llama.go: Use dynamic buffer for TokenToPiece

The cgo binding for llama_token_to_piece uses a fixed 12 byte buffer, which is usually but not always enough to hold a token. This increase the buffer size if needed, similar to what llama.cpp does internally.
2025-07-23 09:40:03 +00:00 · 2024-08-19 17:54:57 -07:00 · 2024-08-19 17:54:57 -07:00 · 523d84c563
commit 523d84c563
parent ed19fad862
1 changed files with 18 additions and 4 deletions
--- a/llama/llama.go
+++ b/llama/llama.go
@ -260,15 +260,29 @@ type Model struct {
 }
 func (m *Model) TokenToPiece(token int) string {
-	buf := make([]byte, 12)
+	tokenLen := 12
-	C.llama_token_to_piece(
+	buf := make([]byte, tokenLen)
 	tokenLen = int(C.llama_token_to_piece(
 		m.c,
 		C.int32_t(token),
 		(*C.char)(unsafe.Pointer(&buf[0])),
-		C.int32_t(12),
+		C.int32_t(tokenLen),
 		C.int32_t(0),
 		C.bool(true),
-	)
+	))
 	if tokenLen < 0 {
 		tokenLen = -tokenLen
 		buf = make([]byte, tokenLen)
 		C.llama_token_to_piece(
 			m.c,
 			C.int32_t(token),
 			(*C.char)(unsafe.Pointer(&buf[0])),
 			C.int32_t(tokenLen),
 			C.int32_t(0),
 			C.bool(true),
 		)
 	}
 	return strings.TrimRight(string(buf), "\x00")
 }