mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-07 11:45:21 +00:00
llama.go: Use dynamic buffer for TokenToPiece
The cgo binding for llama_token_to_piece uses a fixed 12 byte buffer, which is usually but not always enough to hold a token. This increase the buffer size if needed, similar to what llama.cpp does internally.
This commit is contained in:
parent
ed19fad862
commit
523d84c563
@ -260,15 +260,29 @@ type Model struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *Model) TokenToPiece(token int) string {
|
func (m *Model) TokenToPiece(token int) string {
|
||||||
buf := make([]byte, 12)
|
tokenLen := 12
|
||||||
C.llama_token_to_piece(
|
buf := make([]byte, tokenLen)
|
||||||
|
tokenLen = int(C.llama_token_to_piece(
|
||||||
m.c,
|
m.c,
|
||||||
C.int32_t(token),
|
C.int32_t(token),
|
||||||
(*C.char)(unsafe.Pointer(&buf[0])),
|
(*C.char)(unsafe.Pointer(&buf[0])),
|
||||||
C.int32_t(12),
|
C.int32_t(tokenLen),
|
||||||
C.int32_t(0),
|
C.int32_t(0),
|
||||||
C.bool(true),
|
C.bool(true),
|
||||||
)
|
))
|
||||||
|
if tokenLen < 0 {
|
||||||
|
tokenLen = -tokenLen
|
||||||
|
|
||||||
|
buf = make([]byte, tokenLen)
|
||||||
|
C.llama_token_to_piece(
|
||||||
|
m.c,
|
||||||
|
C.int32_t(token),
|
||||||
|
(*C.char)(unsafe.Pointer(&buf[0])),
|
||||||
|
C.int32_t(tokenLen),
|
||||||
|
C.int32_t(0),
|
||||||
|
C.bool(true),
|
||||||
|
)
|
||||||
|
}
|
||||||
return strings.TrimRight(string(buf), "\x00")
|
return strings.TrimRight(string(buf), "\x00")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user