From 7b8ca2dc0fcd578fcf4a3f1d6ea2ac045feefd35 Mon Sep 17 00:00:00 2001 From: Josh Yan Date: Thu, 1 Aug 2024 14:31:36 -0700 Subject: [PATCH] add q4_0_4_4/4_8/8_8 --- llm/filetype.go | 15 +++++++++++++++ llm/ggml.go | 38 +++++++++++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/llm/filetype.go b/llm/filetype.go index 7a8e9f69..4232bb35 100644 --- a/llm/filetype.go +++ b/llm/filetype.go @@ -37,6 +37,9 @@ const ( fileTypeIQ2_M fileTypeIQ1_M fileTypeBF16 + fileTypeQ4_0_4_4 + fileTypeQ4_0_4_8 + fileTypeQ4_0_8_8 fileTypeUnknown ) @@ -103,6 +106,12 @@ func ParseFileType(s string) (fileType, error) { return fileTypeIQ1_M, nil case "BF16": return fileTypeBF16, nil + case "Q4_0_4_4": + return fileTypeQ4_0_4_4, nil + case "Q4_0_4_8": + return fileTypeQ4_0_4_8, nil + case "Q4_0_8_8": + return fileTypeQ4_0_8_8, nil default: return fileTypeUnknown, fmt.Errorf("unknown fileType: %s", s) } @@ -170,6 +179,12 @@ func (t fileType) String() string { return "IQ1_M" case fileTypeBF16: return "BF16" + case fileTypeQ4_0_4_4: + return "Q4_0_4_4" + case fileTypeQ4_0_4_8: + return "Q4_0_4_8" + case fileTypeQ4_0_8_8: + return "Q4_0_8_8" default: return "unknown" } diff --git a/llm/ggml.go b/llm/ggml.go index fddb5039..a4fc6390 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -156,9 +156,27 @@ type Tensor struct { func (t Tensor) blockSize() uint64 { switch t.Kind { - case 0, 1, 24, 25, 26, 27, 28, 30: // F32, F16, I8, I16, I32, I64, F64, BF16 + case 0, // F32 + 1, // F16 + 24, // I8 + 25, // I16 + 26, // I32 + 27, // I64 + 28, // F64 + 30: // BF16 return 1 - case 2, 3, 4, 5, 6, 7, 8, 9, 20: // Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, Q8_1, IQ4_NL + case 2, // Q4_0 + 3, // Q4_1 + 4, // Q5_0 + 5, // Q5_1 + 6, // Q8_0 + 7, // Q8_1 + 8, // IQ4_NL + 9, // IQ4_NL + 20, // IQ4_NL + 31, // Q4_0_4_4 + 32, // Q4_0_4_8 + 33: // Q4_0_8_8 return 32 default: // All others return 256 @@ -169,11 +187,17 @@ func (t Tensor) typeSize() uint64 { blockSize := t.blockSize() switch t.Kind { - case 0: // FP32 + case 0, // FP32 + 26: // I32 return 4 - case 1: // FP16 + case 1, // FP16 + 25, // I16 + 30: // BF16 return 2 - case 2: // Q4_0 + case 2, // Q4_0 + 31, // Q4_0_4_4 + 32, // Q4_0_4_8 + 33: // Q4_0_8_8 return 2 + blockSize/2 case 3: // Q4_1 return 2 + 2 + blockSize/2 @@ -215,10 +239,6 @@ func (t Tensor) typeSize() uint64 { return 2 + 2 + blockSize/2 + blockSize/64 case 24: // I8 return 1 - case 25: // I16 - return 2 - case 26: // I32 - return 4 case 27: // I64 return 8 case 28: // F64