add q4_0_4_4/4_8/8_8

This commit is contained in:
Josh Yan 2024-08-01 14:31:36 -07:00
parent 3e614260af
commit 7b8ca2dc0f
2 changed files with 44 additions and 9 deletions

View File

@ -37,6 +37,9 @@ const (
fileTypeIQ2_M
fileTypeIQ1_M
fileTypeBF16
fileTypeQ4_0_4_4
fileTypeQ4_0_4_8
fileTypeQ4_0_8_8
fileTypeUnknown
)
@ -103,6 +106,12 @@ func ParseFileType(s string) (fileType, error) {
return fileTypeIQ1_M, nil
case "BF16":
return fileTypeBF16, nil
case "Q4_0_4_4":
return fileTypeQ4_0_4_4, nil
case "Q4_0_4_8":
return fileTypeQ4_0_4_8, nil
case "Q4_0_8_8":
return fileTypeQ4_0_8_8, nil
default:
return fileTypeUnknown, fmt.Errorf("unknown fileType: %s", s)
}
@ -170,6 +179,12 @@ func (t fileType) String() string {
return "IQ1_M"
case fileTypeBF16:
return "BF16"
case fileTypeQ4_0_4_4:
return "Q4_0_4_4"
case fileTypeQ4_0_4_8:
return "Q4_0_4_8"
case fileTypeQ4_0_8_8:
return "Q4_0_8_8"
default:
return "unknown"
}

View File

@ -156,9 +156,27 @@ type Tensor struct {
func (t Tensor) blockSize() uint64 {
switch t.Kind {
case 0, 1, 24, 25, 26, 27, 28, 30: // F32, F16, I8, I16, I32, I64, F64, BF16
case 0, // F32
1, // F16
24, // I8
25, // I16
26, // I32
27, // I64
28, // F64
30: // BF16
return 1
case 2, 3, 4, 5, 6, 7, 8, 9, 20: // Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, Q8_1, IQ4_NL
case 2, // Q4_0
3, // Q4_1
4, // Q5_0
5, // Q5_1
6, // Q8_0
7, // Q8_1
8, // IQ4_NL
9, // IQ4_NL
20, // IQ4_NL
31, // Q4_0_4_4
32, // Q4_0_4_8
33: // Q4_0_8_8
return 32
default: // All others
return 256
@ -169,11 +187,17 @@ func (t Tensor) typeSize() uint64 {
blockSize := t.blockSize()
switch t.Kind {
case 0: // FP32
case 0, // FP32
26: // I32
return 4
case 1: // FP16
case 1, // FP16
25, // I16
30: // BF16
return 2
case 2: // Q4_0
case 2, // Q4_0
31, // Q4_0_4_4
32, // Q4_0_4_8
33: // Q4_0_8_8
return 2 + blockSize/2
case 3: // Q4_1
return 2 + 2 + blockSize/2
@ -215,10 +239,6 @@ func (t Tensor) typeSize() uint64 {
return 2 + 2 + blockSize/2 + blockSize/64
case 24: // I8
return 1
case 25: // I16
return 2
case 26: // I32
return 4
case 27: // I64
return 8
case 28: // F64