enable q8, q5, 5_1, and f32 for linux gpu (#699)

2025-06-13 22:47:07 +00:00 · 2023-10-05 12:53:47 -04:00 · 2023-10-05 12:53:47 -04:00 · d06bc0cb6e
commit d06bc0cb6e
parent d104b7e997
1 changed files with 17 additions and 14 deletions
--- a/llm/llm.go
+++ b/llm/llm.go
@ -5,6 +5,7 @@ import (
 	"fmt"
 	"log"
 	"os"
+	"runtime"

 	"github.com/pbnjay/memory"

@ -37,6 +38,7 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
 		return nil, err
 	}

+	if runtime.GOOS == "darwin" {
 		switch ggml.FileType() {
 		case "Q8_0":
 			if ggml.Name() != "gguf" && opts.NumGPU != 0 {
@ -53,6 +55,7 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
 				opts.NumGPU = 0
 			}
 		}
+	}

 	totalResidentMemory := memory.TotalMemory()
 	switch ggml.ModelType() {