mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-13 22:47:07 +00:00
enable q8, q5, 5_1, and f32 for linux gpu (#699)
This commit is contained in:
parent
d104b7e997
commit
d06bc0cb6e
@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/pbnjay/memory"
|
||||
|
||||
@ -37,6 +38,7 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if runtime.GOOS == "darwin" {
|
||||
switch ggml.FileType() {
|
||||
case "Q8_0":
|
||||
if ggml.Name() != "gguf" && opts.NumGPU != 0 {
|
||||
@ -53,6 +55,7 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
|
||||
opts.NumGPU = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
totalResidentMemory := memory.TotalMemory()
|
||||
switch ggml.ModelType() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user