mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-16 07:47:33 +00:00
Compare commits
No commits in common. "main" and "v0.3.13" have entirely different histories.
@ -330,7 +330,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
- [LLMChat](https://github.com/trendy-design/llmchat) (Privacy focused, 100% local, intuitive all-in-one chat interface)
|
- [LLMChat](https://github.com/trendy-design/llmchat) (Privacy focused, 100% local, intuitive all-in-one chat interface)
|
||||||
- [ARGO](https://github.com/xark-argo/argo) (Locally download and run Ollama and Huggingface models with RAG on Mac/Windows/Linux)
|
- [ARGO](https://github.com/xark-argo/argo) (Locally download and run Ollama and Huggingface models with RAG on Mac/Windows/Linux)
|
||||||
- [G1](https://github.com/bklieger-groq/g1) (Prototype of using prompting strategies to improve the LLM's reasoning through o1-like reasoning chains.)
|
- [G1](https://github.com/bklieger-groq/g1) (Prototype of using prompting strategies to improve the LLM's reasoning through o1-like reasoning chains.)
|
||||||
- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
|
|
||||||
|
|
||||||
### Terminal
|
### Terminal
|
||||||
|
|
||||||
@ -417,7 +416,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
|||||||
|
|
||||||
- [Enchanted](https://github.com/AugustDev/enchanted)
|
- [Enchanted](https://github.com/AugustDev/enchanted)
|
||||||
- [Maid](https://github.com/Mobile-Artificial-Intelligence/maid)
|
- [Maid](https://github.com/Mobile-Artificial-Intelligence/maid)
|
||||||
- [Ollama App](https://github.com/JHubi1/ollama-app) (Modern and easy-to-use multi-platform client for Ollama)
|
|
||||||
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption)
|
- [ConfiChat](https://github.com/1runeberg/confichat) (Lightweight, standalone, multi-platform, and privacy focused LLM chat interface with optional encryption)
|
||||||
|
|
||||||
### Extensions & Plugins
|
### Extensions & Plugins
|
||||||
|
@ -47,11 +47,10 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// Gather GPU information from the amdgpu driver if any supported GPUs are detected
|
// Gather GPU information from the amdgpu driver if any supported GPUs are detected
|
||||||
// Only called once during bootstrap
|
func AMDGetGPUInfo() []RocmGPUInfo {
|
||||||
func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|
||||||
resp := []RocmGPUInfo{}
|
resp := []RocmGPUInfo{}
|
||||||
if !AMDDetected() {
|
if !AMDDetected() {
|
||||||
return resp, fmt.Errorf("AMD GPUs not detected")
|
return resp
|
||||||
}
|
}
|
||||||
|
|
||||||
// Opportunistic logging of driver version to aid in troubleshooting
|
// Opportunistic logging of driver version to aid in troubleshooting
|
||||||
@ -195,9 +194,13 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
|
|
||||||
// Shouldn't happen, but just in case...
|
// Shouldn't happen, but just in case...
|
||||||
if gpuID < 0 {
|
if gpuID < 0 {
|
||||||
err := fmt.Errorf("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue")
|
slog.Error("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue")
|
||||||
slog.Error(err.Error())
|
return nil
|
||||||
return nil, err
|
}
|
||||||
|
|
||||||
|
if int(major) < RocmComputeMin {
|
||||||
|
slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%x%x", major, minor, patch), "gpu", gpuID)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look up the memory for the current node
|
// Look up the memory for the current node
|
||||||
@ -267,12 +270,19 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
|
||||||
|
if totalMemory < IGPUMemLimit {
|
||||||
|
slog.Info("unsupported Radeon iGPU detected skipping", "id", gpuID, "total", format.HumanBytes2(totalMemory))
|
||||||
|
continue
|
||||||
|
}
|
||||||
var name string
|
var name string
|
||||||
// TODO - PCI ID lookup
|
// TODO - PCI ID lookup
|
||||||
if vendor > 0 && device > 0 {
|
if vendor > 0 && device > 0 {
|
||||||
name = fmt.Sprintf("%04x:%04x", vendor, device)
|
name = fmt.Sprintf("%04x:%04x", vendor, device)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
|
||||||
|
slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
|
||||||
gpuInfo := RocmGPUInfo{
|
gpuInfo := RocmGPUInfo{
|
||||||
GpuInfo: GpuInfo{
|
GpuInfo: GpuInfo{
|
||||||
Library: "rocm",
|
Library: "rocm",
|
||||||
@ -290,31 +300,6 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
usedFilepath: usedFile,
|
usedFilepath: usedFile,
|
||||||
}
|
}
|
||||||
|
|
||||||
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
|
|
||||||
if totalMemory < IGPUMemLimit {
|
|
||||||
reason := "unsupported Radeon iGPU detected skipping"
|
|
||||||
slog.Info(reason, "id", gpuID, "total", format.HumanBytes2(totalMemory))
|
|
||||||
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
|
|
||||||
GpuInfo: gpuInfo.GpuInfo,
|
|
||||||
Reason: reason,
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if int(major) < RocmComputeMin {
|
|
||||||
reason := fmt.Sprintf("amdgpu too old gfx%d%x%x", major, minor, patch)
|
|
||||||
slog.Warn(reason, "gpu", gpuID)
|
|
||||||
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
|
|
||||||
GpuInfo: gpuInfo.GpuInfo,
|
|
||||||
Reason: reason,
|
|
||||||
})
|
|
||||||
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
|
|
||||||
slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
|
|
||||||
|
|
||||||
// If the user wants to filter to a subset of devices, filter out if we aren't a match
|
// If the user wants to filter to a subset of devices, filter out if we aren't a match
|
||||||
if len(visibleDevices) > 0 {
|
if len(visibleDevices) > 0 {
|
||||||
include := false
|
include := false
|
||||||
@ -325,13 +310,7 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !include {
|
if !include {
|
||||||
reason := "filtering out device per user request"
|
slog.Info("filtering out device per user request", "id", gpuInfo.ID, "visible_devices", visibleDevices)
|
||||||
slog.Info(reason, "id", gpuInfo.ID, "visible_devices", visibleDevices)
|
|
||||||
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
|
|
||||||
GpuInfo: gpuInfo.GpuInfo,
|
|
||||||
Reason: reason,
|
|
||||||
})
|
|
||||||
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -341,13 +320,8 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
if libDir == "" {
|
if libDir == "" {
|
||||||
libDir, err = AMDValidateLibDir()
|
libDir, err = AMDValidateLibDir()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("unable to verify rocm library: %w", err)
|
slog.Warn("unable to verify rocm library, will use cpu", "error", err)
|
||||||
slog.Warn(err.Error())
|
return nil
|
||||||
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
|
|
||||||
GpuInfo: gpuInfo.GpuInfo,
|
|
||||||
Reason: err.Error(),
|
|
||||||
})
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gpuInfo.DependencyPath = libDir
|
gpuInfo.DependencyPath = libDir
|
||||||
@ -357,25 +331,14 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
if len(supported) == 0 {
|
if len(supported) == 0 {
|
||||||
supported, err = GetSupportedGFX(libDir)
|
supported, err = GetSupportedGFX(libDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("failed to lookup supported GFX types: %w", err)
|
slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err)
|
||||||
slog.Warn(err.Error())
|
return nil
|
||||||
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
|
|
||||||
GpuInfo: gpuInfo.GpuInfo,
|
|
||||||
Reason: err.Error(),
|
|
||||||
})
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
slog.Debug("rocm supported GPUs", "types", supported)
|
slog.Debug("rocm supported GPUs", "types", supported)
|
||||||
}
|
}
|
||||||
gfx := gpuInfo.Compute
|
gfx := gpuInfo.Compute
|
||||||
if !slices.Contains[[]string, string](supported, gfx) {
|
if !slices.Contains[[]string, string](supported, gfx) {
|
||||||
reason := fmt.Sprintf("amdgpu is not supported (supported types:%s)", supported)
|
slog.Warn("amdgpu is not supported", "gpu", gpuInfo.ID, "gpu_type", gfx, "library", libDir, "supported_types", supported)
|
||||||
slog.Warn(reason, "gpu_type", gfx, "gpu", gpuInfo.ID, "library", libDir)
|
|
||||||
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
|
|
||||||
GpuInfo: gpuInfo.GpuInfo,
|
|
||||||
Reason: reason,
|
|
||||||
})
|
|
||||||
|
|
||||||
// TODO - consider discrete markdown just for ROCM troubleshooting?
|
// TODO - consider discrete markdown just for ROCM troubleshooting?
|
||||||
slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/gpu.md#overrides for HSA_OVERRIDE_GFX_VERSION usage")
|
slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/gpu.md#overrides for HSA_OVERRIDE_GFX_VERSION usage")
|
||||||
continue
|
continue
|
||||||
@ -395,16 +358,13 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
resp = append(resp, gpuInfo)
|
resp = append(resp, gpuInfo)
|
||||||
}
|
}
|
||||||
if len(resp) == 0 {
|
if len(resp) == 0 {
|
||||||
err := fmt.Errorf("no compatible amdgpu devices detected")
|
slog.Info("no compatible amdgpu devices detected")
|
||||||
slog.Info(err.Error())
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
if err := verifyKFDDriverAccess(); err != nil {
|
if err := verifyKFDDriverAccess(); err != nil {
|
||||||
err = fmt.Errorf("amdgpu devices detected but permission problems block access: %w", err)
|
slog.Error("amdgpu devices detected but permission problems block access", "error", err)
|
||||||
slog.Error(err.Error())
|
return nil
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
return resp, nil
|
return resp
|
||||||
}
|
}
|
||||||
|
|
||||||
// Quick check for AMD driver so we can skip amdgpu discovery if not present
|
// Quick check for AMD driver so we can skip amdgpu discovery if not present
|
||||||
|
@ -3,7 +3,6 @@ package gpu
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@ -27,13 +26,12 @@ var (
|
|||||||
RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\6.1\\bin"} // TODO glob?
|
RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\6.1\\bin"} // TODO glob?
|
||||||
)
|
)
|
||||||
|
|
||||||
// Only called once during bootstrap
|
func AMDGetGPUInfo() []RocmGPUInfo {
|
||||||
func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|
||||||
resp := []RocmGPUInfo{}
|
resp := []RocmGPUInfo{}
|
||||||
hl, err := NewHipLib()
|
hl, err := NewHipLib()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Debug(err.Error())
|
slog.Debug(err.Error())
|
||||||
return nil, err
|
return nil
|
||||||
}
|
}
|
||||||
defer hl.Release()
|
defer hl.Release()
|
||||||
|
|
||||||
@ -46,15 +44,12 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
// Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified
|
// Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified
|
||||||
count := hl.HipGetDeviceCount()
|
count := hl.HipGetDeviceCount()
|
||||||
if count == 0 {
|
if count == 0 {
|
||||||
err := fmt.Errorf("no compatible amdgpu devices detected")
|
return nil
|
||||||
slog.Info(err.Error())
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
libDir, err := AMDValidateLibDir()
|
libDir, err := AMDValidateLibDir()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("unable to verify rocm library: %w", err)
|
slog.Warn("unable to verify rocm library, will use cpu", "error", err)
|
||||||
slog.Warn(err.Error())
|
return nil
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var supported []string
|
var supported []string
|
||||||
@ -62,9 +57,8 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
if gfxOverride == "" {
|
if gfxOverride == "" {
|
||||||
supported, err = GetSupportedGFX(libDir)
|
supported, err = GetSupportedGFX(libDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err = fmt.Errorf("failed to lookup supported GFX types: %w", err)
|
slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err)
|
||||||
slog.Warn(err.Error())
|
return nil
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
|
slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
|
||||||
@ -93,6 +87,21 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
|
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
|
||||||
// slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
|
// slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
|
||||||
// TODO Why isn't props.iGPU accurate!?
|
// TODO Why isn't props.iGPU accurate!?
|
||||||
|
if strings.EqualFold(name, iGPUName) {
|
||||||
|
slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if gfxOverride == "" {
|
||||||
|
// Strip off Target Features when comparing
|
||||||
|
if !slices.Contains[[]string, string](supported, strings.Split(gfx, ":")[0]) {
|
||||||
|
slog.Warn("amdgpu is not supported", "gpu", i, "gpu_type", gfx, "library", libDir, "supported_types", supported)
|
||||||
|
// TODO - consider discrete markdown just for ROCM troubleshooting?
|
||||||
|
slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
slog.Debug("amdgpu is supported", "gpu", i, "gpu_type", gfx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
freeMemory, totalMemory, err := hl.HipMemGetInfo()
|
freeMemory, totalMemory, err := hl.HipMemGetInfo()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -100,6 +109,14 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
|
||||||
|
if totalMemory < IGPUMemLimit {
|
||||||
|
slog.Info("amdgpu appears to be an iGPU, skipping", "gpu", i, "total", format.HumanBytes2(totalMemory))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
|
||||||
|
slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
|
||||||
gpuInfo := RocmGPUInfo{
|
gpuInfo := RocmGPUInfo{
|
||||||
GpuInfo: GpuInfo{
|
GpuInfo: GpuInfo{
|
||||||
Library: "rocm",
|
Library: "rocm",
|
||||||
@ -121,38 +138,10 @@ func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
|
|||||||
index: i,
|
index: i,
|
||||||
}
|
}
|
||||||
|
|
||||||
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
|
|
||||||
if strings.EqualFold(name, iGPUName) || totalMemory < IGPUMemLimit {
|
|
||||||
reason := "unsupported Radeon iGPU detected skipping"
|
|
||||||
slog.Info(reason, "id", gpuInfo.ID, "total", format.HumanBytes2(totalMemory))
|
|
||||||
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
|
|
||||||
GpuInfo: gpuInfo.GpuInfo,
|
|
||||||
Reason: reason,
|
|
||||||
})
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Strip off Target Features when comparing
|
|
||||||
if !slices.Contains[[]string, string](supported, strings.Split(gfx, ":")[0]) {
|
|
||||||
reason := fmt.Sprintf("amdgpu is not supported (supported types:%s)", supported)
|
|
||||||
slog.Warn(reason, "gpu_type", gfx, "gpu", gpuInfo.ID, "library", libDir)
|
|
||||||
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
|
|
||||||
GpuInfo: gpuInfo.GpuInfo,
|
|
||||||
Reason: reason,
|
|
||||||
})
|
|
||||||
// HSA_OVERRIDE_GFX_VERSION not supported on windows
|
|
||||||
continue
|
|
||||||
} else {
|
|
||||||
slog.Debug("amdgpu is supported", "gpu", i, "gpu_type", gfx)
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
|
|
||||||
slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
|
|
||||||
|
|
||||||
resp = append(resp, gpuInfo)
|
resp = append(resp, gpuInfo)
|
||||||
}
|
}
|
||||||
|
|
||||||
return resp, nil
|
return resp
|
||||||
}
|
}
|
||||||
|
|
||||||
func AMDValidateLibDir() (string, error) {
|
func AMDValidateLibDir() (string, error) {
|
||||||
|
151
gpu/gpu.go
151
gpu/gpu.go
@ -54,13 +54,6 @@ var (
|
|||||||
nvmlLibPath string
|
nvmlLibPath string
|
||||||
rocmGPUs []RocmGPUInfo
|
rocmGPUs []RocmGPUInfo
|
||||||
oneapiGPUs []OneapiGPUInfo
|
oneapiGPUs []OneapiGPUInfo
|
||||||
|
|
||||||
// If any discovered GPUs are incompatible, report why
|
|
||||||
unsupportedGPUs []UnsupportedGPUInfo
|
|
||||||
|
|
||||||
// Keep track of errors during bootstrapping so that if GPUs are missing
|
|
||||||
// they expected to be present this may explain why
|
|
||||||
bootstrapErrors []error
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// With our current CUDA compile flags, older than 5.0 will not work properly
|
// With our current CUDA compile flags, older than 5.0 will not work properly
|
||||||
@ -77,17 +70,16 @@ func initCudaHandles() *cudaHandles {
|
|||||||
|
|
||||||
cHandles := &cudaHandles{}
|
cHandles := &cudaHandles{}
|
||||||
// Short Circuit if we already know which library to use
|
// Short Circuit if we already know which library to use
|
||||||
// ignore bootstrap errors in this case since we already recorded them
|
|
||||||
if nvmlLibPath != "" {
|
if nvmlLibPath != "" {
|
||||||
cHandles.nvml, _, _ = loadNVMLMgmt([]string{nvmlLibPath})
|
cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath})
|
||||||
return cHandles
|
return cHandles
|
||||||
}
|
}
|
||||||
if nvcudaLibPath != "" {
|
if nvcudaLibPath != "" {
|
||||||
cHandles.deviceCount, cHandles.nvcuda, _, _ = loadNVCUDAMgmt([]string{nvcudaLibPath})
|
cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath})
|
||||||
return cHandles
|
return cHandles
|
||||||
}
|
}
|
||||||
if cudartLibPath != "" {
|
if cudartLibPath != "" {
|
||||||
cHandles.deviceCount, cHandles.cudart, _, _ = loadCUDARTMgmt([]string{cudartLibPath})
|
cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath})
|
||||||
return cHandles
|
return cHandles
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,21 +102,18 @@ func initCudaHandles() *cudaHandles {
|
|||||||
if len(NvmlGlobs) > 0 {
|
if len(NvmlGlobs) > 0 {
|
||||||
nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
|
nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
|
||||||
if len(nvmlLibPaths) > 0 {
|
if len(nvmlLibPaths) > 0 {
|
||||||
nvml, libPath, err := loadNVMLMgmt(nvmlLibPaths)
|
nvml, libPath := LoadNVMLMgmt(nvmlLibPaths)
|
||||||
if nvml != nil {
|
if nvml != nil {
|
||||||
slog.Debug("nvidia-ml loaded", "library", libPath)
|
slog.Debug("nvidia-ml loaded", "library", libPath)
|
||||||
cHandles.nvml = nvml
|
cHandles.nvml = nvml
|
||||||
nvmlLibPath = libPath
|
nvmlLibPath = libPath
|
||||||
}
|
}
|
||||||
if err != nil {
|
|
||||||
bootstrapErrors = append(bootstrapErrors, err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
|
nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
|
||||||
if len(nvcudaLibPaths) > 0 {
|
if len(nvcudaLibPaths) > 0 {
|
||||||
deviceCount, nvcuda, libPath, err := loadNVCUDAMgmt(nvcudaLibPaths)
|
deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
|
||||||
if nvcuda != nil {
|
if nvcuda != nil {
|
||||||
slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
|
slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
|
||||||
cHandles.nvcuda = nvcuda
|
cHandles.nvcuda = nvcuda
|
||||||
@ -132,14 +121,11 @@ func initCudaHandles() *cudaHandles {
|
|||||||
nvcudaLibPath = libPath
|
nvcudaLibPath = libPath
|
||||||
return cHandles
|
return cHandles
|
||||||
}
|
}
|
||||||
if err != nil {
|
|
||||||
bootstrapErrors = append(bootstrapErrors, err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
|
cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
|
||||||
if len(cudartLibPaths) > 0 {
|
if len(cudartLibPaths) > 0 {
|
||||||
deviceCount, cudart, libPath, err := loadCUDARTMgmt(cudartLibPaths)
|
deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
|
||||||
if cudart != nil {
|
if cudart != nil {
|
||||||
slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
|
slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
|
||||||
cHandles.cudart = cudart
|
cHandles.cudart = cudart
|
||||||
@ -147,9 +133,6 @@ func initCudaHandles() *cudaHandles {
|
|||||||
cudartLibPath = libPath
|
cudartLibPath = libPath
|
||||||
return cHandles
|
return cHandles
|
||||||
}
|
}
|
||||||
if err != nil {
|
|
||||||
bootstrapErrors = append(bootstrapErrors, err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return cHandles
|
return cHandles
|
||||||
@ -160,19 +143,14 @@ func initOneAPIHandles() *oneapiHandles {
|
|||||||
oHandles := &oneapiHandles{}
|
oHandles := &oneapiHandles{}
|
||||||
|
|
||||||
// Short Circuit if we already know which library to use
|
// Short Circuit if we already know which library to use
|
||||||
// ignore bootstrap errors in this case since we already recorded them
|
|
||||||
if oneapiLibPath != "" {
|
if oneapiLibPath != "" {
|
||||||
oHandles.deviceCount, oHandles.oneapi, _, _ = loadOneapiMgmt([]string{oneapiLibPath})
|
oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath})
|
||||||
return oHandles
|
return oHandles
|
||||||
}
|
}
|
||||||
|
|
||||||
oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
|
oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
|
||||||
if len(oneapiLibPaths) > 0 {
|
if len(oneapiLibPaths) > 0 {
|
||||||
var err error
|
oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths)
|
||||||
oHandles.deviceCount, oHandles.oneapi, oneapiLibPath, err = loadOneapiMgmt(oneapiLibPaths)
|
|
||||||
if err != nil {
|
|
||||||
bootstrapErrors = append(bootstrapErrors, err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return oHandles
|
return oHandles
|
||||||
@ -219,7 +197,6 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
|
|
||||||
if !bootstrapped {
|
if !bootstrapped {
|
||||||
slog.Info("looking for compatible GPUs")
|
slog.Info("looking for compatible GPUs")
|
||||||
bootstrapErrors = []error{}
|
|
||||||
needRefresh = false
|
needRefresh = false
|
||||||
cpuCapability = GetCPUCapability()
|
cpuCapability = GetCPUCapability()
|
||||||
var memInfo C.mem_info_t
|
var memInfo C.mem_info_t
|
||||||
@ -229,10 +206,7 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
slog.Warn("error looking up system memory", "error", err)
|
slog.Warn("error looking up system memory", "error", err)
|
||||||
}
|
}
|
||||||
depPath := LibraryDir()
|
depPath := LibraryDir()
|
||||||
details, err := GetCPUDetails()
|
|
||||||
if err != nil {
|
|
||||||
slog.Warn("failed to lookup CPU details", "error", err)
|
|
||||||
}
|
|
||||||
cpus = []CPUInfo{
|
cpus = []CPUInfo{
|
||||||
{
|
{
|
||||||
GpuInfo: GpuInfo{
|
GpuInfo: GpuInfo{
|
||||||
@ -242,15 +216,12 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
ID: "0",
|
ID: "0",
|
||||||
DependencyPath: depPath,
|
DependencyPath: depPath,
|
||||||
},
|
},
|
||||||
CPUs: details,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback to CPU mode if we're lacking required vector extensions on x86
|
// Fallback to CPU mode if we're lacking required vector extensions on x86
|
||||||
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
|
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
|
||||||
err := fmt.Errorf("CPU does not have minimum vector extensions, GPU inference disabled. Required:%s Detected:%s", GPURunnerCPUCapability, cpuCapability)
|
slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability)
|
||||||
slog.Warn(err.Error())
|
|
||||||
bootstrapErrors = append(bootstrapErrors, err)
|
|
||||||
bootstrapped = true
|
bootstrapped = true
|
||||||
// No need to do any GPU discovery, since we can't run on them
|
// No need to do any GPU discovery, since we can't run on them
|
||||||
return GpuInfoList{cpus[0].GpuInfo}
|
return GpuInfoList{cpus[0].GpuInfo}
|
||||||
@ -282,6 +253,10 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
C.free(unsafe.Pointer(memInfo.err))
|
C.free(unsafe.Pointer(memInfo.err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
|
||||||
|
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
|
||||||
|
continue
|
||||||
|
}
|
||||||
gpuInfo.TotalMemory = uint64(memInfo.total)
|
gpuInfo.TotalMemory = uint64(memInfo.total)
|
||||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||||
@ -304,15 +279,6 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||||
gpuInfo.Variant = variant
|
gpuInfo.Variant = variant
|
||||||
|
|
||||||
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
|
|
||||||
unsupportedGPUs = append(unsupportedGPUs,
|
|
||||||
UnsupportedGPUInfo{
|
|
||||||
GpuInfo: gpuInfo.GpuInfo,
|
|
||||||
})
|
|
||||||
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// query the management library as well so we can record any skew between the two
|
// query the management library as well so we can record any skew between the two
|
||||||
// which represents overhead on the GPU we must set aside on subsequent updates
|
// which represents overhead on the GPU we must set aside on subsequent updates
|
||||||
if cHandles.nvml != nil {
|
if cHandles.nvml != nil {
|
||||||
@ -375,10 +341,7 @@ func GetGPUInfo() GpuInfoList {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rocmGPUs, err = AMDGetGPUInfo()
|
rocmGPUs = AMDGetGPUInfo()
|
||||||
if err != nil {
|
|
||||||
bootstrapErrors = append(bootstrapErrors, err)
|
|
||||||
}
|
|
||||||
bootstrapped = true
|
bootstrapped = true
|
||||||
if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
|
if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
|
||||||
slog.Info("no compatible GPUs were discovered")
|
slog.Info("no compatible GPUs were discovered")
|
||||||
@ -563,114 +526,92 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
|
|||||||
return gpuLibPaths
|
return gpuLibPaths
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bootstrap the runtime library
|
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) {
|
||||||
// Returns: num devices, handle, libPath, error
|
|
||||||
func loadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string, error) {
|
|
||||||
var resp C.cudart_init_resp_t
|
var resp C.cudart_init_resp_t
|
||||||
resp.ch.verbose = getVerboseState()
|
resp.ch.verbose = getVerboseState()
|
||||||
var err error
|
|
||||||
for _, libPath := range cudartLibPaths {
|
for _, libPath := range cudartLibPaths {
|
||||||
lib := C.CString(libPath)
|
lib := C.CString(libPath)
|
||||||
defer C.free(unsafe.Pointer(lib))
|
defer C.free(unsafe.Pointer(lib))
|
||||||
C.cudart_init(lib, &resp)
|
C.cudart_init(lib, &resp)
|
||||||
if resp.err != nil {
|
if resp.err != nil {
|
||||||
err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
|
slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err))
|
||||||
slog.Debug(err.Error())
|
|
||||||
C.free(unsafe.Pointer(resp.err))
|
C.free(unsafe.Pointer(resp.err))
|
||||||
} else {
|
} else {
|
||||||
err = nil
|
return int(resp.num_devices), &resp.ch, libPath
|
||||||
return int(resp.num_devices), &resp.ch, libPath, err
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0, nil, "", err
|
return 0, nil, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bootstrap the driver library
|
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
|
||||||
// Returns: num devices, handle, libPath, error
|
|
||||||
func loadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string, error) {
|
|
||||||
var resp C.nvcuda_init_resp_t
|
var resp C.nvcuda_init_resp_t
|
||||||
resp.ch.verbose = getVerboseState()
|
resp.ch.verbose = getVerboseState()
|
||||||
var err error
|
|
||||||
for _, libPath := range nvcudaLibPaths {
|
for _, libPath := range nvcudaLibPaths {
|
||||||
lib := C.CString(libPath)
|
lib := C.CString(libPath)
|
||||||
defer C.free(unsafe.Pointer(lib))
|
defer C.free(unsafe.Pointer(lib))
|
||||||
C.nvcuda_init(lib, &resp)
|
C.nvcuda_init(lib, &resp)
|
||||||
if resp.err != nil {
|
if resp.err != nil {
|
||||||
// Decide what log level based on the type of error message to help users understand why
|
// Decide what log level based on the type of error message to help users understand why
|
||||||
|
msg := C.GoString(resp.err)
|
||||||
switch resp.cudaErr {
|
switch resp.cudaErr {
|
||||||
case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
|
case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
|
||||||
err = fmt.Errorf("version mismatch between driver and cuda driver library - reboot or upgrade may be required: library %s", libPath)
|
slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg)
|
||||||
slog.Warn(err.Error())
|
|
||||||
case C.CUDA_ERROR_NO_DEVICE:
|
case C.CUDA_ERROR_NO_DEVICE:
|
||||||
err = fmt.Errorf("no nvidia devices detected by library %s", libPath)
|
slog.Info("no nvidia devices detected", "library", libPath)
|
||||||
slog.Info(err.Error())
|
|
||||||
case C.CUDA_ERROR_UNKNOWN:
|
case C.CUDA_ERROR_UNKNOWN:
|
||||||
err = fmt.Errorf("unknown error initializing cuda driver library %s: %s. see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information", libPath, C.GoString(resp.err))
|
slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg)
|
||||||
slog.Warn(err.Error())
|
slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information")
|
||||||
default:
|
default:
|
||||||
msg := C.GoString(resp.err)
|
|
||||||
if strings.Contains(msg, "wrong ELF class") {
|
if strings.Contains(msg, "wrong ELF class") {
|
||||||
slog.Debug("skipping 32bit library", "library", libPath)
|
slog.Debug("skipping 32bit library", "library", libPath)
|
||||||
} else {
|
} else {
|
||||||
err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
|
slog.Info("unable to load cuda driver library", "library", libPath, "error", msg)
|
||||||
slog.Info(err.Error())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
C.free(unsafe.Pointer(resp.err))
|
C.free(unsafe.Pointer(resp.err))
|
||||||
} else {
|
} else {
|
||||||
err = nil
|
return int(resp.num_devices), &resp.ch, libPath
|
||||||
return int(resp.num_devices), &resp.ch, libPath, err
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0, nil, "", err
|
return 0, nil, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bootstrap the management library
|
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) {
|
||||||
// Returns: handle, libPath, error
|
|
||||||
func loadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string, error) {
|
|
||||||
var resp C.nvml_init_resp_t
|
var resp C.nvml_init_resp_t
|
||||||
resp.ch.verbose = getVerboseState()
|
resp.ch.verbose = getVerboseState()
|
||||||
var err error
|
|
||||||
for _, libPath := range nvmlLibPaths {
|
for _, libPath := range nvmlLibPaths {
|
||||||
lib := C.CString(libPath)
|
lib := C.CString(libPath)
|
||||||
defer C.free(unsafe.Pointer(lib))
|
defer C.free(unsafe.Pointer(lib))
|
||||||
C.nvml_init(lib, &resp)
|
C.nvml_init(lib, &resp)
|
||||||
if resp.err != nil {
|
if resp.err != nil {
|
||||||
err = fmt.Errorf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))
|
slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err)))
|
||||||
slog.Info(err.Error())
|
|
||||||
C.free(unsafe.Pointer(resp.err))
|
C.free(unsafe.Pointer(resp.err))
|
||||||
} else {
|
} else {
|
||||||
err = nil
|
return &resp.ch, libPath
|
||||||
return &resp.ch, libPath, err
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil, "", err
|
return nil, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// bootstrap the Intel GPU library
|
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
|
||||||
// Returns: num devices, handle, libPath, error
|
|
||||||
func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, error) {
|
|
||||||
var resp C.oneapi_init_resp_t
|
var resp C.oneapi_init_resp_t
|
||||||
num_devices := 0
|
num_devices := 0
|
||||||
resp.oh.verbose = getVerboseState()
|
resp.oh.verbose = getVerboseState()
|
||||||
var err error
|
|
||||||
for _, libPath := range oneapiLibPaths {
|
for _, libPath := range oneapiLibPaths {
|
||||||
lib := C.CString(libPath)
|
lib := C.CString(libPath)
|
||||||
defer C.free(unsafe.Pointer(lib))
|
defer C.free(unsafe.Pointer(lib))
|
||||||
C.oneapi_init(lib, &resp)
|
C.oneapi_init(lib, &resp)
|
||||||
if resp.err != nil {
|
if resp.err != nil {
|
||||||
err = fmt.Errorf("Unable to load oneAPI management library %s: %s", libPath, C.GoString(resp.err))
|
slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
|
||||||
slog.Debug(err.Error())
|
|
||||||
C.free(unsafe.Pointer(resp.err))
|
C.free(unsafe.Pointer(resp.err))
|
||||||
} else {
|
} else {
|
||||||
err = nil
|
|
||||||
for i := range resp.oh.num_drivers {
|
for i := range resp.oh.num_drivers {
|
||||||
num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
|
num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
|
||||||
}
|
}
|
||||||
return num_devices, &resp.oh, libPath, err
|
return num_devices, &resp.oh, libPath
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0, nil, "", err
|
return 0, nil, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
func getVerboseState() C.uint16_t {
|
func getVerboseState() C.uint16_t {
|
||||||
@ -728,23 +669,3 @@ func LibraryDir() string {
|
|||||||
slog.Warn("unable to locate gpu dependency libraries")
|
slog.Warn("unable to locate gpu dependency libraries")
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetSystemInfo() SystemInfo {
|
|
||||||
gpus := GetGPUInfo()
|
|
||||||
gpuMutex.Lock()
|
|
||||||
defer gpuMutex.Unlock()
|
|
||||||
discoveryErrors := []string{}
|
|
||||||
for _, err := range bootstrapErrors {
|
|
||||||
discoveryErrors = append(discoveryErrors, err.Error())
|
|
||||||
}
|
|
||||||
if len(gpus) == 1 && gpus[0].Library == "cpu" {
|
|
||||||
gpus = []GpuInfo{}
|
|
||||||
}
|
|
||||||
|
|
||||||
return SystemInfo{
|
|
||||||
System: cpus[0],
|
|
||||||
GPUs: gpus,
|
|
||||||
UnsupportedGPUs: unsupportedGPUs,
|
|
||||||
DiscoveryErrors: discoveryErrors,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -10,9 +10,7 @@ package gpu
|
|||||||
import "C"
|
import "C"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log/slog"
|
|
||||||
"runtime"
|
"runtime"
|
||||||
"syscall"
|
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
)
|
)
|
||||||
@ -68,34 +66,3 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
|||||||
// No-op on darwin
|
// No-op on darwin
|
||||||
return "", ""
|
return "", ""
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetSystemInfo() SystemInfo {
|
|
||||||
mem, _ := GetCPUMem()
|
|
||||||
query := "hw.perflevel0.physicalcpu"
|
|
||||||
perfCores, err := syscall.SysctlUint32(query)
|
|
||||||
if err != nil {
|
|
||||||
slog.Warn("failed to discover physical CPU details", "query", query, "error", err)
|
|
||||||
}
|
|
||||||
query = "hw.perflevel1.physicalcpu"
|
|
||||||
efficiencyCores, _ := syscall.SysctlUint32(query) // On x86 xeon this wont return data
|
|
||||||
|
|
||||||
// Determine thread count
|
|
||||||
query = "hw.logicalcpu"
|
|
||||||
logicalCores, _ := syscall.SysctlUint32(query)
|
|
||||||
|
|
||||||
return SystemInfo{
|
|
||||||
System: CPUInfo{
|
|
||||||
GpuInfo: GpuInfo{
|
|
||||||
memInfo: mem,
|
|
||||||
},
|
|
||||||
CPUs: []CPU{
|
|
||||||
{
|
|
||||||
CoreCount: int(perfCores + efficiencyCores),
|
|
||||||
EfficiencyCoreCount: int(efficiencyCores),
|
|
||||||
ThreadCount: int(logicalCores),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
GPUs: GetGPUInfo(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -4,8 +4,6 @@ import (
|
|||||||
"bufio"
|
"bufio"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"reflect"
|
|
||||||
"regexp"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
@ -92,95 +90,3 @@ func GetCPUMem() (memInfo, error) {
|
|||||||
}
|
}
|
||||||
return mem, nil
|
return mem, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
const CpuInfoFilename = "/proc/cpuinfo"
|
|
||||||
|
|
||||||
type linuxCpuInfo struct {
|
|
||||||
ID string `cpuinfo:"processor"`
|
|
||||||
VendorID string `cpuinfo:"vendor_id"`
|
|
||||||
ModelName string `cpuinfo:"model name"`
|
|
||||||
PhysicalID string `cpuinfo:"physical id"`
|
|
||||||
Siblings string `cpuinfo:"siblings"`
|
|
||||||
CoreID string `cpuinfo:"core id"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetCPUDetails() ([]CPU, error) {
|
|
||||||
file, err := os.Open(CpuInfoFilename)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
reColumns := regexp.MustCompile("\t+: ")
|
|
||||||
scanner := bufio.NewScanner(file)
|
|
||||||
cpuInfos := []linuxCpuInfo{}
|
|
||||||
cpu := &linuxCpuInfo{}
|
|
||||||
for scanner.Scan() {
|
|
||||||
line := scanner.Text()
|
|
||||||
if sl := reColumns.Split(line, 2); len(sl) > 1 {
|
|
||||||
t := reflect.TypeOf(cpu).Elem()
|
|
||||||
s := reflect.ValueOf(cpu).Elem()
|
|
||||||
for i := range t.NumField() {
|
|
||||||
field := t.Field(i)
|
|
||||||
tag := field.Tag.Get("cpuinfo")
|
|
||||||
if tag == sl[0] {
|
|
||||||
s.FieldByName(field.Name).SetString(sl[1])
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if strings.TrimSpace(line) == "" && cpu.ID != "" {
|
|
||||||
cpuInfos = append(cpuInfos, *cpu)
|
|
||||||
cpu = &linuxCpuInfo{}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process the sockets/cores/threads
|
|
||||||
socketByID := map[string]*CPU{}
|
|
||||||
coreBySocket := map[string]map[string]struct{}{}
|
|
||||||
threadsByCoreBySocket := map[string]map[string]int{}
|
|
||||||
for _, c := range cpuInfos {
|
|
||||||
if _, found := socketByID[c.PhysicalID]; !found {
|
|
||||||
socketByID[c.PhysicalID] = &CPU{
|
|
||||||
ID: c.PhysicalID,
|
|
||||||
VendorID: c.VendorID,
|
|
||||||
ModelName: c.ModelName,
|
|
||||||
}
|
|
||||||
coreBySocket[c.PhysicalID] = map[string]struct{}{}
|
|
||||||
threadsByCoreBySocket[c.PhysicalID] = map[string]int{}
|
|
||||||
}
|
|
||||||
if c.CoreID != "" {
|
|
||||||
coreBySocket[c.PhysicalID][c.PhysicalID+":"+c.CoreID] = struct{}{}
|
|
||||||
threadsByCoreBySocket[c.PhysicalID][c.PhysicalID+":"+c.CoreID]++
|
|
||||||
} else {
|
|
||||||
coreBySocket[c.PhysicalID][c.PhysicalID+":"+c.ID] = struct{}{}
|
|
||||||
threadsByCoreBySocket[c.PhysicalID][c.PhysicalID+":"+c.ID]++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tally up the values from the tracking maps
|
|
||||||
for id, s := range socketByID {
|
|
||||||
s.CoreCount = len(coreBySocket[id])
|
|
||||||
s.ThreadCount = 0
|
|
||||||
for _, tc := range threadsByCoreBySocket[id] {
|
|
||||||
s.ThreadCount += tc
|
|
||||||
}
|
|
||||||
|
|
||||||
// This only works if HT is enabled, consider a more reliable model, maybe cache size comparisons?
|
|
||||||
efficiencyCoreCount := 0
|
|
||||||
for _, threads := range threadsByCoreBySocket[id] {
|
|
||||||
if threads == 1 {
|
|
||||||
efficiencyCoreCount++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if efficiencyCoreCount == s.CoreCount {
|
|
||||||
// 1:1 mapping means they're not actually efficiency cores, but regular cores
|
|
||||||
s.EfficiencyCoreCount = 0
|
|
||||||
} else {
|
|
||||||
s.EfficiencyCoreCount = efficiencyCoreCount
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
result := []CPU{}
|
|
||||||
for _, c := range socketByID {
|
|
||||||
result = append(result, *c)
|
|
||||||
}
|
|
||||||
return result, nil
|
|
||||||
}
|
|
||||||
|
@ -2,7 +2,6 @@ package gpu
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
|
||||||
"syscall"
|
"syscall"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
@ -23,7 +22,6 @@ var (
|
|||||||
k32 = syscall.NewLazyDLL("kernel32.dll")
|
k32 = syscall.NewLazyDLL("kernel32.dll")
|
||||||
globalMemoryStatusExProc = k32.NewProc("GlobalMemoryStatusEx")
|
globalMemoryStatusExProc = k32.NewProc("GlobalMemoryStatusEx")
|
||||||
sizeofMemoryStatusEx = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
|
sizeofMemoryStatusEx = uint32(unsafe.Sizeof(MEMORYSTATUSEX{}))
|
||||||
GetLogicalProcessorInformationEx = k32.NewProc("GetLogicalProcessorInformationEx")
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var CudartGlobs = []string{
|
var CudartGlobs = []string{
|
||||||
@ -57,178 +55,3 @@ func GetCPUMem() (memInfo, error) {
|
|||||||
}
|
}
|
||||||
return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
|
return memInfo{TotalMemory: memStatus.TotalPhys, FreeMemory: memStatus.AvailPhys, FreeSwap: memStatus.AvailPageFile}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type LOGICAL_PROCESSOR_RELATIONSHIP uint32
|
|
||||||
|
|
||||||
const (
|
|
||||||
RelationProcessorCore LOGICAL_PROCESSOR_RELATIONSHIP = iota
|
|
||||||
RelationNumaNode
|
|
||||||
RelationCache
|
|
||||||
RelationProcessorPackage
|
|
||||||
RelationGroup
|
|
||||||
RelationProcessorDie
|
|
||||||
RelationNumaNodeEx
|
|
||||||
RelationProcessorModule
|
|
||||||
)
|
|
||||||
const RelationAll LOGICAL_PROCESSOR_RELATIONSHIP = 0xffff
|
|
||||||
|
|
||||||
type GROUP_AFFINITY struct {
|
|
||||||
Mask uintptr // KAFFINITY
|
|
||||||
Group uint16
|
|
||||||
Reserved [3]uint16
|
|
||||||
}
|
|
||||||
|
|
||||||
type PROCESSOR_RELATIONSHIP struct {
|
|
||||||
Flags byte
|
|
||||||
EfficiencyClass byte
|
|
||||||
Reserved [20]byte
|
|
||||||
GroupCount uint16
|
|
||||||
GroupMask [1]GROUP_AFFINITY // len GroupCount
|
|
||||||
}
|
|
||||||
|
|
||||||
// Omitted unused structs: NUMA_NODE_RELATIONSHIP CACHE_RELATIONSHIP GROUP_RELATIONSHIP
|
|
||||||
|
|
||||||
type SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX struct {
|
|
||||||
Relationship LOGICAL_PROCESSOR_RELATIONSHIP
|
|
||||||
Size uint32
|
|
||||||
U [1]byte // Union len Size
|
|
||||||
// PROCESSOR_RELATIONSHIP
|
|
||||||
// NUMA_NODE_RELATIONSHIP
|
|
||||||
// CACHE_RELATIONSHIP
|
|
||||||
// GROUP_RELATIONSHIP
|
|
||||||
}
|
|
||||||
|
|
||||||
func (group *GROUP_AFFINITY) IsMember(target *GROUP_AFFINITY) bool {
|
|
||||||
if group == nil || target == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return group.Mask&target.Mask != 0
|
|
||||||
}
|
|
||||||
|
|
||||||
type winPackage struct {
|
|
||||||
groups []*GROUP_AFFINITY
|
|
||||||
coreCount int // performance cores = coreCount - efficiencyCoreCount
|
|
||||||
efficiencyCoreCount int
|
|
||||||
threadCount int
|
|
||||||
}
|
|
||||||
|
|
||||||
func (pkg *winPackage) IsMember(target *GROUP_AFFINITY) bool {
|
|
||||||
for _, group := range pkg.groups {
|
|
||||||
if group.IsMember(target) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func getLogicalProcessorInformationEx() ([]byte, error) {
|
|
||||||
buf := make([]byte, 1)
|
|
||||||
bufSize := len(buf)
|
|
||||||
ret, _, err := GetLogicalProcessorInformationEx.Call(
|
|
||||||
uintptr(RelationAll),
|
|
||||||
uintptr(unsafe.Pointer(&buf[0])),
|
|
||||||
uintptr(unsafe.Pointer(&bufSize)),
|
|
||||||
)
|
|
||||||
if ret != 0 {
|
|
||||||
return nil, fmt.Errorf("failed to determine size info ret:%d %w", ret, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
buf = make([]byte, bufSize)
|
|
||||||
ret, _, err = GetLogicalProcessorInformationEx.Call(
|
|
||||||
uintptr(RelationAll),
|
|
||||||
uintptr(unsafe.Pointer(&buf[0])),
|
|
||||||
uintptr(unsafe.Pointer(&bufSize)),
|
|
||||||
)
|
|
||||||
if ret == 0 {
|
|
||||||
return nil, fmt.Errorf("failed to gather processor information ret:%d buflen:%d %w", ret, bufSize, err)
|
|
||||||
}
|
|
||||||
return buf, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func processSystemLogicalProcessorInforationList(buf []byte) []*winPackage {
|
|
||||||
var slpi *SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
|
|
||||||
// Find all the packages first
|
|
||||||
packages := []*winPackage{}
|
|
||||||
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
|
||||||
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
|
||||||
if slpi.Relationship != RelationProcessorPackage {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
|
||||||
pkg := &winPackage{}
|
|
||||||
ga0 := unsafe.Pointer(&pr.GroupMask[0])
|
|
||||||
for j := range pr.GroupCount {
|
|
||||||
gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
|
|
||||||
pkg.groups = append(pkg.groups, gm)
|
|
||||||
}
|
|
||||||
packages = append(packages, pkg)
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Info("packages", "count", len(packages))
|
|
||||||
|
|
||||||
// To identify efficiency cores we have to compare the relative values
|
|
||||||
// Larger values are "less efficient" (aka, more performant)
|
|
||||||
var maxEfficiencyClass byte
|
|
||||||
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
|
||||||
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
|
||||||
if slpi.Relationship != RelationProcessorCore {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
|
||||||
if pr.EfficiencyClass > maxEfficiencyClass {
|
|
||||||
maxEfficiencyClass = pr.EfficiencyClass
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if maxEfficiencyClass > 0 {
|
|
||||||
slog.Info("efficiency cores detected", "maxEfficiencyClass", maxEfficiencyClass)
|
|
||||||
}
|
|
||||||
|
|
||||||
// then match up the Cores to the Packages, count up cores, threads and efficiency cores
|
|
||||||
for bufOffset := 0; bufOffset < len(buf); bufOffset += int(slpi.Size) {
|
|
||||||
slpi = (*SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)(unsafe.Pointer(&buf[bufOffset]))
|
|
||||||
if slpi.Relationship != RelationProcessorCore {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
pr := (*PROCESSOR_RELATIONSHIP)(unsafe.Pointer(&slpi.U[0]))
|
|
||||||
ga0 := unsafe.Pointer(&pr.GroupMask[0])
|
|
||||||
for j := range pr.GroupCount {
|
|
||||||
gm := (*GROUP_AFFINITY)(unsafe.Pointer(uintptr(ga0) + uintptr(j)*unsafe.Sizeof(GROUP_AFFINITY{})))
|
|
||||||
for _, pkg := range packages {
|
|
||||||
if pkg.IsMember(gm) {
|
|
||||||
pkg.coreCount++
|
|
||||||
if pr.Flags == 0 {
|
|
||||||
pkg.threadCount++
|
|
||||||
} else {
|
|
||||||
pkg.threadCount += 2
|
|
||||||
}
|
|
||||||
if pr.EfficiencyClass < maxEfficiencyClass {
|
|
||||||
pkg.efficiencyCoreCount++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sumarize the results
|
|
||||||
for i, pkg := range packages {
|
|
||||||
slog.Info("", "package", i, "cores", pkg.coreCount, "efficiency", pkg.efficiencyCoreCount, "threads", pkg.threadCount)
|
|
||||||
}
|
|
||||||
|
|
||||||
return packages
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetCPUDetails() ([]CPU, error) {
|
|
||||||
buf, err := getLogicalProcessorInformationEx()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
packages := processSystemLogicalProcessorInforationList(buf)
|
|
||||||
cpus := make([]CPU, len(packages))
|
|
||||||
|
|
||||||
for i, pkg := range packages {
|
|
||||||
cpus[i].CoreCount = pkg.coreCount
|
|
||||||
cpus[i].EfficiencyCoreCount = pkg.efficiencyCoreCount
|
|
||||||
cpus[i].ThreadCount = pkg.threadCount
|
|
||||||
}
|
|
||||||
return cpus, nil
|
|
||||||
}
|
|
||||||
|
File diff suppressed because one or more lines are too long
36
gpu/types.go
36
gpu/types.go
@ -10,11 +10,11 @@ import (
|
|||||||
type memInfo struct {
|
type memInfo struct {
|
||||||
TotalMemory uint64 `json:"total_memory,omitempty"`
|
TotalMemory uint64 `json:"total_memory,omitempty"`
|
||||||
FreeMemory uint64 `json:"free_memory,omitempty"`
|
FreeMemory uint64 `json:"free_memory,omitempty"`
|
||||||
FreeSwap uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
|
FreeSwap uint64 `json:"free_swap,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Beginning of an `ollama info` command
|
// Beginning of an `ollama info` command
|
||||||
type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
|
type GpuInfo struct {
|
||||||
memInfo
|
memInfo
|
||||||
Library string `json:"library,omitempty"`
|
Library string `json:"library,omitempty"`
|
||||||
|
|
||||||
@ -49,17 +49,6 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
|
|||||||
|
|
||||||
type CPUInfo struct {
|
type CPUInfo struct {
|
||||||
GpuInfo
|
GpuInfo
|
||||||
CPUs []CPU
|
|
||||||
}
|
|
||||||
|
|
||||||
// CPU type represents a CPU Package occupying a socket
|
|
||||||
type CPU struct {
|
|
||||||
ID string `cpuinfo:"processor"`
|
|
||||||
VendorID string `cpuinfo:"vendor_id"`
|
|
||||||
ModelName string `cpuinfo:"model name"`
|
|
||||||
CoreCount int
|
|
||||||
EfficiencyCoreCount int // Performance = CoreCount - Efficiency
|
|
||||||
ThreadCount int
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type CudaGPUInfo struct {
|
type CudaGPUInfo struct {
|
||||||
@ -87,11 +76,6 @@ type OneapiGPUInfoList []OneapiGPUInfo
|
|||||||
|
|
||||||
type GpuInfoList []GpuInfo
|
type GpuInfoList []GpuInfo
|
||||||
|
|
||||||
type UnsupportedGPUInfo struct {
|
|
||||||
GpuInfo
|
|
||||||
Reason string `json:"reason"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Split up the set of gpu info's by Library and variant
|
// Split up the set of gpu info's by Library and variant
|
||||||
func (l GpuInfoList) ByLibrary() []GpuInfoList {
|
func (l GpuInfoList) ByLibrary() []GpuInfoList {
|
||||||
resp := []GpuInfoList{}
|
resp := []GpuInfoList{}
|
||||||
@ -162,19 +146,3 @@ func (c CPUCapability) String() string {
|
|||||||
return "no vector extensions"
|
return "no vector extensions"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type SystemInfo struct {
|
|
||||||
System CPUInfo `json:"system"`
|
|
||||||
GPUs []GpuInfo `json:"gpus"`
|
|
||||||
UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
|
|
||||||
DiscoveryErrors []string `json:"discovery_errors"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the optimal number of threads to use for inference
|
|
||||||
func (si SystemInfo) GetOptimalThreadCount() int {
|
|
||||||
if len(si.System.CPUs) == 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
// Allocate thread count matching the performance cores on a single socket
|
|
||||||
return si.System.CPUs[0].CoreCount - si.System.CPUs[0].EfficiencyCoreCount
|
|
||||||
}
|
|
||||||
|
@ -3,12 +3,12 @@ package llama
|
|||||||
/*
|
/*
|
||||||
#cgo CFLAGS: -O2 -std=c11 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE
|
#cgo CFLAGS: -O2 -std=c11 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE
|
||||||
#cgo CXXFLAGS: -O2 -std=c++11 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE
|
#cgo CXXFLAGS: -O2 -std=c++11 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE
|
||||||
#cgo darwin,arm64 CFLAGS: -DGGML_USE_METAL -DGGML_USE_ACCELERATE -DGGML_METAL_EMBED_LIBRARY -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -DGGML_USE_BLAS -mmacosx-version-min=11.3
|
#cgo darwin,arm64 CFLAGS: -DGGML_USE_METAL -DGGML_USE_ACCELERATE -DGGML_METAL_EMBED_LIBRARY -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -DGGML_USE_BLAS
|
||||||
#cgo darwin,arm64 CXXFLAGS: -DGGML_USE_METAL -DGGML_USE_ACCELERATE -DGGML_METAL_EMBED_LIBRARY -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -DGGML_USE_BLAS -mmacosx-version-min=11.3
|
#cgo darwin,arm64 CXXFLAGS: -DGGML_USE_METAL -DGGML_USE_ACCELERATE -DGGML_METAL_EMBED_LIBRARY -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -DGGML_USE_BLAS
|
||||||
#cgo darwin,arm64 LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework Accelerate -mmacosx-version-min=11.3
|
#cgo darwin,arm64 LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework Accelerate
|
||||||
#cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers -mmacosx-version-min=11.3
|
#cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers
|
||||||
#cgo darwin,amd64 CXXFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers -mmacosx-version-min=11.3
|
#cgo darwin,amd64 CXXFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers
|
||||||
#cgo darwin,amd64 LDFLAGS: -framework Foundation -mmacosx-version-min=11.3
|
#cgo darwin,amd64 LDFLAGS: -framework Foundation
|
||||||
#cgo darwin,amd64,avx2 CFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
|
#cgo darwin,amd64,avx2 CFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
|
||||||
#cgo darwin,amd64,avx2 CXXFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
|
#cgo darwin,amd64,avx2 CXXFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
|
||||||
#cgo darwin,amd64,avx2 LDFLAGS: -framework Accelerate
|
#cgo darwin,amd64,avx2 LDFLAGS: -framework Accelerate
|
||||||
|
@ -251,7 +251,7 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
|
|||||||
ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
|
ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
|
||||||
fi
|
fi
|
||||||
init_vars
|
init_vars
|
||||||
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
|
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DGGML_CUDA_NO_PEER_COPY=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
|
||||||
# Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
|
# Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
|
||||||
if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
|
if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
|
||||||
echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""
|
echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""
|
||||||
|
@ -340,6 +340,7 @@ function build_rocm() {
|
|||||||
"-DCMAKE_C_COMPILER=clang.exe",
|
"-DCMAKE_C_COMPILER=clang.exe",
|
||||||
"-DCMAKE_CXX_COMPILER=clang++.exe",
|
"-DCMAKE_CXX_COMPILER=clang++.exe",
|
||||||
"-DGGML_HIPBLAS=on",
|
"-DGGML_HIPBLAS=on",
|
||||||
|
"-DGGML_CUDA_NO_PEER_COPY=on",
|
||||||
"-DHIP_PLATFORM=amd",
|
"-DHIP_PLATFORM=amd",
|
||||||
"-DGGML_AVX=on",
|
"-DGGML_AVX=on",
|
||||||
"-DGGML_AVX2=off",
|
"-DGGML_AVX2=off",
|
||||||
|
@ -244,8 +244,6 @@ func (t Tensor) typeSize() uint64 {
|
|||||||
return 8
|
return 8
|
||||||
case 29: // IQ1_M
|
case 29: // IQ1_M
|
||||||
return blockSize/8 + blockSize/16 + blockSize/32
|
return blockSize/8 + blockSize/16 + blockSize/32
|
||||||
case 30: // BF16
|
|
||||||
return 2
|
|
||||||
default:
|
default:
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
@ -98,11 +98,15 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
var systemFreeMemory uint64
|
var systemFreeMemory uint64
|
||||||
var systemSwapFreeMemory uint64
|
var systemSwapFreeMemory uint64
|
||||||
|
|
||||||
systemInfo := gpu.GetSystemInfo()
|
systemMemInfo, err := gpu.GetCPUMem()
|
||||||
systemTotalMemory = systemInfo.System.TotalMemory
|
if err != nil {
|
||||||
systemFreeMemory = systemInfo.System.FreeMemory
|
slog.Error("failed to lookup system memory", "error", err)
|
||||||
systemSwapFreeMemory = systemInfo.System.FreeSwap
|
} else {
|
||||||
|
systemTotalMemory = systemMemInfo.TotalMemory
|
||||||
|
systemFreeMemory = systemMemInfo.FreeMemory
|
||||||
|
systemSwapFreeMemory = systemMemInfo.FreeSwap
|
||||||
slog.Info("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
|
slog.Info("system memory", "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "free_swap", format.HumanBytes2(systemSwapFreeMemory))
|
||||||
|
}
|
||||||
|
|
||||||
// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
|
// If the user wants zero GPU layers, reset the gpu list to be CPU/system ram info
|
||||||
if opts.NumGPU == 0 {
|
if opts.NumGPU == 0 {
|
||||||
@ -213,11 +217,8 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
params = append(params, "--mmproj", projectors[0])
|
params = append(params, "--mmproj", projectors[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
defaultThreads := systemInfo.GetOptimalThreadCount()
|
|
||||||
if opts.NumThread > 0 {
|
if opts.NumThread > 0 {
|
||||||
params = append(params, "--threads", strconv.Itoa(opts.NumThread))
|
params = append(params, "--threads", strconv.Itoa(opts.NumThread))
|
||||||
} else if defaultThreads > 0 {
|
|
||||||
params = append(params, "--threads", strconv.Itoa(defaultThreads))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if !opts.F16KV {
|
if !opts.F16KV {
|
||||||
@ -259,7 +260,15 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
|||||||
params = append(params, "--mlock")
|
params = append(params, "--mlock")
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO - NUMA support currently doesn't work properly
|
if gpu.IsNUMA() && gpus[0].Library == "cpu" {
|
||||||
|
numaMode := "distribute"
|
||||||
|
if runtime.GOOS == "linux" {
|
||||||
|
if _, err := exec.LookPath("numactl"); err == nil {
|
||||||
|
numaMode = "numactl"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
params = append(params, "--numa", numaMode)
|
||||||
|
}
|
||||||
|
|
||||||
params = append(params, "--parallel", strconv.Itoa(numParallel))
|
params = append(params, "--parallel", strconv.Itoa(numParallel))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user