mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-08 12:15:22 +00:00
improve cuda and hipblas build scripts
This commit is contained in:
parent
b22d78720e
commit
922d0acbdb
@ -1,24 +0,0 @@
|
|||||||
nvcc -t 12 `
|
|
||||||
--generate-code=arch=compute_50,code=[compute_50,sm_50] `
|
|
||||||
--generate-code=arch=compute_52,code=[compute_52,sm_52] `
|
|
||||||
--generate-code=arch=compute_61,code=[compute_61,sm_61] `
|
|
||||||
--generate-code=arch=compute_70,code=[compute_70,sm_70] `
|
|
||||||
--generate-code=arch=compute_75,code=[compute_75,sm_75] `
|
|
||||||
--generate-code=arch=compute_80,code=[compute_80,sm_80] `
|
|
||||||
-DGGML_CUDA_DMMV_X=32 `
|
|
||||||
-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 `
|
|
||||||
-DGGML_CUDA_MMV_Y=1 `
|
|
||||||
-DGGML_USE_CUDA=1 `
|
|
||||||
-DGGML_SHARED=1 `
|
|
||||||
-DGGML_BUILD=1 `
|
|
||||||
-DGGML_USE_LLAMAFILE `
|
|
||||||
-Wno-deprecated-gpu-targets `
|
|
||||||
--forward-unknown-to-host-compiler `
|
|
||||||
-use_fast_math `
|
|
||||||
-link `
|
|
||||||
-shared `
|
|
||||||
-I. `
|
|
||||||
-lcuda -lcublas -lcudart -lcublasLt `
|
|
||||||
-O3 `
|
|
||||||
-o ggml-cuda.dll `
|
|
||||||
ggml-cuda.cu ggml-cuda/*.cu ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp
|
|
@ -1,3 +1,13 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
os="$(uname -s)"
|
||||||
|
|
||||||
|
if [[ "$os" == "Windows_NT" || "$os" == "MINGW64_NT"* ]]; then
|
||||||
|
output="ggml-cuda.dll"
|
||||||
|
else
|
||||||
|
output="libggml-cuda.so"
|
||||||
|
fi
|
||||||
|
|
||||||
nvcc \
|
nvcc \
|
||||||
-t 12 \
|
-t 12 \
|
||||||
--generate-code=arch=compute_50,code=[compute_50,sm_50] \
|
--generate-code=arch=compute_50,code=[compute_50,sm_50] \
|
||||||
@ -14,6 +24,7 @@ nvcc \
|
|||||||
-DGGML_BUILD=1 \
|
-DGGML_BUILD=1 \
|
||||||
-DGGML_USE_LLAMAFILE \
|
-DGGML_USE_LLAMAFILE \
|
||||||
-D_GNU_SOURCE \
|
-D_GNU_SOURCE \
|
||||||
|
-DCMAKE_POSITION_INDEPENDENT_CODE=on \
|
||||||
-Wno-deprecated-gpu-targets \
|
-Wno-deprecated-gpu-targets \
|
||||||
--forward-unknown-to-host-compiler \
|
--forward-unknown-to-host-compiler \
|
||||||
-use_fast_math \
|
-use_fast_math \
|
||||||
@ -23,5 +34,5 @@ nvcc \
|
|||||||
-I. \
|
-I. \
|
||||||
-lcuda -lcublas -lcudart -lcublasLt \
|
-lcuda -lcublas -lcudart -lcublasLt \
|
||||||
-O3 \
|
-O3 \
|
||||||
-o libggml-cuda.so \
|
-o $output \
|
||||||
ggml-cuda.cu ggml-cuda/*.cu ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp
|
ggml-cuda.cu ggml-cuda/*.cu ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp
|
||||||
|
@ -1,16 +1,43 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
archs=(
|
||||||
|
gfx900
|
||||||
|
gfx940
|
||||||
|
gfx941
|
||||||
|
gfx942
|
||||||
|
gfx1010
|
||||||
|
gfx1012
|
||||||
|
gfx1030
|
||||||
|
gfx1100
|
||||||
|
gfx1101
|
||||||
|
gfx1102
|
||||||
|
)
|
||||||
|
|
||||||
|
linux_archs=(
|
||||||
|
gfx906:xnack-
|
||||||
|
gfx908:xnack-
|
||||||
|
gfx90a:xnack+
|
||||||
|
gfx90a:xnack-
|
||||||
|
)
|
||||||
|
|
||||||
|
os="$(uname -s)"
|
||||||
|
|
||||||
|
if [[ "$os" == "Windows_NT" || "$os" == "MINGW64_NT"* ]]; then
|
||||||
|
output="ggml-hipblas.dll"
|
||||||
|
else
|
||||||
|
output="libggml-hipblas.so"
|
||||||
|
archs+=("${linux_archs[@]}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
offload_arch_flags=""
|
||||||
|
for arch in "${archs[@]}"; do
|
||||||
|
offload_arch_flags+=" --offload-arch=$arch"
|
||||||
|
done
|
||||||
|
|
||||||
hipcc \
|
hipcc \
|
||||||
-parallel-jobs=12 \
|
-parallel-jobs=12 \
|
||||||
-O3 \
|
-O3 \
|
||||||
--offload-arch=gfx900 \
|
$offload_arch_flags \
|
||||||
--offload-arch=gfx940 \
|
|
||||||
--offload-arch=gfx941 \
|
|
||||||
--offload-arch=gfx942 \
|
|
||||||
--offload-arch=gfx1010 \
|
|
||||||
--offload-arch=gfx1012 \
|
|
||||||
--offload-arch=gfx1030 \
|
|
||||||
--offload-arch=gfx1100 \
|
|
||||||
--offload-arch=gfx1101 \
|
|
||||||
--offload-arch=gfx1102 \
|
|
||||||
-DGGML_USE_CUDA \
|
-DGGML_USE_CUDA \
|
||||||
-DGGML_BUILD=1 \
|
-DGGML_BUILD=1 \
|
||||||
-DGGML_SHARED=1 \
|
-DGGML_SHARED=1 \
|
||||||
@ -23,6 +50,7 @@ hipcc \
|
|||||||
-DNDEBUG \
|
-DNDEBUG \
|
||||||
-DK_QUANTS_PER_ITERATION=2 \
|
-DK_QUANTS_PER_ITERATION=2 \
|
||||||
-D_CRT_SECURE_NO_WARNINGS \
|
-D_CRT_SECURE_NO_WARNINGS \
|
||||||
|
-DCMAKE_POSITION_INDEPENDENT_CODE=on \
|
||||||
-Xclang --dependent-lib=msvcrt -Wl,/subsystem:console \
|
-Xclang --dependent-lib=msvcrt -Wl,/subsystem:console \
|
||||||
-Wno-expansion-to-defined \
|
-Wno-expansion-to-defined \
|
||||||
-Wno-invalid-noreturn \
|
-Wno-invalid-noreturn \
|
||||||
@ -35,10 +63,6 @@ hipcc \
|
|||||||
-o ggml-hipblas.dll \
|
-o ggml-hipblas.dll \
|
||||||
ggml-cuda.cu ggml-cuda/*.cu ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp
|
ggml-cuda.cu ggml-cuda/*.cu ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp
|
||||||
|
|
||||||
# --offload-arch='gfx906:xnack-' \
|
|
||||||
# --offload-arch='gfx908:xnack-' \
|
|
||||||
# --offload-arch='gfx90a:xnack+' \
|
|
||||||
# --offload-arch='gfx90a:xnack-' \
|
|
||||||
# -D_DLL \
|
# -D_DLL \
|
||||||
# -D_MT \
|
# -D_MT \
|
||||||
# -D_XOPEN_SOURCE=600 \
|
# -D_XOPEN_SOURCE=600 \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user