remove build scripts

This commit is contained in:
jmorganca 2024-06-10 02:56:37 -04:00
parent b8c1065ab6
commit 3375b82c56
5 changed files with 155 additions and 305 deletions

View File

@ -1,146 +1,150 @@
OS := $(shell uname -s)
ARCH := $(or $(ARCH), $(shell uname -m))
NVCC := nvcc
HIP_PATH := $(shell cygpath -w -s "$(HIP_PATH)")
HIPCC := "$(HIP_PATH)/bin/hipcc.bin.exe"
ifeq ($(ARCH),x86_64)
ARCH := amd64
endif
# Determine object file extension based on OS
ifneq (,$(findstring MINGW,$(OS)))
OBJ_EXT := obj
SHARED_EXT := dll
else
OBJ_EXT := o
SHARED_EXT := so
endif
CUDA_SRCS := \
ggml-cuda.cu \
$(wildcard ggml-cuda/*.cu) \
$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu) \
$(wildcard ggml-cuda/template-instances/mmq*.cu) \
$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu) \
$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu) \
$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu) \
ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp
CUDA_OBJS := $(CUDA_SRCS:.cu=.cuda.$(OBJ_EXT))
CUDA_OBJS := $(CUDA_OBJS:.c=.cuda.$(OBJ_EXT))
CUDA_OBJS := $(CUDA_OBJS:.cpp=.cuda.$(OBJ_EXT))
HIP_OBJS := $(CUDA_SRCS:.cu=.hip.$(OBJ_EXT))
HIP_OBJS := $(HIP_OBJS:.c=.hip.$(OBJ_EXT))
HIP_OBJS := $(HIP_OBJS:.cpp=.hip.$(OBJ_EXT))
CUDA_FLAGS := \
--generate-code=arch=compute_50,code=[compute_50,sm_50] \
--generate-code=arch=compute_52,code=[compute_52,sm_52] \
--generate-code=arch=compute_61,code=[compute_61,sm_61] \
--generate-code=arch=compute_70,code=[compute_70,sm_70] \
--generate-code=arch=compute_75,code=[compute_75,sm_75] \
--generate-code=arch=compute_80,code=[compute_80,sm_80] \
-DGGML_CUDA_DMMV_X=32 \
-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \
-DGGML_USE_CUDA=1 \
-DGGML_SHARED=1 \
-DGGML_BUILD=1 \
-DGGML_USE_LLAMAFILE \
-D_GNU_SOURCE \
-DCMAKE_POSITION_INDEPENDENT_CODE=on \
-Wno-deprecated-gpu-targets \
--forward-unknown-to-host-compiler \
-use_fast_math \
-link \
-shared \
-I. \
-O3
HIP_ARCHS := gfx900 gfx940 gfx941 gfx942 gfx1010 gfx1012 gfx1030 gfx1100 gfx1101 gfx1102
LINUX_HIP_ARCHS := gfx906:xnack- gfx908:xnack- gfx90a:xnack+ gfx90a:xnack-
HIP_FLAGS := \
-c \
-O3 \
-DGGML_USE_CUDA \
-DGGML_BUILD=1 \
-DGGML_SHARED=1 \
-DGGML_CUDA_DMMV_X=32 \
-DGGML_CUDA_MMV_Y=1 \
-DGGML_SCHED_MAX_COPIES=4 \
-DGGML_USE_HIPBLAS \
-DGGML_USE_LLAMAFILE \
-DHIP_FAST_MATH \
-DNDEBUG \
-DK_QUANTS_PER_ITERATION=2 \
-D_CRT_SECURE_NO_WARNINGS \
-DCMAKE_POSITION_INDEPENDENT_CODE=on \
-D_GNU_SOURCE \
-Wno-expansion-to-defined \
-Wno-invalid-noreturn \
-Wno-ignored-attributes \
-Wno-pass-failed \
-Wno-deprecated-declarations \
-Wno-unused-result \
-I. \
$(foreach arch, $(HIP_ARCHS), --offload-arch=$(arch))
ifeq ($(UNAME_S), Linux)
HIP_FLAGS += $(foreach arch, $(LINUX_HIP_ARCHS), --offload-arch=$(arch))
endif
ifeq ($(OS),Darwin)
ifeq ($(ARCH),arm64)
all: ollama_runner
else ifeq ($(ARCH),amd64)
all: ollama_runner ollama_runner_avx ollama_runner_avx2
endif
else
all: ollama_runner ollama_runner_avx ollama_runner_avx2 ollama_runner_cuda ollama_runner_rocm
endif
%.cuda.$(OBJ_EXT): %.cu
$(NVCC) -c $(CUDA_FLAGS) -o $@ $<
%.cuda.$(OBJ_EXT): %.c
$(NVCC) -c $(CFLAGS) -o $@ $<
%.cuda.$(OBJ_EXT): %.cpp
$(NVCC) -c $(CXXFLAGS) -o $@ $<
ggml_cuda.$(SHARED_EXT): $(CUDA_OBJS)
nvcc --shared -lcuda -lcublas -lcudart -lcublasLt $(CUDA_FLAGS) $(CUDA_OBJS) -o $@
%.hip.$(OBJ_EXT): %.cu
$(HIPCC) -c $(HIP_FLAGS) -o $@ $<
%.hip.$(OBJ_EXT): %.c
$(HIPCC) -c $(CFLAGS) -o $@ $<
%.hip.$(OBJ_EXT): %.cpp
$(HIPCC) -c $(CXXFLAGS) -o $@ $<
ggml_hipblas.$(SHARED_EXT): $(HIP_OBJS)
$(HIPCC) --shared -lhipblas -lamdhip64 -lrocblas $(HIP_OBJS) -o $@
ollama_runner:
CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -o $@ ./runner
ollama_runner_avx:
CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx -o $@ ./runner
ollama_runner_avx2:
CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx,avx2 -o $@ ./runner
ollama_runner_cuda: ggml_cuda.dll
CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx,cuda -o $@ ./runner
ollama_runner_rocm: ggml_hipblas.dll
CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx,rocm -o $@ ./runner
clean:
rm -f $(CUDA_OBJS) $(HIP_OBJS) ggml_cuda.$(SHARED_EXT) ggml_cuda.* ggml_hipblas.* ollama_runner*
.PHONY: all clean ollama_runner ollama_runner_avx ollama_runner_avx2 ollama_runner_cuda ollama_runner_rocm
OS := $(shell uname -s)
ARCH := $(or $(ARCH), $(shell uname -m))
NVCC := nvcc
export CGO_CFLAGS_ALLOW = -mfma|-mf16c
export CGO_CXXFLAGS_ALLOW = -mfma|-mf16c
ifeq ($(ARCH),x86_64)
ARCH := amd64
endif
ifneq (,$(findstring MINGW,$(OS)))
OBJ_EXT := obj
SHARED_EXT := dll
HIP_PATH := $(shell cygpath -w -s "$(HIP_PATH)")
else
OBJ_EXT := o
SHARED_EXT := so
endif
CUDA_SRCS := \
ggml-cuda.cu \
$(wildcard ggml-cuda/*.cu) \
$(wildcard ggml-cuda/template-instances/fattn-wmma*.cu) \
$(wildcard ggml-cuda/template-instances/mmq*.cu) \
$(wildcard ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu) \
$(wildcard ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu) \
$(wildcard ggml-cuda/template-instances/fattn-vec*f16-f16.cu) \
ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp
CUDA_OBJS := $(CUDA_SRCS:.cu=.cuda.$(OBJ_EXT))
CUDA_OBJS := $(CUDA_OBJS:.c=.cuda.$(OBJ_EXT))
CUDA_OBJS := $(CUDA_OBJS:.cpp=.cuda.$(OBJ_EXT))
HIP_OBJS := $(CUDA_SRCS:.cu=.hip.$(OBJ_EXT))
HIP_OBJS := $(HIP_OBJS:.c=.hip.$(OBJ_EXT))
HIP_OBJS := $(HIP_OBJS:.cpp=.hip.$(OBJ_EXT))
CUDA_FLAGS := \
--generate-code=arch=compute_50,code=[compute_50,sm_50] \
--generate-code=arch=compute_52,code=[compute_52,sm_52] \
--generate-code=arch=compute_61,code=[compute_61,sm_61] \
--generate-code=arch=compute_70,code=[compute_70,sm_70] \
--generate-code=arch=compute_75,code=[compute_75,sm_75] \
--generate-code=arch=compute_80,code=[compute_80,sm_80] \
-DGGML_CUDA_DMMV_X=32 \
-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \
-DGGML_USE_CUDA=1 \
-DGGML_SHARED=1 \
-DGGML_BUILD=1 \
-DGGML_USE_LLAMAFILE \
-D_GNU_SOURCE \
-DCMAKE_POSITION_INDEPENDENT_CODE=on \
-Wno-deprecated-gpu-targets \
--forward-unknown-to-host-compiler \
-use_fast_math \
-link \
-shared \
-I. \
-O3
HIP_ARCHS := gfx900 gfx940 gfx941 gfx942 gfx1010 gfx1012 gfx1030 gfx1100 gfx1101 gfx1102
LINUX_HIP_ARCHS := gfx906:xnack- gfx908:xnack- gfx90a:xnack+ gfx90a:xnack-
HIPCC := "$(HIP_PATH)/bin/hipcc.bin.exe"
HIP_FLAGS := \
-c \
-O3 \
-DGGML_USE_CUDA \
-DGGML_BUILD=1 \
-DGGML_SHARED=1 \
-DGGML_CUDA_DMMV_X=32 \
-DGGML_CUDA_MMV_Y=1 \
-DGGML_SCHED_MAX_COPIES=4 \
-DGGML_USE_HIPBLAS \
-DGGML_USE_LLAMAFILE \
-DHIP_FAST_MATH \
-DNDEBUG \
-DK_QUANTS_PER_ITERATION=2 \
-D_CRT_SECURE_NO_WARNINGS \
-DCMAKE_POSITION_INDEPENDENT_CODE=on \
-D_GNU_SOURCE \
-Wno-expansion-to-defined \
-Wno-invalid-noreturn \
-Wno-ignored-attributes \
-Wno-pass-failed \
-Wno-deprecated-declarations \
-Wno-unused-result \
-Xclang \
--dependent-lib=msvcrt \
-I. \
$(foreach arch, $(HIP_ARCHS), --offload-arch=$(arch))
ifeq ($(UNAME_S), Linux)
HIP_FLAGS += $(foreach arch, $(LINUX_HIP_ARCHS), --offload-arch=$(arch))
endif
ifeq ($(OS),Darwin)
ifeq ($(ARCH),arm64)
all: ollama_runner
else ifeq ($(ARCH),amd64)
all: ollama_runner ollama_runner_avx ollama_runner_avx2
endif
else
all: ollama_runner ollama_runner_avx ollama_runner_avx2 ollama_runner_cuda ollama_runner_rocm
endif
%.cuda.$(OBJ_EXT): %.cu
$(NVCC) -c $(CUDA_FLAGS) -o $@ $<
%.cuda.$(OBJ_EXT): %.c
$(NVCC) -c $(CFLAGS) -o $@ $<
%.cuda.$(OBJ_EXT): %.cpp
$(NVCC) -c $(CXXFLAGS) -o $@ $<
ggml_cuda.$(SHARED_EXT): $(CUDA_OBJS)
nvcc --shared -lcuda -lcublas -lcudart -lcublasLt $(CUDA_FLAGS) $(CUDA_OBJS) -o $@
%.hip.$(OBJ_EXT): %.cu
$(HIPCC) -c $(HIP_FLAGS) -o $@ $<
%.hip.$(OBJ_EXT): %.c
$(HIPCC) -c $(CFLAGS) -o $@ $<
%.hip.$(OBJ_EXT): %.cpp
$(HIPCC) -c $(CXXFLAGS) -o $@ $<
ggml_hipblas.$(SHARED_EXT): $(HIP_OBJS)
$(HIPCC) --shared -lhipblas -lamdhip64 -lrocblas $(HIP_OBJS) -o $@
ollama_runner:
CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -o $@ ./runner
ollama_runner_avx:
CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx -o $@ ./runner
ollama_runner_avx2:
CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx,avx2 -o $@ ./runner
ollama_runner_cuda: ggml_cuda.dll
CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx,cuda -o $@ ./runner
ollama_runner_rocm: ggml_hipblas.dll
CGO_ENABLED=1 GOARCH=$(ARCH) go build -ldflags "-s -w" -tags avx,rocm -o $@ ./runner
clean:
rm -f $(CUDA_OBJS) $(HIP_OBJS) ggml_cuda.$(SHARED_EXT) ggml_cuda.* ggml_hipblas.* ollama_runner*
.PHONY: all clean ollama_runner ollama_runner_avx ollama_runner_avx2 ollama_runner_cuda ollama_runner_rocm

View File

@ -13,11 +13,6 @@ Supported:
- [x] Linux ROCm
- [x] Llava
Extra build steps are required for CUDA and ROCm on Windows since `nvcc` and `hipcc` both require using msvc as the host compiler. For these small dlls are created:
- `ggml-cuda.dll`
- `ggml-hipblas.dll`
> Note: it's important that memory is allocated and freed by the same compiler (e.g. entirely by code compiled with msvc or mingw). Issues from this should be rare, but there are some places where pointers are returned by the CUDA or HIP runtimes and freed elsewhere, causing a a crash. In a future change the same runtime should be used in both cases to avoid crashes.
## Building
@ -46,11 +41,7 @@ go build -tags=avx,avx2 .
### CUDA
Install the [CUDA toolkit v11.3.1](https://developer.nvidia.com/cuda-11-3-1-download-archive) then build `libggml-cuda.so`:
```shell
./build_cuda.sh
```
Install the [CUDA toolkit v11.3.1](https://developer.nvidia.com/cuda-11-3-1-download-archive):
Then build the package with the `cuda` tag:
@ -69,7 +60,7 @@ Install the [CUDA toolkit v11.3.1](https://developer.nvidia.com/cuda-11-3-1-down
Build `ggml-cuda.dll`:
```shell
./build_cuda.ps1
make ggml_cuda.dll
```
Then build the package with the `cuda` tag:
@ -82,10 +73,8 @@ go build -tags=cuda .
Install [ROCm 5.7.1](https://rocm.docs.amd.com/en/docs-5.7.1/) and [Strawberry Perl](https://strawberryperl.com/).
Then, build `ggml-hipblas.dll`:
```shell
./build_hipblas.sh
make ggml_hipblas.dll
```
Then build the package with the `rocm` tag:

View File

@ -1,47 +0,0 @@
#!/bin/bash
os="$(uname -s)"
if [[ "$os" == "Windows_NT" || "$os" == "MINGW64_NT"* ]]; then
output="ggml-cuda.dll"
else
output="libggml-cuda.so"
fi
nvcc \
-t $(nproc) \
--generate-code=arch=compute_50,code=[compute_50,sm_50] \
--generate-code=arch=compute_52,code=[compute_52,sm_52] \
--generate-code=arch=compute_61,code=[compute_61,sm_61] \
--generate-code=arch=compute_70,code=[compute_70,sm_70] \
--generate-code=arch=compute_75,code=[compute_75,sm_75] \
--generate-code=arch=compute_80,code=[compute_80,sm_80] \
-DGGML_CUDA_DMMV_X=32 \
-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \
-DGGML_CUDA_MMV_Y=1 \
-DGGML_USE_CUDA=1 \
-DGGML_SHARED=1 \
-DGGML_BUILD=1 \
-DGGML_USE_LLAMAFILE \
-D_GNU_SOURCE \
-DCMAKE_POSITION_INDEPENDENT_CODE=on \
-Wno-deprecated-gpu-targets \
--forward-unknown-to-host-compiler \
-use_fast_math \
-link \
-shared \
-I. \
-lcuda -lcublas -lcudart -lcublasLt \
-O3 \
-o $output \
ggml-cuda.cu \
ggml-cuda/*.cu \
ggml-cuda/template-instances/fattn-wmma*.cu \
ggml-cuda/template-instances/mmq*.cu \
ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu \
ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu \
ggml-cuda/template-instances/fattn-vec*f16-f16.cu \
ggml.c ggml-backend.c ggml-alloc.c ggml-quants.c sgemm.cpp
# -DGGML_CUDA_USE_GRAPHS=1
# -DGGML_CUDA_FA_ALL_QUANTS=1

View File

@ -1,96 +0,0 @@
#!/bin/bash
archs=(
gfx900
gfx940
gfx941
gfx942
gfx1010
gfx1012
gfx1030
gfx1100
gfx1101
gfx1102
)
linux_archs=(
gfx906:xnack-
gfx908:xnack-
gfx90a:xnack+
gfx90a:xnack-
)
os="$(uname -s)"
additional_flags=""
if [[ "$os" == "Windows_NT" || "$os" == "MINGW64_NT"* ]]; then
output="ggml-hipblas.dll"
additional_flags=" -Xclang --dependent-lib=msvcrt"
else
output="libggml-hipblas.so"
archs+=("${linux_archs[@]}")
fi
for arch in "${archs[@]}"; do
additional_flags+=" --offload-arch=$arch"
done
# Create an array of all source files, expanding globs
sources=(
$(echo ggml-cuda/template-instances/fattn-wmma*.cu)
$(echo ggml-cuda/template-instances/mmq*.cu)
$(echo ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu)
$(echo ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu)
$(echo ggml-cuda/template-instances/fattn-vec*f16-f16.cu)
ggml-cuda.cu
$(echo ggml-cuda/*.cu)
ggml.c
ggml-backend.c
ggml-alloc.c
ggml-quants.c
sgemm.cpp
)
# Function to compile a single source file
compile_source() {
src="$1"
hipcc -c -O3 -DGGML_USE_CUDA -DGGML_BUILD=1 -DGGML_SHARED=1 -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 \
-DGGML_SCHED_MAX_COPIES=4 -DGGML_USE_HIPBLAS -DGGML_USE_LLAMAFILE -DHIP_FAST_MATH -DNDEBUG \
-DK_QUANTS_PER_ITERATION=2 -D_CRT_SECURE_NO_WARNINGS -DCMAKE_POSITION_INDEPENDENT_CODE=on \
-D_GNU_SOURCE -Wno-expansion-to-defined -Wno-invalid-noreturn -Wno-ignored-attributes -Wno-pass-failed \
-Wno-deprecated-declarations -Wno-unused-result -I. \
$additional_flags -o "${src%.cu}.o" "$src"
}
# Function to handle Ctrl+C
cleanup() {
echo "Terminating all background processes..."
kill 0
}
# Set trap to handle SIGINT (Ctrl+C)
trap cleanup SIGINT
# Limit the number of concurrent jobs
max_jobs=$(nproc)
job_count=0
for src in "${sources[@]}"; do
echo "$src"
compile_source "$src" &
job_count=$((job_count + 1))
if [[ $job_count -ge $max_jobs ]]; then
wait -n
job_count=$((job_count - 1))
fi
done
wait
# Link all object files into a shared library
echo "Linking object files..."
hipcc -v -shared -o $output *.o ggml-cuda/*.o ggml-cuda/template-instances/*.o -lhipblas -lamdhip64 -lrocblas
# Clean up object files after linking
rm -f *.o ggml-cuda/*.o ggml-cuda/template-instances/*.o

View File

@ -23,8 +23,8 @@ package llama
// #cgo rocm CFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
// #cgo rocm CXXFLAGS: -DGGML_USE_CUDA -DGGML_USE_HIPBLAS -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_CUDA_MMV_Y=1 -DGGML_BUILD=1
// #cgo rocm LDFLAGS: -L${SRCDIR} -lggml_hipblas -lhipblas -lamdhip64 -lrocblas
// #cgo windows,cuda LDFLAGS: -L. -L"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.3/lib/x64" -lggml_cuda -lcuda -lcudart -lcublas -lcublasLt
// #cgo windows,rocm LDFLAGS: -L. -L"C:/Program Files/AMD/ROCm/5.7/lib"
// #cgo windows,cuda LDFLAGS: -L${SRCDIR} -L"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.3/lib/x64" -lggml_cuda -lcuda -lcudart -lcublas -lcublasLt
// #cgo windows,rocm LDFLAGS: -L${SRCDIR} -L"C:/Program Files/AMD/ROCm/5.7/lib" -lggml_hipblas -lhipblas -lamdhip64 -lrocblas
// #cgo linux,cuda LDFLAGS: -L${SRCDIR} -L/usr/local/cuda/lib64 -lggml_cuda -lcuda -lcudart -lcublas -lcublasLt -lpthread -ldl -lrt
// #cgo linux,rocm LDFLAGS: -L/opt/rocm/lib
// #include <stdlib.h>