mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-09 04:35:21 +00:00
revert llm changes
This commit is contained in:
parent
7d0a452938
commit
e1dfc757b3
@ -6,6 +6,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/llama"
|
"github.com/ollama/ollama/llama"
|
||||||
@ -28,9 +29,11 @@ func main() {
|
|||||||
|
|
||||||
// load the model
|
// load the model
|
||||||
llama.BackendInit()
|
llama.BackendInit()
|
||||||
params := llama.NewModelParams()
|
params := llama.NewModelParams(999, 0, func(p float32) {
|
||||||
|
fmt.Printf("loading... %f\n", p)
|
||||||
|
})
|
||||||
model := llama.LoadModelFromFile(*mpath, params)
|
model := llama.LoadModelFromFile(*mpath, params)
|
||||||
ctxParams := llama.NewContextParams()
|
ctxParams := llama.NewContextParams(2048, runtime.NumCPU(), false)
|
||||||
|
|
||||||
// language model context
|
// language model context
|
||||||
lc := llama.NewContextWithModel(model, ctxParams)
|
lc := llama.NewContextWithModel(model, ctxParams)
|
||||||
@ -65,7 +68,7 @@ func main() {
|
|||||||
panic("prompt must contain exactly one <image>")
|
panic("prompt must contain exactly one <image>")
|
||||||
}
|
}
|
||||||
|
|
||||||
beforeTokens, err := lc.Model().Tokenize(parts[0], 2048, true, true)
|
beforeTokens, err := lc.Model().Tokenize(parts[0], true, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -82,7 +85,7 @@ func main() {
|
|||||||
|
|
||||||
llama.LlavaEvalImageEmbed(lc, embedding, 512, &nPast)
|
llama.LlavaEvalImageEmbed(lc, embedding, 512, &nPast)
|
||||||
|
|
||||||
afterTokens, err := lc.Model().Tokenize(parts[1], 2048, true, true)
|
afterTokens, err := lc.Model().Tokenize(parts[1], true, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -92,7 +95,7 @@ func main() {
|
|||||||
nPast++
|
nPast++
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
tokens, err := lc.Model().Tokenize(*prompt, 2048, true, true)
|
tokens, err := lc.Model().Tokenize(*prompt, true, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
@ -2,10 +2,10 @@ package llm
|
|||||||
|
|
||||||
import "fmt"
|
import "fmt"
|
||||||
|
|
||||||
type FileType uint32
|
type fileType uint32
|
||||||
|
|
||||||
const (
|
const (
|
||||||
fileTypeF32 FileType = iota
|
fileTypeF32 fileType = iota
|
||||||
fileTypeF16
|
fileTypeF16
|
||||||
fileTypeQ4_0
|
fileTypeQ4_0
|
||||||
fileTypeQ4_1
|
fileTypeQ4_1
|
||||||
@ -41,7 +41,7 @@ const (
|
|||||||
fileTypeUnknown
|
fileTypeUnknown
|
||||||
)
|
)
|
||||||
|
|
||||||
func ParseFileType(s string) (FileType, error) {
|
func ParseFileType(s string) (fileType, error) {
|
||||||
switch s {
|
switch s {
|
||||||
case "F32":
|
case "F32":
|
||||||
return fileTypeF32, nil
|
return fileTypeF32, nil
|
||||||
@ -108,7 +108,7 @@ func ParseFileType(s string) (FileType, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t FileType) String() string {
|
func (t fileType) String() string {
|
||||||
switch t {
|
switch t {
|
||||||
case fileTypeF32:
|
case fileTypeF32:
|
||||||
return "F32"
|
return "F32"
|
||||||
@ -175,6 +175,6 @@ func (t FileType) String() string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t FileType) Value() uint32 {
|
func (t fileType) Value() uint32 {
|
||||||
return uint32(t)
|
return uint32(t)
|
||||||
}
|
}
|
||||||
|
@ -58,6 +58,19 @@ init_vars
|
|||||||
git_module_setup
|
git_module_setup
|
||||||
apply_patches
|
apply_patches
|
||||||
|
|
||||||
|
init_vars
|
||||||
|
if [ -z "${OLLAMA_SKIP_STATIC_GENERATE}" -o "${OLLAMA_CPU_TARGET}" = "static" ]; then
|
||||||
|
# Builds by default, allows skipping, forces build if OLLAMA_CPU_TARGET="static"
|
||||||
|
# Enables optimized Dockerfile builds using a blanket skip and targeted overrides
|
||||||
|
# Static build for linking into the Go binary
|
||||||
|
init_vars
|
||||||
|
CMAKE_TARGETS="--target llama --target ggml"
|
||||||
|
CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DLLAMA_NATIVE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||||
|
BUILD_DIR="../build/linux/${ARCH}_static"
|
||||||
|
echo "Building static library"
|
||||||
|
build
|
||||||
|
fi
|
||||||
|
|
||||||
init_vars
|
init_vars
|
||||||
if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
|
if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
|
||||||
# Users building from source can tune the exact flags we pass to cmake for configuring
|
# Users building from source can tune the exact flags we pass to cmake for configuring
|
||||||
|
@ -177,6 +177,39 @@ function cleanup {
|
|||||||
# -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
|
# -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
|
||||||
# -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
|
# -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
|
||||||
|
|
||||||
|
|
||||||
|
function build_static() {
|
||||||
|
if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
|
||||||
|
# GCC build for direct linking into the Go binary
|
||||||
|
init_vars
|
||||||
|
# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
|
||||||
|
# as we need this to be compiled by gcc for golang to be able to link with itx
|
||||||
|
write-host "Checking for MinGW..."
|
||||||
|
# error action ensures we exit on failure
|
||||||
|
get-command gcc
|
||||||
|
get-command mingw32-make
|
||||||
|
$oldTargets = $script:cmakeTargets
|
||||||
|
$script:cmakeTargets = @("llama", "ggml")
|
||||||
|
$script:cmakeDefs = @(
|
||||||
|
"-G", "MinGW Makefiles"
|
||||||
|
"-DCMAKE_C_COMPILER=gcc.exe",
|
||||||
|
"-DCMAKE_CXX_COMPILER=g++.exe",
|
||||||
|
"-DBUILD_SHARED_LIBS=off",
|
||||||
|
"-DLLAMA_NATIVE=off",
|
||||||
|
"-DLLAMA_AVX=off",
|
||||||
|
"-DLLAMA_AVX2=off",
|
||||||
|
"-DLLAMA_AVX512=off",
|
||||||
|
"-DLLAMA_F16C=off",
|
||||||
|
"-DLLAMA_FMA=off")
|
||||||
|
$script:buildDir="../build/windows/${script:ARCH}_static"
|
||||||
|
write-host "Building static library"
|
||||||
|
build
|
||||||
|
$script:cmakeTargets = $oldTargets
|
||||||
|
} else {
|
||||||
|
write-host "Skipping CPU generation step as requested"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function build_cpu($gen_arch) {
|
function build_cpu($gen_arch) {
|
||||||
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
|
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
|
||||||
# remaining llama.cpp builds use MSVC
|
# remaining llama.cpp builds use MSVC
|
||||||
@ -364,6 +397,7 @@ init_vars
|
|||||||
if ($($args.count) -eq 0) {
|
if ($($args.count) -eq 0) {
|
||||||
git_module_setup
|
git_module_setup
|
||||||
apply_patches
|
apply_patches
|
||||||
|
build_static
|
||||||
if ($script:ARCH -eq "arm64") {
|
if ($script:ARCH -eq "arm64") {
|
||||||
build_cpu("ARM64")
|
build_cpu("ARM64")
|
||||||
} else { # amd64
|
} else { # amd64
|
||||||
|
@ -55,9 +55,9 @@ func (kv KV) ParameterCount() uint64 {
|
|||||||
return kv.u64("general.parameter_count")
|
return kv.u64("general.parameter_count")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kv KV) FileType() FileType {
|
func (kv KV) FileType() fileType {
|
||||||
if u64 := kv.u64("general.file_type"); u64 > 0 {
|
if u64 := kv.u64("general.file_type"); u64 > 0 {
|
||||||
return FileType(uint32(u64))
|
return fileType(uint32(u64))
|
||||||
}
|
}
|
||||||
|
|
||||||
return fileTypeUnknown
|
return fileTypeUnknown
|
||||||
|
39
llm/llm.go
Normal file
39
llm/llm.go
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
package llm
|
||||||
|
|
||||||
|
// #cgo CFLAGS: -Illama.cpp
|
||||||
|
// #cgo darwin,arm64 LDFLAGS: ${SRCDIR}/build/darwin/arm64_static/libllama.a -lstdc++
|
||||||
|
// #cgo darwin,amd64 LDFLAGS: ${SRCDIR}/build/darwin/x86_64_static/libllama.a -lstdc++
|
||||||
|
// #cgo windows,amd64 LDFLAGS: ${SRCDIR}/build/windows/amd64_static/libllama.a -static -lstdc++
|
||||||
|
// #cgo windows,arm64 LDFLAGS: ${SRCDIR}/build/windows/arm64_static/libllama.a -static -lstdc++
|
||||||
|
// #cgo linux,amd64 LDFLAGS: ${SRCDIR}/build/linux/x86_64_static/libllama.a -lstdc++
|
||||||
|
// #cgo linux,arm64 LDFLAGS: ${SRCDIR}/build/linux/arm64_static/libllama.a -lstdc++
|
||||||
|
// #include <stdlib.h>
|
||||||
|
// #include "llama.h"
|
||||||
|
import "C"
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SystemInfo is an unused example of calling llama.cpp functions using CGo
|
||||||
|
func SystemInfo() string {
|
||||||
|
return C.GoString(C.llama_print_system_info())
|
||||||
|
}
|
||||||
|
|
||||||
|
func Quantize(infile, outfile string, ftype fileType) error {
|
||||||
|
cinfile := C.CString(infile)
|
||||||
|
defer C.free(unsafe.Pointer(cinfile))
|
||||||
|
|
||||||
|
coutfile := C.CString(outfile)
|
||||||
|
defer C.free(unsafe.Pointer(coutfile))
|
||||||
|
|
||||||
|
params := C.llama_model_quantize_default_params()
|
||||||
|
params.nthread = -1
|
||||||
|
params.ftype = ftype.Value()
|
||||||
|
|
||||||
|
if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 {
|
||||||
|
return fmt.Errorf("llama_model_quantize: %d", rc)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
@ -26,7 +26,6 @@ import (
|
|||||||
"github.com/ollama/ollama/auth"
|
"github.com/ollama/ollama/auth"
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
"github.com/ollama/ollama/llama"
|
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
"github.com/ollama/ollama/template"
|
"github.com/ollama/ollama/template"
|
||||||
@ -454,7 +453,7 @@ func CreateModel(ctx context.Context, name model.Name, modelFileDir, quantizatio
|
|||||||
defer temp.Close()
|
defer temp.Close()
|
||||||
defer os.Remove(temp.Name())
|
defer os.Remove(temp.Name())
|
||||||
|
|
||||||
if err := llama.Quantize(blob, temp.Name(), want); err != nil {
|
if err := llm.Quantize(blob, temp.Name(), want); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user