mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-06 19:25:21 +00:00
fix template / imageproc issues
This commit is contained in:
parent
a2d33ee390
commit
5486c57364
@ -42,7 +42,7 @@ func min(a, b int) int {
|
|||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
|
func getImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) image.Point {
|
||||||
targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
|
targetWidth := clip(imageSize.X, tileSize, canvasSize.X)
|
||||||
targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
|
targetHeight := clip(imageSize.Y, tileSize, canvasSize.Y)
|
||||||
|
|
||||||
@ -62,7 +62,7 @@ func GetImageSizeFitToCanvas(imageSize, canvasSize image.Point, tileSize int) im
|
|||||||
return image.Point{w, h}
|
return image.Point{w, h}
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
|
func getOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) image.Point {
|
||||||
possibleTileArrangements := GetSupportedAspectRatios(maxImageTiles)
|
possibleTileArrangements := GetSupportedAspectRatios(maxImageTiles)
|
||||||
possibleCanvasSizes := []image.Point{}
|
possibleCanvasSizes := []image.Point{}
|
||||||
for _, pta := range possibleTileArrangements {
|
for _, pta := range possibleTileArrangements {
|
||||||
@ -104,11 +104,13 @@ func GetOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) i
|
|||||||
selectedScale = minUpscale
|
selectedScale = minUpscale
|
||||||
}
|
}
|
||||||
|
|
||||||
selectedCanvas := possibleCanvasSizes[0]
|
var selectedCanvas image.Point
|
||||||
for n, pcs := range possibleCanvasSizes {
|
for n, pcs := range possibleCanvasSizes {
|
||||||
if scales[n] == selectedScale {
|
if scales[n] == selectedScale {
|
||||||
// choose the largest possible canvas
|
// choose the smallest possible canvas
|
||||||
if pcs.X*pcs.Y > selectedCanvas.X*selectedCanvas.Y {
|
if selectedCanvas.X == 0 && selectedCanvas.Y == 0 {
|
||||||
|
selectedCanvas = pcs
|
||||||
|
} else if pcs.X*pcs.Y < selectedCanvas.X*selectedCanvas.Y {
|
||||||
selectedCanvas = pcs
|
selectedCanvas = pcs
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -116,7 +118,7 @@ func GetOptimalTiledCanvas(imageSize image.Point, maxImageTiles, tileSize int) i
|
|||||||
return selectedCanvas
|
return selectedCanvas
|
||||||
}
|
}
|
||||||
|
|
||||||
func SplitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
|
func splitToTiles(img image.Image, numTilesSize image.Point) []image.Image {
|
||||||
b := img.Bounds()
|
b := img.Bounds()
|
||||||
width := b.Max.X - b.Min.X
|
width := b.Max.X - b.Min.X
|
||||||
height := b.Max.Y - b.Min.Y
|
height := b.Max.Y - b.Min.Y
|
||||||
@ -141,10 +143,9 @@ func ResizeImage(img image.Image, outputSize image.Point, maxImageTiles int) (im
|
|||||||
b := img.Bounds()
|
b := img.Bounds()
|
||||||
tileSize := outputSize.Y
|
tileSize := outputSize.Y
|
||||||
|
|
||||||
canvasSize := GetOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
|
canvasSize := getOptimalTiledCanvas(b.Max, maxImageTiles, tileSize)
|
||||||
aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
|
aspectRatio := image.Point{canvasSize.X / tileSize, canvasSize.Y / tileSize}
|
||||||
|
newSize := getImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
|
||||||
newSize := GetImageSizeFitToCanvas(b.Max, canvasSize, tileSize)
|
|
||||||
|
|
||||||
dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
|
dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
|
||||||
draw.ApproxBiLinear.Scale(dst, dst.Rect, img, b, draw.Over, nil)
|
draw.ApproxBiLinear.Scale(dst, dst.Rect, img, b, draw.Over, nil)
|
||||||
@ -165,7 +166,7 @@ func PadImage(img image.Image, outputSize, aspectRatio image.Point) image.Image
|
|||||||
}
|
}
|
||||||
|
|
||||||
func PackImages(img image.Image, aspectRatio image.Point, mean, std [3]float32) []float32 {
|
func PackImages(img image.Image, aspectRatio image.Point, mean, std [3]float32) []float32 {
|
||||||
subImages := SplitToTiles(img, aspectRatio)
|
subImages := splitToTiles(img, aspectRatio)
|
||||||
|
|
||||||
var pixelVals []float32
|
var pixelVals []float32
|
||||||
|
|
||||||
@ -218,8 +219,6 @@ func Preprocess(imageData []byte) ([]float32, int, error) {
|
|||||||
newImage, aspectRatio := ResizeImage(img, outputSize, maxTiles)
|
newImage, aspectRatio := ResizeImage(img, outputSize, maxTiles)
|
||||||
newImage = PadImage(newImage, outputSize, aspectRatio)
|
newImage = PadImage(newImage, outputSize, aspectRatio)
|
||||||
|
|
||||||
// todo: need to scale (dim) by 1/256
|
|
||||||
|
|
||||||
data := PackImages(newImage, aspectRatio, mean, std)
|
data := PackImages(newImage, aspectRatio, mean, std)
|
||||||
supportedRatios := GetSupportedAspectRatios(maxTiles)
|
supportedRatios := GetSupportedAspectRatios(maxTiles)
|
||||||
var aspectRatioIndex int
|
var aspectRatioIndex int
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
package imageproc
|
package imageproc
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"image"
|
"image"
|
||||||
|
"image/png"
|
||||||
"reflect"
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
@ -27,12 +29,12 @@ func testEq(a, b any) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestAspectRatios(t *testing.T) {
|
func TestAspectRatios(t *testing.T) {
|
||||||
type AspectCase struct {
|
type aspectCase struct {
|
||||||
MaxTiles int
|
MaxTiles int
|
||||||
Expected []image.Point
|
Expected []image.Point
|
||||||
}
|
}
|
||||||
|
|
||||||
cases := []AspectCase{
|
cases := []aspectCase{
|
||||||
{
|
{
|
||||||
MaxTiles: 1,
|
MaxTiles: 1,
|
||||||
Expected: []image.Point{{1, 1}},
|
Expected: []image.Point{{1, 1}},
|
||||||
@ -61,14 +63,14 @@ func TestAspectRatios(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestGetImageSizeFitToCanvas(t *testing.T) {
|
func TestGetImageSizeFitToCanvas(t *testing.T) {
|
||||||
type ImageSizeCase struct {
|
type imageSizeCase struct {
|
||||||
ImageRect image.Point
|
ImageRect image.Point
|
||||||
CanvasRect image.Point
|
CanvasRect image.Point
|
||||||
TileSize int
|
TileSize int
|
||||||
Expected image.Point
|
Expected image.Point
|
||||||
}
|
}
|
||||||
|
|
||||||
cases := []ImageSizeCase{
|
cases := []imageSizeCase{
|
||||||
{
|
{
|
||||||
ImageRect: image.Point{400, 400},
|
ImageRect: image.Point{400, 400},
|
||||||
CanvasRect: image.Point{640, 480},
|
CanvasRect: image.Point{640, 480},
|
||||||
@ -108,7 +110,7 @@ func TestGetImageSizeFitToCanvas(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
actual := GetImageSizeFitToCanvas(c.ImageRect, c.CanvasRect, c.TileSize)
|
actual := getImageSizeFitToCanvas(c.ImageRect, c.CanvasRect, c.TileSize)
|
||||||
|
|
||||||
if actual != c.Expected {
|
if actual != c.Expected {
|
||||||
t.Errorf("incorrect image rect: '%#v'. expected: '%#v'", actual, c.Expected)
|
t.Errorf("incorrect image rect: '%#v'. expected: '%#v'", actual, c.Expected)
|
||||||
@ -117,19 +119,19 @@ func TestGetImageSizeFitToCanvas(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestGetOptimalTiledCanvas(t *testing.T) {
|
func TestGetOptimalTiledCanvas(t *testing.T) {
|
||||||
type TiledCanvasSizeCase struct {
|
type tiledCanvasSizeCase struct {
|
||||||
ImageSize image.Point
|
ImageSize image.Point
|
||||||
MaxImageTiles int
|
MaxImageTiles int
|
||||||
TileSize int
|
TileSize int
|
||||||
Expected image.Point
|
Expected image.Point
|
||||||
}
|
}
|
||||||
|
|
||||||
cases := []TiledCanvasSizeCase{
|
cases := []tiledCanvasSizeCase{
|
||||||
{
|
{
|
||||||
ImageSize: image.Point{1024, 768},
|
ImageSize: image.Point{1024, 768},
|
||||||
MaxImageTiles: 4,
|
MaxImageTiles: 4,
|
||||||
TileSize: 1000,
|
TileSize: 1000,
|
||||||
Expected: image.Point{4000, 1000},
|
Expected: image.Point{2000, 1000},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ImageSize: image.Point{1024, 768},
|
ImageSize: image.Point{1024, 768},
|
||||||
@ -140,7 +142,7 @@ func TestGetOptimalTiledCanvas(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
actual := GetOptimalTiledCanvas(c.ImageSize, c.MaxImageTiles, c.TileSize)
|
actual := getOptimalTiledCanvas(c.ImageSize, c.MaxImageTiles, c.TileSize)
|
||||||
|
|
||||||
if actual != c.Expected {
|
if actual != c.Expected {
|
||||||
t.Errorf("incorrect tiled canvas: '%#v'. expected: '%#v'", actual, c.Expected)
|
t.Errorf("incorrect tiled canvas: '%#v'. expected: '%#v'", actual, c.Expected)
|
||||||
@ -149,13 +151,13 @@ func TestGetOptimalTiledCanvas(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestSplitToTiles(t *testing.T) {
|
func TestSplitToTiles(t *testing.T) {
|
||||||
type SplitCase struct {
|
type splitCase struct {
|
||||||
TestImage image.Image
|
TestImage image.Image
|
||||||
NumTilesSize image.Point
|
NumTilesSize image.Point
|
||||||
Expected []image.Image
|
Expected []image.Image
|
||||||
}
|
}
|
||||||
|
|
||||||
cases := []SplitCase{
|
cases := []splitCase{
|
||||||
{
|
{
|
||||||
TestImage: image.NewRGBA(image.Rect(0, 0, 1024, 768)),
|
TestImage: image.NewRGBA(image.Rect(0, 0, 1024, 768)),
|
||||||
NumTilesSize: image.Point{1, 1},
|
NumTilesSize: image.Point{1, 1},
|
||||||
@ -182,7 +184,7 @@ func TestSplitToTiles(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
actual := SplitToTiles(c.TestImage, c.NumTilesSize)
|
actual := splitToTiles(c.TestImage, c.NumTilesSize)
|
||||||
|
|
||||||
if len(actual) != len(c.Expected) {
|
if len(actual) != len(c.Expected) {
|
||||||
t.Errorf("incorrect number of images '%d': expected: '%d'", len(actual), len(c.Expected))
|
t.Errorf("incorrect number of images '%d': expected: '%d'", len(actual), len(c.Expected))
|
||||||
@ -197,7 +199,7 @@ func TestSplitToTiles(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestResize(t *testing.T) {
|
func TestResize(t *testing.T) {
|
||||||
type ResizeCase struct {
|
type resizeCase struct {
|
||||||
TestImage image.Image
|
TestImage image.Image
|
||||||
OutputSize image.Point
|
OutputSize image.Point
|
||||||
MaxImageTiles int
|
MaxImageTiles int
|
||||||
@ -205,7 +207,7 @@ func TestResize(t *testing.T) {
|
|||||||
ExpectedAspectRatio image.Point
|
ExpectedAspectRatio image.Point
|
||||||
}
|
}
|
||||||
|
|
||||||
cases := []ResizeCase{
|
cases := []resizeCase{
|
||||||
{
|
{
|
||||||
TestImage: image.NewRGBA(image.Rect(0, 0, 200, 200)),
|
TestImage: image.NewRGBA(image.Rect(0, 0, 200, 200)),
|
||||||
OutputSize: image.Point{100, 100},
|
OutputSize: image.Point{100, 100},
|
||||||
@ -218,7 +220,14 @@ func TestResize(t *testing.T) {
|
|||||||
OutputSize: image.Point{100, 100},
|
OutputSize: image.Point{100, 100},
|
||||||
MaxImageTiles: 2,
|
MaxImageTiles: 2,
|
||||||
ExpectedImage: image.NewRGBA(image.Rect(0, 0, 100, 100)),
|
ExpectedImage: image.NewRGBA(image.Rect(0, 0, 100, 100)),
|
||||||
ExpectedAspectRatio: image.Point{1, 2},
|
ExpectedAspectRatio: image.Point{1, 1},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TestImage: image.NewRGBA(image.Rect(0, 0, 10, 10)),
|
||||||
|
OutputSize: image.Point{560, 560},
|
||||||
|
MaxImageTiles: 4,
|
||||||
|
ExpectedImage: image.NewRGBA(image.Rect(0, 0, 560, 560)),
|
||||||
|
ExpectedAspectRatio: image.Point{1, 1},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
TestImage: image.NewRGBA(image.Rect(0, 0, 2560, 1920)),
|
TestImage: image.NewRGBA(image.Rect(0, 0, 2560, 1920)),
|
||||||
@ -244,20 +253,20 @@ func TestResize(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if actualAspectRatio != c.ExpectedAspectRatio {
|
if actualAspectRatio != c.ExpectedAspectRatio {
|
||||||
t.Errorf("canvas size incorrect: '%#v': expected: '%#v'", actualAspectRatio, c.ExpectedAspectRatio)
|
t.Errorf("aspect ratio incorrect: '%#v': expected: '%#v'", actualAspectRatio, c.ExpectedAspectRatio)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPad(t *testing.T) {
|
func TestPad(t *testing.T) {
|
||||||
type PadCase struct {
|
type padCase struct {
|
||||||
TestImage image.Image
|
TestImage image.Image
|
||||||
OutputSize image.Point
|
OutputSize image.Point
|
||||||
AspectRatio image.Point
|
AspectRatio image.Point
|
||||||
Expected image.Image
|
Expected image.Image
|
||||||
}
|
}
|
||||||
|
|
||||||
cases := []PadCase{
|
cases := []padCase{
|
||||||
{
|
{
|
||||||
TestImage: image.NewRGBA(image.Rect(0, 0, 1000, 667)),
|
TestImage: image.NewRGBA(image.Rect(0, 0, 1000, 667)),
|
||||||
OutputSize: image.Point{560, 560},
|
OutputSize: image.Point{560, 560},
|
||||||
@ -276,30 +285,79 @@ func TestPad(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestPackImages(t *testing.T) {
|
func TestPackImages(t *testing.T) {
|
||||||
type PackCase struct {
|
type packCase struct {
|
||||||
TestImage image.Image
|
TestImage image.Image
|
||||||
AspectRatio image.Point
|
AspectRatio image.Point
|
||||||
|
ExpectedVals int
|
||||||
}
|
}
|
||||||
|
|
||||||
mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
|
mean := [3]float32{0.48145466, 0.4578275, 0.40821073}
|
||||||
std := [3]float32{0.26862954, 0.26130258, 0.27577711}
|
std := [3]float32{0.26862954, 0.26130258, 0.27577711}
|
||||||
|
|
||||||
cases := []PackCase{
|
cases := []packCase{
|
||||||
{
|
{
|
||||||
TestImage: image.NewRGBA(image.Rect(0, 0, 1120, 1120)),
|
TestImage: image.NewRGBA(image.Rect(0, 0, 1120, 1120)),
|
||||||
AspectRatio: image.Point{2, 2},
|
AspectRatio: image.Point{2, 2},
|
||||||
|
ExpectedVals: 2 * 2 * 3 * 560 * 560,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
TestImage: image.NewRGBA(image.Rect(0, 0, 560, 560)),
|
TestImage: image.NewRGBA(image.Rect(0, 0, 560, 560)),
|
||||||
AspectRatio: image.Point{1, 1},
|
AspectRatio: image.Point{1, 1},
|
||||||
|
ExpectedVals: 1 * 1 * 3 * 560 * 560,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
TestImage: image.NewRGBA(image.Rect(0, 0, 1120, 560)),
|
TestImage: image.NewRGBA(image.Rect(0, 0, 1120, 560)),
|
||||||
AspectRatio: image.Point{1, 2},
|
AspectRatio: image.Point{1, 2},
|
||||||
|
ExpectedVals: 1 * 2 * 3 * 560 * 560,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
PackImages(c.TestImage, c.AspectRatio, mean, std)
|
actualVals := PackImages(c.TestImage, c.AspectRatio, mean, std)
|
||||||
|
if len(actualVals) != c.ExpectedVals {
|
||||||
|
t.Errorf("packed image size incorrect: '%d': expected: '%d'", len(actualVals), c.ExpectedVals)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPreprocess(t *testing.T) {
|
||||||
|
type preprocessCase struct {
|
||||||
|
TestImage image.Image
|
||||||
|
ExpectedVals int
|
||||||
|
ExpectedAspectRatioID int
|
||||||
|
}
|
||||||
|
|
||||||
|
cases := []preprocessCase{
|
||||||
|
{
|
||||||
|
TestImage: image.NewRGBA(image.Rect(0, 0, 10, 10)),
|
||||||
|
ExpectedVals: 0,
|
||||||
|
ExpectedAspectRatioID: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
TestImage: image.NewRGBA(image.Rect(0, 0, 1024, 768)),
|
||||||
|
ExpectedVals: 0,
|
||||||
|
ExpectedAspectRatioID: 6,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, c := range cases {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
err := png.Encode(&buf, c.TestImage)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
imgData, aspectRatioID, err := Preprocess(buf.Bytes())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error processing: %q", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(imgData) == 0 {
|
||||||
|
t.Errorf("no image data returned")
|
||||||
|
}
|
||||||
|
|
||||||
|
if aspectRatioID != c.ExpectedAspectRatioID {
|
||||||
|
t.Errorf("aspect ratio incorrect: '%d': expected: '%d'", aspectRatioID, c.ExpectedAspectRatioID)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,8 @@ package server
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"image"
|
||||||
|
"image/png"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
@ -13,18 +15,41 @@ import (
|
|||||||
|
|
||||||
func TestChatPrompt(t *testing.T) {
|
func TestChatPrompt(t *testing.T) {
|
||||||
type expect struct {
|
type expect struct {
|
||||||
prompt string
|
prompt string
|
||||||
images [][]byte
|
images [][]byte
|
||||||
|
aspectRatioID int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tmpl, err := template.Parse(`
|
||||||
|
{{- if .System }}{{ .System }} {{ end }}
|
||||||
|
{{- if .Prompt }}{{ .Prompt }} {{ end }}
|
||||||
|
{{- if .Response }}{{ .Response }} {{ end }}`)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
visionModel := Model{Template: tmpl, ProjectorPaths: []string{"vision"}}
|
||||||
|
mllamaModel := Model{Template: tmpl, ProjectorPaths: []string{"vision"}, Config: ConfigV2{ModelFamilies: []string{"mllama"}}}
|
||||||
|
|
||||||
|
img := image.NewRGBA(image.Rect(0, 0, 5, 5))
|
||||||
|
var buf bytes.Buffer
|
||||||
|
|
||||||
|
err = png.Encode(&buf, img)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
imgBuf := buf.Bytes()
|
||||||
|
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
name string
|
name string
|
||||||
|
model Model
|
||||||
limit int
|
limit int
|
||||||
msgs []api.Message
|
msgs []api.Message
|
||||||
expect
|
expect
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "messages",
|
name: "messages",
|
||||||
|
model: visionModel,
|
||||||
limit: 64,
|
limit: 64,
|
||||||
msgs: []api.Message{
|
msgs: []api.Message{
|
||||||
{Role: "user", Content: "You're a test, Harry!"},
|
{Role: "user", Content: "You're a test, Harry!"},
|
||||||
@ -37,6 +62,7 @@ func TestChatPrompt(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "truncate messages",
|
name: "truncate messages",
|
||||||
|
model: visionModel,
|
||||||
limit: 1,
|
limit: 1,
|
||||||
msgs: []api.Message{
|
msgs: []api.Message{
|
||||||
{Role: "user", Content: "You're a test, Harry!"},
|
{Role: "user", Content: "You're a test, Harry!"},
|
||||||
@ -49,6 +75,7 @@ func TestChatPrompt(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "truncate messages with image",
|
name: "truncate messages with image",
|
||||||
|
model: visionModel,
|
||||||
limit: 64,
|
limit: 64,
|
||||||
msgs: []api.Message{
|
msgs: []api.Message{
|
||||||
{Role: "user", Content: "You're a test, Harry!"},
|
{Role: "user", Content: "You're a test, Harry!"},
|
||||||
@ -64,6 +91,7 @@ func TestChatPrompt(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "truncate messages with images",
|
name: "truncate messages with images",
|
||||||
|
model: visionModel,
|
||||||
limit: 64,
|
limit: 64,
|
||||||
msgs: []api.Message{
|
msgs: []api.Message{
|
||||||
{Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}},
|
{Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}},
|
||||||
@ -79,6 +107,7 @@ func TestChatPrompt(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "messages with images",
|
name: "messages with images",
|
||||||
|
model: visionModel,
|
||||||
limit: 2048,
|
limit: 2048,
|
||||||
msgs: []api.Message{
|
msgs: []api.Message{
|
||||||
{Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}},
|
{Role: "user", Content: "You're a test, Harry!", Images: []api.ImageData{[]byte("something")}},
|
||||||
@ -95,6 +124,7 @@ func TestChatPrompt(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "message with image tag",
|
name: "message with image tag",
|
||||||
|
model: visionModel,
|
||||||
limit: 2048,
|
limit: 2048,
|
||||||
msgs: []api.Message{
|
msgs: []api.Message{
|
||||||
{Role: "user", Content: "You're a test, Harry! [img]", Images: []api.ImageData{[]byte("something")}},
|
{Role: "user", Content: "You're a test, Harry! [img]", Images: []api.ImageData{[]byte("something")}},
|
||||||
@ -111,6 +141,7 @@ func TestChatPrompt(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "messages with interleaved images",
|
name: "messages with interleaved images",
|
||||||
|
model: visionModel,
|
||||||
limit: 2048,
|
limit: 2048,
|
||||||
msgs: []api.Message{
|
msgs: []api.Message{
|
||||||
{Role: "user", Content: "You're a test, Harry!"},
|
{Role: "user", Content: "You're a test, Harry!"},
|
||||||
@ -129,6 +160,7 @@ func TestChatPrompt(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "truncate message with interleaved images",
|
name: "truncate message with interleaved images",
|
||||||
|
model: visionModel,
|
||||||
limit: 1024,
|
limit: 1024,
|
||||||
msgs: []api.Message{
|
msgs: []api.Message{
|
||||||
{Role: "user", Content: "You're a test, Harry!"},
|
{Role: "user", Content: "You're a test, Harry!"},
|
||||||
@ -146,6 +178,7 @@ func TestChatPrompt(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "message with system prompt",
|
name: "message with system prompt",
|
||||||
|
model: visionModel,
|
||||||
limit: 2048,
|
limit: 2048,
|
||||||
msgs: []api.Message{
|
msgs: []api.Message{
|
||||||
{Role: "system", Content: "You are the Test Who Lived."},
|
{Role: "system", Content: "You are the Test Who Lived."},
|
||||||
@ -159,6 +192,7 @@ func TestChatPrompt(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "out of order system",
|
name: "out of order system",
|
||||||
|
model: visionModel,
|
||||||
limit: 2048,
|
limit: 2048,
|
||||||
msgs: []api.Message{
|
msgs: []api.Message{
|
||||||
{Role: "user", Content: "You're a test, Harry!"},
|
{Role: "user", Content: "You're a test, Harry!"},
|
||||||
@ -170,19 +204,39 @@ func TestChatPrompt(t *testing.T) {
|
|||||||
prompt: "You're a test, Harry! I-I'm a what? You are the Test Who Lived. A test. And a thumping good one at that, I'd wager. ",
|
prompt: "You're a test, Harry! I-I'm a what? You are the Test Who Lived. A test. And a thumping good one at that, I'd wager. ",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
{
|
||||||
|
name: "messages with mllama (no images)",
|
||||||
tmpl, err := template.Parse(`
|
model: mllamaModel,
|
||||||
{{- if .System }}{{ .System }} {{ end }}
|
limit: 2048,
|
||||||
{{- if .Prompt }}{{ .Prompt }} {{ end }}
|
msgs: []api.Message{
|
||||||
{{- if .Response }}{{ .Response }} {{ end }}`)
|
{Role: "user", Content: "You're a test, Harry!"},
|
||||||
if err != nil {
|
{Role: "assistant", Content: "I-I'm a what?"},
|
||||||
t.Fatal(err)
|
{Role: "user", Content: "A test. And a thumping good one at that, I'd wager."},
|
||||||
|
},
|
||||||
|
expect: expect{
|
||||||
|
prompt: "You're a test, Harry! I-I'm a what? A test. And a thumping good one at that, I'd wager. ",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "messages with mllama",
|
||||||
|
model: mllamaModel,
|
||||||
|
limit: 2048,
|
||||||
|
msgs: []api.Message{
|
||||||
|
{Role: "user", Content: "You're a test, Harry!"},
|
||||||
|
{Role: "assistant", Content: "I-I'm a what?"},
|
||||||
|
{Role: "user", Content: "A test. And a thumping good one at that, I'd wager.", Images: []api.ImageData{imgBuf}},
|
||||||
|
},
|
||||||
|
expect: expect{
|
||||||
|
prompt: "You're a test, Harry! I-I'm a what? <|image|>A test. And a thumping good one at that, I'd wager. ",
|
||||||
|
images: [][]byte{imgBuf},
|
||||||
|
aspectRatioID: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range cases {
|
for _, tt := range cases {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
model := Model{Template: tmpl, ProjectorPaths: []string{"vision"}}
|
model := tt.model
|
||||||
opts := api.Options{Runner: api.Runner{NumCtx: tt.limit}}
|
opts := api.Options{Runner: api.Runner{NumCtx: tt.limit}}
|
||||||
prompt, images, err := chatPrompt(context.TODO(), &model, mockRunner{}.Tokenize, &opts, tt.msgs, nil)
|
prompt, images, err := chatPrompt(context.TODO(), &model, mockRunner{}.Tokenize, &opts, tt.msgs, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -202,8 +256,14 @@ func TestChatPrompt(t *testing.T) {
|
|||||||
t.Errorf("expected ID %d, got %d", i, images[i].ID)
|
t.Errorf("expected ID %d, got %d", i, images[i].ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !bytes.Equal(images[i].Data, tt.images[i]) {
|
if len(model.Config.ModelFamilies) == 0 {
|
||||||
t.Errorf("expected %q, got %q", tt.images[i], images[i].Data)
|
if !bytes.Equal(images[i].Data, tt.images[i]) {
|
||||||
|
t.Errorf("expected %q, got %q", tt.images[i], images[i].Data)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if images[i].AspectRatioID != tt.aspectRatioID {
|
||||||
|
t.Errorf("expected aspect ratio %d, got %d", tt.aspectRatioID, images[i].AspectRatioID)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
@ -317,45 +317,6 @@ What is your name?<|im_end|>
|
|||||||
<|im_start|>assistant
|
<|im_start|>assistant
|
||||||
`,
|
`,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"moondream",
|
|
||||||
[]template{
|
|
||||||
// this does not have a "no response" test because it's impossible to render the same output
|
|
||||||
{"response", `{{ if .Prompt }}Question: {{ .Prompt }}
|
|
||||||
|
|
||||||
{{ end }}Answer: {{ .Response }}
|
|
||||||
|
|
||||||
`},
|
|
||||||
{"messages", `
|
|
||||||
{{- range .Messages }}
|
|
||||||
{{- if eq .Role "user" }}Question: {{ .Content }}
|
|
||||||
|
|
||||||
{{ else if eq .Role "assistant" }}Answer: {{ .Content }}
|
|
||||||
|
|
||||||
{{ end }}
|
|
||||||
{{- end }}Answer: `},
|
|
||||||
},
|
|
||||||
Values{
|
|
||||||
Messages: []api.Message{
|
|
||||||
{Role: "user", Content: "What's in this image?", Images: []api.ImageData{[]byte("")}},
|
|
||||||
{Role: "assistant", Content: "It's a hot dog."},
|
|
||||||
{Role: "user", Content: "What's in _this_ image?"},
|
|
||||||
{Role: "user", Images: []api.ImageData{[]byte("")}},
|
|
||||||
{Role: "user", Content: "Is it a hot dog?"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
`Question: [img-0] What's in this image?
|
|
||||||
|
|
||||||
Answer: It's a hot dog.
|
|
||||||
|
|
||||||
Question: What's in _this_ image?
|
|
||||||
|
|
||||||
[img-1]
|
|
||||||
|
|
||||||
Is it a hot dog?
|
|
||||||
|
|
||||||
Answer: `,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range cases {
|
for _, tt := range cases {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user