mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-06 19:25:21 +00:00
fix cuda unpad kernel
This commit is contained in:
parent
a795cbc934
commit
ce6fee43ee
4
llama/ggml-cuda/pad.cu
vendored
4
llama/ggml-cuda/pad.cu
vendored
@ -100,9 +100,9 @@ static __global__ void unpad_f32(const float * x, float * dst, const int ne0, co
|
|||||||
static void unpad_f32_cuda(const float * x, float * dst,
|
static void unpad_f32_cuda(const float * x, float * dst,
|
||||||
const int ne00, const int ne01, const int ne02, const int ne03,
|
const int ne00, const int ne01, const int ne02, const int ne03,
|
||||||
const int ne0, const int ne1, const int ne2, const int ne3, cudaStream_t stream) {
|
const int ne0, const int ne1, const int ne2, const int ne3, cudaStream_t stream) {
|
||||||
int num_blocks = (ne0 + CUDA_unpad_BLOCK_SIZE - 1) / CUDA_unpad_BLOCK_SIZE;
|
int num_blocks = (ne0 + CUDA_PAD_BLOCK_SIZE - 1) / CUDA_PAD_BLOCK_SIZE;
|
||||||
dim3 gridDim(num_blocks, ne1, ne2*ne3);
|
dim3 gridDim(num_blocks, ne1, ne2*ne3);
|
||||||
unpad_f32<<<gridDim, CUDA_unpad_BLOCK_SIZE, 0, stream>>>(x, dst, ne0, ne00, ne01, ne02, ne03);
|
unpad_f32<<<gridDim, CUDA_PAD_BLOCK_SIZE, 0, stream>>>(x, dst, ne0, ne00, ne01, ne02, ne03);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
From eb77f6e32425e9f18914f3dec66faf4129c30301 Mon Sep 17 00:00:00 2001
|
From a749b1d9635ee48b25dba95a12fe032abf427ddc Mon Sep 17 00:00:00 2001
|
||||||
From: Michael Yang <mxyng@pm.me>
|
From: Michael Yang <mxyng@pm.me>
|
||||||
Date: Fri, 11 Oct 2024 16:19:43 -0700
|
Date: Fri, 11 Oct 2024 16:19:43 -0700
|
||||||
Subject: [PATCH] add unpad operator
|
Subject: [PATCH] add unpad operator
|
||||||
@ -64,7 +64,7 @@ index 8a844b02..7e4611fb 100644
|
|||||||
case GGML_OP_TIMESTEP_EMBEDDING:
|
case GGML_OP_TIMESTEP_EMBEDDING:
|
||||||
case GGML_OP_LEAKY_RELU:
|
case GGML_OP_LEAKY_RELU:
|
||||||
diff --git a/ggml/src/ggml-cuda/pad.cu b/ggml/src/ggml-cuda/pad.cu
|
diff --git a/ggml/src/ggml-cuda/pad.cu b/ggml/src/ggml-cuda/pad.cu
|
||||||
index aba539e8..3d4c4ca4 100644
|
index aba539e8..39fd4b16 100644
|
||||||
--- a/ggml/src/ggml-cuda/pad.cu
|
--- a/ggml/src/ggml-cuda/pad.cu
|
||||||
+++ b/ggml/src/ggml-cuda/pad.cu
|
+++ b/ggml/src/ggml-cuda/pad.cu
|
||||||
@@ -47,3 +47,49 @@ void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
@@ -47,3 +47,49 @@ void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||||
@ -98,9 +98,9 @@ index aba539e8..3d4c4ca4 100644
|
|||||||
+static void unpad_f32_cuda(const float * x, float * dst,
|
+static void unpad_f32_cuda(const float * x, float * dst,
|
||||||
+ const int ne00, const int ne01, const int ne02, const int ne03,
|
+ const int ne00, const int ne01, const int ne02, const int ne03,
|
||||||
+ const int ne0, const int ne1, const int ne2, const int ne3, cudaStream_t stream) {
|
+ const int ne0, const int ne1, const int ne2, const int ne3, cudaStream_t stream) {
|
||||||
+ int num_blocks = (ne0 + CUDA_unpad_BLOCK_SIZE - 1) / CUDA_unpad_BLOCK_SIZE;
|
+ int num_blocks = (ne0 + CUDA_PAD_BLOCK_SIZE - 1) / CUDA_PAD_BLOCK_SIZE;
|
||||||
+ dim3 gridDim(num_blocks, ne1, ne2*ne3);
|
+ dim3 gridDim(num_blocks, ne1, ne2*ne3);
|
||||||
+ unpad_f32<<<gridDim, CUDA_unpad_BLOCK_SIZE, 0, stream>>>(x, dst, ne0, ne00, ne01, ne02, ne03);
|
+ unpad_f32<<<gridDim, CUDA_PAD_BLOCK_SIZE, 0, stream>>>(x, dst, ne0, ne00, ne01, ne02, ne03);
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
+void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
+void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user