Skip to content

Commit 6b86bcf

Browse files
committed
cuda : increase max block size to 1024
1 parent 62532c0 commit 6b86bcf

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml-cuda.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_
443443
#define CUDA_SCALE_BLOCK_SIZE 256
444444
#define CUDA_CLAMP_BLOCK_SIZE 256
445445
#define CUDA_ROPE_BLOCK_SIZE 256
446-
#define CUDA_SOFT_MAX_BLOCK_SIZE 512
446+
#define CUDA_SOFT_MAX_BLOCK_SIZE 1024
447447
#define CUDA_ALIBI_BLOCK_SIZE 32
448448
#define CUDA_DIAG_MASK_INF_BLOCK_SIZE 32
449449
#define CUDA_QUANTIZE_BLOCK_SIZE 256

0 commit comments

Comments
 (0)