Skip to content

Commit 40a34fe

Browse files
speculative : fix prompt tokenization in speculative example (#4025)
* Support special tokens and not adding BOS to prompt in speculative * Adapt to new should_add_bos function * Ensure tgt and dft have same add_bos setting
1 parent dae06c0 commit 40a34fe

File tree

1 file changed

+15
-2
lines changed

1 file changed

+15
-2
lines changed

examples/speculative/speculative.cpp

+15-2
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,22 @@ int main(int argc, char ** argv) {
9494
}
9595
}
9696

97-
// tokenize the prompt
97+
98+
// Tokenize the prompt
99+
const bool add_bos_tgt = llama_should_add_bos_token(model_tgt);
100+
LOG("add_bos tgt: %d\n", add_bos_tgt);
101+
102+
const bool add_bos_dft = llama_should_add_bos_token(model_dft);
103+
LOG("add_bos dft: %d\n", add_bos_dft);
104+
105+
if (add_bos_tgt != add_bos_dft) {
106+
fprintf(stderr, "%s: error: draft model add_bos must match target model to use speculation but ", __func__);
107+
fprintf(stderr, "add_bos_dft = %d while add_bos_tgt = %d\n", add_bos_dft, add_bos_tgt);
108+
return 1;
109+
}
110+
98111
std::vector<llama_token> inp;
99-
inp = ::llama_tokenize(ctx_tgt, params.prompt, true);
112+
inp = ::llama_tokenize(ctx_tgt, params.prompt, add_bos_tgt, true);
100113

101114
const int max_context_size = llama_n_ctx(ctx_tgt);
102115
const int max_tokens_list_size = max_context_size - 4;

0 commit comments

Comments
 (0)