4
4
#include " llama-mmap.h"
5
5
#include " llama-model.h"
6
6
7
- #include < algorithm>
8
7
#include < map>
9
8
#include < cassert>
10
9
#include < stdexcept>
11
10
12
11
// vec
13
12
14
- struct ggml_tensor * llama_adapter_cvec::tensor_for (int il) const {
13
+ ggml_tensor * llama_adapter_cvec::tensor_for (int il) const {
15
14
if (il < 0 || il < layer_start || il > layer_end || (size_t ) il >= tensors.size ()) {
16
15
return nullptr ;
17
16
}
18
17
19
18
return tensors[il];
20
19
}
21
20
22
- struct ggml_tensor * llama_adapter_cvec::apply_to (struct ggml_context * ctx, struct ggml_tensor * cur, int il) const {
21
+ ggml_tensor * llama_adapter_cvec::apply_to (ggml_context * ctx, ggml_tensor * cur, int il) const {
23
22
ggml_tensor * layer_dir = tensor_for (il);
24
23
if (layer_dir != nullptr ) {
25
24
cur = ggml_add (ctx, cur, layer_dir);
@@ -40,7 +39,7 @@ bool llama_adapter_cvec::init(const llama_model & model) {
40
39
auto ctx_for_buft = [&](ggml_backend_buffer_type_t buft) -> ggml_context * {
41
40
auto it = ctx_map.find (buft);
42
41
if (it == ctx_map.end ()) {
43
- struct ggml_init_params params = {
42
+ ggml_init_params params = {
44
43
/* .mem_size =*/ hparams.n_layer *ggml_tensor_overhead (),
45
44
/* .mem_buffer =*/ NULL ,
46
45
/* .no_alloc =*/ true ,
@@ -135,7 +134,7 @@ bool llama_adapter_cvec::apply(
135
134
136
135
// lora
137
136
138
- llama_adapter_lora_weight * llama_adapter_lora::get_weight (struct ggml_tensor * w) {
137
+ llama_adapter_lora_weight * llama_adapter_lora::get_weight (ggml_tensor * w) {
139
138
const std::string name (w->name );
140
139
141
140
const auto pos = ab_map.find (name);
@@ -146,11 +145,11 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(struct ggml_tensor *
146
145
return nullptr ;
147
146
}
148
147
149
- static void llama_adapter_lora_init_impl (struct llama_model & model, const char * path_lora, struct llama_adapter_lora & adapter) {
148
+ static void llama_adapter_lora_init_impl (llama_model & model, const char * path_lora, llama_adapter_lora & adapter) {
150
149
LLAMA_LOG_INFO (" %s: loading lora adapter from '%s' ...\n " , __func__, path_lora);
151
150
152
151
ggml_context * ctx_init;
153
- struct gguf_init_params meta_gguf_params = {
152
+ gguf_init_params meta_gguf_params = {
154
153
/* .no_alloc = */ true ,
155
154
/* .ctx = */ &ctx_init,
156
155
};
@@ -201,7 +200,7 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
201
200
auto it = ctx_map.find (buft);
202
201
if (it == ctx_map.end ()) {
203
202
// add a new context
204
- struct ggml_init_params params = {
203
+ ggml_init_params params = {
205
204
/* .mem_size =*/ n_tensors*ggml_tensor_overhead (),
206
205
/* .mem_buffer =*/ NULL ,
207
206
/* .no_alloc =*/ true ,
@@ -264,7 +263,7 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
264
263
throw std::runtime_error (" LoRA tensor '" + name + " ' does not exist in base model (hint: maybe wrong base model?)" );
265
264
}
266
265
267
- struct ggml_context * dev_ctx = ctx_for_buft (ggml_backend_buffer_get_type (model_tensor->buffer ));
266
+ ggml_context * dev_ctx = ctx_for_buft (ggml_backend_buffer_get_type (model_tensor->buffer ));
268
267
// validate tensor shape
269
268
if (is_token_embd) {
270
269
// expect B to be non-transposed, A and B are flipped; see llm_build_inp_embd()
@@ -281,8 +280,8 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
281
280
}
282
281
283
282
// save tensor to adapter
284
- struct ggml_tensor * tensor_a = ggml_dup_tensor (dev_ctx, w.a );
285
- struct ggml_tensor * tensor_b = ggml_dup_tensor (dev_ctx, w.b );
283
+ ggml_tensor * tensor_a = ggml_dup_tensor (dev_ctx, w.a );
284
+ ggml_tensor * tensor_b = ggml_dup_tensor (dev_ctx, w.b );
286
285
ggml_set_name (tensor_a, w.a ->name );
287
286
ggml_set_name (tensor_b, w.b ->name );
288
287
adapter.ab_map [name] = llama_adapter_lora_weight (tensor_a, tensor_b);
@@ -308,7 +307,7 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
308
307
{
309
308
llama_file gguf_file (path_lora, " rb" );
310
309
std::vector<uint8_t > read_buf;
311
- auto set_tensor = [&](struct ggml_tensor * orig, struct ggml_tensor * dev) {
310
+ auto set_tensor = [&](ggml_tensor * orig, ggml_tensor * dev) {
312
311
size_t offs = gguf_get_data_offset (ctx_gguf.get ()) + gguf_get_tensor_offset (ctx_gguf.get (), gguf_find_tensor (ctx_gguf.get (), orig->name ));
313
312
size_t size = ggml_nbytes (orig);
314
313
read_buf.resize (size);
@@ -327,8 +326,8 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
327
326
LLAMA_LOG_INFO (" %s: loaded %zu tensors from lora file\n " , __func__, adapter.ab_map .size ()*2 );
328
327
}
329
328
330
- struct llama_adapter_lora * llama_adapter_lora_init (struct llama_model * model, const char * path_lora) {
331
- struct llama_adapter_lora * adapter = new llama_adapter_lora ();
329
+ llama_adapter_lora * llama_adapter_lora_init (llama_model * model, const char * path_lora) {
330
+ llama_adapter_lora * adapter = new llama_adapter_lora ();
332
331
333
332
try {
334
333
llama_adapter_lora_init_impl (*model, path_lora, *adapter);
@@ -342,6 +341,6 @@ struct llama_adapter_lora * llama_adapter_lora_init(struct llama_model * model,
342
341
return nullptr ;
343
342
}
344
343
345
- void llama_adapter_lora_free (struct llama_adapter_lora * adapter) {
344
+ void llama_adapter_lora_free (llama_adapter_lora * adapter) {
346
345
delete adapter;
347
346
}
0 commit comments