ggml-org
diff --git a/‎include/llama.h
+1-1 b/‎include/llama.h
+1-1
diff --git a/‎src/llama-adapter.cpp
+14-15 b/‎src/llama-adapter.cpp
+14-15
diff --git a/‎src/llama-adapter.h
+9-9 b/‎src/llama-adapter.h
+9-9
@@ -471,7 +471,7 @@ extern "C" {
 
     LLAMA_API const struct llama_model * llama_get_model   (const struct llama_context * ctx); // TODO: remove const?
     LLAMA_API    struct llama_kv_cache * llama_get_kv_self (      struct llama_context * ctx);
-    LLAMA_API  enum llama_pooling_type   llama_pooling_type(const struct llama_context * ctx);
+    LLAMA_API  enum llama_pooling_type   llama_pooling_type(const struct llama_context * ctx); // TODO: rename to llama_get_pooling_type
 
     LLAMA_API const struct llama_vocab * llama_model_get_vocab(const struct llama_model * model);
     LLAMA_API enum llama_rope_type       llama_model_rope_type(const struct llama_model * model);
 
@@ -4,22 +4,21 @@
 #include "llama-mmap.h"
 #include "llama-model.h"
 
-#include <algorithm>
 #include <map>
 #include <cassert>
 #include <stdexcept>
 
 // vec
 
-struct ggml_tensor * llama_adapter_cvec::tensor_for(int il) const {
+ggml_tensor * llama_adapter_cvec::tensor_for(int il) const {
     if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) {
         return nullptr;
     }
 
     return tensors[il];
 }
 
-struct ggml_tensor * llama_adapter_cvec::apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int  il) const {
+ggml_tensor * llama_adapter_cvec::apply_to(ggml_context * ctx, ggml_tensor * cur, int  il) const {
     ggml_tensor * layer_dir = tensor_for(il);
     if (layer_dir != nullptr) {
         cur = ggml_add(ctx, cur, layer_dir);
@@ -40,7 +39,7 @@ bool llama_adapter_cvec::init(const llama_model & model) {
     auto ctx_for_buft = [&](ggml_backend_buffer_type_t buft) -> ggml_context * {
         auto it = ctx_map.find(buft);
         if (it == ctx_map.end()) {
-            struct ggml_init_params params = {
+            ggml_init_params params = {
                 /*.mem_size   =*/ hparams.n_layer*ggml_tensor_overhead(),
                 /*.mem_buffer =*/ NULL,
                 /*.no_alloc   =*/ true,
@@ -135,7 +134,7 @@ bool llama_adapter_cvec::apply(
 
 // lora
 
-llama_adapter_lora_weight * llama_adapter_lora::get_weight(struct ggml_tensor * w) {
+llama_adapter_lora_weight * llama_adapter_lora::get_weight(ggml_tensor * w) {
     const std::string name(w->name);
 
     const auto pos = ab_map.find(name);
@@ -146,11 +145,11 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(struct ggml_tensor *
     return nullptr;
 }
 
-static void llama_adapter_lora_init_impl(struct llama_model & model, const char * path_lora, struct llama_adapter_lora & adapter) {
+static void llama_adapter_lora_init_impl(llama_model & model, const char * path_lora, llama_adapter_lora & adapter) {
     LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
 
     ggml_context * ctx_init;
-    struct gguf_init_params meta_gguf_params = {
+    gguf_init_params meta_gguf_params = {
         /* .no_alloc = */ true,
         /* .ctx      = */ &ctx_init,
     };
@@ -201,7 +200,7 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
         auto it = ctx_map.find(buft);
         if (it == ctx_map.end()) {
             // add a new context
-            struct ggml_init_params params = {
+            ggml_init_params params = {
                 /*.mem_size   =*/ n_tensors*ggml_tensor_overhead(),
                 /*.mem_buffer =*/ NULL,
                 /*.no_alloc   =*/ true,
@@ -264,7 +263,7 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
             throw std::runtime_error("LoRA tensor '" + name + "' does not exist in base model (hint: maybe wrong base model?)");
         }
 
-        struct ggml_context * dev_ctx = ctx_for_buft(ggml_backend_buffer_get_type(model_tensor->buffer));
+        ggml_context * dev_ctx = ctx_for_buft(ggml_backend_buffer_get_type(model_tensor->buffer));
         // validate tensor shape
         if (is_token_embd) {
             // expect B to be non-transposed, A and B are flipped; see llm_build_inp_embd()
@@ -281,8 +280,8 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
         }
 
         // save tensor to adapter
-        struct ggml_tensor * tensor_a = ggml_dup_tensor(dev_ctx, w.a);
-        struct ggml_tensor * tensor_b = ggml_dup_tensor(dev_ctx, w.b);
+        ggml_tensor * tensor_a = ggml_dup_tensor(dev_ctx, w.a);
+        ggml_tensor * tensor_b = ggml_dup_tensor(dev_ctx, w.b);
         ggml_set_name(tensor_a, w.a->name);
         ggml_set_name(tensor_b, w.b->name);
         adapter.ab_map[name] = llama_adapter_lora_weight(tensor_a, tensor_b);
@@ -308,7 +307,7 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
     {
         llama_file gguf_file(path_lora, "rb");
         std::vector<uint8_t> read_buf;
-        auto set_tensor = [&](struct ggml_tensor * orig, struct ggml_tensor * dev) {
+        auto set_tensor = [&](ggml_tensor * orig, ggml_tensor * dev) {
             size_t offs = gguf_get_data_offset(ctx_gguf.get()) + gguf_get_tensor_offset(ctx_gguf.get(), gguf_find_tensor(ctx_gguf.get(), orig->name));
             size_t size = ggml_nbytes(orig);
             read_buf.resize(size);
@@ -327,8 +326,8 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
     LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
 }
 
-struct llama_adapter_lora * llama_adapter_lora_init(struct llama_model * model, const char * path_lora) {
-    struct llama_adapter_lora * adapter = new llama_adapter_lora();
+llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
+    llama_adapter_lora * adapter = new llama_adapter_lora();
 
     try {
         llama_adapter_lora_init_impl(*model, path_lora, *adapter);
@@ -342,6 +341,6 @@ struct llama_adapter_lora * llama_adapter_lora_init(struct llama_model * model,
     return nullptr;
 }
 
-void llama_adapter_lora_free(struct llama_adapter_lora * adapter) {
+void llama_adapter_lora_free(llama_adapter_lora * adapter) {
     delete adapter;
 }
@@ -15,9 +15,9 @@
 //
 
 struct llama_adapter_cvec {
-    struct ggml_tensor * tensor_for(int il) const;
+    ggml_tensor * tensor_for(int il) const;
 
-    struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int  il) const;
+    ggml_tensor * apply_to(ggml_context * ctx, ggml_tensor * cur, int  il) const;
 
     bool apply(
             const llama_model & model,
@@ -36,16 +36,16 @@ struct llama_adapter_cvec {
     std::vector<ggml_context_ptr> ctxs;
     std::vector<ggml_backend_buffer_ptr> bufs;
 
-    std::vector<struct ggml_tensor *> tensors; // per layer
+    std::vector<ggml_tensor *> tensors; // per layer
 };
 
 //
 // llama_adapter_lora
 //
 
 struct llama_adapter_lora_weight {
-    struct ggml_tensor * a = nullptr;
-    struct ggml_tensor * b = nullptr;
+    ggml_tensor * a = nullptr;
+    ggml_tensor * b = nullptr;
 
     // get actual scale based on rank and alpha
     float get_scale(float alpha, float adapter_scale) const {
@@ -55,12 +55,12 @@ struct llama_adapter_lora_weight {
     }
 
     llama_adapter_lora_weight() = default;
-    llama_adapter_lora_weight(struct ggml_tensor * a, struct ggml_tensor * b) : a(a), b(b) {}
+    llama_adapter_lora_weight(ggml_tensor * a, ggml_tensor * b) : a(a), b(b) {}
 };
 
 struct llama_adapter_lora {
     // map tensor name to lora_a_b
-    std::unordered_map<std::string, struct llama_adapter_lora_weight> ab_map;
+    std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;
 
     std::vector<ggml_context_ptr> ctxs;
     std::vector<ggml_backend_buffer_ptr> bufs;
@@ -70,7 +70,7 @@ struct llama_adapter_lora {
     llama_adapter_lora() = default;
     ~llama_adapter_lora() = default;
 
-    llama_adapter_lora_weight * get_weight(struct ggml_tensor * w);
+    llama_adapter_lora_weight * get_weight(ggml_tensor * w);
 };
 
-using llama_adapter_loras = std::unordered_map<struct llama_adapter_lora *, float>;
+using llama_adapter_loras = std::unordered_map<llama_adapter_lora *, float>;