llama.cpp updated to b3982

guinmoon · Oct 26, 2024 · a6aab90 · a6aab90
1 parent 337ec7d
commit a6aab90
Show file tree

Hide file tree

Showing 5 changed files with 18 additions and 14 deletions.
diff --git a/Package.swift b/Package.swift
@@ -3,10 +3,12 @@
 
 import PackageDescription
 
+
+
 var sources = [ "llama.cpp/ggml/src/ggml.c",
                 "llama.cpp/ggml/src/ggml-quants.c",
                 "llama.cpp/ggml/src/ggml-alloc.c",
-                "llama.cpp/ggml/src/ggml-backend.c",
+                "llama.cpp/ggml/src/ggml-backend.cpp",
                 "llama.cpp/ggml/src/ggml-metal.m",
                 "llama.cpp/ggml/src/ggml-blas.cpp",
                 "llama.cpp/ggml/src/ggml-aarch64.c",

diff --git a/Sources/llmfarm_core/LLaMa.swift b/Sources/llmfarm_core/LLaMa.swift
@@ -206,7 +206,8 @@ public class LLaMa: LLMBase {
 
     public override func llm_eval(inputBatch: inout [ModelToken]) throws -> Bool {
 
-        if llama_decode(context,llama_batch_get_one(&inputBatch, Int32(inputBatch.count), self.nPast, 0)) != 0 {
+        if llama_decode(context,llama_batch_get_one(&inputBatch, Int32(inputBatch.count))) != 0 {
+//        if llama_decode(context,llama_batch_get_one(&inputBatch, Int32(inputBatch.count), self.nPast, 0)) != 0 {
             print("failed to evaluate llama!")
             return false
         }
@@ -411,7 +412,8 @@ public class LLaMa: LLMBase {
         }
 
         if (llama_model_has_encoder(model)) {
-            if (llama_encode(context, llama_batch_get_one(&embeddings, Int32(embeddings.count), 0, 0)) != 0) {
+            if (llama_encode(context, llama_batch_get_one(&embeddings, Int32(embeddings.count))) != 0) {
+//            if (llama_encode(context, llama_batch_get_one(&embeddings, Int32(embeddings.count), 0, 0)) != 0) {
                 print("failed to eval encode.")
                 return [];                
             }

diff --git a/Sources/llmfarm_core_cpp/gpt_spm.cpp b/Sources/llmfarm_core_cpp/gpt_spm.cpp
@@ -311,7 +311,7 @@ int check_tensor_name(struct ggml_tensor * t){
 
 
 /*struct llama_sampling_context*/ 
- struct gpt_sampler*  init_sampling(llama_model* model,
+ struct common_sampler*  init_sampling(llama_model* model,
                                                 int32_t     n_prev                = 64,                 // number of previous tokens to remember
                                                 int32_t     top_k                 = 40,                 // <= 0 to use vocab size
                                                 float       top_p                 = 0.95f,              // 1.0 = disabled
@@ -332,7 +332,7 @@ int check_tensor_name(struct ggml_tensor * t){
                                                 uint32_t    seed                  = LLAMA_DEFAULT_SEED,
                                                 const char * grammar_path = ""){
     // sparams
-    struct gpt_sampler_params  sparams;
+    struct common_sampler_params  sparams;
     sparams.n_prev = n_prev;
     sparams.top_k = top_k;
     sparams.top_p = top_p;              // 1.0 = disabled
@@ -363,29 +363,29 @@ int check_tensor_name(struct ggml_tensor * t){
             );
     }
 
-    struct gpt_sampler * ctx_sampling = gpt_sampler_init(model, sparams);
+    struct common_sampler * ctx_sampling = common_sampler_init(model, sparams);
     return ctx_sampling;
 }
 
 llama_token spm_llama_sampling_sample(
-        /*llama_sampling_context*/gpt_sampler * ctx_sampling,
+        /*llama_sampling_context*/common_sampler * ctx_sampling,
         struct llama_context * ctx_main,
         // struct llama_context * ctx_cfg,
         int idx = -1,
         bool grammar_first = false)
 {
 
     //    llama_sampling_sample(ctx_sampling,ctx_main,ctx_cfg,idx);
-    gpt_sampler_sample(ctx_sampling, ctx_main, idx, grammar_first);
+    common_sampler_sample(ctx_sampling, ctx_main, idx, grammar_first);
 }
 
 void spm_llama_sampling_accept(
-        struct /*llama_sampling_context*/gpt_sampler * ctx_sampling,
+        struct /*llama_sampling_context*/common_sampler * ctx_sampling,
         struct llama_context * ctx_main,
         llama_token id,
         bool apply_grammar)
 {
     // llama_sampling_accept(ctx_sampling,ctx_main,id,apply_grammar);
-    gpt_sampler_accept(ctx_sampling, id, apply_grammar);
+    common_sampler_accept(ctx_sampling, id, apply_grammar);
 }
 
diff --git a/Sources/llmfarm_core_cpp/llama.cpp b/Sources/llmfarm_core_cpp/llama.cpp
diff --git a/Sources/llmfarm_core_cpp/spm-headers/gpt_spm.h b/Sources/llmfarm_core_cpp/spm-headers/gpt_spm.h
@@ -185,7 +185,7 @@ char * get_tensor_name(struct ggml_tensor * t);
 int check_tensor_name(struct ggml_tensor * t);
 
 // struct llama_sampling_context *
-struct gpt_sampler* init_sampling(struct llama_model* model,
+struct common_sampler* init_sampling(struct llama_model* model,
                                                 int32_t     n_prev,                 // number of previous tokens to remember
                                                 int32_t     top_k,                 // <= 0 to use vocab size
                                                 float       top_p,              // 1.0 = disabled
@@ -209,15 +209,15 @@ struct gpt_sampler* init_sampling(struct llama_model* model,
 
 llama_token spm_llama_sampling_sample(
         // struct llama_sampling_context * ctx_sampling,
-        struct gpt_sampler * ctx_sampling,
+        struct common_sampler * ctx_sampling,
         struct llama_context * ctx_main,
         // struct llama_context * ctx_cfg,
         int idx,        
         bool grammar_first);
 
 void spm_llama_sampling_accept(
         // struct llama_sampling_context * ctx_sampling,
-        struct gpt_sampler * ctx_sampling,
+        struct common_sampler * ctx_sampling,
         struct llama_context * ctx_main,
         llama_token id,
         bool apply_grammar);