Skip to content

Commit

Permalink
llama.cpp updated to b3982
Browse files Browse the repository at this point in the history
  • Loading branch information
guinmoon committed Oct 26, 2024
1 parent 337ec7d commit a6aab90
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 14 deletions.
4 changes: 3 additions & 1 deletion Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@

import PackageDescription



var sources = [ "llama.cpp/ggml/src/ggml.c",
"llama.cpp/ggml/src/ggml-quants.c",
"llama.cpp/ggml/src/ggml-alloc.c",
"llama.cpp/ggml/src/ggml-backend.c",
"llama.cpp/ggml/src/ggml-backend.cpp",
"llama.cpp/ggml/src/ggml-metal.m",
"llama.cpp/ggml/src/ggml-blas.cpp",
"llama.cpp/ggml/src/ggml-aarch64.c",
Expand Down
6 changes: 4 additions & 2 deletions Sources/llmfarm_core/LLaMa.swift
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ public class LLaMa: LLMBase {

public override func llm_eval(inputBatch: inout [ModelToken]) throws -> Bool {

if llama_decode(context,llama_batch_get_one(&inputBatch, Int32(inputBatch.count), self.nPast, 0)) != 0 {
if llama_decode(context,llama_batch_get_one(&inputBatch, Int32(inputBatch.count))) != 0 {
// if llama_decode(context,llama_batch_get_one(&inputBatch, Int32(inputBatch.count), self.nPast, 0)) != 0 {
print("failed to evaluate llama!")
return false
}
Expand Down Expand Up @@ -411,7 +412,8 @@ public class LLaMa: LLMBase {
}

if (llama_model_has_encoder(model)) {
if (llama_encode(context, llama_batch_get_one(&embeddings, Int32(embeddings.count), 0, 0)) != 0) {
if (llama_encode(context, llama_batch_get_one(&embeddings, Int32(embeddings.count))) != 0) {
// if (llama_encode(context, llama_batch_get_one(&embeddings, Int32(embeddings.count), 0, 0)) != 0) {
print("failed to eval encode.")
return [];
}
Expand Down
14 changes: 7 additions & 7 deletions Sources/llmfarm_core_cpp/gpt_spm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ int check_tensor_name(struct ggml_tensor * t){


/*struct llama_sampling_context*/
struct gpt_sampler* init_sampling(llama_model* model,
struct common_sampler* init_sampling(llama_model* model,
int32_t n_prev = 64, // number of previous tokens to remember
int32_t top_k = 40, // <= 0 to use vocab size
float top_p = 0.95f, // 1.0 = disabled
Expand All @@ -332,7 +332,7 @@ int check_tensor_name(struct ggml_tensor * t){
uint32_t seed = LLAMA_DEFAULT_SEED,
const char * grammar_path = ""){
// sparams
struct gpt_sampler_params sparams;
struct common_sampler_params sparams;
sparams.n_prev = n_prev;
sparams.top_k = top_k;
sparams.top_p = top_p; // 1.0 = disabled
Expand Down Expand Up @@ -363,29 +363,29 @@ int check_tensor_name(struct ggml_tensor * t){
);
}

struct gpt_sampler * ctx_sampling = gpt_sampler_init(model, sparams);
struct common_sampler * ctx_sampling = common_sampler_init(model, sparams);
return ctx_sampling;
}

llama_token spm_llama_sampling_sample(
/*llama_sampling_context*/gpt_sampler * ctx_sampling,
/*llama_sampling_context*/common_sampler * ctx_sampling,
struct llama_context * ctx_main,
// struct llama_context * ctx_cfg,
int idx = -1,
bool grammar_first = false)
{

// llama_sampling_sample(ctx_sampling,ctx_main,ctx_cfg,idx);
gpt_sampler_sample(ctx_sampling, ctx_main, idx, grammar_first);
common_sampler_sample(ctx_sampling, ctx_main, idx, grammar_first);
}

void spm_llama_sampling_accept(
struct /*llama_sampling_context*/gpt_sampler * ctx_sampling,
struct /*llama_sampling_context*/common_sampler * ctx_sampling,
struct llama_context * ctx_main,
llama_token id,
bool apply_grammar)
{
// llama_sampling_accept(ctx_sampling,ctx_main,id,apply_grammar);
gpt_sampler_accept(ctx_sampling, id, apply_grammar);
common_sampler_accept(ctx_sampling, id, apply_grammar);
}

2 changes: 1 addition & 1 deletion Sources/llmfarm_core_cpp/llama.cpp
Submodule llama.cpp updated 179 files
6 changes: 3 additions & 3 deletions Sources/llmfarm_core_cpp/spm-headers/gpt_spm.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ char * get_tensor_name(struct ggml_tensor * t);
int check_tensor_name(struct ggml_tensor * t);

// struct llama_sampling_context *
struct gpt_sampler* init_sampling(struct llama_model* model,
struct common_sampler* init_sampling(struct llama_model* model,
int32_t n_prev, // number of previous tokens to remember
int32_t top_k, // <= 0 to use vocab size
float top_p, // 1.0 = disabled
Expand All @@ -209,15 +209,15 @@ struct gpt_sampler* init_sampling(struct llama_model* model,

llama_token spm_llama_sampling_sample(
// struct llama_sampling_context * ctx_sampling,
struct gpt_sampler * ctx_sampling,
struct common_sampler * ctx_sampling,
struct llama_context * ctx_main,
// struct llama_context * ctx_cfg,
int idx,
bool grammar_first);

void spm_llama_sampling_accept(
// struct llama_sampling_context * ctx_sampling,
struct gpt_sampler * ctx_sampling,
struct common_sampler * ctx_sampling,
struct llama_context * ctx_main,
llama_token id,
bool apply_grammar);
Expand Down

0 comments on commit a6aab90

Please sign in to comment.