Skip to content

Commit 3ac8072

Browse files
committed
ggml : sync with llama.cpp
- int64_t number of elements - remove mlock - expose quantization functions - expose ggml_object - add ggml_view_3d() - multi-thread ggml_rope() - fix ggml_cpy() - add ggml_init_params.no_alloc - fix ggml_mul_mat() backward
1 parent 42cbb07 commit 3ac8072

18 files changed

+1086
-716
lines changed

examples/gpt-2/main.cpp

+8-4
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
199199
struct ggml_init_params params = {
200200
.mem_size = ctx_size,
201201
.mem_buffer = NULL,
202+
.no_alloc = false,
202203
};
203204

204205
model.ctx = ggml_init(params);
@@ -315,9 +316,11 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
315316
}
316317

317318
int32_t nelements = 1;
318-
int32_t ne[2] = { 1, 1 };
319+
int64_t ne[2] = { 1, 1 };
319320
for (int i = 0; i < n_dims; ++i) {
320-
fin.read(reinterpret_cast<char *>(&ne[i]), sizeof(ne[i]));
321+
int32_t ne_cur;
322+
fin.read(reinterpret_cast<char *>(&ne_cur), sizeof(ne_cur));
323+
ne[i] = ne_cur;
321324
nelements *= ne[i];
322325
}
323326

@@ -336,14 +339,14 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
336339
}
337340

338341
if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
339-
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",
342+
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%lld, %lld], expected [%lld, %lld]\n",
340343
__func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
341344
return false;
342345
}
343346

344347
if (0) {
345348
static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", };
346-
printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ftype_str[ftype], ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
349+
printf("%24s - [%5lld, %5lld], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ftype_str[ftype], ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
347350
}
348351

349352
size_t bpe = 0;
@@ -432,6 +435,7 @@ bool gpt2_eval(
432435
struct ggml_init_params params = {
433436
.mem_size = buf_size,
434437
.mem_buffer = buf,
438+
.no_alloc = false,
435439
};
436440

437441
struct ggml_context * ctx0 = ggml_init(params);

examples/gpt-2/quantize.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ int main(int argc, char ** argv) {
291291

292292
// needed to initialize f16 tables
293293
{
294-
struct ggml_init_params params = { 0, NULL };
294+
struct ggml_init_params params = { 0, NULL, false };
295295
struct ggml_context * ctx = ggml_init(params);
296296
ggml_free(ctx);
297297
}

examples/gpt-j/main.cpp

+10-6
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
198198
struct ggml_init_params params = {
199199
.mem_size = ctx_size,
200200
.mem_buffer = NULL,
201+
.no_alloc = false,
201202
};
202203

203204
model.ctx = ggml_init(params);
@@ -310,10 +311,12 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
310311
break;
311312
}
312313

313-
int32_t nelements = 1;
314-
int32_t ne[2] = { 1, 1 };
314+
int64_t nelements = 1;
315+
int64_t ne[2] = { 1, 1 };
315316
for (int i = 0; i < n_dims; ++i) {
316-
fin.read(reinterpret_cast<char *>(&ne[i]), sizeof(ne[i]));
317+
int32_t ne_cur;
318+
fin.read(reinterpret_cast<char *>(&ne_cur), sizeof(ne_cur));
319+
ne[i] = ne_cur;
317320
nelements *= ne[i];
318321
}
319322

@@ -332,14 +335,14 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
332335
}
333336

334337
if (tensor->ne[0] != ne[0] || tensor->ne[1] != ne[1]) {
335-
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%d, %d], expected [%d, %d]\n",
338+
fprintf(stderr, "%s: tensor '%s' has wrong shape in model file: got [%lld, %lld], expected [%lld, %lld]\n",
336339
__func__, name.data(), tensor->ne[0], tensor->ne[1], ne[0], ne[1]);
337340
return false;
338341
}
339342

340343
if (0) {
341344
static const char * ftype_str[] = { "f32", "f16", "q4_0", "q4_1", };
342-
printf("%24s - [%5d, %5d], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ftype_str[ftype], ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
345+
printf("%24s - [%5lld, %5lld], type = %6s, %6.2f MB, %9zu bytes\n", name.data(), ne[0], ne[1], ftype_str[ftype], ggml_nbytes(tensor)/1024.0/1024.0, ggml_nbytes(tensor));
343346
}
344347

345348
size_t bpe = 0;
@@ -357,7 +360,7 @@ bool gptj_model_load(const std::string & fname, gptj_model & model, gpt_vocab &
357360
};
358361

359362
if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) {
360-
fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %zu\n",
363+
fprintf(stderr, "%s: tensor '%s' has wrong size in model file: got %zu, expected %llu\n",
361364
__func__, name.data(), ggml_nbytes(tensor), nelements*bpe);
362365
return false;
363366
}
@@ -431,6 +434,7 @@ bool gptj_eval(
431434
struct ggml_init_params params = {
432435
.mem_size = buf_size,
433436
.mem_buffer = buf,
437+
.no_alloc = false,
434438
};
435439

436440
struct ggml_context * ctx0 = ggml_init(params);

examples/gpt-j/quantize.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ int main(int argc, char ** argv) {
292292

293293
// needed to initialize f16 tables
294294
{
295-
struct ggml_init_params params = { 0, NULL };
295+
struct ggml_init_params params = { 0, NULL, false };
296296
struct ggml_context * ctx = ggml_init(params);
297297
ggml_free(ctx);
298298
}

examples/whisper/main.cpp

+40-21
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <string>
99
#include <thread>
1010
#include <vector>
11+
#include <cstring>
1112

1213
// Terminal color map. 10 colors grouped in ranges [0.0, 0.1, ..., 0.9]
1314
// Lowest is red, middle is yellow, highest is green.
@@ -371,6 +372,39 @@ bool output_csv(struct whisper_context * ctx, const char * fname) {
371372
return true;
372373
}
373374

375+
char *escape_double_quotes(const char *str) {
376+
if (str == NULL) {
377+
return NULL;
378+
}
379+
380+
size_t escaped_length = strlen(str) + 1;
381+
382+
for (size_t i = 0; str[i] != '\0'; i++) {
383+
if (str[i] == '"') {
384+
escaped_length++;
385+
}
386+
}
387+
388+
char *escaped = (char *)calloc(escaped_length, 1); // pre-zeroed
389+
if (escaped == NULL) {
390+
return NULL;
391+
}
392+
393+
size_t pos = 0;
394+
for (size_t i = 0; str[i] != '\0'; i++) {
395+
if (str[i] == '"') {
396+
escaped[pos++] = '\\';
397+
escaped[pos++] = '"';
398+
} else {
399+
escaped[pos++] = str[i];
400+
}
401+
}
402+
403+
// no need to set zero due to calloc() being used prior
404+
405+
return escaped;
406+
}
407+
374408
bool output_json(struct whisper_context * ctx, const char * fname, const whisper_params & params) {
375409
std::ofstream fout(fname);
376410
int indent = 0;
@@ -414,7 +448,9 @@ bool output_json(struct whisper_context * ctx, const char * fname, const whisper
414448

415449
auto value_s = [&](const char *name, const char *val, bool end = false) {
416450
start_value(name);
417-
fout << "\"" << val << (end ? "\"\n" : "\",\n");
451+
char * val_escaped = escape_double_quotes(val);
452+
fout << "\"" << val_escaped << (end ? "\"\n" : "\",\n");
453+
free(val_escaped);
418454
};
419455

420456
auto end_value = [&](bool end = false) {
@@ -455,7 +491,7 @@ bool output_json(struct whisper_context * ctx, const char * fname, const whisper
455491
value_i("ctx", whisper_model_n_text_ctx(ctx));
456492
value_i("state", whisper_model_n_text_state(ctx));
457493
value_i("head", whisper_model_n_text_head(ctx));
458-
value_i("leyer", whisper_model_n_text_layer(ctx), true);
494+
value_i("layer", whisper_model_n_text_layer(ctx), true);
459495
end_obj();
460496
value_i("mels", whisper_model_n_mels(ctx));
461497
value_i("f16", whisper_model_f16(ctx), true);
@@ -477,7 +513,7 @@ bool output_json(struct whisper_context * ctx, const char * fname, const whisper
477513
const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
478514

479515
start_obj();
480-
start_obj("timestanps");
516+
start_obj("timestamps");
481517
value_s("from", to_timestamp(t0, true).c_str());
482518
value_s("to", to_timestamp(t1, true).c_str(), true);
483519
end_obj();
@@ -639,22 +675,6 @@ int main(int argc, char ** argv) {
639675
return 3;
640676
}
641677

642-
// initial prompt
643-
std::vector<whisper_token> prompt_tokens;
644-
645-
if (!params.prompt.empty()) {
646-
prompt_tokens.resize(1024);
647-
prompt_tokens.resize(whisper_tokenize(ctx, params.prompt.c_str(), prompt_tokens.data(), prompt_tokens.size()));
648-
649-
fprintf(stderr, "\n");
650-
fprintf(stderr, "initial prompt: '%s'\n", params.prompt.c_str());
651-
fprintf(stderr, "initial tokens: [ ");
652-
for (int i = 0; i < (int) prompt_tokens.size(); ++i) {
653-
fprintf(stderr, "%d ", prompt_tokens[i]);
654-
}
655-
fprintf(stderr, "]\n");
656-
}
657-
658678
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
659679
const auto fname_inp = params.fname_inp[f];
660680
const auto fname_out = f < (int) params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
@@ -718,8 +738,7 @@ int main(int argc, char ** argv) {
718738

719739
wparams.speed_up = params.speed_up;
720740

721-
wparams.prompt_tokens = prompt_tokens.empty() ? nullptr : prompt_tokens.data();
722-
wparams.prompt_n_tokens = prompt_tokens.empty() ? 0 : prompt_tokens.size();
741+
wparams.initial_prompt = params.prompt.c_str();
723742

724743
wparams.greedy.best_of = params.best_of;
725744
wparams.beam_search.beam_size = params.beam_size;

examples/whisper/quantize.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ int main(int argc, char ** argv) {
334334

335335
// needed to initialize f16 tables
336336
{
337-
struct ggml_init_params params = { 0, NULL };
337+
struct ggml_init_params params = { 0, NULL, false };
338338
struct ggml_context * ctx = ggml_init(params);
339339
ggml_free(ctx);
340340
}

0 commit comments

Comments
 (0)