8
8
#include < string>
9
9
#include < thread>
10
10
#include < vector>
11
+ #include < cstring>
11
12
12
13
// Terminal color map. 10 colors grouped in ranges [0.0, 0.1, ..., 0.9]
13
14
// Lowest is red, middle is yellow, highest is green.
@@ -371,6 +372,39 @@ bool output_csv(struct whisper_context * ctx, const char * fname) {
371
372
return true ;
372
373
}
373
374
375
+ char *escape_double_quotes (const char *str) {
376
+ if (str == NULL ) {
377
+ return NULL ;
378
+ }
379
+
380
+ size_t escaped_length = strlen (str) + 1 ;
381
+
382
+ for (size_t i = 0 ; str[i] != ' \0 ' ; i++) {
383
+ if (str[i] == ' "' ) {
384
+ escaped_length++;
385
+ }
386
+ }
387
+
388
+ char *escaped = (char *)calloc (escaped_length, 1 ); // pre-zeroed
389
+ if (escaped == NULL ) {
390
+ return NULL ;
391
+ }
392
+
393
+ size_t pos = 0 ;
394
+ for (size_t i = 0 ; str[i] != ' \0 ' ; i++) {
395
+ if (str[i] == ' "' ) {
396
+ escaped[pos++] = ' \\ ' ;
397
+ escaped[pos++] = ' "' ;
398
+ } else {
399
+ escaped[pos++] = str[i];
400
+ }
401
+ }
402
+
403
+ // no need to set zero due to calloc() being used prior
404
+
405
+ return escaped;
406
+ }
407
+
374
408
bool output_json (struct whisper_context * ctx, const char * fname, const whisper_params & params) {
375
409
std::ofstream fout (fname);
376
410
int indent = 0 ;
@@ -414,7 +448,9 @@ bool output_json(struct whisper_context * ctx, const char * fname, const whisper
414
448
415
449
auto value_s = [&](const char *name, const char *val, bool end = false ) {
416
450
start_value (name);
417
- fout << " \" " << val << (end ? " \"\n " : " \" ,\n " );
451
+ char * val_escaped = escape_double_quotes (val);
452
+ fout << " \" " << val_escaped << (end ? " \"\n " : " \" ,\n " );
453
+ free (val_escaped);
418
454
};
419
455
420
456
auto end_value = [&](bool end = false ) {
@@ -455,7 +491,7 @@ bool output_json(struct whisper_context * ctx, const char * fname, const whisper
455
491
value_i (" ctx" , whisper_model_n_text_ctx (ctx));
456
492
value_i (" state" , whisper_model_n_text_state (ctx));
457
493
value_i (" head" , whisper_model_n_text_head (ctx));
458
- value_i (" leyer " , whisper_model_n_text_layer (ctx), true );
494
+ value_i (" layer " , whisper_model_n_text_layer (ctx), true );
459
495
end_obj ();
460
496
value_i (" mels" , whisper_model_n_mels (ctx));
461
497
value_i (" f16" , whisper_model_f16 (ctx), true );
@@ -477,7 +513,7 @@ bool output_json(struct whisper_context * ctx, const char * fname, const whisper
477
513
const int64_t t1 = whisper_full_get_segment_t1 (ctx, i);
478
514
479
515
start_obj ();
480
- start_obj (" timestanps " );
516
+ start_obj (" timestamps " );
481
517
value_s (" from" , to_timestamp (t0, true ).c_str ());
482
518
value_s (" to" , to_timestamp (t1, true ).c_str (), true );
483
519
end_obj ();
@@ -639,22 +675,6 @@ int main(int argc, char ** argv) {
639
675
return 3 ;
640
676
}
641
677
642
- // initial prompt
643
- std::vector<whisper_token> prompt_tokens;
644
-
645
- if (!params.prompt .empty ()) {
646
- prompt_tokens.resize (1024 );
647
- prompt_tokens.resize (whisper_tokenize (ctx, params.prompt .c_str (), prompt_tokens.data (), prompt_tokens.size ()));
648
-
649
- fprintf (stderr, " \n " );
650
- fprintf (stderr, " initial prompt: '%s'\n " , params.prompt .c_str ());
651
- fprintf (stderr, " initial tokens: [ " );
652
- for (int i = 0 ; i < (int ) prompt_tokens.size (); ++i) {
653
- fprintf (stderr, " %d " , prompt_tokens[i]);
654
- }
655
- fprintf (stderr, " ]\n " );
656
- }
657
-
658
678
for (int f = 0 ; f < (int ) params.fname_inp .size (); ++f) {
659
679
const auto fname_inp = params.fname_inp [f];
660
680
const auto fname_out = f < (int ) params.fname_out .size () && !params.fname_out [f].empty () ? params.fname_out [f] : params.fname_inp [f];
@@ -718,8 +738,7 @@ int main(int argc, char ** argv) {
718
738
719
739
wparams.speed_up = params.speed_up ;
720
740
721
- wparams.prompt_tokens = prompt_tokens.empty () ? nullptr : prompt_tokens.data ();
722
- wparams.prompt_n_tokens = prompt_tokens.empty () ? 0 : prompt_tokens.size ();
741
+ wparams.initial_prompt = params.prompt .c_str ();
723
742
724
743
wparams.greedy .best_of = params.best_of ;
725
744
wparams.beam_search .beam_size = params.beam_size ;
0 commit comments