forked from coqui-ai/STT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodelstate.cc
76 lines (63 loc) · 2 KB
/
modelstate.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include <vector>
#include "ctcdecode/ctc_beam_search_decoder.h"
#include "modelstate.h"
using std::vector;
ModelState::ModelState()
: beam_width_(-1)
, n_steps_(-1)
, n_context_(-1)
, n_features_(-1)
, mfcc_feats_per_timestep_(-1)
, sample_rate_(-1)
, audio_win_len_(-1)
, audio_win_step_(-1)
, state_size_(-1)
{
}
ModelState::~ModelState()
{
}
int
ModelState::init(const char* model_string, bool init_from_bytes, size_t bufferSize)
{
return STT_ERR_OK;
}
char*
ModelState::decode(const DecoderState& state) const
{
vector<Output> out = state.decode();
return strdup(alphabet_.Decode(out[0].tokens).c_str());
}
Metadata*
ModelState::decode_metadata(const DecoderState& state,
size_t num_results)
{
vector<Output> out = state.decode(num_results);
unsigned int num_returned = out.size();
CandidateTranscript* transcripts = (CandidateTranscript*)malloc(sizeof(CandidateTranscript)*num_returned);
for (int i = 0; i < num_returned; ++i) {
TokenMetadata* tokens = (TokenMetadata*)malloc(sizeof(TokenMetadata)*out[i].tokens.size());
for (int j = 0; j < out[i].tokens.size(); ++j) {
TokenMetadata token {
strdup(alphabet_.DecodeSingle(out[i].tokens[j]).c_str()), // text
static_cast<unsigned int>(out[i].timesteps[j]), // timestep
out[i].timesteps[j] * ((float)audio_win_step_ / sample_rate_), // start_time
};
memcpy(&tokens[j], &token, sizeof(TokenMetadata));
}
CandidateTranscript transcript {
tokens, // tokens
static_cast<unsigned int>(out[i].tokens.size()), // num_tokens
out[i].confidence, // confidence
};
memcpy(&transcripts[i], &transcript, sizeof(CandidateTranscript));
}
Metadata* ret = (Metadata*)malloc(sizeof(Metadata));
Metadata metadata {
transcripts, // transcripts
num_returned, // num_transcripts
NULL,
};
memcpy(ret, &metadata, sizeof(Metadata));
return ret;
}