forked from aalto-speech/AaltoASR
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'develop' of github.com:aalto-speech/AaltoASR into develop
- Loading branch information
Showing
49 changed files
with
3,187 additions
and
1,717 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#include "Fst.hh" | ||
#include "misc/str.hh" | ||
|
||
#include <cstdlib> | ||
#define strtof strtod | ||
|
||
Fst::Fst(): initial_node_idx(-1) { | ||
} | ||
|
||
void Fst::read(std::string &fname) { | ||
std::string line; | ||
|
||
FILE *ifh = fopen(fname.c_str(), "r"); | ||
if (ifh==nullptr) { | ||
perror("Error"); | ||
exit(-1); // FIXME: we should use exceptions | ||
} | ||
|
||
str::read_line(line, ifh, true); | ||
if (line != "#FSTBasic MaxPlus") { | ||
fprintf(stderr, "Unknown header '%s'.\n", line.c_str()); | ||
throw ReadError(); | ||
} | ||
std::vector<std::string> fields; | ||
while (str::read_line(line, ifh, true)) { | ||
fields = str::split(line, " ", true); | ||
if (fields.size()<2) { | ||
fprintf(stderr, "Too few fields '%s'.\n", line.c_str()); | ||
throw ReadError(); | ||
} | ||
|
||
// Resize nodes to the size of the first mentioned node | ||
auto first_node_idx = atoi(fields[1].c_str()); | ||
if (nodes.size() <= first_node_idx) { | ||
nodes.resize(first_node_idx+1); | ||
} | ||
|
||
if (fields[0]=="I") { | ||
initial_node_idx = first_node_idx; | ||
if (fields.size()>2) { | ||
fprintf(stderr, "Too many fields for I: '%s'.\n", line.c_str()); | ||
throw ReadError(); | ||
} | ||
continue; | ||
} | ||
|
||
if (fields[0]=="F") { | ||
nodes[first_node_idx].end_node = true; | ||
if (fields.size()>2) { | ||
fprintf(stderr, "Too many fields for F: '%s'.\n", line.c_str()); | ||
throw ReadError(); | ||
} | ||
continue; | ||
} | ||
|
||
if (fields[0]=="T") { | ||
if (fields.size()<3 || fields.size()>6) { | ||
fprintf(stderr, "Weird number of fields for T: '%s'.\n", line.c_str()); | ||
throw ReadError(); | ||
} | ||
|
||
auto second_node_idx = atoi(fields[2].c_str()); | ||
if (nodes.size() <= second_node_idx) { | ||
nodes.resize(second_node_idx+1); | ||
} | ||
|
||
auto aidx=arcs.size(); | ||
arcs.resize(aidx+1); | ||
Arc &a = arcs[aidx]; | ||
a.source = first_node_idx; | ||
a.target = second_node_idx; | ||
|
||
if (fields.size()>=5) { | ||
if (fields[4] != ",") { | ||
a.emit_symbol = fields[4]; | ||
} | ||
} | ||
|
||
if (fields.size()>=6) { | ||
a.transition_logprob = strtof(fields[5].c_str(), nullptr); | ||
} | ||
nodes[first_node_idx].arcidxs.push_back(aidx); | ||
|
||
// Move emission pdf indices from arcs to nodes | ||
auto emission_pdf_idx = atoi(fields[3].c_str()); | ||
if (nodes[second_node_idx].emission_pdf_idx==-1) { | ||
nodes[second_node_idx].emission_pdf_idx = emission_pdf_idx; | ||
} else if (nodes[second_node_idx].emission_pdf_idx != emission_pdf_idx) { | ||
fprintf(stderr, "Conflicting emission_pdf_indices for node %d: %d != %d.\n", | ||
second_node_idx, nodes[second_node_idx].emission_pdf_idx, emission_pdf_idx); | ||
throw ReadError(); | ||
} | ||
|
||
|
||
} else { | ||
fprintf(stderr, "Weird type indicator: '%s'.\n", fields[0].c_str()); | ||
throw ReadError(); | ||
} | ||
|
||
} | ||
fclose(ifh); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#ifndef FST_HH | ||
#define FST_HH | ||
/* | ||
Simple class to handle mitfst (http://people.csail.mit.edu/ilh/fst/) format networks. | ||
AT&T fst toolkit and openfst have very similar formats, so this may work directly or | ||
with small adjustments with thosenetworks. | ||
*/ | ||
|
||
#include <vector> | ||
#include <string> | ||
#include <sstream> | ||
|
||
class Fst { | ||
public: | ||
struct ReadError : public std::exception { | ||
virtual const char *what() const throw() { | ||
return "Fst: read error"; } | ||
}; | ||
|
||
struct Arc { | ||
int source; | ||
int target; | ||
float transition_logprob; | ||
std::string emit_symbol; | ||
|
||
inline std::string str() { | ||
std::ostringstream os; | ||
os << "Arc " << source << " -> " << target << " (" << transition_logprob << "): " << emit_symbol; | ||
return os.str(); | ||
} | ||
}; | ||
|
||
struct Node { | ||
Node() : emission_pdf_idx(-1), end_node(false) {} | ||
int emission_pdf_idx; | ||
std::vector<int> arcidxs; | ||
bool end_node; | ||
|
||
inline std::string str() { | ||
std::ostringstream os; | ||
os << "Node " << emission_pdf_idx << " (" << arcidxs.size() << ")"; | ||
return os.str(); | ||
} | ||
}; | ||
|
||
Fst(); | ||
void read(std::string &); | ||
inline void read(const char *s) {std::string ss(s); read(ss);} | ||
int initial_node_idx; | ||
std::vector<Node> nodes; | ||
std::vector<Arc> arcs; | ||
}; | ||
|
||
#endif | ||
|
Oops, something went wrong.