-
Notifications
You must be signed in to change notification settings - Fork 36
/
NiuTrans.hierarchy.config
114 lines (83 loc) · 3.42 KB
/
NiuTrans.hierarchy.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
############################################
### NiuTrans decoder configuration file ###
### hierarchical phrase-based system ###
### 2011-07-01 ###
############################################
#>>>>>>>>>>>>>>>>>>>>>>
#>>> runtime resource tables
# language model
param="Ngram-LanguageModel-File" value="../../data/lm/lm.trie.large.data"
# target-side vocabulary
param="Target-Vocab-File" value="../../data/lm/lm.large.vocab"
# phrase translation model
param="SCFG-Rule-Set" value="../../data/model/rule.hiero.txt"
# Punctuations specified (sample file is in utf8.).
# Of course, users can modify/change this file as needed.
param="Punct-Vocab-File" value="punct.txt"
#>>>>>>>>>>>>>>>>>>>>>>
#>>> runtime parameters
# number of MERT iterations
param="nround" value="12"
# order of n-gram language model
param="ngram" value="3"
# maximum phrase length
param="maxphraselength" value="7"
# use punctuation pruning (1) or not (0)
param="usepuncpruning" value="1"
# use cube-pruning (1) or not (0)
param="usecubepruning" value="1"
# use maxent reordering model (1) or not (0)
param="use-me-reorder" value="1"
# use msd reordering model (1) or not (0)
param="use-msd-reorder" value="1"
# number of threads
param="nthread" value="4"
# how many translations are dumped
param="nbest" value="30"
# output OOV words
param="outputoov" value="0"
# output source words that are explicitly deleted
param="outputnull" value="0"
# beam size (or beam width)
param="beamsize" value="30"
# beam scale. This parameter controls the number of hypotheses
# that are computed in decoding.
# e.g., beamscale=3 means we search over 3 * beamsize hyphotheses
param="beamscale" value="1"
# number of references of dev. set
param="nref" value="1"
# allow null-translation (i.e. word-deletion)
param="usenulltrans" value="0"
# allow sequence of null-translations
param="snulltrans" value="1"
# normalize output (1) or not (0)
param="normalizeoutput" value="0"
# distortion limit
param="maxdd" value="10"
#>>>>>>>>>>>>>>>>>>>>>>
#>>> model parameters
# features used
# 0: n-gram language model
# 1: number of target-words
# 2: Pr(e|f). f->e translation probablilty.
# 3: Lex(e|f). f->e lexical weight
# 4: Pr(f|e). e->f translation probablilty.
# 5: Lex(f|e). e->f lexical weight
# 6: number of rules
# 7: number of bi-lex links (not fired in current version)
# 8: number of NULL-translation (i.e. word deletion)
# 9: number of phrasal rules
# 10: number of glue rules
# 11: <UNDEFINED>
# 12: <UNDEFINED>
# 13: <UNDEFINED>
# 14: <UNDEFINED>
# 15: <UNDEFINED>
# 16: <UNDEFINED>
# feature weights
param="weights" value="2.000 1.000 1.000 0.200 1.000 0.200 0.000 0.500 0.000 0.500 0.000 0.000 0.000 0.000 0.000 0.000 0.000"
# bound the feature weight in MERT
# e.g. the first number "-3:7" means that the first feature weight ranges in [-3, 7]
param="ranges" value="-3:7 -1:3 0:3 0:0.4 0:3 0:0.4 -3:3 -3:3 -3:0 -3:3 -3:3 0:0 0:0 0:0 0:0 0:0 0:0"
# fix a dimention (1) or not (0)
param="fixedfs" value="0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0"