forked from hlt-mt/TranscRater
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompute_WER.py
67 lines (50 loc) · 2.11 KB
/
compute_WER.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
'''
Created on June 9, 2016
@author: jalalvand
compute WER between two text files. Each line is identified by the ID of
the sentence.
egs:
python compute_WER.py reference hypothesis
P.S. The core is written by Martin Thoma, https://martin-thoma.com/word-error-rate-calculation/#comment-2727808425
'''
from __future__ import division
import sys
import argparse
import math
import numpy as np
from itertools import izip
#!/usr/bin/env python
def main(ref, hyp):
# initialisation
WER_arr = np.array([])
with open(ref,'r') as rf, open(hyp,'r') as hf:
for rl,hl in izip(rf,hf):
if rl.strip().split()[0] == hl.strip().split()[0]:
r = rl.strip().split()[1:]
h = hl.strip().split()[1:]
d = np.zeros((len(r)+1)*(len(h)+1), dtype=np.uint8)
d = d.reshape((len(r)+1, len(h)+1))
for i in range(len(r)+1):
for j in range(len(h)+1):
if i == 0:
d[0][j] = j
elif j == 0:
d[i][0] = i
# computation
for i in range(1, len(r)+1):
for j in range(1, len(h)+1):
if r[i-1] == h[j-1]:
d[i][j] = d[i-1][j-1]
else:
substitution = d[i-1][j-1] + 1
insertion = d[i][j-1] + 1
deletion = d[i-1][j] + 1
d[i][j] = min(substitution, insertion, deletion)
WER_arr = np.append( WER_arr, (d[len(r)][len(h)] / len(r)) )
else:
print "ERROR!!! hyp id:"+rl.strip().split()[0]+" does not match with ref id:"+hl.strip().split()[0]
print "HINT: You might need to reorder the transcriptions and the references to be compatible\n"
return
return WER_arr
if __name__=='__main__':
sys.exit(main(sys.argv[1], sys.argv[2]))