-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathndu_breakdown.py~
executable file
·142 lines (111 loc) · 4.19 KB
/
ndu_breakdown.py~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/python
'''
This tool is used to find the amount of time spent by
the driver and copilot in any of the domains
'''
import argparse
import os
import read_write_annotation_files as rw
import annotation_schema
import sys
import metadata
read_complex = rw.read_annotation_file
read_simple = rw.read_simple_annotation_file
write_complex = rw.write_annotation_file
write_simple = rw.write_simple_annotation_file
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='''
Script to perform an analysis on the object references
vs gender of the speakers.
''')
parser.add_argument('data',
metavar='<data directory>',
help='''
directory where all the annotations
are stored
''')
parser.add_argument('list',
metavar='<run list>',
help='''
list of all the runs for which
analysis is to be performed
''')
parser.add_argument('--config',
type=str,
default='',
help='''
file where other optional configurations
are stored
''');
args = vars(parser.parse_args())
dataDir = args['data']
runListId = args['list']
configFileId = args['config']
runListP = open(runListId, 'r')
runList = []
for line in runListP:
line = line.strip()
runList.append(line)
domainLabels = annotation_schema.domainLabels
analysisCount = dict()
runAnalysisCount = dict()
domNames = [domainLabel.name for domainLabel in domainLabels]
spkList = ['copilot', 'driver']
# Create the analysisCount structure
for domName in domNames:
analysisCount[domName] = dict()
runAnalysisCount[domName] = dict()
for spk in spkList:
analysisCount[domName][spk] = 0
runAnalysisCount[domName][spk] = 0
# Update analysisCount from all the runs
totalCount = 0
errCount = 0
print '\t'.join(domNames)
for runId in runList:
for domName in domNames:
for spk in spkList:
runAnalysisCount[domName][spk] = 0
runCount = 0
run = dataDir + '/' + runId
domainId = run + '/' + 'domain-annotated.xml'
domWords, domAnnotations, domNotes = read_simple(domainId)
for domAnnotation in domAnnotations:
domLabel = domAnnotation.label
Words = domAnnotation.words
for word in Words:
if word in domWords:
domWords.remove(word)
spk = word.speaker
s_time = float(word.s_time)
e_time = float(word.e_time)
try:
analysisCount[domLabel][spk] += (e_time - s_time)
runAnalysisCount[domLabel][spk] += (e_time - s_time)
except:
errCount += 1
print "error for", word.name
# All remaining words are a part of OOD
for word in domWords:
spk = word.speaker
s_time = float(word.s_time)
e_time = float(word.e_time)
domLabel='OOD'
try:
analysisCount[domLabel][spk] += (e_time - s_time)
runAnalysisCount[domLabel][spk] += (e_time - s_time)
except:
errCount += 1
print "error for", word.name
print runId
for spk in spkList:
line = ['\t' + spk]
domVals = [str(runAnalysisCount[domName][spk]) for domName in domNames]
line = line + domVals
print '\t'.join(line)
print '\nTotal:'
for spk in spkList:
line = ['\t' + spk]
domVals = [str(analysisCount[domName][spk]) for domName in domNames]
line = line + domVals
print '\t'.join(line)