#!/usr/bin/python
import sys,os,re,glob
import codecs
def conv_encoding(data, to_enc='utf_8'):
lookup = ('utf_8', 'euc_jp', 'euc_jis_2004', 'euc_jisx0213', 'shift_jis')
for enc in lookup:
try:
data = data.decode(enc)
break
except:
pass
if isinstance(data, unicode):
return data.encode(to_enc)
else:
return data
def parse_julius_result(julius_result):
id_list = [] # list for recognized ids
dir_list = [] # list for the directions
utter_list = [] # list for the content of utterances
re_id = re.compile("source_id = [0-9]+")
re_dir = re.compile(", azimuth = -?[0-9.]+")
#re_utter = re.compile("pass1_best: .*")
re_utter = re.compile("sentence1: .*")
for line in open(julius_result, 'r'):
if re_id.search(line):
# append id
m = re_id.search(line)
id_list.append(int(m.group()[12:]))
if re_dir.search(line):
# append direction
m = re_dir.search(line)
dir_list.append(float(m.group()[12:]))
if re_utter.search(line):
# append utterance
m = re_utter.search(line)
if(re.compile("").search(line)):
utter_list.append(conv_encoding(m.group()[15:-5])) # ######
else:
utter_list.append(conv_encoding(m.group()[15:]))
return id_list, dir_list, utter_list
if __name__ == '__main__':
if len(sys.argv) < 2:
print 'this result.txt'
exit()
deg = 0
margin = 180
if len(sys.argv) >= 4:
deg = float(sys.argv[2])
margin = float(sys.argv[3])
(id_list, dir_list, utter_list) = parse_julius_result(sys.argv[1])
print 'id\tdir\trecognition'
for (i,d,u) in filter(lambda x: abs(x[1]-deg)