#!/usr/bin/python import sys,os,re,glob import codecs def conv_encoding(data, to_enc='utf_8'): lookup = ('utf_8', 'euc_jp', 'euc_jis_2004', 'euc_jisx0213', 'shift_jis') for enc in lookup: try: data = data.decode(enc) break except: pass if isinstance(data, unicode): return data.encode(to_enc) else: return data def parse_julius_result(julius_result): id_list = [] # list for recognized ids dir_list = [] # list for the directions utter_list = [] # list for the content of utterances re_id = re.compile("source_id = [0-9]+") re_dir = re.compile(", azimuth = -?[0-9.]+") #re_utter = re.compile("pass1_best: .*") re_utter = re.compile("sentence1: .*") for line in open(julius_result, 'r'): if re_id.search(line): # append id m = re_id.search(line) id_list.append(int(m.group()[12:])) if re_dir.search(line): # append direction m = re_dir.search(line) dir_list.append(float(m.group()[12:])) if re_utter.search(line): # append utterance m = re_utter.search(line) if(re.compile("").search(line)): utter_list.append(conv_encoding(m.group()[15:-5])) # ###### else: utter_list.append(conv_encoding(m.group()[15:])) return id_list, dir_list, utter_list if __name__ == '__main__': if len(sys.argv) < 2: print 'this result.txt' exit() deg = 0 margin = 180 if len(sys.argv) >= 4: deg = float(sys.argv[2]) margin = float(sys.argv[3]) (id_list, dir_list, utter_list) = parse_julius_result(sys.argv[1]) print 'id\tdir\trecognition' for (i,d,u) in filter(lambda x: abs(x[1]-deg)