parent
e04f111b8a
commit
881618dc2a
@ -0,0 +1,63 @@
|
|||||||
|
# -*- encoding:utf-8 -*-
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
'''
|
||||||
|
@arthur: david_95
|
||||||
|
|
||||||
|
Assum you executed g2p test twice, the WER rate have some gap, you would like to see what sentences error cause your rate up.
|
||||||
|
so you may get test result ( exp/g2p )into two directories, as exp/prefolder and exp/curfolder
|
||||||
|
run this program as "python compare_badcase.py prefolder curfolder"
|
||||||
|
then you will get diffrences between two run, uuid, phonetics, chinese samples
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
def compare(prefolder, curfolder):
|
||||||
|
'''
|
||||||
|
compare file of text.g2p.pra in two folders
|
||||||
|
result P1 will be prefolder ; P2 will be curfolder, just about the sequence you input in argvs
|
||||||
|
'''
|
||||||
|
|
||||||
|
linecnt = 0
|
||||||
|
pre_block = []
|
||||||
|
cur_block = []
|
||||||
|
zh_lines = []
|
||||||
|
with open(prefolder + "/text.g2p.pra", "r") as pre_file, open(
|
||||||
|
curfolder + "/text.g2p.pra", "r") as cur_file:
|
||||||
|
for pre_line, cur_line in zip(pre_file, cur_file):
|
||||||
|
linecnt += 1
|
||||||
|
|
||||||
|
if linecnt < 11: #skip non-data head in files
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
pre_block.append(pre_line.strip())
|
||||||
|
cur_block.append(cur_line.strip())
|
||||||
|
if pre_line.strip().startswith(
|
||||||
|
"Eval:") and pre_line.strip() != cur_line.strip():
|
||||||
|
uuid = pre_block[-5].replace("id: (baker_", "").replace(")",
|
||||||
|
"")
|
||||||
|
with open("data/g2p/text", 'r') as txt:
|
||||||
|
conlines = txt.readlines()
|
||||||
|
|
||||||
|
for line in conlines:
|
||||||
|
if line.strip().startswith(uuid.strip()):
|
||||||
|
print(line)
|
||||||
|
zh_lines.append(re.sub(r"#[1234]", "", line))
|
||||||
|
break
|
||||||
|
|
||||||
|
print("*" + cur_block[-3]) # ref
|
||||||
|
print("P1 " + pre_block[-2])
|
||||||
|
print("P2 " + cur_block[-2])
|
||||||
|
print("P1 " + pre_block[-1])
|
||||||
|
print("P2 " + cur_block[-1] + "\n\n")
|
||||||
|
pre_block = []
|
||||||
|
cur_block = []
|
||||||
|
|
||||||
|
print("\n")
|
||||||
|
print(str.join("\n", zh_lines))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
assert len(
|
||||||
|
sys.argv) == 3, "Usage: python compare_badcase.py %prefolder %curfolder"
|
||||||
|
compare(sys.argv[1], sys.argv[2])
|
Loading…
Reference in new issue