You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
49 lines
1.3 KiB
49 lines
1.3 KiB
4 years ago
|
# -*- coding: utf-8 -*-
|
||
|
"""生成初始的 kMandarin_8105.txt"""
|
||
|
from merge_unihan import parse_pinyins, code_to_hanzi
|
||
|
|
||
|
|
||
|
def parse_china_x():
|
||
|
with open('tools/china-8105-06062014.txt') as fp:
|
||
|
for line in fp:
|
||
|
line = line.strip()
|
||
|
if line.startswith('#') or not line:
|
||
|
continue
|
||
|
yield line.split()[0]
|
||
|
|
||
|
|
||
|
def parse_zdic():
|
||
|
with open('zdic.txt') as fp:
|
||
|
return parse_pinyins(fp)
|
||
|
|
||
|
|
||
|
def parse_kmandain():
|
||
|
with open('pinyin.txt') as fp:
|
||
|
return parse_pinyins(fp)
|
||
|
|
||
|
|
||
|
def diff(kmandarin, zdic, commons):
|
||
|
for key in commons:
|
||
|
hanzi = code_to_hanzi(key)
|
||
|
if key in kmandarin:
|
||
|
value = kmandarin[key][0]
|
||
|
if key in zdic and value != zdic[key][0]:
|
||
|
yield '{0}: {1} # {2} -> {3}'.format(
|
||
|
key, value, hanzi, zdic[key][0]
|
||
|
)
|
||
|
else:
|
||
|
yield '{0}: {1} # {2}'.format(key, value, hanzi)
|
||
|
elif key in zdic:
|
||
|
value = zdic[key][0]
|
||
|
yield '{0}: {1} # {2}'.format(key, value, hanzi)
|
||
|
else:
|
||
|
yield '# {0}: {1} # {2}'.format(key, '<-', hanzi)
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
zdic = parse_zdic()
|
||
|
kmandarin = parse_kmandain()
|
||
|
commons = parse_china_x()
|
||
|
lst = diff(kmandarin, zdic, commons)
|
||
|
for x in lst:
|
||
|
print(x)
|