You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
59 lines
1.2 KiB
59 lines
1.2 KiB
# -*- coding: utf-8 -*-
|
|
import sys
|
|
import codecs
|
|
|
|
|
|
def parse(lines):
|
|
"""
|
|
:yield: hanzi, others
|
|
"""
|
|
for line in lines:
|
|
line = line.strip()
|
|
if line.startswith('#') or not line:
|
|
continue
|
|
|
|
hanzi, others = line.split(':', 1)
|
|
yield hanzi.strip(), others.strip()
|
|
|
|
|
|
def merge(pinyin_d_list):
|
|
"""
|
|
:rtype: dict
|
|
"""
|
|
final_d = {}
|
|
for overwrite_d in pinyin_d_list:
|
|
final_d.update(overwrite_d)
|
|
return final_d
|
|
|
|
|
|
def sort(pinyin_d):
|
|
"""
|
|
:rtype: list
|
|
"""
|
|
return sorted(pinyin_d.items(), key=lambda x: x[0])
|
|
|
|
|
|
def output(pinyin_s):
|
|
print('# version: 0.10.5')
|
|
print('# source: https://github.com/mozillazg/phrase-pinyin-data')
|
|
for hanzi, pinyin in pinyin_s:
|
|
hanzi = hanzi.split('_')[0]
|
|
print('{hanzi}: {pinyin}'.format(hanzi=hanzi, pinyin=pinyin))
|
|
|
|
|
|
def main(files):
|
|
pinyin_d_list = []
|
|
for name in files:
|
|
with codecs.open(name, 'r', 'utf-8-sig') as fp:
|
|
d = {}
|
|
for h, p in parse(fp):
|
|
d.setdefault(h, p)
|
|
pinyin_d_list.append(d)
|
|
|
|
pinyin_d = merge(pinyin_d_list)
|
|
output(sort(pinyin_d))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main(sys.argv[1:])
|