From 28974ab7ec4b3848fd82b2dbf58e04575edcc312 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 21 May 2021 07:57:34 +0000 Subject: [PATCH] jieba as default wordseg --- third_party/python-pinyin/pypinyin/core.py | 4 +++- third_party/python-pinyin/requirements.txt | 1 + third_party/python-pinyin/setup.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/third_party/python-pinyin/pypinyin/core.py b/third_party/python-pinyin/pypinyin/core.py index ef9103760..3b81820d5 100644 --- a/third_party/python-pinyin/pypinyin/core.py +++ b/third_party/python-pinyin/pypinyin/core.py @@ -16,6 +16,7 @@ from pypinyin.converter import DefaultConverter from pypinyin.seg import mmseg from pypinyin.seg import simpleseg from pypinyin.utils import (_replace_tone2_style_dict_to_default) +import jieba TStyle = Style TErrors = Union[Callable[[Text], Text], Text] @@ -139,7 +140,8 @@ class Pinyin(): :param hans: 分词前的字符串 :return: ``None`` or ``list`` """ - pass + outs = list(jieba.cut(hans)) # 默认用jieba分词,从语义角度分词。 + return outs def post_seg(self, hans: Text, seg_data: List[Text], **kwargs: Any) -> Optional[List[Text]]: diff --git a/third_party/python-pinyin/requirements.txt b/third_party/python-pinyin/requirements.txt index baa1f0152..aca786bcc 100644 --- a/third_party/python-pinyin/requirements.txt +++ b/third_party/python-pinyin/requirements.txt @@ -10,3 +10,4 @@ Sphinx tox twine wheel>=0.21 +jieba diff --git a/third_party/python-pinyin/setup.py b/third_party/python-pinyin/setup.py index 9d86ccb9e..b30f7962b 100644 --- a/third_party/python-pinyin/setup.py +++ b/third_party/python-pinyin/setup.py @@ -17,7 +17,7 @@ packages = [ 'pypinyin.style', ] -requirements = [] +requirements = ["jieba"] if sys.version_info[:2] < (3, 4): requirements.append('enum34') if sys.version_info[:2] < (3, 5):