format code

pull/658/head
chenfeiyu 3 years ago
parent 7779f33e74
commit ae92fa7498

@ -1,7 +1,22 @@
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from text_processing import normalization
parser = argparse.ArgumentParser(description="Normalize text in Chinese with some rules.")
parser = argparse.ArgumentParser(
description="Normalize text in Chinese with some rules.")
parser.add_argument("input", type=str, help="the input sentences")
parser.add_argument("output", type=str, help="path to save the output file.")
args = parser.parse_args()

@ -1,5 +1,4 @@
export MAIN_ROOT=${PWD}/../../
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

@ -1,5 +1,4 @@
#!/usr/bin/env bash
source path.sh
stage=-1

@ -2,6 +2,7 @@
`opencc <https://github.com/BYVoid/OpenCC>`_.
"""
import opencc
_t2s_converter = opencc.OpenCC("t2s.json")
@ -11,4 +12,4 @@ def tranditional_to_simplified(text: str) -> str:
return _t2s_converter.convert(text)
def simplified_to_traditional(text: str) -> str:
return _s2t_converter.convert(text)
return _s2t_converter.convert(text)

@ -1,6 +1,7 @@
import re
from .num import verbalize_cardinal, verbalize_digit, num2str, DIGITS
def _time_num2str(num_string: str) -> str:
"""A special case for verbalizing number in time."""
result = num2str(num_string.lstrip('0'))
@ -60,4 +61,4 @@ def replace_date2(match: re.Match) -> str:
result += f"{verbalize_cardinal(month)}"
if day:
result += f"{verbalize_cardinal(day)}"
return result
return result

@ -2,6 +2,7 @@ import string
import re
from pypinyin.constants import SUPPORT_UCS4
# 全角半角转换
# 英文字符全角 -> 半角映射表 (num: 52)
F2H_ASCII_LETTERS = {

@ -2,6 +2,7 @@
Rules to verbalize numbers into Chinese characters.
https://zh.wikipedia.org/wiki/中文数字#現代中文
"""
import re
from typing import List
from collections import OrderedDict

@ -1,6 +1,7 @@
import re
from .num import verbalize_digit
# 规范化固话/手机号码
# 手机
# http://www.jihaoba.com/news/show/13680
@ -27,4 +28,4 @@ def phone2str(phone_string: str, mobile=True) -> str:
def replace_phone(match: re.Match) -> str:
return phone2str(match.group(0))
return phone2str(match.group(0))

@ -1,6 +1,7 @@
import re
from .num import num2str
# 温度表达式,温度会影响负号的读法
# -3°C 零下三度
RE_TEMPERATURE = re.compile(
@ -14,4 +15,4 @@ def replace_temperature(match: re.Match) -> str:
temperature: str = num2str(temperature)
unit: str = "摄氏度" if unit == "摄氏度" else ""
result = f"{sign}{temperature}{unit}"
return result
return result

@ -1,6 +1,7 @@
import re
from typing import List
SENTENCE_SPLITOR = re.compile(r'([。!?][”’]?)')
def split(text: str) -> List[str]:

Loading…
Cancel
Save