remove useless third lib

4 years ago · 3f3442b98a
parent aba37810ff
commit 3f3442b98a
344 changed files with 4 additions and 1554963 deletions
--- a/examples/other/g2p/.gitignore
+++ b/examples/other/g2p/.gitignore
@ -1,2 +0,0 @@
 data
 exp
--- a/examples/other/g2p/README.md
+++ b/examples/other/g2p/README.md
@ -1,3 +0,0 @@
 # G2P
 * zh - Chinese G2P
--- a/examples/other/g2p/zh/README.md
+++ b/examples/other/g2p/zh/README.md
@ -1,93 +0,0 @@
 # G2P
 * WS
 jieba
 * G2P
 pypinyin
 * Tone sandhi
 simple
 We recommend using [Paraket](https://github.com/PaddlePaddle/Parakeet] [TextFrontEnd](https://github.com/PaddlePaddle/Parakeet/blob/develop/parakeet/frontend/__init__.py) to do G2P.
 The phoneme set should be changed, you can reference `examples/thchs30/a0/data/dict/syllable.lexicon`.
 ## Download Baker dataset
 [Baker](https://test.data-baker.com/#/data/index/source) dataset has to be downloaded mannually and moved to './data',
 because you will have to pass the `CATTCHA` from a browswe to download the dataset.
 ## RUN
 ```
 . path.sh
 ./run.sh
 ```
 ## Result
 ```
 exp/
 |-- 000001-010000.txt
 |-- ref.pinyin
 |-- trans.jieba.pinyin
 `-- trans.pinyin
 0 directories, 4 files
 ```
 ```
 4f5a368441eb16aaf43dc1972f8b63dd  exp/000001-010000.txt
 01707896391c2de9b6fc4a39654be942  exp/ref.pinyin
 43380ef160f65a23a3a0544700aa49b8  exp/trans.jieba.pinyin
 8e6ff1fc22d8e8584082e804e8bcdeb7  exp/trans.pinyin
 ```
 ```
 ==> exp/000001-010000.txt <==
 000001  卡尔普#2陪外孙#1玩滑梯#4。
        ka2 er2 pu3 pei2 wai4 sun1 wan2 hua2 ti1
 000002  假语村言#2别再#1拥抱我#4。
        jia2 yu3 cun1 yan2 bie2 zai4 yong1 bao4 wo3
 000003  宝马#1配挂#1跛骡鞍#3，貂蝉#1怨枕#2董翁榻#4。
        bao2 ma3 pei4 gua4 bo3 luo2 an1 diao1 chan2 yuan4 zhen3 dong3 weng1 ta4
 000004  邓小平#2与#1撒切尔#2会晤#4。
        deng4 xiao3 ping2 yu3 sa4 qie4 er3 hui4 wu4
 000005  老虎#1幼崽#2与#1宠物犬#1玩耍#4。
        lao2 hu3 you4 zai3 yu2 chong3 wu4 quan3 wan2 shua3
 ==> exp/ref.pinyin <==
 000001 ka2 er2 pu3 pei2 wai4 sun1 wan2 hua2 ti1
 000002 jia2 yu3 cun1 yan2 bie2 zai4 yong1 bao4 wo3
 000003 bao2 ma3 pei4 gua4 bo3 luo2 an1 diao1 chan2 yuan4 zhen3 dong3 weng1 ta4
 000004 deng4 xiao3 ping2 yu3 sa4 qie4 er3 hui4 wu4
 000005 lao2 hu3 you4 zai3 yu2 chong3 wu4 quan3 wan2 shua3
 000006 shen1 chang2 yue1 wu2 chi3 er4 cun4 wu3 fen1 huo4 yi3 shang4
 000007 zhao4 di2 yue1 cao2 yun2 teng2 qu4 gui3 wu1
 000008 zhan2 pin3 sui1 you3 zhan3 yuan2 que4 tui2
 000009 yi2 san3 ju1 er2 tong2 he2 you4 tuo1 er2 tong2 wei2 zhu3
 000010 ke1 te4 ni1 shen1 chuan1 bao4 wen2 da4 yi1
 ==> exp/trans.jieba.pinyin <==
 000001 ka3 er3 pu3 pei2 wai4 sun1 wan2 hua2 ti1
 000002 jia3 yu3 cun1 yan2 bie2 zai4 yong1 bao4 wo3
 000003 bao3 ma3 pei4 gua4 bo3 luo2 an1 diao1 chan2 yuan4 zhen3 dong3 weng1 ta4
 000004 deng4 xiao3 ping2 yu3 sa1 qie4 er3 hui4 wu4
 000005 lao3 hu3 you4 zai3 yu3 chong3 wu4 quan3 wan2 shua3
 000006 shen1 chang2 yue1 wu3 chi3 er4 cun4 wu3 fen1 huo4 yi3 shang4
 000007 zhao4 di2 yue1 cao2 yun2 teng2 qu4 gui3 wu1
 000008 zhan3 pin3 sui1 you3 zhan3 yuan2 que4 tui2
 000009 yi3 san3 ju1 er2 tong2 he2 you4 tuo1 er2 tong2 wei2 zhu3
 000010 ke1 te4 ni1 shen1 chuan1 bao4 wen2 da4 yi1
 ==> exp/trans.pinyin <==
 000001 ka3 er3 pu3 pei2 wai4 sun1 wan2 hua2 ti1
 000002 jia3 yu3 cun1 yan2 bie2 zai4 yong1 bao4 wo3
 000003 bao3 ma3 pei4 gua4 bo3 luo2 an1 diao1 chan2 yuan4 zhen3 dong3 weng1 ta4
 000004 deng4 xiao3 ping2 yu3 sa1 qie4 er3 hui4 wu4
 000005 lao3 hu3 you4 zai3 yu3 chong3 wu4 quan3 wan2 shua3
 000006 shen1 chang2 yue1 wu3 chi3 er4 cun4 wu3 fen1 huo4 yi3 shang4
 000007 zhao4 di2 yue1 cao2 yun2 teng2 qu4 gui3 wu1
 000008 zhan3 pin3 sui1 you3 zhan3 yuan2 que4 tui2
 000009 yi3 san3 ju1 er2 tong2 he2 you4 tuo1 er2 tong2 wei2 zhu3
 000010 ke1 te4 ni1 shen1 chuan1 bao4 wen2 da4 yi1
 ```
--- a/examples/other/g2p/zh/local/convert_transcription.py
+++ b/examples/other/g2p/zh/local/convert_transcription.py
@ -1,53 +0,0 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
 import re
 import jieba
 from pypinyin import lazy_pinyin
 from pypinyin import Style
 def extract_pinyin(source, target, use_jieba=False):
    with open(source, 'rt', encoding='utf-8') as fin:
        with open(target, 'wt', encoding='utf-8') as fout:
            for i, line in enumerate(fin):
                if i % 2 == 0:
                    sentence_id, raw_text = line.strip().split()
                    raw_text = re.sub(r'#\d', '', raw_text)
                    if use_jieba:
                        raw_text = jieba.lcut(raw_text)
                    syllables = lazy_pinyin(
                        raw_text,
                        errors='ignore',
                        style=Style.TONE3,
                        neutral_tone_with_five=True)
                    transcription = ' '.join(syllables)
                    fout.write(f'{sentence_id} {transcription}\n')
                else:
                    continue
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="extract baker pinyin labels")
    parser.add_argument(
        "input", type=str, help="source file of baker's prosody label file")
    parser.add_argument(
        "output", type=str, help="target file to write pinyin lables")
    parser.add_argument(
        "--use-jieba",
        action='store_true',
        help="use jieba for word segmentation.")
    args = parser.parse_args()
    extract_pinyin(args.input, args.output, use_jieba=args.use_jieba)
--- a/examples/other/g2p/zh/local/extract_pinyin_label.py
+++ b/examples/other/g2p/zh/local/extract_pinyin_label.py
@ -1,37 +0,0 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
 def extract_pinyin_lables(source, target):
    """Extract pinyin labels from Baker's prosody labeling."""
    with open(source, 'rt', encoding='utf-8') as fin:
        with open(target, 'wt', encoding='utf-8') as fout:
            for i, line in enumerate(fin):
                if i % 2 == 0:
                    sentence_id, raw_text = line.strip().split()
                    fout.write(f'{sentence_id} ')
                else:
                    transcription = line.strip()
                    fout.write(f'{transcription}\n')
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="extract baker pinyin labels")
    parser.add_argument(
        "input", type=str, help="source file of baker's prosody label file")
    parser.add_argument(
        "output", type=str, help="target file to write pinyin lables")
    args = parser.parse_args()
    extract_pinyin_lables(args.input, args.output)
--- a/examples/other/g2p/zh/local/ignore_sandhi.py
+++ b/examples/other/g2p/zh/local/ignore_sandhi.py
@ -1,103 +0,0 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
 from pathlib import Path
 from typing import List
 from typing import Union
 def erized(syllable: str) -> bool:
    """Whether the syllable contains erhua effect.
    Example
    --------
    huar -> True
    guanr -> True
    er -> False
    """
    # note: for pinyin, len(syllable) >=2 is always true
    # if not: there is something wrong in the data
    assert len(syllable) >= 2, f"inavlid syllable {syllable}"
    return syllable[:2] != "er" and syllable[-2] == 'r'
 def ignore_sandhi(reference: List[str], generated: List[str]) -> List[str]:
    """
    Given a sequence of syllables from human annotation(reference), 
    which makes sandhi explici and a sequence of syllables from some 
    simple g2p program(generated), which does not consider sandhi, 
    return a the reference sequence while ignore sandhi.
    Example
    --------
    ['lao2', 'hu3'], ['lao3', 'hu3'] -> ['lao3', 'hu3']
    """
    i = 0
    j = 0
    # sandhi ignored in the result while other errors are not included
    result = []
    while i < len(reference):
        if erized(reference[i]):
            result.append(reference[i])
            i += 1
            j += 2
        elif reference[i][:-1] == generated[i][:-1] and reference[i][
                -1] == '2' and generated[i][-1] == '3':
            result.append(generated[i])
            i += 1
            j += 1
        else:
            result.append(reference[i])
            i += 1
            j += 1
    assert j == len(
        generated
    ), "length of transcriptions mismatch, There may be some characters that are ignored in the generated transcription."
    return result
 def convert_transcriptions(reference: Union[str, Path],
                           generated: Union[str, Path],
                           output: Union[str, Path]):
    with open(reference, 'rt') as f_ref:
        with open(generated, 'rt') as f_gen:
            with open(output, 'wt') as f_out:
                for i, (ref, gen) in enumerate(zip(f_ref, f_gen)):
                    sentence_id, ref_transcription = ref.strip().split(' ', 1)
                    _, gen_transcription = gen.strip().split(' ', 1)
                    try:
                        result = ignore_sandhi(ref_transcription.split(),
                                               gen_transcription.split())
                        result = ' '.join(result)
                    except Exception:
                        print(
                            f"sentence_id: {sentence_id} There is some annotation error in the reference or generated transcription. Use the reference."
                        )
                        result = ref_transcription
                    f_out.write(f"{sentence_id} {result}\n")
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="reference transcription but ignore sandhi.")
    parser.add_argument(
        "--reference",
        type=str,
        help="path to the reference transcription of baker dataset.")
    parser.add_argument(
        "--generated", type=str, help="path to the generated transcription.")
    parser.add_argument("--output", type=str, help="path to save result.")
    args = parser.parse_args()
    convert_transcriptions(args.reference, args.generated, args.output)
--- a/examples/other/g2p/zh/local/prepare_dataset.sh
+++ b/examples/other/g2p/zh/local/prepare_dataset.sh
@ -1,33 +0,0 @@
 #!/bin/bash
 exp_dir="exp"
 data_dir="data"
 source ${MAIN_ROOT}/utils/parse_options.sh || exit -1
 archive=${data_dir}/"BZNSYP.rar"
 if [ ! -f ${archive} ]; then
    echo "Baker Dataset not found! Download it first to the data_dir."
    exit -1
 fi
 MD5='c4350563bf7dc298f7dd364b2607be83'
 md5_result=$(md5sum ${archive} | awk -F[' '] '{print $1}')
 if [ ${md5_result} != ${MD5} ]; then
    echo "MD5 mismatch! The Archive has been changed."
    exit -1
 fi
 label_file='ProsodyLabeling/000001-010000.txt'
 filename='000001-010000.txt'
 unrar e ${archive} ${label_file}
 cp ${filename} ${exp_dir}
 rm -f ${filename}
 if [ ! -f ${exp_dir}/${filename} ];then
    echo "File extraction failed!"
    exit
 fi
 exit 0
--- a/examples/other/g2p/zh/path.sh
+++ b/examples/other/g2p/zh/path.sh
@ -1,8 +0,0 @@
 export MAIN_ROOT=`realpath ${PWD}/../../../../`
 export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
 export LC_ALL=C
 # Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
 export PYTHONIOENCODING=UTF-8
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
--- a/examples/other/g2p/zh/requirements.txt
+++ b/examples/other/g2p/zh/requirements.txt
@ -1 +0,0 @@
 jieba
--- a/examples/other/g2p/zh/run.sh
+++ b/examples/other/g2p/zh/run.sh
@ -1,37 +0,0 @@
 #!/usr/bin/env bash
 source path.sh
 stage=-1
 stop_stage=100
 exp_dir=exp
 data=data
 source ${MAIN_ROOT}/utils/parse_options.sh || exit -1
 mkdir -p ${exp_dir}
 if [ $stage -le -1 ] && [ $stop_stage -ge -1 ];then
    mkdir -p ${data}
    test -e ${data}/BZNSYP.rar || wget -c https://weixinxcxdb.oss-cn-beijing.aliyuncs.com/gwYinPinKu/BZNSYP.rar -P ${data}
 fi
 if [ $stage -le 0 ] && [ $stop_stage -ge 0 ];then
    echo "stage 0: Extracting Prosody Labeling"
    bash local/prepare_dataset.sh --exp-dir ${exp_dir} --data-dir ${data}
 fi
 # convert transcription in chinese into pinyin with pypinyin or jieba+pypinyin
 filename="000001-010000.txt"
 if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
    echo "stage 1: Processing transcriptions..."
    python3 local/extract_pinyin_label.py ${exp_dir}/${filename} ${exp_dir}/ref.pinyin
    python3 local/convert_transcription.py ${exp_dir}/${filename} ${exp_dir}/trans.pinyin
    python3 local/convert_transcription.py --use-jieba ${exp_dir}/${filename} ${exp_dir}/trans.jieba.pinyin
 fi
 echo "done"
 exit 0
--- a/examples/other/tn/.gitignore
+++ b/examples/other/tn/.gitignore
@ -1 +0,0 @@
 exp
--- a/examples/other/tn/README.md
+++ b/examples/other/tn/README.md
@ -1,36 +0,0 @@
 # Regular expression based text normalization for Chinese
 For simplicity and ease of implementation, text normalization is basically done by rules and dictionaries. Here's an example.
 ## Run
 ```
 . path.sh
 bash run.sh
 ```
 ## Results
 ```
 exp/
 `-- normalized.txt
 0 directories, 1 file
 ```
 ```
 aff31f8aa08e2a7360228c9ce5886b98  exp/normalized.txt
 ```
 ```
 今天的最低气温达到零下十度.
 只要有四分之三十三的人同意，就可以通过决议。
 一九四五年五月二日，苏联士兵在德国国会大厦上升起了胜利旗，象征着攻占柏林并战胜了纳粹德国。
 四月十六日，清晨的战斗以炮击揭幕，数以千计的大炮和喀秋莎火箭炮开始炮轰德军阵地，炮击持续了数天之久。
 如果剩下的百分之三十点六是过去，那么还有百分之六十九点四.
 事情发生在二零二零年三月三十一日的上午八点.
 警方正在找一支点二二口径的手枪。
 欢迎致电中国联通，北京二零二二年冬奥会官方合作伙伴为您服务
 充值缴费请按一，查询话费及余量请按二，跳过本次提醒请按井号键。
 快速解除流量封顶请按星号键，腾讯王卡产品介绍、使用说明、特权及活动请按九，查询话费、套餐余量、积分及活动返款请按一，手机上网流量开通及取消请按二，查<EFBFBD><EFBFBD><EFBFBD>本机号码及本号所使用套餐请按四，密码修改及重置请按五，紧急开机请按六，挂失请按七，查询充值记录请按八，其它自助服务及工服务请按零
 ```
--- a/examples/other/tn/data/sentences.txt
+++ b/examples/other/tn/data/sentences.txt
@ -1,26 +0,0 @@
 今天的最低气温达到-10°C.
 只要有33/4的人同意，就可以通过决议。
 1945年5月2日，苏联士兵在德国国会大厦上升起了胜利旗，象征着攻占柏林并战胜了纳粹德国。
 4月16日，清晨的战斗以炮击揭幕，数以千计的大炮和喀秋莎火箭炮开始炮轰德军阵地，炮击持续了数天之久。
 如果剩下的30.6%是过去，那么还有69.4%.
 事情发生在2020/03/31的上午8:00.
 警方正在找一支.22口径的手枪。
 欢迎致电中国联通，北京2022年冬奥会官方合作伙伴为您服务
 充值缴费请按1，查询话费及余量请按2，跳过本次提醒请按井号键。
 快速解除流量封顶请按星号键，腾讯王卡产品介绍、使用说明、特权及活动请按9，查询话费、套餐余量、积分及活动返款请按1，手机上网流量开通及取消请按2，查询本机号码及本号所使用套餐请按4，密码修改及重置请按5，紧急开机请按6，挂失请按7，查询充值记录请按8，其它自助服务及人工服务请按0
 智能客服助理快速查话费、查流量请按9，了解北京联通业务请按1，宽带IPTV新装、查询请按2，障碍报修请按3，充值缴费请按4，投诉建议请按5，政企业务请按7，人工服务请按0，for english severice press star key
 您的帐户当前可用余额为63.89元，本月消费为2.17元。您的消费、套餐余量和其它信息将以短信形式下发，请您注意查收。谢谢使用，再见！。
 您的帐户当前可用余额为负15.5元，本月消费为59.6元。您的消费、套餐余量和其它信息将以短信形式下发，请您注意查收。谢谢使用，再见！。
 尊敬的客户，您目前的话费余额为负14.60元，已低于10元，为保证您的通信畅通，请及时缴纳费用。
 您的流量已用完，为避免您产生额外费用，建议您根据需求开通一个流量包以作补充。
 您可以直接说，查询话费及余量、开通流量包、缴费，您也可以说出其它需求，请问有什么可以帮您？
 您的账户当前可用余额为负36.00元，本月消费36.00元。
 请问你是电话13985608526的机主吗？
 如您对处理结果不满意，可拨打中国联通集团投诉电话10015进行投诉，按本地通话费收费，返回自助服务请按井号键
 “26314”号VIP客服代表为您服务。
 尊敬的5G用户，欢迎您致电中国联通
 首先是应用了M1芯片的iPad Pro，新款的iPad Pro支持5G，这也是苹果的第二款5G产品线。
 除此之外，摄像头方面再次升级，增加了前摄全新超广角摄像头，支持人物居中功能，搭配超广角可实现视频中始终让人物居中效果。
 屏幕方面，iPad Pro 12.9版本支持XDR体验的Mini-LEDS显示屏，支持HDR10、杜比视界，还支持杜比全景声。
 iPad Pro的秒控键盘这次也推出白色版本。
 售价方面，11英寸版本售价799美元起，12.9英寸售价1099美元起。
--- a/examples/other/tn/local/test_normalization.py
+++ b/examples/other/tn/local/test_normalization.py
@ -1,29 +0,0 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
 from text_processing import normalization
 parser = argparse.ArgumentParser(
    description="Normalize text in Chinese with some rules.")
 parser.add_argument("input", type=str, help="the input sentences")
 parser.add_argument("output", type=str, help="path to save the output file.")
 args = parser.parse_args()
 with open(args.input, 'rt') as fin:
    with open(args.output, 'wt') as fout:
        for sent in fin:
            sent = normalization.normalize_sentence(sent.strip())
            fout.write(sent)
            fout.write('\n')
--- a/examples/other/tn/path.sh
+++ b/examples/other/tn/path.sh
@ -1,8 +0,0 @@
 export MAIN_ROOT=`realpath ${PWD}/../../../`
 export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
 export LC_ALL=C
 # Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
 export PYTHONIOENCODING=UTF-8
 export PYTHONPATH=${MAIN_ROOT}:${MAIN_ROOT}/third_party:${PYTHONPATH}#
--- a/examples/other/tn/run.sh
+++ b/examples/other/tn/run.sh
@ -1,26 +0,0 @@
 #!/usr/bin/env bash
 source path.sh
 stage=-1
 stop_stage=100
 exp_dir=exp
 data_dir=data
 filename="sentences.txt"
 source ${MAIN_ROOT}/utils/parse_options.sh || exit -1
 mkdir -p ${exp_dir}
 if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
    echo "stage 1: Processing "
    python3 local/test_normalization.py  ${data_dir}/${filename} ${exp_dir}/normalized.txt
    if [ -f "${exp_dir}/normalized.txt" ]; then
 	echo "Normalized text save at ${exp_dir}/normalized.txt"
    fi
    # TODO(chenfeiyu): compute edit distance against ground-truth
 fi
 echo "done"
 exit 0
--- a/third_party/README.md
+++ b/third_party/README.md
@ -22,3 +22,7 @@ licence: MIT
 * [phkit](https://github.com/KuangDD/phkit.git)
 commit: b2100293c1e36da531d7f30bd52c9b955a649522
 licence: None
 * [nnAudio](https://github.com/KinWaiCheuk/nnAudio.git)
 licence: MIT
--- a/third_party/chinese_text_normalization/.gitignore
+++ b/third_party/chinese_text_normalization/.gitignore
@ -1,2 +0,0 @@
 *~
 *.far
--- a/third_party/chinese_text_normalization/LICENSE
+++ b/third_party/chinese_text_normalization/LICENSE
@ -1,21 +0,0 @@
 MIT License
 Copyright (c) 2020 SpeechIO
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/third_party/chinese_text_normalization/README.md
+++ b/third_party/chinese_text_normalization/README.md
@ -1,112 +0,0 @@
 # Chinese Text Normalization for Speech Processing
 ## Problem
 Search for "Text Normalization"(TN) on Google and Github, you can hardly find open-source projects that are "read-to-use" for text normalization tasks. Instead, you find a bunch of NLP toolkits or frameworks that *supports* TN functionality.  There is quite some work between "support text normalization" and "do text normalization".
 ## Reason
 * TN is language-dependent, more or less.
    Some of TN processing methods are shared across languages, but a good TN module always involves language-specific knowledge and treatments, more or less.
 * TN is task-specific.
    Even for the same language, different applications require quite different TN.
 * TN is "dirty"
    Constructing and maintaining a set of TN rewrite-rules is painful, whatever toolkits and frameworks you choose.  Subtle and intrinsic complexities hide inside TN task itself, not in tools or frameworks.
 * mature TN module is an asset
    Since constructing and maintaining TN is hard, it is actually an asset for commercial companies, hence it is unlikely to find a product-level TN in open-source community (correct me if you find any)
 * TN is a less important topic for either academic or commercials.
 ## Goal
 This project sets up a ready-to-use TN module for **Chinese**. Since my background is **speech processing**, this project should be able to handle most common TN tasks, in **Chinese ASR** text processing pipelines.
 ## Normalizers
 1. supported NSW (Non-Standard-Word) Normalization
    |NSW type|raw|normalized|
    |-|-|-|
    |cardinal|这块黄金重达324.75克|这块黄金重达三百二十四点七五克|
    |date|她出生于86年8月18日，她弟弟出生于1995年3月1日|她出生于八六年八月十八日 她弟弟出生于一九九五年三月一日|
    |digit|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九|
    |fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票|
    |money|随便来几个价格12块5，34.5元，20.1万|随便来几个价格十二块五 三十四点五元 二十点一万|
    |percentage|明天有62％的概率降雨|明天有百分之六十二的概率降雨|
    |telephone|这是固话0421-33441122<br>这是手机+86 18544139121|这是固话零四二一三三四四一一二二<br>这是手机八六一八五四四一三九一二一|
    acknowledgement: the NSW normalization codes are based on [Zhiyang Zhou's work here](https://github.com/Joee1995/chn_text_norm.git)
 1. punctuation removal
    For Chinese, it removes punctuation list collected in [Zhon](https://github.com/tsroten/zhon) project, containing
    * non-stop puncs
        ```
        '＂＃＄％＆＇（）＊＋，－／：；＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏'
        ```
    * stop puncs
        ```
        '！？｡。'
        ```
    For English, it removes Python's `string.punctuation`
 1. multilingual English word upper/lower case conversion
    since ASR/TTS lexicons usually unify English entries to uppercase or lowercase, the TN module should adapt with lexicon accordingly.
 ## Supported text format
 1. plain text, preferably one sentence per line(most common case in ASR processing).
    ```
    今天早饭吃了没
    没吃回家吃去吧
    ...
    ```
    plain text is default format.
 2. Kaldi's transcription format
    ```
    KALDI_KEY_UTT001    今天早饭吃了没
    KALDI_KEY_UTT002    没吃回家吃去吧
    ...
    ```
    TN will skip first column key section, normalize latter transcription text
    pass `--has_key` option to switch to kaldi format.
 _note: All input text should be UTF-8 encoded._
 ## Run examples
 * TN (python)
 make sure you have **python3**, python2.X won't work correctly.
 `sh run.sh` in `TN` dir, and compare raw text and normalized text.
 * ITN (thrax)
 make sure you  have **thrax** installed, and your PATH should be able to find thrax binaries.
 `sh run.sh` in `ITN` dir. check Makefile for grammar dependency.
 ## possible future work
 Since TN is a typical "done is better than perfect" module in context of ASR, and the current state is sufficient for my purpose, I probably won't update this repo frequently.
 there are indeed something that needs to be improved:
 * For TN, NSW normalizers in TN dir are based on regular expression, I've found some unintended matches, those pattern regexps need to be refined for more precise TN coverage.
 * For ITN, extend those thrax rewriting grammars to cover more scenarios.
 * Further more, nowadays commercial systems start to introduce RNN-like models into TN, and a mix of (rule-based & model-based) system is state-of-the-art.  More readings about this, look for Richard Sproat and KyleGorman's work at Google.
 END
--- a/third_party/chinese_text_normalization/python/cn_tn.py
+++ b/third_party/chinese_text_normalization/python/cn_tn.py
@ -1,794 +0,0 @@
 #!/usr/bin/env python3
 # coding=utf-8
 # Authors:
 #   2019.5 Zhiyang Zhou (https://github.com/Joee1995/chn_text_norm.git)
 #   2019.9 Jiayu DU
 #
 # requirements:
 #   - python 3.X
 # notes: python 2.X WILL fail or produce misleading results
 import sys, os, argparse, codecs, string, re
 # ================================================================================ #
 #                                    basic constant
 # ================================================================================ #
 CHINESE_DIGIS = u'零一二三四五六七八九'
 BIG_CHINESE_DIGIS_SIMPLIFIED = u'零壹贰叁肆伍陆柒捌玖'
 BIG_CHINESE_DIGIS_TRADITIONAL = u'零壹貳參肆伍陸柒捌玖'
 SMALLER_BIG_CHINESE_UNITS_SIMPLIFIED = u'十百千万'
 SMALLER_BIG_CHINESE_UNITS_TRADITIONAL = u'拾佰仟萬'
 LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED = u'亿兆京垓秭穰沟涧正载'
 LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL = u'億兆京垓秭穰溝澗正載'
 SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED = u'十百千万'
 SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL = u'拾佰仟萬'
 ZERO_ALT = u'〇'
 ONE_ALT = u'幺'
 TWO_ALTS = [u'两', u'兩']
 POSITIVE = [u'正', u'正']
 NEGATIVE = [u'负', u'負']
 POINT = [u'点', u'點']
 # PLUS = [u'加', u'加']
 # SIL = [u'杠', u'槓']
 # 中文数字系统类型
 NUMBERING_TYPES = ['low', 'mid', 'high']
 CURRENCY_NAMES = '(人民币|美元|日元|英镑|欧元|马克|法郎|加拿大元|澳元|港币|先令|芬兰马克|爱尔兰镑|' \
                 '里拉|荷兰盾|埃斯库多|比塞塔|印尼盾|林吉特|新西兰元|比索|卢布|新加坡元|韩元|泰铢)'
 CURRENCY_UNITS = '((亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)'
 COM_QUANTIFIERS = '(匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|' \
                  '砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|' \
                  '针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|' \
                  '毫|厘|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|' \
                  '盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|' \
                  '纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块)'
 # punctuation information are based on Zhon project (https://github.com/tsroten/zhon.git)
 CHINESE_PUNC_STOP = '！？｡。'
 CHINESE_PUNC_NON_STOP = '＂＃＄％＆＇（）＊＋，－／：；＜＝＞＠［＼］＾＿｀｛｜｝～｟｠｢｣､、〃《》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏'
 CHINESE_PUNC_OTHER = '·〈〉-'
 CHINESE_PUNC_LIST = CHINESE_PUNC_STOP + CHINESE_PUNC_NON_STOP + CHINESE_PUNC_OTHER
 # ================================================================================ #
 #                                    basic class
 # ================================================================================ #
 class ChineseChar(object):
    """
    中文字符
    每个字符对应简体和繁体,
    e.g. 简体 = '负', 繁体 = '負'
    转换时可转换为简体或繁体
    """
    def __init__(self, simplified, traditional):
        self.simplified = simplified
        self.traditional = traditional
        #self.__repr__ = self.__str__
    def __str__(self):
        return self.simplified or self.traditional or None
    def __repr__(self):
        return self.__str__()
 class ChineseNumberUnit(ChineseChar):
    """
    中文数字/数位字符
    每个字符除繁简体外还有一个额外的大写字符
    e.g. '陆' 和 '陸'
    """
    def __init__(self, power, simplified, traditional, big_s, big_t):
        super(ChineseNumberUnit, self).__init__(simplified, traditional)
        self.power = power
        self.big_s = big_s
        self.big_t = big_t
    def __str__(self):
        return '10^{}'.format(self.power)
    @classmethod
    def create(cls, index, value, numbering_type=NUMBERING_TYPES[1], small_unit=False):
        if small_unit:
            return ChineseNumberUnit(power=index + 1,
                                     simplified=value[0], traditional=value[1], big_s=value[1], big_t=value[1])
        elif numbering_type == NUMBERING_TYPES[0]:
            return ChineseNumberUnit(power=index + 8,
                                     simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1])
        elif numbering_type == NUMBERING_TYPES[1]:
            return ChineseNumberUnit(power=(index + 2) * 4,
                                     simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1])
        elif numbering_type == NUMBERING_TYPES[2]:
            return ChineseNumberUnit(power=pow(2, index + 3),
                                     simplified=value[0], traditional=value[1], big_s=value[0], big_t=value[1])
        else:
            raise ValueError(
                'Counting type should be in {0} ({1} provided).'.format(NUMBERING_TYPES, numbering_type))
 class ChineseNumberDigit(ChineseChar):
    """
    中文数字字符
    """
    def __init__(self, value, simplified, traditional, big_s, big_t, alt_s=None, alt_t=None):
        super(ChineseNumberDigit, self).__init__(simplified, traditional)
        self.value = value
        self.big_s = big_s
        self.big_t = big_t
        self.alt_s = alt_s
        self.alt_t = alt_t
    def __str__(self):
        return str(self.value)
    @classmethod
    def create(cls, i, v):
        return ChineseNumberDigit(i, v[0], v[1], v[2], v[3])
 class ChineseMath(ChineseChar):
    """
    中文数位字符
    """
    def __init__(self, simplified, traditional, symbol, expression=None):
        super(ChineseMath, self).__init__(simplified, traditional)
        self.symbol = symbol
        self.expression = expression
        self.big_s = simplified
        self.big_t = traditional
 CC, CNU, CND, CM = ChineseChar, ChineseNumberUnit, ChineseNumberDigit, ChineseMath
 class NumberSystem(object):
    """
    中文数字系统
    """
    pass
 class MathSymbol(object):
    """
    用于中文数字系统的数学符号 (繁/简体), e.g.
    positive = ['正', '正']
    negative = ['负', '負']
    point = ['点', '點']
    """
    def __init__(self, positive, negative, point):
        self.positive = positive
        self.negative = negative
        self.point = point
    def __iter__(self):
        for v in self.__dict__.values():
            yield v
 # class OtherSymbol(object):
 #     """
 #     其他符号
 #     """
 #
 #     def __init__(self, sil):
 #         self.sil = sil
 #
 #     def __iter__(self):
 #         for v in self.__dict__.values():
 #             yield v
 # ================================================================================ #
 #                                    basic utils
 # ================================================================================ #
 def create_system(numbering_type=NUMBERING_TYPES[1]):
    """
    根据数字系统类型返回创建相应的数字系统，默认为 mid
    NUMBERING_TYPES = ['low', 'mid', 'high']: 中文数字系统类型
        low:  '兆' = '亿' * '十' = $10^{9}$,  '京' = '兆' * '十', etc.
        mid:  '兆' = '亿' * '万' = $10^{12}$, '京' = '兆' * '万', etc.
        high: '兆' = '亿' * '亿' = $10^{16}$, '京' = '兆' * '兆', etc.
    返回对应的数字系统
    """
    # chinese number units of '亿' and larger
    all_larger_units = zip(
        LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED, LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL)
    larger_units = [CNU.create(i, v, numbering_type, False)
                    for i, v in enumerate(all_larger_units)]
    # chinese number units of '十, 百, 千, 万'
    all_smaller_units = zip(
        SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED, SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL)
    smaller_units = [CNU.create(i, v, small_unit=True)
                     for i, v in enumerate(all_smaller_units)]
    # digis
    chinese_digis = zip(CHINESE_DIGIS, CHINESE_DIGIS,
                        BIG_CHINESE_DIGIS_SIMPLIFIED, BIG_CHINESE_DIGIS_TRADITIONAL)
    digits = [CND.create(i, v) for i, v in enumerate(chinese_digis)]
    digits[0].alt_s, digits[0].alt_t = ZERO_ALT, ZERO_ALT
    digits[1].alt_s, digits[1].alt_t = ONE_ALT, ONE_ALT
    digits[2].alt_s, digits[2].alt_t = TWO_ALTS[0], TWO_ALTS[1]
    # symbols
    positive_cn = CM(POSITIVE[0], POSITIVE[1], '+', lambda x: x)
    negative_cn = CM(NEGATIVE[0], NEGATIVE[1], '-', lambda x: -x)
    point_cn = CM(POINT[0], POINT[1], '.', lambda x,
                  y: float(str(x) + '.' + str(y)))
    # sil_cn = CM(SIL[0], SIL[1], '-', lambda x, y: float(str(x) + '-' + str(y)))
    system = NumberSystem()
    system.units = smaller_units + larger_units
    system.digits = digits
    system.math = MathSymbol(positive_cn, negative_cn, point_cn)
    # system.symbols = OtherSymbol(sil_cn)
    return system
 def chn2num(chinese_string, numbering_type=NUMBERING_TYPES[1]):
    def get_symbol(char, system):
        for u in system.units:
            if char in [u.traditional, u.simplified, u.big_s, u.big_t]:
                return u
        for d in system.digits:
            if char in [d.traditional, d.simplified, d.big_s, d.big_t, d.alt_s, d.alt_t]:
                return d
        for m in system.math:
            if char in [m.traditional, m.simplified]:
                return m
    def string2symbols(chinese_string, system):
        int_string, dec_string = chinese_string, ''
        for p in [system.math.point.simplified, system.math.point.traditional]:
            if p in chinese_string:
                int_string, dec_string = chinese_string.split(p)
                break
        return [get_symbol(c, system) for c in int_string], \
               [get_symbol(c, system) for c in dec_string]
    def correct_symbols(integer_symbols, system):
        """
        一百八 to 一百八十
        一亿一千三百万 to 一亿 一千万 三百万
        """
        if integer_symbols and isinstance(integer_symbols[0], CNU):
            if integer_symbols[0].power == 1:
                integer_symbols = [system.digits[1]] + integer_symbols
        if len(integer_symbols) > 1:
            if isinstance(integer_symbols[-1], CND) and isinstance(integer_symbols[-2], CNU):
                integer_symbols.append(
                    CNU(integer_symbols[-2].power - 1, None, None, None, None))
        result = []
        unit_count = 0
        for s in integer_symbols:
            if isinstance(s, CND):
                result.append(s)
                unit_count = 0
            elif isinstance(s, CNU):
                current_unit = CNU(s.power, None, None, None, None)
                unit_count += 1
            if unit_count == 1:
                result.append(current_unit)
            elif unit_count > 1:
                for i in range(len(result)):
                    if isinstance(result[-i - 1], CNU) and result[-i - 1].power < current_unit.power:
                        result[-i - 1] = CNU(result[-i - 1].power +
                                             current_unit.power, None, None, None, None)
        return result
    def compute_value(integer_symbols):
        """
        Compute the value.
        When current unit is larger than previous unit, current unit * all previous units will be used as all previous units.
        e.g. '两千万' = 2000 * 10000 not 2000 + 10000
        """
        value = [0]
        last_power = 0
        for s in integer_symbols:
            if isinstance(s, CND):
                value[-1] = s.value
            elif isinstance(s, CNU):
                value[-1] *= pow(10, s.power)
                if s.power > last_power:
                    value[:-1] = list(map(lambda v: v *
                                                    pow(10, s.power), value[:-1]))
                    last_power = s.power
                value.append(0)
        return sum(value)
    system = create_system(numbering_type)
    int_part, dec_part = string2symbols(chinese_string, system)
    int_part = correct_symbols(int_part, system)
    int_str = str(compute_value(int_part))
    dec_str = ''.join([str(d.value) for d in dec_part])
    if dec_part:
        return '{0}.{1}'.format(int_str, dec_str)
    else:
        return int_str
 def num2chn(number_string, numbering_type=NUMBERING_TYPES[1], big=False,
            traditional=False, alt_zero=False, alt_one=False, alt_two=True,
            use_zeros=True, use_units=True):
    def get_value(value_string, use_zeros=True):
        striped_string = value_string.lstrip('0')
        # record nothing if all zeros
        if not striped_string:
            return []
        # record one digits
        elif len(striped_string) == 1:
            if use_zeros and len(value_string) != len(striped_string):
                return [system.digits[0], system.digits[int(striped_string)]]
            else:
                return [system.digits[int(striped_string)]]
        # recursively record multiple digits
        else:
            result_unit = next(u for u in reversed(
                system.units) if u.power < len(striped_string))
            result_string = value_string[:-result_unit.power]
            return get_value(result_string) + [result_unit] + get_value(striped_string[-result_unit.power:])
    system = create_system(numbering_type)
    int_dec = number_string.split('.')
    if len(int_dec) == 1:
        int_string = int_dec[0]
        dec_string = ""
    elif len(int_dec) == 2:
        int_string = int_dec[0]
        dec_string = int_dec[1]
    else:
        raise ValueError(
            "invalid input num string with more than one dot: {}".format(number_string))
    if use_units and len(int_string) > 1:
        result_symbols = get_value(int_string)
    else:
        result_symbols = [system.digits[int(c)] for c in int_string]
    dec_symbols = [system.digits[int(c)] for c in dec_string]
    if dec_string:
        result_symbols += [system.math.point] + dec_symbols
    if alt_two:
        liang = CND(2, system.digits[2].alt_s, system.digits[2].alt_t,
                    system.digits[2].big_s, system.digits[2].big_t)
        for i, v in enumerate(result_symbols):
            if isinstance(v, CND) and v.value == 2:
                next_symbol = result_symbols[i +
                                             1] if i < len(result_symbols) - 1 else None
                previous_symbol = result_symbols[i - 1] if i > 0 else None
                if isinstance(next_symbol, CNU) and isinstance(previous_symbol, (CNU, type(None))):
                    if next_symbol.power != 1 and ((previous_symbol is None) or (previous_symbol.power != 1)):
                        result_symbols[i] = liang
    # if big is True, '两' will not be used and `alt_two` has no impact on output
    if big:
        attr_name = 'big_'
        if traditional:
            attr_name += 't'
        else:
            attr_name += 's'
    else:
        if traditional:
            attr_name = 'traditional'
        else:
            attr_name = 'simplified'
    result = ''.join([getattr(s, attr_name) for s in result_symbols])
    # if not use_zeros:
    #     result = result.strip(getattr(system.digits[0], attr_name))
    if alt_zero:
        result = result.replace(
            getattr(system.digits[0], attr_name), system.digits[0].alt_s)
    if alt_one:
        result = result.replace(
            getattr(system.digits[1], attr_name), system.digits[1].alt_s)
    for i, p in enumerate(POINT):
        if result.startswith(p):
            return CHINESE_DIGIS[0] + result
    # ^10, 11, .., 19
    if len(result) >= 2 and result[1] in [SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED[0],
                                          SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL[0]] and \
            result[0] in [CHINESE_DIGIS[1], BIG_CHINESE_DIGIS_SIMPLIFIED[1], BIG_CHINESE_DIGIS_TRADITIONAL[1]]:
        result = result[1:]
    return result
 # ================================================================================ #
 #                          different types of rewriters
 # ================================================================================ #
 class Cardinal:
    """
    CARDINAL类
    """
    def __init__(self, cardinal=None, chntext=None):
        self.cardinal = cardinal
        self.chntext = chntext
    def chntext2cardinal(self):
        return chn2num(self.chntext)
    def cardinal2chntext(self):
        return num2chn(self.cardinal)
 class Digit:
    """
    DIGIT类
    """
    def __init__(self, digit=None, chntext=None):
        self.digit = digit
        self.chntext = chntext
    # def chntext2digit(self):
    #     return chn2num(self.chntext)
    def digit2chntext(self):
        return num2chn(self.digit, alt_two=False, use_units=False)
 class TelePhone:
    """
    TELEPHONE类
    """
    def __init__(self, telephone=None, raw_chntext=None, chntext=None):
        self.telephone = telephone
        self.raw_chntext = raw_chntext
        self.chntext = chntext
    # def chntext2telephone(self):
    #     sil_parts = self.raw_chntext.split('<SIL>')
    #     self.telephone = '-'.join([
    #         str(chn2num(p)) for p in sil_parts
    #     ])
    #     return self.telephone
    def telephone2chntext(self, fixed=False):
        if fixed:
            sil_parts = self.telephone.split('-')
            self.raw_chntext = '<SIL>'.join([
                num2chn(part, alt_two=False, use_units=False) for part in sil_parts
            ])
            self.chntext = self.raw_chntext.replace('<SIL>', '')
        else:
            sp_parts = self.telephone.strip('+').split()
            self.raw_chntext = '<SP>'.join([
                num2chn(part, alt_two=False, use_units=False) for part in sp_parts
            ])
            self.chntext = self.raw_chntext.replace('<SP>', '')
        return self.chntext
 class Fraction:
    """
    FRACTION类
    """
    def __init__(self, fraction=None, chntext=None):
        self.fraction = fraction
        self.chntext = chntext
    def chntext2fraction(self):
        denominator, numerator = self.chntext.split('分之')
        return chn2num(numerator) + '/' + chn2num(denominator)
    def fraction2chntext(self):
        numerator, denominator = self.fraction.split('/')
        return num2chn(denominator) + '分之' + num2chn(numerator)
 class Date:
    """
    DATE类
    """
    def __init__(self, date=None, chntext=None):
        self.date = date
        self.chntext = chntext
    # def chntext2date(self):
    #     chntext = self.chntext
    #     try:
    #         year, other = chntext.strip().split('年', maxsplit=1)
    #         year = Digit(chntext=year).digit2chntext() + '年'
    #     except ValueError:
    #         other = chntext
    #         year = ''
    #     if other:
    #         try:
    #             month, day = other.strip().split('月', maxsplit=1)
    #             month = Cardinal(chntext=month).chntext2cardinal() + '月'
    #         except ValueError:
    #             day = chntext
    #             month = ''
    #         if day:
    #             day = Cardinal(chntext=day[:-1]).chntext2cardinal() + day[-1]
    #     else:
    #         month = ''
    #         day = ''
    #     date = year + month + day
    #     self.date = date
    #     return self.date
    def date2chntext(self):
        date = self.date
        try:
            year, other = date.strip().split('年', 1)
            year = Digit(digit=year).digit2chntext() + '年'
        except ValueError:
            other = date
            year = ''
        if other:
            try:
                month, day = other.strip().split('月', 1)
                month = Cardinal(cardinal=month).cardinal2chntext() + '月'
            except ValueError:
                day = date
                month = ''
            if day:
                day = Cardinal(cardinal=day[:-1]).cardinal2chntext() + day[-1]
        else:
            month = ''
            day = ''
        chntext = year + month + day
        self.chntext = chntext
        return self.chntext
 class Money:
    """
    MONEY类
    """
    def __init__(self, money=None, chntext=None):
        self.money = money
        self.chntext = chntext
    # def chntext2money(self):
    #     return self.money
    def money2chntext(self):
        money = self.money
        pattern = re.compile(r'(\d+(\.\d+)?)')
        matchers = pattern.findall(money)
        if matchers:
            for matcher in matchers:
                money = money.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext())
        self.chntext = money
        return self.chntext
 class Percentage:
    """
    PERCENTAGE类
    """
    def __init__(self, percentage=None, chntext=None):
        self.percentage = percentage
        self.chntext = chntext
    def chntext2percentage(self):
        return chn2num(self.chntext.strip().strip('百分之')) + '%'
    def percentage2chntext(self):
        return '百分之' + num2chn(self.percentage.strip().strip('%'))
 # ================================================================================ #
 #                            NSW Normalizer
 # ================================================================================ #
 class NSWNormalizer:
    def __init__(self, raw_text):
        self.raw_text = '^' + raw_text + '$'
        self.norm_text = ''
    def _particular(self):
        text = self.norm_text
        pattern = re.compile(r"(([a-zA-Z]+)二([a-zA-Z]+))")
        matchers = pattern.findall(text)
        if matchers:
            # print('particular')
            for matcher in matchers:
                text = text.replace(matcher[0], matcher[1]+'2'+matcher[2], 1)
        self.norm_text = text
        return self.norm_text
    def normalize(self):
        text = self.raw_text
        # 规范化日期
        pattern = re.compile(r"\D+((([089]\d|(19|20)\d{2})年)?(\d{1,2}月(\d{1,2}[日号])?)?)")
        matchers = pattern.findall(text)
        if matchers:
            #print('date')
            for matcher in matchers:
                text = text.replace(matcher[0], Date(date=matcher[0]).date2chntext(), 1)
        # 规范化金钱
        pattern = re.compile(r"\D+((\d+(\.\d+)?)[多余几]?" + CURRENCY_UNITS + r"(\d" + CURRENCY_UNITS + r"?)?)")
        matchers = pattern.findall(text)
        if matchers:
            #print('money')
            for matcher in matchers:
                text = text.replace(matcher[0], Money(money=matcher[0]).money2chntext(), 1)
        # 规范化固话/手机号码
        # 手机
        # http://www.jihaoba.com/news/show/13680
        # 移动：139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198
        # 联通：130、131、132、156、155、186、185、176
        # 电信：133、153、189、180、181、177
        pattern = re.compile(r"\D((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})\D")
        matchers = pattern.findall(text)
        if matchers:
            #print('telephone')
            for matcher in matchers:
                text = text.replace(matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(), 1)
        # 固话
        pattern = re.compile(r"\D((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{6,7})\D")
        matchers = pattern.findall(text)
        if matchers:
            # print('fixed telephone')
            for matcher in matchers:
                text = text.replace(matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(fixed=True), 1)
        # 规范化分数
        pattern = re.compile(r"(\d+/\d+)")
        matchers = pattern.findall(text)
        if matchers:
            #print('fraction')
            for matcher in matchers:
                text = text.replace(matcher, Fraction(fraction=matcher).fraction2chntext(), 1)
        # 规范化百分数
        text = text.replace('％', '%')
        pattern = re.compile(r"(\d+(\.\d+)?%)")
        matchers = pattern.findall(text)
        if matchers:
            #print('percentage')
            for matcher in matchers:
                text = text.replace(matcher[0], Percentage(percentage=matcher[0]).percentage2chntext(), 1)
        # 规范化纯数+量词
        pattern = re.compile(r"(\d+(\.\d+)?)[多余几]?" + COM_QUANTIFIERS)
        matchers = pattern.findall(text)
        if matchers:
            #print('cardinal+quantifier')
            for matcher in matchers:
                text = text.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1)
        # 规范化数字编号
        pattern = re.compile(r"(\d{4,32})")
        matchers = pattern.findall(text)
        if matchers:
            #print('digit')
            for matcher in matchers:
                text = text.replace(matcher, Digit(digit=matcher).digit2chntext(), 1)
        # 规范化纯数
        pattern = re.compile(r"(\d+(\.\d+)?)")
        matchers = pattern.findall(text)
        if matchers:
            #print('cardinal')
            for matcher in matchers:
                text = text.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1)
        self.norm_text = text
        self._particular()
        return self.norm_text.lstrip('^').rstrip('$')
 def nsw_test_case(raw_text):
    print('I:' + raw_text)
    print('O:' + NSWNormalizer(raw_text).normalize())
    print('')
 def nsw_test():
    nsw_test_case('固话：0595-23865596或23880880。')
    nsw_test_case('固话：0595-23865596或23880880。')
    nsw_test_case('手机：+86 19859213959或15659451527。')
    nsw_test_case('分数：32477/76391。')
    nsw_test_case('百分数：80.03%。')
    nsw_test_case('编号：31520181154418。')
    nsw_test_case('纯数：2983.07克或12345.60米。')
    nsw_test_case('日期：1999年2月20日或09年3月15号。')
    nsw_test_case('金钱：12块5，34.5元，20.1万')
    nsw_test_case('特殊：O2O或B2C。')
    nsw_test_case('3456万吨')
    nsw_test_case('2938个')
    nsw_test_case('938')
    nsw_test_case('今天吃了115个小笼包231个馒头')
    nsw_test_case('有62％的概率')
 if __name__ == '__main__':
    #nsw_test()
    p = argparse.ArgumentParser()
    p.add_argument('ifile', help='input filename, assume utf-8 encoding')
    p.add_argument('ofile', help='output filename')
    p.add_argument('--to_upper', action='store_true', help='convert to upper case')
    p.add_argument('--to_lower', action='store_true', help='convert to lower case')
    p.add_argument('--has_key', action='store_true', help="input text has Kaldi's key as first field.")
    p.add_argument('--log_interval', type=int, default=100000, help='log interval in number of processed lines')
    args = p.parse_args()
    ifile = codecs.open(args.ifile, 'r', 'utf8')
    ofile = codecs.open(args.ofile, 'w+', 'utf8')
    n = 0
    for l in ifile:
        key = ''
        text = ''
        if args.has_key:
            cols = l.split(maxsplit=1)
            key = cols[0]
            if len(cols) == 2:
                text = cols[1].strip()
            else:
                text = ''
        else:
            text = l.strip()
        # cases
        if args.to_upper and args.to_lower:
            sys.stderr.write('cn_tn.py: to_upper OR to_lower?')
            exit(1)
        if args.to_upper:
            text = text.upper()
        if args.to_lower:
            text = text.lower()
        # NSW(Non-Standard-Word) normalization
        text = NSWNormalizer(text).normalize()
        # Punctuations removal
        old_chars = CHINESE_PUNC_LIST + string.punctuation # includes all CN and EN punctuations
        new_chars = ' ' * len(old_chars)
        del_chars = ''
        text = text.translate(str.maketrans(old_chars, new_chars, del_chars))
        #
        if args.has_key:
            ofile.write(key + '\t' + text + '\n')
        else:
            if text.strip() != '': # skip empty line in pure text format(without Kaldi's utt key)
                ofile.write(text + '\n')
        n += 1
        if n % args.log_interval == 0:
            sys.stderr.write("cn_tn.py: {} lines done.\n".format(n))
            sys.stderr.flush()
    sys.stderr.write("cn_tn.py: {} lines done in total.\n".format(n))
    sys.stderr.flush()
    ifile.close()
    ofile.close()
--- a/third_party/chinese_text_normalization/python/example_kaldi.txt
+++ b/third_party/chinese_text_normalization/python/example_kaldi.txt
@ -1,7 +0,0 @@
 UTT000	这块黄金重达324.75克
 UTT001	她出生于86年8月18日，她弟弟出生于1995年3月1日
 UTT002	电影中梁朝伟扮演的陈永仁的编号27149
 UTT003	现场有7/12的观众投出了赞成票
 UTT004	随便来几个价格12块5，34.5元，20.1万
 UTT005	明天有62％的概率降雨
 UTT006	这是固话0421-33441122或这是手机+86 18544139121
--- a/third_party/chinese_text_normalization/python/example_plain.txt
+++ b/third_party/chinese_text_normalization/python/example_plain.txt
@ -1,7 +0,0 @@
 这块黄金重达324.75克
 她出生于86年8月18日，她弟弟出生于1995年3月1日
 电影中梁朝伟扮演的陈永仁的编号27149
 现场有7/12的观众投出了赞成票
 随便来几个价格12块5，34.5元，20.1万
 明天有62％的概率降雨
 这是固话0421-33441122或这是手机+86 18544139121
--- a/third_party/chinese_text_normalization/python/run.sh
+++ b/third_party/chinese_text_normalization/python/run.sh
@ -1,8 +0,0 @@
 # for plain text
 python3 cn_tn.py example_plain.txt output_plain.txt
 diff example_plain.txt output_plain.txt
 # for Kaldi's trans format
 python3 cn_tn.py --has_key example_kaldi.txt output_kaldi.txt
 diff example_kaldi.txt output_kaldi.txt
--- a/third_party/chinese_text_normalization/thrax/INSTALL.txt
+++ b/third_party/chinese_text_normalization/thrax/INSTALL.txt
@ -1,24 +0,0 @@
 0. place install_thrax.sh into $KALDI/tools/extras/
 1. recompile openfst with necessary option "--enable-grm" to support thrax:
 * cd $KALDI_ROOT/tools
 * make clean
 * edit $KALDI_ROOT/tools/Makefile, append "--enable-grm" option to OPENFST_CONFIGURE:
 OPENFST_CONFIGURE ?= --enable-static --enable-shared --enable-far --enable-ngram-fsts --enable-lookahead-fsts --with-pic --enable-grm
 * make -j 10
 2. install thrax
 cd $KALDI_ROOT/tools
 sh extras/install_thrax.sh
 3. add thrax binary path into $KALDI_ROOT/tools/env.sh:
 export PATH=/path/to/your/kaldi_root/tools/thrax-1.2.9/src/bin:${PATH}
 usage:
 before you run anything related to thrax, use:
 . $KALDI_ROOT/tools/env.sh
 to enable binary finding, like what we always do in kaldi.
 sample usage:
 sh run_en.sh
 sh run_cn.sh
--- a/third_party/chinese_text_normalization/thrax/install_thrax.sh
+++ b/third_party/chinese_text_normalization/thrax/install_thrax.sh
@ -1,12 +0,0 @@
 #!/bin/bash
 ## This script should be placed under $KALDI_ROOT/tools/extras/, and see INSTALL.txt for installation guide
 if [ ! -f thrax-1.2.9.tar.gz ]; then
    wget http://www.openfst.org/twiki/pub/GRM/ThraxDownload/thrax-1.2.9.tar.gz
    tar -zxf thrax-1.2.9.tar.gz
 fi
 cd thrax-1.2.9
 OPENFSTPREFIX=`pwd`/../openfst
 LDFLAGS="-L${OPENFSTPREFIX}/lib" CXXFLAGS="-I${OPENFSTPREFIX}/include" ./configure --prefix ${OPENFSTPREFIX}
 make -j 10; make install
 cd ..
--- a/third_party/chinese_text_normalization/thrax/papers/gorman-sproat-2016.pdf
+++ b/third_party/chinese_text_normalization/thrax/papers/gorman-sproat-2016.pdf
--- a/third_party/chinese_text_normalization/thrax/papers/wu-etal-2016.pdf
+++ b/third_party/chinese_text_normalization/thrax/papers/wu-etal-2016.pdf
--- a/third_party/chinese_text_normalization/thrax/run_cn.sh
+++ b/third_party/chinese_text_normalization/thrax/run_cn.sh
@ -1,6 +0,0 @@
 cd src/cn
 thraxmakedep itn.grm
 make
 #thraxrewrite-tester --far=itn.far --rules=ITN 
 cat ../../testcase_cn.txt | thraxrewrite-tester --far=itn.far --rules=ITN 
 cd -
--- a/third_party/chinese_text_normalization/thrax/run_en.sh
+++ b/third_party/chinese_text_normalization/thrax/run_en.sh
@ -1,6 +0,0 @@
 cd src
 thraxmakedep en/verbalizer/podspeech.grm
 make
 cat ../testcase_en.txt
 cat ../testcase_en.txt | thraxrewrite-tester --far=en/verbalizer/podspeech.far --rules=POD_SPEECH_TN
 cd -
--- a/third_party/chinese_text_normalization/thrax/src/LICENSE
+++ b/third_party/chinese_text_normalization/thrax/src/LICENSE
@ -1,202 +0,0 @@
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
   1. Definitions.
      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.
      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.
      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.
      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.
      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.
      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.
      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).
      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.
      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."
      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.
   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.
   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.
   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:
      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and
      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and
      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and
      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.
      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.
   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.
   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.
   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.
   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.
   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.
   END OF TERMS AND CONDITIONS
   APPENDIX: How to apply the Apache License to your work.
      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.
   Copyright [yyyy] [name of copyright owner]
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
       http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
--- a/third_party/chinese_text_normalization/thrax/src/Makefile
+++ b/third_party/chinese_text_normalization/thrax/src/Makefile
@ -1,65 +0,0 @@
 en/verbalizer/podspeech.far: en/verbalizer/podspeech.grm util/util.far util/case.far en/verbalizer/extra_numbers.far en/verbalizer/float.far en/verbalizer/math.far en/verbalizer/miscellaneous.far en/verbalizer/money.far en/verbalizer/numbers.far en/verbalizer/numbers_plus.far en/verbalizer/spelled.far en/verbalizer/spoken_punct.far en/verbalizer/time.far en/verbalizer/urls.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 util/util.far: util/util.grm util/byte.far util/case.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 util/byte.far: util/byte.grm 
 	thraxcompiler --input_grammar=$< --output_far=$@
 util/case.far: util/case.grm util/byte.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/extra_numbers.far: en/verbalizer/extra_numbers.grm util/byte.far en/verbalizer/numbers.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/numbers.far: en/verbalizer/numbers.grm en/verbalizer/number_names.far util/byte.far universal/thousands_punct.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/number_names.far: en/verbalizer/number_names.grm util/arithmetic.far en/verbalizer/g.fst en/verbalizer/cardinals.tsv en/verbalizer/ordinals.tsv
 	thraxcompiler --input_grammar=$< --output_far=$@
 util/arithmetic.far: util/arithmetic.grm util/byte.far util/germanic.tsv
 	thraxcompiler --input_grammar=$< --output_far=$@
 universal/thousands_punct.far: universal/thousands_punct.grm util/byte.far util/util.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/float.far: en/verbalizer/float.grm en/verbalizer/factorization.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/factorization.far: en/verbalizer/factorization.grm util/byte.far util/util.far en/verbalizer/numbers.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/lexical_map.far: en/verbalizer/lexical_map.grm util/byte.far en/verbalizer/lexical_map.tsv
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/math.far: en/verbalizer/math.grm en/verbalizer/float.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/miscellaneous.far: en/verbalizer/miscellaneous.grm util/byte.far ru/classifier/cyrillic.far en/verbalizer/extra_numbers.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far en/verbalizer/spelled.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 ru/classifier/cyrillic.far: ru/classifier/cyrillic.grm 
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/spelled.far: en/verbalizer/spelled.grm util/byte.far ru/classifier/cyrillic.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/money.far: en/verbalizer/money.grm util/byte.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far en/verbalizer/money.tsv
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/numbers_plus.far: en/verbalizer/numbers_plus.grm en/verbalizer/factorization.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/spoken_punct.far: en/verbalizer/spoken_punct.grm en/verbalizer/lexical_map.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/time.far: en/verbalizer/time.grm util/byte.far en/verbalizer/lexical_map.far en/verbalizer/numbers.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 en/verbalizer/urls.far: en/verbalizer/urls.grm util/byte.far en/verbalizer/lexical_map.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 clean:
 	rm -f util/util.far util/case.far en/verbalizer/extra_numbers.far en/verbalizer/float.far en/verbalizer/math.far en/verbalizer/miscellaneous.far en/verbalizer/money.far en/verbalizer/numbers.far en/verbalizer/numbers_plus.far en/verbalizer/spelled.far en/verbalizer/spoken_punct.far en/verbalizer/time.far en/verbalizer/urls.far util/byte.far en/verbalizer/number_names.far universal/thousands_punct.far util/arithmetic.far en/verbalizer/factorization.far en/verbalizer/lexical_map.far ru/classifier/cyrillic.far
--- a/third_party/chinese_text_normalization/thrax/src/README.md
+++ b/third_party/chinese_text_normalization/thrax/src/README.md
@ -1,24 +0,0 @@
 # Text normalization covering grammars
 This repository provides covering grammars for English and Russian text normalization as
 documented in:
  Gorman, K., and Sproat, R. 2016. Minimally supervised number normalization.
  _Transactions of the Association for Computational Linguistics_ 4: 507-519.
  Ng, A. H., Gorman, K., and Sproat, R. 2017. Minimally supervised
  written-to-spoken text normalization. In _ASRU_, pages 665-670.
 If you use these grammars in a publication, we would appreciate if you cite these works.
 ## Building
 The grammars are written in [Thrax](thrax.opengrm.org) and compile into [OpenFst](openfst.org) FAR (FstARchive) files. To compile, simply run `make` in the `src/` directory.
 ## License
 See `LICENSE`.
 ## Mandatory disclaimer
 This is not an official Google product.
--- a/third_party/chinese_text_normalization/thrax/src/cn/Makefile
+++ b/third_party/chinese_text_normalization/thrax/src/cn/Makefile
@ -1,23 +0,0 @@
 itn.far: itn.grm byte.far number.far hotfix.far percentage.far date.far amount.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 byte.far: byte.grm 
 	thraxcompiler --input_grammar=$< --output_far=$@
 number.far: number.grm byte.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 hotfix.far: hotfix.grm byte.far hotfix.list
 	thraxcompiler --input_grammar=$< --output_far=$@
 percentage.far: percentage.grm byte.far number.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 date.far: date.grm byte.far number.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 amount.far: amount.grm byte.far number.far
 	thraxcompiler --input_grammar=$< --output_far=$@
 clean:
 	rm -f byte.far number.far hotfix.far percentage.far date.far amount.far
--- a/third_party/chinese_text_normalization/thrax/src/cn/amount.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/amount.grm
@ -1,24 +0,0 @@
 import 'byte.grm' as b;
 import 'number.grm' as n;
 unit = (
 	"匹"|"张"|"座"|"回"|"场"|"尾"|"条"|"个"|"首"|"阙"|"阵"|"网"|"炮"|
 	"顶"|"丘"|"棵"|"只"|"支"|"袭"|"辆"|"挑"|"担"|"颗"|"壳"|"窠"|"曲"|
 	"墙"|"群"|"腔"|"砣"|"座"|"客"|"贯"|"扎"|"捆"|"刀"|"令"|"打"|"手"|
 	"罗"|"坡"|"山"|"岭"|"江"|"溪"|"钟"|"队"|"单"|"双"|"对"|"出"|"口"|
 	"头"|"脚"|"板"|"跳"|"枝"|"件"|"贴"|"针"|"线"|"管"|"名"|"位"|"身"|
 	"堂"|"课"|"本"|"页"|"家"|"户"|"层"|"丝"|"毫"|"厘"|"分"|"钱"|"两"|
 	"斤"|"担"|"铢"|"石"|"钧"|"锱"|"忽"|"毫"|"厘"|"分"|"寸"|"尺"|"丈"|
 	"里"|"寻"|"常"|"铺"|"程"|"撮"|"勺"|"合"|"升"|"斗"|"石"|"盘"|"碗"|
 	"碟"|"叠"|"桶"|"笼"|"盆"|"盒"|"杯"|"钟"|"斛"|"锅"|"簋"|"篮"|"盘"|
 	"桶"|"罐"|"瓶"|"壶"|"卮"|"盏"|"箩"|"箱"|"煲"|"啖"|"袋"|"钵"|"年"|
 	"月"|"日"|"季"|"刻"|"时"|"周"|"天"|"秒"|"分"|"旬"|"纪"|"岁"|"世"|
 	"更"|"夜"|"春"|"夏"|"秋"|"冬"|"代"|"伏"|"辈"|"丸"|"泡"|"粒"|"颗"|
 	"幢"|"堆"|"条"|"根"|"支"|"道"|"面"|"片"|"张"|"颗"|"块"|
 	(("千克":"kg")|("毫克":"mg")|("微克":"µg"))|
 	(("千米":"km")|("厘米":"cm")|("毫米":"mm")|("微米":"µm")|("纳米":"nm"))
 );
 amount = n.number unit;
 export AMOUNT = CDRewrite[amount, "", "", b.kBytes*];
--- a/third_party/chinese_text_normalization/thrax/src/cn/byte.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/byte.grm
@ -1,76 +0,0 @@
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Copyright 2005-2011 Google, Inc.
 # Author: ttai@google.com (Terry Tai)
 # Standard constants for ASCII (byte) based strings.  This mirrors the
 # functions provided by C/C++'s ctype.h library.
 # Note that [0] is missing.  Matching the string-termination character is kinda weird.
 export kBytes = Optimize[
  "[1]" |   "[2]" |   "[3]" |   "[4]" |   "[5]" |   "[6]" |   "[7]" |   "[8]" |   "[9]" |  "[10]" |
 "[11]" |  "[12]" |  "[13]" |  "[14]" |  "[15]" |  "[16]" |  "[17]" |  "[18]" |  "[19]" |  "[20]" |
 "[21]" |  "[22]" |  "[23]" |  "[24]" |  "[25]" |  "[26]" |  "[27]" |  "[28]" |  "[29]" |  "[30]" |
 "[31]" |  "[32]" |  "[33]" |  "[34]" |  "[35]" |  "[36]" |  "[37]" |  "[38]" |  "[39]" |  "[40]" |
 "[41]" |  "[42]" |  "[43]" |  "[44]" |  "[45]" |  "[46]" |  "[47]" |  "[48]" |  "[49]" |  "[50]" |
 "[51]" |  "[52]" |  "[53]" |  "[54]" |  "[55]" |  "[56]" |  "[57]" |  "[58]" |  "[59]" |  "[60]" |
 "[61]" |  "[62]" |  "[63]" |  "[64]" |  "[65]" |  "[66]" |  "[67]" |  "[68]" |  "[69]" |  "[70]" |
 "[71]" |  "[72]" |  "[73]" |  "[74]" |  "[75]" |  "[76]" |  "[77]" |  "[78]" |  "[79]" |  "[80]" |
 "[81]" |  "[82]" |  "[83]" |  "[84]" |  "[85]" |  "[86]" |  "[87]" |  "[88]" |  "[89]" |  "[90]" |
 "[91]" |  "[92]" |  "[93]" |  "[94]" |  "[95]" |  "[96]" |  "[97]" |  "[98]" |  "[99]" | "[100]" |
 "[101]" | "[102]" | "[103]" | "[104]" | "[105]" | "[106]" | "[107]" | "[108]" | "[109]" | "[110]" |
 "[111]" | "[112]" | "[113]" | "[114]" | "[115]" | "[116]" | "[117]" | "[118]" | "[119]" | "[120]" |
 "[121]" | "[122]" | "[123]" | "[124]" | "[125]" | "[126]" | "[127]" | "[128]" | "[129]" | "[130]" |
 "[131]" | "[132]" | "[133]" | "[134]" | "[135]" | "[136]" | "[137]" | "[138]" | "[139]" | "[140]" |
 "[141]" | "[142]" | "[143]" | "[144]" | "[145]" | "[146]" | "[147]" | "[148]" | "[149]" | "[150]" |
 "[151]" | "[152]" | "[153]" | "[154]" | "[155]" | "[156]" | "[157]" | "[158]" | "[159]" | "[160]" |
 "[161]" | "[162]" | "[163]" | "[164]" | "[165]" | "[166]" | "[167]" | "[168]" | "[169]" | "[170]" |
 "[171]" | "[172]" | "[173]" | "[174]" | "[175]" | "[176]" | "[177]" | "[178]" | "[179]" | "[180]" |
 "[181]" | "[182]" | "[183]" | "[184]" | "[185]" | "[186]" | "[187]" | "[188]" | "[189]" | "[190]" |
 "[191]" | "[192]" | "[193]" | "[194]" | "[195]" | "[196]" | "[197]" | "[198]" | "[199]" | "[200]" |
 "[201]" | "[202]" | "[203]" | "[204]" | "[205]" | "[206]" | "[207]" | "[208]" | "[209]" | "[210]" |
 "[211]" | "[212]" | "[213]" | "[214]" | "[215]" | "[216]" | "[217]" | "[218]" | "[219]" | "[220]" |
 "[221]" | "[222]" | "[223]" | "[224]" | "[225]" | "[226]" | "[227]" | "[228]" | "[229]" | "[230]" |
 "[231]" | "[232]" | "[233]" | "[234]" | "[235]" | "[236]" | "[237]" | "[238]" | "[239]" | "[240]" |
 "[241]" | "[242]" | "[243]" | "[244]" | "[245]" | "[246]" | "[247]" | "[248]" | "[249]" | "[250]" |
 "[251]" | "[252]" | "[253]" | "[254]" | "[255]"
 ];
 export kDigit = Optimize[
    "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
 ];
 export kLower = Optimize[
    "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" |
    "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
 ];
 export kUpper = Optimize[
    "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" |
    "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
 ];
 export kAlpha = Optimize[kLower | kUpper];
 export kAlnum = Optimize[kDigit | kAlpha];
 export kSpace = Optimize[
    " " | "\t" | "\n" | "\r"
 ];
 export kNotSpace = Optimize[kBytes - kSpace];
 export kPunct = Optimize[
    "!" | "\"" | "#" | "$" | "%" | "&" | "'" | "(" | ")" | "*" | "+" | "," |
    "-" | "." | "/" | ":" | ";" | "<" | "=" | ">" | "?" | "@" | "\[" | "\\" |
    "\]" | "^" | "_" | "`" | "{" | "|" | "}" | "~"
 ];
 export kGraph = Optimize[kAlnum | kPunct];
--- a/third_party/chinese_text_normalization/thrax/src/cn/date.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/date.grm
@ -1,10 +0,0 @@
 import 'byte.grm' as b;
 import 'number.grm' as n;
 date_day = n.number_1_to_99 ("日"|"号");
 date_month_day = n.number_1_to_99 "月" date_day;
 date_year_month_day = ((n.number_0_to_9){2,4} | n.number) "年" date_month_day;
 date = date_year_month_day | date_month_day | date_day;
 export DATE = CDRewrite[date, "", "", b.kBytes*];
--- a/third_party/chinese_text_normalization/thrax/src/cn/hotfix.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/hotfix.grm
@ -1,5 +0,0 @@
 import 'byte.grm' as b;
 hotfix = StringFile['hotfix.list'];
 export HOTFIX = CDRewrite[hotfix, "", "", b.kBytes*];
--- a/third_party/chinese_text_normalization/thrax/src/cn/hotfix.list
+++ b/third_party/chinese_text_normalization/thrax/src/cn/hotfix.list
@ -1,18 +0,0 @@
 0头	零头
 10字	十字
 东4环	东4环	-1.0
 东4	东四	-0.5
 4惠	四惠
 3元桥	三元桥
 4平市	四平市
 5台山	五台山
 西2旗	西二旗
 西3旗	西三旗
 4道口	四道口	-1.0
 5道口	五道口	-1.0
 6道口	六道口	-1.0
 6里桥	六里桥
 7里庄	七里庄
 8宝山	八宝山
 9颗松	九棵松
 10里堡	十里堡
--- a/third_party/chinese_text_normalization/thrax/src/cn/itn.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/itn.grm
@ -1,9 +0,0 @@
 import 'byte.grm' as b;
 import 'number.grm' as number;
 import 'hotfix.grm' as hotfix;
 import 'percentage.grm' as percentage;
 import 'date.grm' as date;
 import 'amount.grm' as amount; # seems not useful for now
 export ITN = Optimize[percentage.PERCENTAGE @ (date.DATE <-1>) @ number.NUMBER @ hotfix.HOTFIX];
--- a/third_party/chinese_text_normalization/thrax/src/cn/number.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/number.grm
@ -1,61 +0,0 @@
 import 'byte.grm' as b;
 number_1_to_9 = (
  ("一":"1") | ("幺":"1") |
  ("二":"2") | ("两":"2") |
  ("三":"3") |
  ("四":"4") |
  ("五":"5") |
  ("六":"6") |
  ("七":"7") |
  ("八":"8") |
  ("九":"9") 
 );
 export number_0_to_9 = (("零":"0") | number_1_to_9);
 number_10_to_19 = (
  ("十":"10") |
  ("十一":"11") |
  ("十二":"12") |
  ("十三":"13") |
  ("十四":"14") |
  ("十五":"15") |
  ("十六":"16") |
  ("十七":"17") |
  ("十八":"18") |
  ("十九":"19") 
 );
 number_10s    = (number_1_to_9 ("十":""));
 number_100s   = (number_1_to_9 ("百":""));
 number_1000s  = (number_1_to_9 ("千":""));
 number_10000s = (number_1_to_9 ("万":""));
 number_10_to_99 = (
  ((number_10s number_1_to_9)<-0.3>) | 
  ((number_10s ("":"0"))<-0.2>) | 
  (number_10_to_19 <-0.1>)
 );
 export number_1_to_99 = (number_1_to_9 | number_10_to_99);
 number_100_to_999 = (
  ((number_100s ("零":"0") number_1_to_9)<0.0>)|
  ((number_100s number_10_to_99)<0.0>) |
  ((number_100s number_1_to_9 ("":"0"))<0.0>) |
  ((number_100s ("":"00"))<0.1>)
 );
 number_1000_to_9999 = (
  ((number_1000s number_100_to_999)<0.0>) |
  ((number_1000s ("零":"0") number_10_to_99)<0.0>)|
  ((number_1000s ("零":"00") number_1_to_9)<0.0>)|
  ((number_1000s ("":"000"))<1>) |
  ((number_1000s number_1_to_9 ("":"00"))<0.0>)
 );
 export number = number_1_to_99 | (number_100_to_999 <-1>) | (number_1000_to_9999 <-2>);
 export NUMBER = CDRewrite[number, "", "", b.kBytes*];
--- a/third_party/chinese_text_normalization/thrax/src/cn/percentage.grm
+++ b/third_party/chinese_text_normalization/thrax/src/cn/percentage.grm
@ -1,8 +0,0 @@
 import 'byte.grm' as b;
 import 'number.grm' as n;
 percentage = (
  ("百分之":"") n.number_1_to_99 ("":"%")
 );
 export PERCENTAGE = CDRewrite[percentage, "", "", b.kBytes*];
--- a/third_party/chinese_text_normalization/thrax/src/en/README.md
+++ b/third_party/chinese_text_normalization/thrax/src/en/README.md
@ -1,6 +0,0 @@
 # English covering grammar definitions
 This directory defines a English text normalization covering grammar. The
 primary entry-point is the FST `VERBALIZER`, defined in
 `verbalizer/verbalizer.grm` and compiled in the FST archive
 `verbalizer/verbalizer.far`.
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/Makefile
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/Makefile
@ -1,3 +0,0 @@
 verbalizer.far: verbalizer.grm util/util.far en/verbalizer/extra_numbers.far en/verbalizer/float.far en/verbalizer/math.far en/verbalizer/miscellaneous.far en/verbalizer/money.far en/verbalizer/numbers.far en/verbalizer/numbers_plus.far en/verbalizer/spelled.far en/verbalizer/spoken_punct.far en/verbalizer/time.far en/verbalizer/urls.far
 	thraxcompiler --input_grammar=$< --output_far=$@
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/cardinals.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/cardinals.tsv
@ -1,32 +0,0 @@
 0	zero
 1	one
 2	two
 3	three
 4	four
 5	five
 6	six
 7	seven
 8	eight
 9	nine
 10	ten
 11	eleven
 12	twelve
 13	thirteen
 14	fourteen
 15	fifteen
 16	sixteen
 17	seventeen
 18	eighteen
 19	nineteen
 20	twenty
 30	thirty
 40	forty
 50	fifty
 60	sixty
 70	seventy
 80	eighty
 90	ninety
 100	hundred
 1000	thousand
 1000000	million
 1000000000	billion
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/extra_numbers.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/extra_numbers.grm
@ -1,35 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/byte.grm' as b;
 import 'en/verbalizer/numbers.grm' as n;
 digit = b.kDigit @ n.CARDINAL_NUMBERS | ("0" : "@@OTHER_ZERO_VERBALIZATIONS@@");
 export DIGITS  = digit (n.I[" "] digit)*;
 # Various common factorizations
 two_digits = b.kDigit{2} @ n.CARDINAL_NUMBERS;
 three_digits = b.kDigit{3} @ n.CARDINAL_NUMBERS;
 mixed =
   (digit n.I[" "] two_digits)
 | (two_digits n.I[" "] two_digits)
 | (two_digits n.I[" "] three_digits)
 | (two_digits n.I[" "] two_digits n.I[" "] two_digits)
 ;
 export MIXED_NUMBERS = Optimize[mixed];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/factorization.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/factorization.grm
@ -1,40 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/byte.grm' as b;
 import 'util/util.grm' as u;
 import 'en/verbalizer/numbers.grm' as n;
 func ToNumberName[expr] {
  number_name_seq = n.CARDINAL_NUMBERS (" " n.CARDINAL_NUMBERS)*;
  return Optimize[expr @ number_name_seq];
 }
 d = b.kDigit;
 leading_zero = CDRewrite[n.I[" "], ("[BOS]" | " ") "0", "", b.kBytes*];
 by_ones = d n.I[" "];
 by_twos = (d{2} @ leading_zero) n.I[" "];
 by_threes = (d{3} @ leading_zero) n.I[" "];
 groupings = by_twos* (by_threes | by_twos | by_ones);
 export FRACTIONAL_PART_UNGROUPED =
  Optimize[ToNumberName[by_ones+ @ u.CLEAN_SPACES]]
 ;
 export FRACTIONAL_PART_GROUPED =
  Optimize[ToNumberName[groupings @ u.CLEAN_SPACES]]
 ;
 export FRACTIONAL_PART_UNPARSED = Optimize[ToNumberName[d*]];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/float.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/float.grm
@ -1,30 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'en/verbalizer/factorization.grm' as f;
 import 'en/verbalizer/lexical_map.grm' as l;
 import 'en/verbalizer/numbers.grm' as n;
 fractional_part_ungrouped = f.FRACTIONAL_PART_UNGROUPED;
 fractional_part_grouped = f.FRACTIONAL_PART_GROUPED;
 fractional_part_unparsed = f.FRACTIONAL_PART_UNPARSED;
 __fractional_part__ = fractional_part_ungrouped | fractional_part_unparsed;
 __decimal_marker__ = ".";
 export FLOAT = Optimize[
 (n.CARDINAL_NUMBERS
  (__decimal_marker__ : " @@DECIMAL_DOT_EXPRESSION@@ ")
  __fractional_part__) @ l.LEXICAL_MAP]
 ;
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/g.fst
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/g.fst
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/lexical_map.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/lexical_map.grm
@ -1,25 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/byte.grm' as b;
 lexical_map = StringFile['en/verbalizer/lexical_map.tsv'];
 sigma_star = b.kBytes*;
 del_null = CDRewrite["__NULL__" : "", "", "", sigma_star];
 export LEXICAL_MAP = Optimize[
  CDRewrite[lexical_map, "", "", sigma_star] @ del_null]
 ;
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/lexical_map.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/lexical_map.tsv
@ -1,74 +0,0 @@
@@CONNECTOR_RANGE@@	to
@@CONNECTOR_RATIO@@	to
@@CONNECTOR_BY@@	by
@@CONNECTOR_CONSECUTIVE_YEAR@@	to
@@JANUARY@@	january
@@FEBRUARY@@	february
@@MARCH@@	march
@@APRIL@@	april
@@MAY@@	may
@@JUNE@@	june
@@JULY@@	july
@@AUGUST@@	august
@@SEPTEMBER@@	september
@@OCTOBER@@	october
@@NOVEMBER@@	november
@@DECEMBER@@	december
@@MINUS@@	minus
@@DECIMAL_DOT_EXPRESSION@@	point
@@URL_DOT_EXPRESSION@@	dot
@@DECIMAL_EXPONENT@@	to the
@@DECIMAL_EXPONENT@@	to the power of
@@COLON@@	colon
@@SLASH@@	slash
@@SLASH@@	forward slash
@@DASH@@	dash
@@PASSWORD@@	password
@@AT@@	at
@@PORT@@	port
@@QUESTION_MARK@@	question mark
@@HASH@@	hash
@@HASH@@	hash tag
@@FRACTION_OVER@@	over
@@MONEY_AND@@	and
@@AND@@	and
@@PHONE_PLUS@@	plus
@@PHONE_EXTENSION@@	extension
@@TIME_AM@@		a m
@@TIME_PM@@		p m
@@HOUR@@		o'clock
@@MINUTE@@		minute
@@MINUTE@@		minutes
@@TIME_AFTER@@		after
@@TIME_AFTER@@		past
@@TIME_BEFORE@@		to
@@TIME_BEFORE@@		till
@@TIME_QUARTER@@	quarter
@@TIME_HALF@@		half
@@TIME_ZERO@@		oh
@@TIME_THREE_QUARTER@@	three quarters
@@ARITHMETIC_PLUS@@	plus
@@ARITHMETIC_TIMES@@	times
@@ARITHMETIC_TIMES@@	multiplied by
@@ARITHMETIC_MINUS@@	minus
@@ARITHMETIC_DIVISION@@	divided by
@@ARITHMETIC_DIVISION@@	over
@@ARITHMETIC_EQUALS@@	equals
@@PERCENT@@		percent
@@DEGREE@@		degree
@@DEGREE@@		degrees
@@SQUARE_ROOT@@		square root of
@@SQUARE_ROOT@@		the square root of
@@STAR@@		star
@@HYPHEN@@		hyphen
@@AT@@			at
@@PER@@			per
@@PERIOD@@		period
@@PERIOD@@		full stop
@@PERIOD@@		dot
@@EXCLAMATION_MARK@@	exclamation mark
@@EXCLAMATION_MARK@@	exclamation point
@@COMMA@@		comma
@@POSITIVE@@		positive
@@NEGATIVE@@		negative
@@OTHER_ZERO_VERBALIZATIONS@@	oh
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/math.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/math.grm
@ -1,34 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'en/verbalizer/float.grm' as f;
 import 'en/verbalizer/lexical_map.grm' as l;
 import 'en/verbalizer/numbers.grm' as n;
 float = f.FLOAT;
 card = n.CARDINAL_NUMBERS;
 number = card | float;
 plus = "+" : " @@ARITHMETIC_PLUS@@ ";
 times = "*" : " @@ARITHMETIC_TIMES@@ ";
 minus = "-" : " @@ARITHMETIC_MINUS@@ ";
 division = "/" : " @@ARITHMETIC_DIVISION@@ ";
 operator = plus | times | minus | division;
 percent = "%" : " @@PERCENT@@";
 export ARITHMETIC =
  Optimize[((number operator number) | (number percent)) @ l.LEXICAL_MAP]
 ;
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/miscellaneous.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/miscellaneous.grm
@ -1,78 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/byte.grm' as b;
 import 'ru/classifier/cyrillic.grm' as c;
 import 'en/verbalizer/extra_numbers.grm' as e;
 import 'en/verbalizer/lexical_map.grm' as l;
 import 'en/verbalizer/numbers.grm' as n;
 import 'en/verbalizer/spelled.grm' as s;
 letter = b.kAlpha | c.kCyrillicAlpha;
 dash   = "-";
 word = letter+;
 possibly_split_word = word (((dash | ".") : " ") word)* n.D["."]?;
 post_word_symbol =
   ("+" : ("@@ARITHMETIC_PLUS@@" | "@@POSITIVE@@")) |
   ("-" : ("@@ARITHMETIC_MINUS@@" | "@@NEGATIVE@@")) |
   ("*" : "@@STAR@@")
 ;
 pre_word_symbol =
   ("@" : "@@AT@@") |
   ("/" : "@@SLASH@@") |
   ("#" : "@@HASH@@")
 ;
 post_word = possibly_split_word n.I[" "] post_word_symbol;
 pre_word = pre_word_symbol n.I[" "] possibly_split_word;
 ## Number/digit sequence combos, maybe with a dash
 spelled_word = word @ s.SPELLED_NO_LETTER;
 word_number =
  (word | spelled_word)
  (n.I[" "] | (dash : " "))
  (e.DIGITS | n.CARDINAL_NUMBERS | e.MIXED_NUMBERS)
 ;
 number_word =
  (e.DIGITS | n.CARDINAL_NUMBERS | e.MIXED_NUMBERS)
  (n.I[" "] | (dash : " "))
  (word | spelled_word)
 ;
 ## Two-digit year.
 # Note that in this case to be fair we really have to allow ordinals too since
 # in some languages that's what you would have.
 two_digit_year = n.D["'"] (b.kDigit{2} @ (n.CARDINAL_NUMBERS | e.DIGITS));
 dot_com = ("." : "@@URL_DOT_EXPRESSION@@") n.I[" "] "com";
 miscellaneous = Optimize[
    possibly_split_word
  | post_word
  | pre_word
  | word_number
  | number_word
  | two_digit_year
  | dot_com
 ];
 export MISCELLANEOUS = Optimize[miscellaneous @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/money.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/money.grm
@ -1,44 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/byte.grm' as b;
 import 'en/verbalizer/lexical_map.grm' as l;
 import 'en/verbalizer/numbers.grm' as n;
 card = n.CARDINAL_NUMBERS;
 __currency__ = StringFile['en/verbalizer/money.tsv'];
 d = b.kDigit;
 D = d - "0";
 cents = ((n.D["0"] | D) d) @ card;
 # Only dollar for the verbalizer tests for English. Will need to add other
 # currencies.
 usd_maj = Project["usd_maj" @ __currency__, 'output'];
 usd_min = Project["usd_min" @ __currency__, 'output'];
 and = " @@MONEY_AND@@ " | " ";
 dollar1 =
  n.D["$"] card n.I[" " usd_maj] n.I[and] n.D["."] cents n.I[" " usd_min]
 ;
 dollar2 = n.D["$"] card n.I[" " usd_maj] n.D["."] n.D["00"];
 dollar3 = n.D["$"] card n.I[" " usd_maj];
 dollar = Optimize[dollar1 | dollar2 | dollar3];
 export MONEY = Optimize[dollar @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/money.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/money.tsv
@ -1,4 +0,0 @@
 usd_maj	dollar
 usd_maj	dollars
 usd_min	cent
 usd_min	cents
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/number_names.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/number_names.grm
@ -1,54 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # English minimally supervised number grammar.
 #
 # Supports both cardinals and ordinals without overt marking.
 #
 # The language-specific acceptor G was compiled with digit, teen, and decade
 # preterminals. The lexicon transducer L is unambiguous so no LM is used.
 import 'util/arithmetic.grm' as a;
 # Intersects the universal factorization transducer (F) with the
 # language-specific acceptor (G).
 d = a.DELTA_STAR;
 f = a.IARITHMETIC_RESTRICTED;
 g = LoadFst['en/verbalizer/g.fst'];
 fg = Optimize[d @ Optimize[f @ Optimize[f @ Optimize[f @ g]]]];
 test1 = AssertEqual["230" @ fg, "(+ (* 2 100 *) 30 +)"];
 # Compiles lexicon transducer (L).
 cardinal_name = StringFile['en/verbalizer/cardinals.tsv'];
 cardinal_l = Optimize[(cardinal_name " ")* cardinal_name];
 test2 = AssertEqual["2 100 30" @ cardinal_l, "two hundred thirty"];
 ordinal_name = StringFile['en/verbalizer/ordinals.tsv'];
 # In English, ordinals have the same syntax as cardinals and all but the final
 # element is verbalized using a cardinal number word; e.g., "two hundred
 # thirtieth".
 ordinal_l = Optimize[(cardinal_name " ")* ordinal_name];
 test3 = AssertEqual["2 100 30" @ ordinal_l, "two hundred thirtieth"];
 # Composes L with the leaf transducer (P), then composes that with FG.
 p = a.LEAVES;
 export CARDINAL_NUMBER_NAME = Optimize[fg @ (p @ cardinal_l)];
 test4 = AssertEqual["230" @ CARDINAL_NUMBER_NAME, "two hundred thirty"];
 export ORDINAL_NUMBER_NAME = Optimize[fg @ (p @ ordinal_l)];
 test5 = AssertEqual["230" @ ORDINAL_NUMBER_NAME, "two hundred thirtieth"];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/numbers.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/numbers.grm
@ -1,57 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'en/verbalizer/number_names.grm' as n;
 import 'util/byte.grm' as bytelib;
 import 'universal/thousands_punct.grm' as t;
 cardinal = n.CARDINAL_NUMBER_NAME;
 ordinal = n.ORDINAL_NUMBER_NAME;
 # Putting these here since this grammar gets incorporated by all the others.
 func I[expr] {
  return "" : expr;
 }
 func D[expr] {
  return expr : "";
 }
 separators = t.comma_thousands | t.no_delimiter;
 # Language specific endings for ordinals.
 d = bytelib.kDigit;
 endings = "st" | "nd" | "rd" | "th";
 st = (d* "1") - (d* "11");
 nd = (d* "2") - (d* "12");
 rd = (d* "3") - (d* "13");
 th = Optimize[d* - st - nd - rd];
 first = st ("st" : "");
 second = nd ("nd" : "");
 third = rd ("rd" : "");
 other = th ("th" : "");
 marked_ordinal = Optimize[first | second | third | other];
 # The separator is a no-op here but will be needed once we replace
 # the above targets.
 export CARDINAL_NUMBERS = Optimize[separators @ cardinal];
 export ORDINAL_NUMBERS =
  Optimize[(separators endings) @ marked_ordinal @ ordinal]
 ;
 export ORDINAL_NUMBERS_UNMARKED = Optimize[separators @ ordinal];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/numbers_plus.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/numbers_plus.grm
@ -1,133 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Grammar for things built mostly on numbers.
 import 'en/verbalizer/factorization.grm' as f;
 import 'en/verbalizer/lexical_map.grm' as l;
 import 'en/verbalizer/numbers.grm' as n;
 num = n.CARDINAL_NUMBERS;
 ord = n.ORDINAL_NUMBERS_UNMARKED;
 digits = f.FRACTIONAL_PART_UNGROUPED;
 # Various symbols.
 plus = "+" : "@@ARITHMETIC_PLUS@@";
 minus = "-" : "@@ARITHMETIC_MINUS@@";
 slash = "/" : "@@SLASH@@";
 dot = "." : "@@URL_DOT_EXPRESSION@@";
 dash = "-" : "@@DASH@@";
 equals = "=" : "@@ARITHMETIC_EQUALS@@";
 degree = "°" : "@@DEGREE@@";
 division = ("/" | "÷") : "@@ARITHMETIC_DIVISION@@";
 times = ("x" | "*") : "@@ARITHMETIC_TIMES@@";
 power = "^" : "@@DECIMAL_EXPONENT@@";
 square_root = "√" : "@@SQUARE_ROOT@@";
 percent = "%" : "@@PERCENT@@";
 # Safe roman numbers.
 # NB: Do not change the formatting here. NO_EDIT must be on the same
 # line as the path.
 rfile = 
  'universal/roman_numerals.tsv' # NO_EDIT
 ;
 roman = StringFile[rfile];
 ## Main categories.
 cat_dot_number =
   num
   n.I[" "] dot n.I[" "] num
   (n.I[" "] dot n.I[" "] num)+
 ;
 cat_slash_number =
   num
   n.I[" "] slash n.I[" "] num
   (n.I[" "] slash n.I[" "] num)*
 ;
 cat_dash_number =
   num
   n.I[" "] dash n.I[" "] num
   (n.I[" "] dash n.I[" "] num)*
 ;
 cat_signed_number = ((plus | minus) n.I[" "])? num;
 cat_degree = cat_signed_number n.I[" "] degree;
 cat_country_code = plus n.I[" "] (num | digits);
 cat_math_operations =
     plus
   | minus
   | division
   | times
   | equals
   | percent
   | power
   | square_root
 ;
 # Roman numbers are often either cardinals or ordinals in various languages.
 cat_roman = roman @ (num | ord);
 # Allow
 #
 # number:number
 # number-number
 #
 # to just be
 #
 # number number.
 cat_number_number =
   num ((":" | "-") : " ") num
 ;
 # Some additional readings for these symbols.
 cat_additional_readings =
  ("/" : "@@PER@@") |
  ("+" : "@@AND@@") |
  ("-" : ("@@HYPHEN@@" | "@@CONNECTOR_TO@@")) |
  ("*" : "@@STAR@@") |
  ("x" : ("x" | "@@CONNECTOR_BY@@")) |
  ("@" : "@@AT@@")
 ;
 numbers_plus = Optimize[
   cat_dot_number
 | cat_slash_number
 | cat_dash_number
 | cat_signed_number
 | cat_degree
 | cat_country_code
 | cat_math_operations
 | cat_roman
 | cat_number_number
 | cat_additional_readings
 ];
 export NUMBERS_PLUS = Optimize[numbers_plus @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/ordinals.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/ordinals.tsv
@ -1,32 +0,0 @@
 0	zeroth
 1	first
 2	second
 3	third
 4	fourth
 5	fifth
 6	sixth
 7	seventh
 8	eighth
 9	ninth
 10	tenth
 11	eleventh
 12	twelfth
 13	thirteenth
 14	fourteenth
 15	fifteenth
 16	sixteenth
 17	seventeenth
 18	eighteenth
 19	nineteenth
 20	twentieth
 30	thirtieth
 40	fortieth
 50	fiftieth
 60	sixtieth
 70	seventieth
 80	eightieth
 90	ninetieth
 100	hundredth
 1000	thousandth
 1000000	millionth
 1000000000	billionth
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/params.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/params.tsv
@ -1,7 +0,0 @@
 float.grm	__fractional_part__ = fractional_part_ungrouped | fractional_part_unparsed;
 telephone.grm	__grouping__ = f.UNGROUPED;
 measure.grm	__measure__ = StringFile['en/verbalizer/measures.tsv'];
 money.grm	__currency__ = StringFile['en/verbalizer/money.tsv'];
 time.grm	__sep__ = ":";
 time.grm	__am__ = "a.m." | "am" | "AM";
 time.grm	__pm__ = "p.m." | "pm" | "PM";
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/podspeech.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/podspeech.grm
@ -1,46 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/util.grm' as util;
 import 'util/case.grm' as case;
 import 'en/verbalizer/extra_numbers.grm' as e;
 import 'en/verbalizer/float.grm' as f;
 import 'en/verbalizer/math.grm' as ma;
 import 'en/verbalizer/miscellaneous.grm' as mi;
 import 'en/verbalizer/money.grm' as mo;
 import 'en/verbalizer/numbers.grm' as n;
 import 'en/verbalizer/numbers_plus.grm' as np;
 import 'en/verbalizer/spelled.grm' as s;
 import 'en/verbalizer/spoken_punct.grm' as sp;
 import 'en/verbalizer/time.grm' as t;
 import 'en/verbalizer/urls.grm' as u;
 export POD_SPEECH_TN = Optimize[RmWeight[
 (u.URL 
  | e.MIXED_NUMBERS
  | e.DIGITS
  | f.FLOAT
  | ma.ARITHMETIC
  | mo.MONEY
  | n.CARDINAL_NUMBERS
  | n.ORDINAL_NUMBERS
  | np.NUMBERS_PLUS
  | s.SPELLED
  | sp.SPOKEN_PUNCT
  | t.TIME
  | u.URL
  | u.EMAILS) @ util.CLEAN_SPACES @ case.TOUPPER
 ]];
 #export POD_SPEECH_TN = Optimize[RmWeight[(mi.MISCELLANEOUS) @ util.CLEAN_SPACES @ case.TOUPPER]];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/spelled.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/spelled.grm
@ -1,77 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # This verbalizer is used whenever there is an LM symbol that consists of
 # letters immediately followed by "{spelled}".l This strips the "{spelled}"
 # suffix.
 import 'util/byte.grm' as b;
 import 'ru/classifier/cyrillic.grm' as c;
 import 'en/verbalizer/lexical_map.grm' as l;
 import 'en/verbalizer/numbers.grm' as n;
 digit = b.kDigit @ n.CARDINAL_NUMBERS;
 char_set = (("a" | "A") : "letter-a")
        | (("b" | "B") : "letter-b")
        | (("c" | "C") : "letter-c")
        | (("d" | "D") : "letter-d")
        | (("e" | "E") : "letter-e")
        | (("f" | "F") : "letter-f")
        | (("g" | "G") : "letter-g")
        | (("h" | "H") : "letter-h")
        | (("i" | "I") : "letter-i")
        | (("j" | "J") : "letter-j")
        | (("k" | "K") : "letter-k")
        | (("l" | "L") : "letter-l")
        | (("m" | "M") : "letter-m")
        | (("n" | "N") : "letter-n")
        | (("o" | "O") : "letter-o")
        | (("p" | "P") : "letter-p")
        | (("q" | "Q") : "letter-q")
        | (("r" | "R") : "letter-r")
        | (("s" | "S") : "letter-s")
        | (("t" | "T") : "letter-t")
        | (("u" | "U") : "letter-u")
        | (("v" | "V") : "letter-v")
        | (("w" | "W") : "letter-w")
        | (("x" | "X") : "letter-x")
        | (("y" | "Y") : "letter-y")
        | (("z" | "Z") : "letter-z")
        | (digit)
        | ("&" : "@@AND@@")
        | ("." : "")
        | ("-" : "")
        | ("_" : "")
        | ("/" : "")
        | (n.I["letter-"] c.kCyrillicAlpha)
        ;
 ins_space = "" : " ";
 suffix = "{spelled}" : "";
 spelled = Optimize[char_set (ins_space char_set)* suffix];
 export SPELLED = Optimize[spelled @ l.LEXICAL_MAP];
 sigma_star = b.kBytes*;
 # Gets rid of the letter- prefix since in some cases we don't want it.
 del_letter = CDRewrite[n.D["letter-"], "", "", sigma_star];
 spelled_no_tag = Optimize[char_set (ins_space char_set)*];
 export SPELLED_NO_LETTER = Optimize[spelled_no_tag @ del_letter];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/spoken_punct.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/spoken_punct.grm
@ -1,24 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'en/verbalizer/lexical_map.grm' as l;
 punct =
   ("." : "@@PERIOD@@")
 | ("," : "@@COMMA@@")
 | ("!" : "@@EXCLAMATION_MARK@@")
 | ("?" : "@@QUESTION_MARK@@")
 ;
 export SPOKEN_PUNCT = Optimize[punct @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/time.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/time.grm
@ -1,108 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/byte.grm' as b;
 import 'en/verbalizer/lexical_map.grm' as l;
 import 'en/verbalizer/numbers.grm' as n;
 # Only handles 24-hour time with quarter-to, half-past and quarter-past.
 increment_hour =
    ("0" : "1")
  | ("1" : "2")
  | ("2" : "3")
  | ("3" : "4")
  | ("4" : "5")
  | ("5" : "6")
  | ("6" : "7")
  | ("7" : "8")
  | ("8" : "9")
  | ("9" : "10")
  | ("10" : "11")
  | ("11" : "12")
  | ("12" : "1")  # If someone uses 12, we assume 12-hour by default.
  | ("13" : "14")
  | ("14" : "15")
  | ("15" : "16")
  | ("16" : "17")
  | ("17" : "18")
  | ("18" : "19")
  | ("19" : "20")
  | ("20" : "21")
  | ("21" : "22")
  | ("22" : "23")
  | ("23" : "12")
 ;
 hours = Project[increment_hour, 'input'];
 d = b.kDigit;
 D = d - "0";
 minutes09 = "0" D;
 minutes = ("1" | "2" | "3" | "4" | "5") d;
 __sep__ = ":";
 sep_space = __sep__ : " ";
 verbalize_hours = hours @ n.CARDINAL_NUMBERS;
 verbalize_minutes =
   ("00" : "@@HOUR@@")
 | (minutes09 @ (("0" : "@@TIME_ZERO@@") n.I[" "] n.CARDINAL_NUMBERS))
 | (minutes @ n.CARDINAL_NUMBERS)
 ;
 time_basic = Optimize[verbalize_hours sep_space verbalize_minutes];
 # Special cases we handle right now.
 # TODO: Need to allow for cases like
 #
 #   half twelve (in the UK English sense)
 #   half twaalf (in the Dutch sense)
 time_quarter_past =
   n.I["@@TIME_QUARTER@@ @@TIME_AFTER@@ "]
   verbalize_hours
   n.D[__sep__ "15"];
 time_half_past =
   n.I["@@TIME_HALF@@ @@TIME_AFTER@@ "]
   verbalize_hours
   n.D[__sep__ "30"];
 time_quarter_to =
   n.I["@@TIME_QUARTER@@ @@TIME_BEFORE@@ "]
   (increment_hour @ verbalize_hours)
   n.D[__sep__ "45"];
 time_extra = Optimize[
  time_quarter_past | time_half_past | time_quarter_to]
 ;
 # Basic time periods which most languages can be expected to have.
 __am__ = "a.m." | "am" | "AM";
 __pm__ = "p.m." | "pm" | "PM";
 period = (__am__ : "@@TIME_AM@@") | (__pm__ : "@@TIME_PM@@");
 time_variants = time_basic | time_extra;
 time = Optimize[
    (period (" " | n.I[" "]))? time_variants
 |  time_variants ((" " | n.I[" "]) period)?]
 ;
 export TIME = Optimize[time @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/urls.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/urls.grm
@ -1,68 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Rules for URLs and email addresses.
 import 'util/byte.grm' as bytelib;
 import 'en/verbalizer/lexical_map.grm' as l;
 ins_space = "" : " ";
 dot = "." : "@@URL_DOT_EXPRESSION@@";
 at = "@" : "@@AT@@";
 url_suffix =
  (".com" : dot ins_space "com") |
  (".gov" : dot ins_space "gov") |
  (".edu" : dot ins_space "e d u") |
  (".org" : dot ins_space "org") |
  (".net" : dot ins_space "net")
 ;
 letter_string = (bytelib.kAlnum)* bytelib.kAlnum;
 letter_string_dot =
  ((letter_string ins_space dot ins_space)* letter_string)
 ;
 # Rules for URLs.
 export URL = Optimize[
 ((letter_string_dot) (ins_space)
  (url_suffix)) @ l.LEXICAL_MAP
 ];
 # Rules for email addresses.
 letter_by_letter = ((bytelib.kAlnum ins_space)* bytelib.kAlnum);
 letter_by_letter_dot =
  ((letter_by_letter ins_space dot ins_space)*
  letter_by_letter)
 ;
 export EMAIL1 = Optimize[
 ((letter_by_letter) (ins_space)
  (at) (ins_space)
  (letter_by_letter_dot) (ins_space)
  (url_suffix)) @ l.LEXICAL_MAP
 ];
 export EMAIL2 = Optimize[
 ((letter_by_letter) (ins_space)
  (at) (ins_space)
  (letter_string_dot) (ins_space)
  (url_suffix)) @ l.LEXICAL_MAP
 ];
 export EMAILS = Optimize[
  EMAIL1 | EMAIL2
 ];
--- a/third_party/chinese_text_normalization/thrax/src/en/verbalizer/verbalizer.grm
+++ b/third_party/chinese_text_normalization/thrax/src/en/verbalizer/verbalizer.grm
@ -1,42 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/util.grm' as util;
 import 'en/verbalizer/extra_numbers.grm' as e;
 import 'en/verbalizer/float.grm' as f;
 import 'en/verbalizer/math.grm' as ma;
 import 'en/verbalizer/miscellaneous.grm' as mi;
 import 'en/verbalizer/money.grm' as mo;
 import 'en/verbalizer/numbers.grm' as n;
 import 'en/verbalizer/numbers_plus.grm' as np;
 import 'en/verbalizer/spelled.grm' as s;
 import 'en/verbalizer/spoken_punct.grm' as sp;
 import 'en/verbalizer/time.grm' as t;
 import 'en/verbalizer/urls.grm' as u;
 export VERBALIZER = Optimize[RmWeight[
 (  e.MIXED_NUMBERS
  | e.DIGITS
  | f.FLOAT
  | ma.ARITHMETIC
  | mi.MISCELLANEOUS
  | mo.MONEY
  | n.CARDINAL_NUMBERS
  | n.ORDINAL_NUMBERS
  | np.NUMBERS_PLUS
  | s.SPELLED
  | sp.SPOKEN_PUNCT
  | t.TIME
  | u.URL) @ util.CLEAN_SPACES
 ]];
--- a/third_party/chinese_text_normalization/thrax/src/number_data/README.md
+++ b/third_party/chinese_text_normalization/thrax/src/number_data/README.md
@ -1,17 +0,0 @@
 This directory contains data used in:
  Gorman, K., and Sproat, R. 2016. Minimally supervised number normalization.
  Transactions of the Association for Computational Linguistics 4: 507-519.
 * `minimal.txt`: A list of 30 curated numbers used as the "minimal" training
  set.
 * `random-trn.txt`: A list of 9000 randomly-generated numbers used as the
  "medium" training set.
 * `random-tst.txt`: A list of 1000 randomly-generated numbers used as the test
  set.
 Note that `random-trn.txt` and `random-tst.txt` are totally disjoint, but that
 a small number of examples occur both in `minimal.txt` and `random-tst.txt`.
 For information about the sampling procedure used to generate the random data
 sets, see appendix A of the aforementioned paper.
--- a/third_party/chinese_text_normalization/thrax/src/number_data/minimal.txt
+++ b/third_party/chinese_text_normalization/thrax/src/number_data/minimal.txt
@ -1,300 +0,0 @@
 0
 1
 2
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 220
 221
 230
 300
 400
 500
 600
 700
 800
 900
 1000
 1001
 1002
 1003
 1004
 1005
 1006
 1007
 1008
 1009
 1010
 1011
 1012
 1020
 1021
 1030
 1200
 2000
 2001
 2002
 2003
 2004
 2005
 2006
 2007
 2008
 2009
 2010
 2011
 2012
 2020
 2021
 2030
 2100
 2200
 5001
 10000
 12000
 20000
 21000
 50001
 100000
 120000
 200000
 210000
 500001
 1000000
 1001000
 1200000
 2000000
 2100000
 5000001
 10000000
 10001000
 12000000
 20000000
 50000001
 100000000
 100001000
 120000000
 200000000
 500000001
 1000000000
 1000001000
 1200000000
 2000000000
 5000000001
 10000000000
 10000001000
 12000000000
 20000000000
 50000000001
 100000000000
 100000001000
 120000000000
 200000000000
 500000000001
--- a/third_party/chinese_text_normalization/thrax/src/number_data/random-trn.txt
+++ b/third_party/chinese_text_normalization/thrax/src/number_data/random-trn.txt
--- a/third_party/chinese_text_normalization/thrax/src/number_data/random-tst.txt
+++ b/third_party/chinese_text_normalization/thrax/src/number_data/random-tst.txt
--- a/third_party/chinese_text_normalization/thrax/src/ru/README.md
+++ b/third_party/chinese_text_normalization/thrax/src/ru/README.md
@ -1,6 +0,0 @@
 # Russian covering grammar definitions
 This directory defines a Russian text normalization covering grammar. The
 primary entry-point is the FST `VERBALIZER`, defined in
 `verbalizer/verbalizer.grm` and compiled in the FST archive
 `verbalizer/verbalizer.far`.
--- a/third_party/chinese_text_normalization/thrax/src/ru/classifier/cyrillic.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/classifier/cyrillic.grm
@ -1,58 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 export kRussianLowerAlpha = Optimize[
    "а" | "б" | "в" | "г" | "д" | "е" | "ё" | "ж" | "з" | "и" | "й" |
    "к" | "л" | "м" | "н" | "о" | "п" | "р" | "с" | "т" | "у" | "ф" |
    "х" | "ц" | "ч" | "ш" | "щ" | "ъ" | "ы" | "ь" | "э" | "ю" | "я" ];
 export kRussianUpperAlpha = Optimize[
    "А" | "Б" | "В" | "Г" | "Д" | "Е" | "Ё" | "Ж" | "З" | "И" | "Й" |
    "К" | "Л" | "М" | "Н" | "О" | "П" | "Р" | "С" | "Т" | "У" | "Ф" |
    "Х" | "Ц" | "Ч" | "Ш" | "Щ" | "Ъ" | "Ы" | "Ь" | "Э" | "Ю" | "Я" ];
 export kRussianLowerAlphaStressed = Optimize[
    "а́" | "е́" | "ё́" | "и́" | "о́" | "у́" | "ы́" | "э́" | "ю́" | "я́" ];
 export kRussianUpperAlphaStressed = Optimize[
    "А́" | "Е́" | "Ё́" | "И́" | "О́" | "У́" | "Ы́" | "Э́" | "Ю́" | "Я́" ];
 export kRussianRewriteStress = Optimize[
    ("А́" : "А'") | ("Е́" : "Е'") | ("Ё́" : "Ё'") | ("И́" : "И'") |
    ("О́" : "О'") | ("У́" : "У'") | ("Ы́" : "Ы'") | ("Э́" : "Э'") |
    ("Ю́" : "Ю'") | ("Я́" : "Я'") |
    ("а́" : "а'") | ("е́" : "е'") | ("ё́" : "ё'") | ("и́" : "и'") |
    ("о́" : "о'") | ("у́" : "у'") | ("ы́" : "ы'") | ("э́" : "э'") |
    ("ю́" : "ю'") | ("я́" : "я'")
 ];
 export kRussianRemoveStress = Optimize[
    ("А́" : "А") | ("Е́" : "Е") | ("Ё́" : "Ё") | ("И́" : "И") | ("О́" : "О") |
    ("У́" : "У") | ("Ы́" : "Ы") | ("Э́" : "Э") | ("Ю́" : "Ю") | ("Я́" : "Я") |
    ("а́" : "а") | ("е́" : "е") | ("ё́" : "ё") | ("и́" : "и") | ("о́" : "о") |
    ("у́" : "у") | ("ы́" : "ы") | ("э́" : "э") | ("ю́" : "ю") | ("я́" : "я")
 ];
 # Pre-reform characters, just in case.
 export kRussianPreReform = Optimize[
    "ѣ" | "Ѣ"   # http://en.wikipedia.org/wiki/Yat
 ];
 export kCyrillicAlphaStressed = Optimize[
  kRussianLowerAlphaStressed | kRussianUpperAlphaStressed
 ];
 export kCyrillicAlpha = Optimize[
    kRussianLowerAlpha | kRussianUpperAlpha | kRussianPreReform
 ];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/cardinals-lex.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/cardinals-lex.grm
@ -1,338 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # AUTOMATICALLY GENERATED: DO NOT EDIT.
 import 'util/byte.grm' as b;
 # Utilities for insertion and deletion.
 func I[expr] {
  return "" : expr;
 }
 func D[expr] {
  return expr : "";
 }
 # Powers of base 10.
 export POWERS =
    "[E15]"
  | "[E14]"
  | "[E13]"
  | "[E12]"
  | "[E11]"
  | "[E10]"
  | "[E9]"
  | "[E8]"
  | "[E7]"
  | "[E6]"
  | "[E5]"
  | "[E4]"
  | "[E3]"
  | "[E2]"
  | "[E1]"
 ;
 export SIGMA = b.kBytes | POWERS;
 export SIGMA_STAR = SIGMA*;
 export SIGMA_PLUS = SIGMA+;
 ################################################################################
 # BEGIN LANGUAGE SPECIFIC DATA
 revaluations =
    ("[E4]" : "[E1]")
  | ("[E5]" : "[E2]")
  | ("[E7]" : "[E1]")
  | ("[E8]" : "[E2]")
 ;
 Ms = "[E3]" | "[E6]" | "[E9]";
 func Zero[expr] {
  return expr : ("");
 }
 space = " ";
 lexset3 = Optimize[
    ("1[E1]+1" : "одиннадцати")
  | ("1[E1]+1" : "одиннадцать")
  | ("1[E1]+1" : "одиннадцатью")
  | ("1[E1]+2" : "двенадцати")
  | ("1[E1]+2" : "двенадцать")
  | ("1[E1]+2" : "двенадцатью")
  | ("1[E1]+3" : "тринадцати")
  | ("1[E1]+3" : "тринадцать")
  | ("1[E1]+3" : "тринадцатью")
  | ("1[E1]+4" : "четырнадцати")
  | ("1[E1]+4" : "четырнадцать")
  | ("1[E1]+4" : "четырнадцатью")
  | ("1[E1]+5" : "пятнадцати")
  | ("1[E1]+5" : "пятнадцать")
  | ("1[E1]+5" : "пятнадцатью")
  | ("1[E1]+6" : "шестнадцати")
  | ("1[E1]+6" : "шестнадцать")
  | ("1[E1]+6" : "шестнадцатью")
  | ("1[E1]+7" : "семнадцати")
  | ("1[E1]+7" : "семнадцать")
  | ("1[E1]+7" : "семнадцатью")
  | ("1[E1]+8" : "восемнадцати")
  | ("1[E1]+8" : "восемнадцать")
  | ("1[E1]+8" : "восемнадцатью")
  | ("1[E1]+9" : "девятнадцати")
  | ("1[E1]+9" : "девятнадцать")
  | ("1[E1]+9" : "девятнадцатью")]
 ;
 lex3 = CDRewrite[lexset3 I[space], "", "", SIGMA_STAR];
 lexset2 = Optimize[
    ("1[E1]" : "десяти")
  | ("1[E1]" : "десять")
  | ("1[E1]" : "десятью")
  | ("1[E2]" : "ста")
  | ("1[E2]" : "сто")
  | ("2[E1]" : "двадцати")
  | ("2[E1]" : "двадцать")
  | ("2[E1]" : "двадцатью")
  | ("2[E2]" : "двести")
  | ("2[E2]" : "двумстам")
  | ("2[E2]" : "двумястами")
  | ("2[E2]" : "двухсот")
  | ("2[E2]" : "двухстах")
  | ("3[E1]" : "тридцати")
  | ("3[E1]" : "тридцать")
  | ("3[E1]" : "тридцатью")
  | ("3[E2]" : "тремстам")
  | ("3[E2]" : "тремястами")
  | ("3[E2]" : "трехсот")
  | ("3[E2]" : "трехстах")
  | ("3[E2]" : "триста")
  | ("4[E1]" : "сорок")
  | ("4[E1]" : "сорока")
  | ("4[E2]" : "четыремстам")
  | ("4[E2]" : "четыреста")
  | ("4[E2]" : "четырехсот")
  | ("4[E2]" : "четырехстах")
  | ("4[E2]" : "четырьмястами")
  | ("5[E1]" : "пятидесяти")
  | ("5[E1]" : "пятьдесят")
  | ("5[E1]" : "пятьюдесятью")
  | ("5[E2]" : "пятисот")
  | ("5[E2]" : "пятистам")
  | ("5[E2]" : "пятистах")
  | ("5[E2]" : "пятьсот")
  | ("5[E2]" : "пятьюстами")
  | ("6[E1]" : "шестидесяти")
  | ("6[E1]" : "шестьдесят")
  | ("6[E1]" : "шестьюдесятью")
  | ("6[E2]" : "шестисот")
  | ("6[E2]" : "шестистам")
  | ("6[E2]" : "шестистах")
  | ("6[E2]" : "шестьсот")
  | ("6[E2]" : "шестьюстами")
  | ("7[E1]" : "семидесяти")
  | ("7[E1]" : "семьдесят")
  | ("7[E1]" : "семьюдесятью")
  | ("7[E2]" : "семисот")
  | ("7[E2]" : "семистам")
  | ("7[E2]" : "семистах")
  | ("7[E2]" : "семьсот")
  | ("7[E2]" : "семьюстами")
  | ("8[E1]" : "восемьдесят")
  | ("8[E1]" : "восьмидесяти")
  | ("8[E1]" : "восьмьюдесятью")
  | ("8[E2]" : "восемьсот")
  | ("8[E2]" : "восемьюстами")
  | ("8[E2]" : "восьмисот")
  | ("8[E2]" : "восьмистам")
  | ("8[E2]" : "восьмистах")
  | ("8[E2]" : "восьмьюстами")
  | ("9[E1]" : "девяноста")
  | ("9[E1]" : "девяносто")
  | ("9[E2]" : "девятисот")
  | ("9[E2]" : "девятистам")
  | ("9[E2]" : "девятистах")
  | ("9[E2]" : "девятьсот")
  | ("9[E2]" : "девятьюстами")]
 ;
 lex2 = CDRewrite[lexset2 I[space], "", "", SIGMA_STAR];
 lexset1 = Optimize[
    ("+" : "")
  | ("1" : "один")
  | ("1" : "одна")
  | ("1" : "одни")
  | ("1" : "одним")
  | ("1" : "одними")
  | ("1" : "одних")
  | ("1" : "одно")
  | ("1" : "одного")
  | ("1" : "одной")
  | ("1" : "одном")
  | ("1" : "одному")
  | ("1" : "одною")
  | ("1" : "одну")
  | ("2" : "два")
  | ("2" : "две")
  | ("2" : "двум")
  | ("2" : "двумя")
  | ("2" : "двух")
  | ("3" : "трем")
  | ("3" : "тремя")
  | ("3" : "трех")
  | ("3" : "три")
  | ("4" : "четыре")
  | ("4" : "четырем")
  | ("4" : "четырех")
  | ("4" : "четырьмя")
  | ("5" : "пяти")
  | ("5" : "пять")
  | ("5" : "пятью")
  | ("6" : "шести")
  | ("6" : "шесть")
  | ("6" : "шестью")
  | ("7" : "семи")
  | ("7" : "семь")
  | ("7" : "семью")
  | ("8" : "восемь")
  | ("8" : "восьми")
  | ("8" : "восьмью")
  | ("9" : "девяти")
  | ("9" : "девять")
  | ("9" : "девятью")
  | ("[E3]" : "тысяч")
  | ("[E3]" : "тысяча")
  | ("[E3]" : "тысячам")
  | ("[E3]" : "тысячами")
  | ("[E3]" : "тысячах")
  | ("[E3]" : "тысяче")
  | ("[E3]" : "тысячей")
  | ("[E3]" : "тысячи")
  | ("[E3]" : "тысячу")
  | ("[E3]" : "тысячью")
  | ("[E6]" : "миллион")
  | ("[E6]" : "миллиона")
  | ("[E6]" : "миллионам")
  | ("[E6]" : "миллионами")
  | ("[E6]" : "миллионах")
  | ("[E6]" : "миллионе")
  | ("[E6]" : "миллионов")
  | ("[E6]" : "миллионом")
  | ("[E6]" : "миллиону")
  | ("[E6]" : "миллионы")
  | ("[E9]" : "миллиард")
  | ("[E9]" : "миллиарда")
  | ("[E9]" : "миллиардам")
  | ("[E9]" : "миллиардами")
  | ("[E9]" : "миллиардах")
  | ("[E9]" : "миллиарде")
  | ("[E9]" : "миллиардов")
  | ("[E9]" : "миллиардом")
  | ("[E9]" : "миллиарду")
  | ("[E9]" : "миллиарды")
  | ("|0|" : "ноле")
  | ("|0|" : "нолем")
  | ("|0|" : "ноль")
  | ("|0|" : "нолю")
  | ("|0|" : "ноля")
  | ("|0|" : "нуле")
  | ("|0|" : "нулем")
  | ("|0|" : "нуль")
  | ("|0|" : "нулю")
  | ("|0|" : "нуля")]
 ;
 lex1 = CDRewrite[lexset1 I[space], "", "", SIGMA_STAR];
 export LEX = Optimize[lex3 @ lex2 @ lex1];
 export INDEPENDENT_EXPONENTS = "[E3]" | "[E6]" | "[E9]";
 # END LANGUAGE SPECIFIC DATA
 ################################################################################
 # Inserts a marker after the Ms.
 export INSERT_BOUNDARY = CDRewrite["" : "%", Ms, "", SIGMA_STAR];
 # Deletes all powers and "+".
 export DELETE_POWERS = CDRewrite[D[POWERS | "+"], "", "", SIGMA_STAR];
 # Deletes trailing zeros at the beginning of a number, so that "0003" does not
 # get treated as an ordinary number.
 export DELETE_INITIAL_ZEROS =
  CDRewrite[("0" POWERS "+") : "", "[BOS]", "", SIGMA_STAR]
 ;
 NonMs = Optimize[POWERS - Ms];
 # Deletes (usually) zeros before a non-M. E.g., +0[E1] should be deleted.
 export DELETE_INTERMEDIATE_ZEROS1 =
  CDRewrite[Zero["+0" NonMs], "", "", SIGMA_STAR]
 ;
 # Deletes (usually) zeros before an M, if there is no non-zero element between
 # that and the previous boundary. Thus, if after the result of the rule above we
 # end up with "%+0[E3]", then that gets deleted. Also (really) deletes a final
 # zero.
 export DELETE_INTERMEDIATE_ZEROS2 = Optimize[
   CDRewrite[Zero["%+0" Ms], "", "", SIGMA_STAR]
 @ CDRewrite[D["+0"], "", "[EOS]", SIGMA_STAR]]
 ;
 # Final clean up of stray zeros.
 export DELETE_REMAINING_ZEROS = Optimize[
   CDRewrite[Zero["+0"], "", "", SIGMA_STAR]
 @ CDRewrite[Zero["0"], "", "", SIGMA_STAR]]
 ;
 # Applies the revaluation map. For example in English, changes [E4] to [E1] as a
 # modifier of [E3].
 export REVALUE = CDRewrite[revaluations, "", "", SIGMA_STAR];
 # Deletes the various marks and powers in the input and output.
 export DELETE_MARKS = CDRewrite[D["%" | "+" | POWERS], "", "", SIGMA_STAR];
 export CLEAN_SPACES = Optimize[
   CDRewrite[" "+ : " ", b.kNotSpace, b.kNotSpace, SIGMA_STAR]
 @ CDRewrite[" "* : "", "[BOS]", "", SIGMA_STAR]
 @ CDRewrite[" "* : "", "", "[EOS]", SIGMA_STAR]]
 ;
 d = b.kDigit;
 # Germanic inversion rule.
 germanic =
    (I["1+"] d "[E1]" D["+1"])
  | (I["2+"] d "[E1]" D["+2"])
  | (I["3+"] d "[E1]" D["+3"])
  | (I["4+"] d "[E1]" D["+4"])
  | (I["5+"] d "[E1]" D["+5"])
  | (I["6+"] d "[E1]" D["+6"])
  | (I["7+"] d "[E1]" D["+7"])
  | (I["8+"] d "[E1]" D["+8"])
  | (I["9+"] d "[E1]" D["+9"])
 ;
 germanic_inversion =
  CDRewrite[germanic, "", "", SIGMA_STAR, 'ltr', 'opt']
 ;
 export GERMANIC_INVERSION = SIGMA_STAR;
 export ORDINAL_RESTRICTION = SIGMA_STAR;
 nondigits = b.kBytes - b.kDigit;
 export ORDINAL_SUFFIX = D[nondigits*];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/cardinals.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/cardinals.tsv
@ -1,177 +0,0 @@
 0	ноле
 0	ноль
 0	нолю
 0	ноля
 0	нолём
 0	нуле
 0	нуль
 0	нулю
 0	нуля
 0	нулём
 1	один
 1	одна
 1	одни
 1	одним
 1	одними
 1	одних
 1	одно
 1	одного
 1	одной
 1	одном
 1	одному
 1	одною
 1	раз
 1	одну
 2	два
 2	две
 2	двум
 2	двумя
 2	двух
 3	тремя
 3	три
 3	трём
 3	трёх
 4	четыре
 4	четырьмя
 4	четырём
 4	четырёх
 5	пяти
 5	пять
 5	пятью
 6	шести
 6	шесть
 6	шестью
 7	семи
 7	семь
 7	семью
 8	восемь
 8	восьми
 8	восьмью
 9	девяти
 9	девять
 9	девятью
 10	десяти
 10	десять
 10	десятью
 11	одиннадцати
 11	одиннадцать
 11	одиннадцатью
 12	двенадцати
 12	двенадцать
 12	двенадцатью
 13	тринадцати
 13	тринадцать
 13	тринадцатью
 14	четырнадцати
 14	четырнадцать
 14	четырнадцатью
 15	пятнадцати
 15	пятнадцать
 15	пятнадцатью
 16	шестнадцати
 16	шестнадцать
 16	шестнадцатью
 17	семнадцати
 17	семнадцать
 17	семнадцатью
 18	восемнадцати
 18	восемнадцать
 18	восемнадцатью
 19	девятнадцати
 19	девятнадцать
 19	девятнадцатью
 20	двадцати
 20	двадцать
 20	двадцатью
 30	тридцати
 30	тридцать
 30	тридцатью
 40	сорок
 40	сорока
 50	пятидесяти
 50	пятьдесят
 50	пятьюдесятью
 60	шестидесяти
 60	шестьдесят
 60	шестьюдесятью
 70	семидесяти
 70	семьдесят
 70	семьюдесятью
 80	восемьдесят
 80	восьмидесяти
 80	восьмьюдесятью
 90	девяноста
 90	девяносто
 100	ста
 100	сто
 200	двести
 200	двумстам
 200	двумястами
 200	двухсот
 200	двухстах
 300	тремястами
 300	трехсот
 300	триста
 300	трёмстам
 300	трёхстах
 400	четыреста
 400	четырьмястами
 400	четырёмстам
 400	четырёхсот
 400	четырёхстах
 500	пятисот
 500	пятистам
 500	пятистах
 500	пятьсот
 500	пятьюстами
 600	шестисот
 600	шестистам
 600	шестистах
 600	шестьсот
 600	шестьюстами
 700	семисот
 700	семистам
 700	семистах
 700	семьсот
 700	семьюстами
 800	восемьсот
 800	восемьюстами
 800	восьмисот
 800	восьмистам
 800	восьмистах
 800	восьмьюстами
 900	девятисот
 900	девятистам
 900	девятистах
 900	девятьсот
 900	девятьюстами
 1000	тысяч
 1000	тысяча
 1000	тысячам
 1000	тысячами
 1000	тысячах
 1000	тысяче
 1000	тысячей
 1000	тысячи
 1000	тысячу
 1000	тысячью
 1000000	миллион
 1000000	миллиона
 1000000	миллионам
 1000000	миллионами
 1000000	миллионах
 1000000	миллионе
 1000000	миллионов
 1000000	миллионом
 1000000	миллиону
 1000000	миллионы
 1000000000	миллиард
 1000000000	миллиарда
 1000000000	миллиардам
 1000000000	миллиардами
 1000000000	миллиардах
 1000000000	миллиарде
 1000000000	миллиардов
 1000000000	миллиардом
 1000000000	миллиарду
 1000000000	миллиарды
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/extra_numbers.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/extra_numbers.grm
@ -1,35 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/byte.grm' as b;
 import 'ru/verbalizer/numbers.grm' as n;
 digit = b.kDigit @ n.CARDINAL_NUMBERS | ("0" : "@@OTHER_ZERO_VERBALIZATIONS@@");
 export DIGITS  = digit (n.I[" "] digit)*;
 # Various common factorizations
 two_digits = b.kDigit{2} @ n.CARDINAL_NUMBERS;
 three_digits = b.kDigit{3} @ n.CARDINAL_NUMBERS;
 mixed =
   (digit n.I[" "] two_digits)
 | (two_digits n.I[" "] two_digits)
 | (two_digits n.I[" "] three_digits)
 | (two_digits n.I[" "] two_digits n.I[" "] two_digits)
 ;
 export MIXED_NUMBERS = Optimize[mixed];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/factorization.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/factorization.grm
@ -1,40 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/byte.grm' as b;
 import 'util/util.grm' as u;
 import 'ru/verbalizer/numbers.grm' as n;
 func ToNumberName[expr] {
  number_name_seq = n.CARDINAL_NUMBERS (" " n.CARDINAL_NUMBERS)*;
  return Optimize[expr @ number_name_seq];
 }
 d = b.kDigit;
 leading_zero = CDRewrite[n.I[" "], ("[BOS]" | " ") "0", "", b.kBytes*];
 by_ones = d n.I[" "];
 by_twos = (d{2} @ leading_zero) n.I[" "];
 by_threes = (d{3} @ leading_zero) n.I[" "];
 groupings = by_twos* (by_threes | by_twos | by_ones);
 export FRACTIONAL_PART_UNGROUPED =
  Optimize[ToNumberName[by_ones+ @ u.CLEAN_SPACES]]
 ;
 export FRACTIONAL_PART_GROUPED =
  Optimize[ToNumberName[groupings @ u.CLEAN_SPACES]]
 ;
 export FRACTIONAL_PART_UNPARSED = Optimize[ToNumberName[d*]];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/float.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/float.grm
@ -1,30 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'ru/verbalizer/factorization.grm' as f;
 import 'ru/verbalizer/lexical_map.grm' as l;
 import 'ru/verbalizer/numbers.grm' as n;
 fractional_part_ungrouped = f.FRACTIONAL_PART_UNGROUPED;
 fractional_part_grouped = f.FRACTIONAL_PART_GROUPED;
 fractional_part_unparsed = f.FRACTIONAL_PART_UNPARSED;
 __fractional_part__ = fractional_part_unparsed;
 __decimal_marker__ = ",";
 export FLOAT = Optimize[
 (n.CARDINAL_NUMBERS
  (__decimal_marker__ : " @@DECIMAL_DOT_EXPRESSION@@ ")
  __fractional_part__) @ l.LEXICAL_MAP]
 ;
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/g.fst
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/g.fst
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/lexical_map.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/lexical_map.grm
@ -1,25 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/byte.grm' as b;
 lexical_map = StringFile['ru/verbalizer/lexical_map.tsv'];
 sigma_star = b.kBytes*;
 del_null = CDRewrite["__NULL__" : "", "", "", sigma_star];
 export LEXICAL_MAP = Optimize[
  CDRewrite[lexical_map, "", "", sigma_star] @ del_null]
 ;
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/lexical_map.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/lexical_map.tsv
@ -1,221 +0,0 @@
@@CONNECTOR_RANGE@@	до
@@CONNECTOR_RATIO@@	к
@@CONNECTOR_BY@@	на
@@CONNECTOR_CONSECUTIVE_YEAR@@	до
@@JANUARY@@	январь
@@JANUARY@@	январи
@@JANUARY@@	января
@@JANUARY@@	январей
@@JANUARY@@	январю
@@JANUARY@@	январям
@@JANUARY@@	январь
@@JANUARY@@	январи
@@JANUARY@@	январём
@@JANUARY@@	январями
@@JANUARY@@	январе
@@JANUARY@@	январях
@@FEBRUARY@@	февраль
@@FEBRUARY@@	феврали
@@FEBRUARY@@	февраля
@@FEBRUARY@@	февралей
@@FEBRUARY@@	февралю
@@FEBRUARY@@	февралям
@@FEBRUARY@@	февраль
@@FEBRUARY@@	феврали
@@FEBRUARY@@	февралём
@@FEBRUARY@@	февралями
@@FEBRUARY@@	феврале
@@FEBRUARY@@	февралях
@@MARCH@@	март
@@MARCH@@	марты
@@MARCH@@	марта
@@MARCH@@	мартов
@@MARCH@@	марту
@@MARCH@@	мартам
@@MARCH@@	март
@@MARCH@@	марты
@@MARCH@@	мартом
@@MARCH@@	мартами
@@MARCH@@	марте
@@MARCH@@	мартах
@@APRIL@@	апрель
@@APRIL@@	апрели
@@APRIL@@	апреля
@@APRIL@@	апрелей
@@APRIL@@	апрелю
@@APRIL@@	апрелям
@@APRIL@@	апрель
@@APRIL@@	апрели
@@APRIL@@	апрелем
@@APRIL@@	апрелями
@@APRIL@@	апреле
@@APRIL@@	апрелях
@@MAY@@	май
@@MAY@@	маи
@@MAY@@	мая
@@MAY@@	маев
@@MAY@@	маю
@@MAY@@	маям
@@MAY@@	май
@@MAY@@	маи
@@MAY@@	маем
@@MAY@@	маями
@@MAY@@	мае
@@MAY@@	маях
@@JUN@@	июнь
@@JUN@@	июни
@@JUN@@	июня
@@JUN@@	июней
@@JUN@@	июню
@@JUN@@	июням
@@JUN@@	июнь
@@JUN@@	июни
@@JUN@@	июнем
@@JUN@@	июнями
@@JUN@@	июне
@@JUN@@	июнях
@@JUL@@	июль
@@JUL@@	июли
@@JUL@@	июля
@@JUL@@	июлей
@@JUL@@	июлю
@@JUL@@	июлям
@@JUL@@	июль
@@JUL@@	июли
@@JUL@@	июлем
@@JUL@@	июлями
@@JUL@@	июле
@@JUL@@	июлях
@@AUGUST@@	август
@@AUGUST@@	августы
@@AUGUST@@	августа
@@AUGUST@@	августов
@@AUGUST@@	августу
@@AUGUST@@	августам
@@AUGUST@@	август
@@AUGUST@@	августы
@@AUGUST@@	августом
@@AUGUST@@	августами
@@AUGUST@@	августе
@@AUGUST@@	августах
@@SEPTEMBER@@	сентябрь
@@SEPTEMBER@@	сентябри
@@SEPTEMBER@@	сентября
@@SEPTEMBER@@	сентябрей
@@SEPTEMBER@@	сентябрю
@@SEPTEMBER@@	сентябрям
@@SEPTEMBER@@	сентябрь
@@SEPTEMBER@@	сентябри
@@SEPTEMBER@@	сентябрём
@@SEPTEMBER@@	сентябрями
@@SEPTEMBER@@	сентябре
@@SEPTEMBER@@	сентябрях
@@OCTOBER@@	октябрь
@@OCTOBER@@	октябри
@@OCTOBER@@	октября
@@OCTOBER@@	октябрей
@@OCTOBER@@	октябрю
@@OCTOBER@@	октябрям
@@OCTOBER@@	октябрь
@@OCTOBER@@	октябри
@@OCTOBER@@	октябрём
@@OCTOBER@@	октябрями
@@OCTOBER@@	октябре
@@OCTOBER@@	октябрях
@@NOVEMBER@@	ноябрь
@@NOVEMBER@@	ноябри
@@NOVEMBER@@	ноября
@@NOVEMBER@@	ноябрей
@@NOVEMBER@@	ноябрю
@@NOVEMBER@@	ноябрям
@@NOVEMBER@@	ноябрь
@@NOVEMBER@@	ноябри
@@NOVEMBER@@	ноябрём
@@NOVEMBER@@	ноябрями
@@NOVEMBER@@	ноябре
@@NOVEMBER@@	ноябрях
@@DECEMBER@@	декабрь
@@DECEMBER@@	декабри
@@DECEMBER@@	декабря
@@DECEMBER@@	декабрей
@@DECEMBER@@	декабрю
@@DECEMBER@@	декабрям
@@DECEMBER@@	декабрь
@@DECEMBER@@	декабри
@@DECEMBER@@	декабрём
@@DECEMBER@@	декабрями
@@DECEMBER@@	декабре
@@DECEMBER@@	декабрях
@@MINUS@@	минус
@@DECIMAL_DOT_EXPRESSION@@	целая
@@DECIMAL_DOT_EXPRESSION@@	целой
@@DECIMAL_DOT_EXPRESSION@@	целой
@@DECIMAL_DOT_EXPRESSION@@	целую
@@DECIMAL_DOT_EXPRESSION@@	целой
@@DECIMAL_DOT_EXPRESSION@@	целой
@@DECIMAL_DOT_EXPRESSION@@	целым
@@DECIMAL_DOT_EXPRESSION@@	целыми
@@DECIMAL_DOT_EXPRESSION@@	целых
@@DECIMAL_DOT_EXPRESSION@@	целых
@@URL_DOT_EXPRESSION@@	точка
@@PERIOD@@	точка
@@DECIMAL_EXPONENT@@	умножить на десять в степени
@@COLON@@	двоеточие
@@SLASH@@	косая черта
@@PASSWORD@@	пароль
@@AT@@	собака
@@PORT@@	порт
@@QUESTION_MARK@@	вопросительный знак
@@HASH@@	решётка
@@HASH@@	решетка
@@MONEY_AND@@	и
@@AND@@	и
@@PHONE_PLUS@@	плюс
@@ARITHMETIC_PLUS@@	плюс
@@PHONE_EXTENSION@@	добавочный номер
@@TIME_AM@@		утра
@@TIME_PM@@		вечера
@@HOUR@@		час
@@HOUR@@		часа
@@HOUR@@		часам
@@HOUR@@		часами
@@HOUR@@		часах
@@HOUR@@		часе
@@HOUR@@		часов
@@HOUR@@		часом
@@HOUR@@		часу
@@HOUR@@		часы
@@MINUTE@@	минут
@@MINUTE@@	минута
@@MINUTE@@	минутам
@@MINUTE@@	минутами
@@MINUTE@@	минутах
@@MINUTE@@	минуте
@@MINUTE@@	минутой
@@MINUTE@@	минутою
@@MINUTE@@	минуту
@@MINUTE@@	минуты
@@TIME_AFTER@@	__NULL__
@@TIME_BEFORE_PRE@@		без
@@TIME_QUARTER@@	четверть
@@TIME_QUARTER@@	четверти
@@TIME_HALF@@	половина
@@TIME_HALF@@	половины
@@TIME_HALF@@	половину
@@TIME_HALF@@	половин
@@TIME_HALF@@	половине
@@TIME_HALF@@	половинам
@@TIME_HALF@@	половиной
@@TIME_HALF@@	половинами
@@TIME_HALF@@	половинах
@@PERCENT@@	процент
@@PERCENT@@	процента
@@PERCENT@@	процентам
@@PERCENT@@	процентами
@@PERCENT@@	процентах
@@PERCENT@@	проценте
@@PERCENT@@	процентов
@@PERCENT@@	процентом
@@PERCENT@@	проценту
@@PERCENT@@	проценты
@@PERCENT@@	проценты
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/math.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/math.grm
@ -1,34 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'ru/verbalizer/float.grm' as f;
 import 'ru/verbalizer/lexical_map.grm' as l;
 import 'ru/verbalizer/numbers.grm' as n;
 float = f.FLOAT;
 card = n.CARDINAL_NUMBERS;
 number = card | float;
 plus = "+" : " @@ARITHMETIC_PLUS@@ ";
 times = "*" : " @@ARITHMETIC_TIMES@@ ";
 minus = "-" : " @@ARITHMETIC_MINUS@@ ";
 division = "/" : " @@ARITHMETIC_DIVISION@@ ";
 operator = plus | times | minus | division;
 percent = "%" : " @@PERCENT@@";
 export ARITHMETIC =
  Optimize[((number operator number) | (number percent)) @ l.LEXICAL_MAP]
 ;
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/miscellaneous.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/miscellaneous.grm
@ -1,78 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/byte.grm' as b;
 import 'ru/classifier/cyrillic.grm' as c;
 import 'ru/verbalizer/extra_numbers.grm' as e;
 import 'ru/verbalizer/lexical_map.grm' as l;
 import 'ru/verbalizer/numbers.grm' as n;
 import 'ru/verbalizer/spelled.grm' as s;
 letter = b.kAlpha | c.kCyrillicAlpha;
 dash   = "-";
 word = letter+;
 possibly_split_word = word (((dash | ".") : " ") word)* n.D["."]?;
 post_word_symbol =
   ("+" : ("@@ARITHMETIC_PLUS@@" | "@@POSITIVE@@")) |
   ("-" : ("@@ARITHMETIC_MINUS@@" | "@@NEGATIVE@@")) |
   ("*" : "@@STAR@@")
 ;
 pre_word_symbol =
   ("@" : "@@AT@@") |
   ("/" : "@@SLASH@@") |
   ("#" : "@@HASH@@")
 ;
 post_word = possibly_split_word n.I[" "] post_word_symbol;
 pre_word = pre_word_symbol n.I[" "] possibly_split_word;
 ## Number/digit sequence combos, maybe with a dash
 spelled_word = word @ s.SPELLED_NO_LETTER;
 word_number =
  (word | spelled_word)
  (n.I[" "] | (dash : " "))
  (e.DIGITS | n.CARDINAL_NUMBERS | e.MIXED_NUMBERS)
 ;
 number_word =
  (e.DIGITS | n.CARDINAL_NUMBERS | e.MIXED_NUMBERS)
  (n.I[" "] | (dash : " "))
  (word | spelled_word)
 ;
 ## Two-digit year.
 # Note that in this case to be fair we really have to allow ordinals too since
 # in some languages that's what you would have.
 two_digit_year = n.D["'"] (b.kDigit{2} @ (n.CARDINAL_NUMBERS | e.DIGITS));
 dot_com = ("." : "@@URL_DOT_EXPRESSION@@") n.I[" "] "com";
 miscellaneous = Optimize[
    possibly_split_word
  | post_word
  | pre_word
  | word_number
  | number_word
  | two_digit_year
  | dot_com
 ];
 export MISCELLANEOUS = Optimize[miscellaneous @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/money.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/money.grm
@ -1,44 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/byte.grm' as b;
 import 'ru/verbalizer/lexical_map.grm' as l;
 import 'ru/verbalizer/numbers.grm' as n;
 card = n.CARDINAL_NUMBERS;
 __currency__ = StringFile['ru/verbalizer/money.tsv'];
 d = b.kDigit;
 D = d - "0";
 cents = ((n.D["0"] | D) d) @ card;
 # Only dollar for the verbalizer tests for English. Will need to add other
 # currencies.
 usd_maj = Project["usd_maj" @ __currency__, 'output'];
 usd_min = Project["usd_min" @ __currency__, 'output'];
 and = " @@MONEY_AND@@ " | " ";
 dollar1 =
  n.D["$"] card n.I[" " usd_maj] n.I[and] n.D["."] cents n.I[" " usd_min]
 ;
 dollar2 = n.D["$"] card n.I[" " usd_maj] n.D["."] n.D["00"];
 dollar3 = n.D["$"] card n.I[" " usd_maj];
 dollar = Optimize[dollar1 | dollar2 | dollar3];
 export MONEY = Optimize[dollar @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/money.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/money.tsv
@ -1,24 +0,0 @@
 usd_maj	доллара
 usd_maj	долларами
 usd_maj	долларам
 usd_maj	долларах
 usd_maj	долларе
 usd_maj	долларов
 usd_maj	долларом
 usd_maj	доллар
 usd_maj	доллар
 usd_maj	доллару
 usd_maj	доллары
 usd_maj	доллары
 usd_min	цент
 usd_min	цент
 usd_min	цента
 usd_min	центам
 usd_min	центами
 usd_min	центах
 usd_min	центе
 usd_min	центов
 usd_min	центом
 usd_min	центу
 usd_min	центы
 usd_min	центы
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/nominatives.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/nominatives.tsv
@ -1,166 +0,0 @@
 нуль
 ноль
 один
 два
 две
 три
 четыре
 пять
 шесть
 семь
 восемь
 девять
 десять
 одиннадцать
 двенадцать
 тринадцать
 четырнадцать
 пятнадцать
 шестнадцать
 семнадцать
 восемнадцать
 девятнадцать
 двадцать
 тридцать
 сорок
 пятьдесят
 шестьдесят
 семьдесят
 восемьдесят
 девяносто
 сто
 двести
 триста
 четыреста
 пятьсот
 шестьсот
 семьсот
 восемьсот
 девятьсот
 тысячи
 тысяч
 тысяча
 миллионов
 миллион
 миллиона
 миллиардов
 миллиард
 миллиарда
 первая
 первого
 первое
 первый
 вторая
 второе
 второй
 третий
 третье
 третья
 четвертая
 четвертое
 четвертой
 пятая
 пятое
 пятой
 шестая
 шестое
 шестой
 седьмая
 седьмое
 седьмой
 восьмая
 восьмое
 восьмой
 девятая
 девятое
 девятой
 десятая
 десятое
 десятой
 одиннадцатая
 одиннадцатое
 одиннадцатой
 двенадцатая
 двенадцатое
 двенадцатой
 тринадцатая
 тринадцатое
 тринадцатой
 четырнадцатая
 четырнадцатое
 четырнадцатой
 пятнадцатая
 пятнадцатое
 пятнадцатой
 шестнадцатая
 шестнадцатое
 шестнадцатой
 семнадцатая
 семнадцатое
 семнадцатой
 восемнадцатая
 восемнадцатое
 восемнадцатой
 девятнадцатая
 девятнадцатое
 девятнадцатой
 двадцатая
 двадцатое
 двадцатой
 тридцатая
 тридцатое
 тридцатой
 сороковая
 сороковое
 сороковой
 пятидесятая
 пятидесятое
 пятидесятой
 шестидесятая
 шестидесятое
 шестидесятой
 семидесятая
 семидесятое
 семидесятой
 восьмидесятая
 восьмидесятое
 восьмидесятой
 девяностая
 девяностое
 девяностой
 сотая
 сотое
 сотой
 двухсотая
 двухсотое
 двухсотой
 трехсотая
 трехсотое
 трехсотой
 четырехсотая
 четырехсотое
 четырехсотой
 пятисотая
 пятисотое
 пятисотой
 шестисотая
 шестисотое
 шестисотой
 семисотая
 семисотое
 семисотой
 восьмисотая
 восьмисотое
 восьмисотой
 девятисотая
 девятисотое
 девятисотой
 тысячная
 тысячное
 тысячной
 миллионная
 миллионное
 миллионной
 миллиардная
 миллиардное
 миллиардной
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/number_names.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/number_names.grm
@ -1,48 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Russian minimally supervised number grammar.
 #
 # Supports cardinals and ordinals in all inflected forms.
 #
 # The language-specific acceptor G was compiled with digit, teen, decade,
 # century, and big power-of-ten preterminals. The lexicon transducer is
 # highly ambiguous, but no LM is used.
 import 'util/arithmetic.grm' as a;
 # Intersects the universal factorization transducer (F) with language-specific
 # acceptor (G).
 d = a.DELTA_STAR;
 f = a.IARITHMETIC_RESTRICTED;
 g = LoadFst['ru/verbalizer/g.fst'];
 fg = Optimize[d @ Optimize[f @ Optimize[f @ Optimize[f @ g]]]];
 test1 = AssertEqual["230" @ fg, "(+ 200 30 +)"];
 # Compiles lexicon transducers (L).
 cardinal_name = StringFile['ru/verbalizer/cardinals.tsv'];
 cardinal_l = Optimize[(cardinal_name " ")* cardinal_name];
 ordinal_name = StringFile['ru/verbalizer/ordinals.tsv'];
 ordinal_l = Optimize[(cardinal_name " ")* ordinal_name];
 # Composes L with the leaf transducer (P), then composes that with FG.
 p = a.LEAVES;
 export CARDINAL_NUMBER_NAME = Optimize[fg @ (p @ cardinal_l)];
 export ORDINAL_NUMBER_NAME = Optimize[fg @ (p @ ordinal_l)];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/numbers.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/numbers.grm
@ -1,68 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'ru/verbalizer/number_names.grm' as n;
 import 'universal/thousands_punct.grm' as t;
 import 'util/byte.grm' as b;
 nominatives = StringFile['ru/verbalizer/nominatives.tsv'];
 sigma_star = b.kBytes*;
 nominative_filter =
 CDRewrite[nominatives ("" : "" <-1>), "[BOS]" | " ", " " | "[EOS]", sigma_star]
 ;
 cardinal = n.CARDINAL_NUMBER_NAME;
 ordinal = n.ORDINAL_NUMBER_NAME;
 # Putting these here since this grammar gets incorporated by all the others.
 func I[expr] {
  return "" : expr;
 }
 func D[expr] {
  return expr : "";
 }
 # Since we know this is the default for Russian, it's fair game to set it.
 separators = t.dot_thousands | t.no_delimiter;
 export CARDINAL_NUMBERS = Optimize[
   separators
 @ cardinal
 ];
 export ORDINAL_NUMBERS_UNMARKED = Optimize[
   separators
 @ ordinal
 ];
 endings = StringFile['ru/verbalizer/ordinal_endings.tsv'];
 not_dash = (b.kBytes - "-")+;
 del_ending = CDRewrite[("-" not_dash) : "", "", "[EOS]", sigma_star];
 # Needs nominative_filter here if we take out Kyle's models.
 export ORDINAL_NUMBERS_MARKED = Optimize[
   Optimize[Optimize[separators @ ordinal] "-" not_dash]
 @ Optimize[sigma_star endings]
 @ del_ending]
 ;
 export ORDINAL_NUMBERS =
  Optimize[ORDINAL_NUMBERS_MARKED | ORDINAL_NUMBERS_UNMARKED]
 ;
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/numbers_plus.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/numbers_plus.grm
@ -1,133 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Grammar for things built mostly on numbers.
 import 'ru/verbalizer/factorization.grm' as f;
 import 'ru/verbalizer/lexical_map.grm' as l;
 import 'ru/verbalizer/numbers.grm' as n;
 num = n.CARDINAL_NUMBERS;
 ord = n.ORDINAL_NUMBERS_UNMARKED;
 digits = f.FRACTIONAL_PART_UNGROUPED;
 # Various symbols.
 plus = "+" : "@@ARITHMETIC_PLUS@@";
 minus = "-" : "@@ARITHMETIC_MINUS@@";
 slash = "/" : "@@SLASH@@";
 dot = "." : "@@URL_DOT_EXPRESSION@@";
 dash = "-" : "@@DASH@@";
 equals = "=" : "@@ARITHMETIC_EQUALS@@";
 degree = "°" : "@@DEGREE@@";
 division = ("/" | "÷") : "@@ARITHMETIC_DIVISION@@";
 times = ("x" | "*") : "@@ARITHMETIC_TIMES@@";
 power = "^" : "@@DECIMAL_EXPONENT@@";
 square_root = "√" : "@@SQUARE_ROOT@@";
 percent = "%" : "@@PERCENT@@";
 # Safe roman numbers.
 # NB: Do not change the formatting here. NO_EDIT must be on the same
 # line as the path.
 rfile =
  'universal/roman_numerals.tsv' # NO_EDIT
 ;
 roman = StringFile[rfile];
 ## Main categories.
 cat_dot_number =
   num
   n.I[" "] dot n.I[" "] num
   (n.I[" "] dot n.I[" "] num)+
 ;
 cat_slash_number =
   num
   n.I[" "] slash n.I[" "] num
   (n.I[" "] slash n.I[" "] num)*
 ;
 cat_dash_number =
   num
   n.I[" "] dash n.I[" "] num
   (n.I[" "] dash n.I[" "] num)*
 ;
 cat_signed_number = ((plus | minus) n.I[" "])? num;
 cat_degree = cat_signed_number n.I[" "] degree;
 cat_country_code = plus n.I[" "] (num | digits);
 cat_math_operations =
     plus
   | minus
   | division
   | times
   | equals
   | percent
   | power
   | square_root
 ;
 # Roman numbers are often either cardinals or ordinals in various languages.
 cat_roman = roman @ (num | ord);
 # Allow
 #
 # number:number
 # number-number
 #
 # to just be
 #
 # number number.
 cat_number_number =
   num ((":" | "-") : " ") num
 ;
 # Some additional readings for these symbols.
 cat_additional_readings =
  ("/" : "@@PER@@") |
  ("+" : "@@AND@@") |
  ("-" : ("@@HYPHEN@@" | "@@CONNECTOR_TO@@")) |
  ("*" : "@@STAR@@") |
  ("x" : ("x" | "@@CONNECTOR_BY@@")) |
  ("@" : "@@AT@@")
 ;
 numbers_plus = Optimize[
   cat_dot_number
 | cat_slash_number
 | cat_dash_number
 | cat_signed_number
 | cat_degree
 | cat_country_code
 | cat_math_operations
 | cat_roman
 | cat_number_number
 | cat_additional_readings
 ];
 export NUMBERS_PLUS = Optimize[numbers_plus @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/ordinal_endings.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/ordinal_endings.tsv
@ -1,39 +0,0 @@
 ая-ая
 ого-го
 ьего-го
 ьего-его
 ьей-ей
 ьему-ему
 ьем-ем
 ое-е
 ые-е
 ье-е
 ий-ий
 ьими-ими
 ьим-им
 ьих-их
 ьи-и
 ий-й
 ой-й
 ый-й
 ыми-ми
 ьими-ми
 ому-му
 ьему-му
 ого-ого
 ое-ое
 ой-ой
 ом-ом
 ому-ому
 ую-ую
 ых-х
 ьих-х
 ые-ые
 ый-ый
 ыми-ыми
 ым-ым
 ых-ых
 ую-ю
 ью-ю
 ая-я
 ья-я
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/ordinals-lex.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/ordinals-lex.grm
@ -1,804 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # AUTOMATICALLY GENERATED: DO NOT EDIT.
 import 'util/byte.grm' as b;
 # Utilities for insertion and deletion.
 func I[expr] {
  return "" : expr;
 }
 func D[expr] {
  return expr : "";
 }
 # Powers of base 10.
 export POWERS =
    "[E15]"
  | "[E14]"
  | "[E13]"
  | "[E12]"
  | "[E11]"
  | "[E10]"
  | "[E9]"
  | "[E8]"
  | "[E7]"
  | "[E6]"
  | "[E5]"
  | "[E4]"
  | "[E3]"
  | "[E2]"
  | "[E1]"
 ;
 export SIGMA = b.kBytes | POWERS;
 export SIGMA_STAR = SIGMA*;
 export SIGMA_PLUS = SIGMA+;
 ################################################################################
 # BEGIN LANGUAGE SPECIFIC DATA
 revaluations =
    ("[E4]" : "[E1]")
  | ("[E5]" : "[E2]")
  | ("[E7]" : "[E1]")
  | ("[E8]" : "[E2]")
 ;
 Ms = "[E3]" | "[E6]" | "[E9]";
 func Zero[expr] {
  return expr : ("");
 }
 space = " ";
 lexset3 = Optimize[
    ("1[E1]+1" : "одиннадцатая@")
  | ("1[E1]+1" : "одиннадцати")
  | ("1[E1]+1" : "одиннадцатого@")
  | ("1[E1]+1" : "одиннадцатое@")
  | ("1[E1]+1" : "одиннадцатой@")
  | ("1[E1]+1" : "одиннадцатом@")
  | ("1[E1]+1" : "одиннадцатому@")
  | ("1[E1]+1" : "одиннадцатую@")
  | ("1[E1]+1" : "одиннадцатые@")
  | ("1[E1]+1" : "одиннадцатый@")
  | ("1[E1]+1" : "одиннадцатым@")
  | ("1[E1]+1" : "одиннадцатыми@")
  | ("1[E1]+1" : "одиннадцатых@")
  | ("1[E1]+1" : "одиннадцать")
  | ("1[E1]+1" : "одиннадцатью")
  | ("1[E1]+2" : "двенадцатая@")
  | ("1[E1]+2" : "двенадцати")
  | ("1[E1]+2" : "двенадцатого@")
  | ("1[E1]+2" : "двенадцатое@")
  | ("1[E1]+2" : "двенадцатой@")
  | ("1[E1]+2" : "двенадцатом@")
  | ("1[E1]+2" : "двенадцатому@")
  | ("1[E1]+2" : "двенадцатую@")
  | ("1[E1]+2" : "двенадцатые@")
  | ("1[E1]+2" : "двенадцатый@")
  | ("1[E1]+2" : "двенадцатым@")
  | ("1[E1]+2" : "двенадцатыми@")
  | ("1[E1]+2" : "двенадцатых@")
  | ("1[E1]+2" : "двенадцать")
  | ("1[E1]+2" : "двенадцатью")
  | ("1[E1]+3" : "тринадцатая@")
  | ("1[E1]+3" : "тринадцати")
  | ("1[E1]+3" : "тринадцатого@")
  | ("1[E1]+3" : "тринадцатое@")
  | ("1[E1]+3" : "тринадцатой@")
  | ("1[E1]+3" : "тринадцатом@")
  | ("1[E1]+3" : "тринадцатому@")
  | ("1[E1]+3" : "тринадцатую@")
  | ("1[E1]+3" : "тринадцатые@")
  | ("1[E1]+3" : "тринадцатый@")
  | ("1[E1]+3" : "тринадцатым@")
  | ("1[E1]+3" : "тринадцатыми@")
  | ("1[E1]+3" : "тринадцатых@")
  | ("1[E1]+3" : "тринадцать")
  | ("1[E1]+3" : "тринадцатью")
  | ("1[E1]+4" : "четырнадцатая@")
  | ("1[E1]+4" : "четырнадцати")
  | ("1[E1]+4" : "четырнадцатого@")
  | ("1[E1]+4" : "четырнадцатое@")
  | ("1[E1]+4" : "четырнадцатой@")
  | ("1[E1]+4" : "четырнадцатом@")
  | ("1[E1]+4" : "четырнадцатому@")
  | ("1[E1]+4" : "четырнадцатую@")
  | ("1[E1]+4" : "четырнадцатые@")
  | ("1[E1]+4" : "четырнадцатый@")
  | ("1[E1]+4" : "четырнадцатым@")
  | ("1[E1]+4" : "четырнадцатыми@")
  | ("1[E1]+4" : "четырнадцатых@")
  | ("1[E1]+4" : "четырнадцать")
  | ("1[E1]+4" : "четырнадцатью")
  | ("1[E1]+5" : "пятнадцатая@")
  | ("1[E1]+5" : "пятнадцати")
  | ("1[E1]+5" : "пятнадцатого@")
  | ("1[E1]+5" : "пятнадцатое@")
  | ("1[E1]+5" : "пятнадцатой@")
  | ("1[E1]+5" : "пятнадцатом@")
  | ("1[E1]+5" : "пятнадцатому@")
  | ("1[E1]+5" : "пятнадцатую@")
  | ("1[E1]+5" : "пятнадцатые@")
  | ("1[E1]+5" : "пятнадцатый@")
  | ("1[E1]+5" : "пятнадцатым@")
  | ("1[E1]+5" : "пятнадцатыми@")
  | ("1[E1]+5" : "пятнадцатых@")
  | ("1[E1]+5" : "пятнадцать")
  | ("1[E1]+5" : "пятнадцатью")
  | ("1[E1]+6" : "шестнадцатая@")
  | ("1[E1]+6" : "шестнадцати")
  | ("1[E1]+6" : "шестнадцатого@")
  | ("1[E1]+6" : "шестнадцатое@")
  | ("1[E1]+6" : "шестнадцатой@")
  | ("1[E1]+6" : "шестнадцатом@")
  | ("1[E1]+6" : "шестнадцатому@")
  | ("1[E1]+6" : "шестнадцатую@")
  | ("1[E1]+6" : "шестнадцатые@")
  | ("1[E1]+6" : "шестнадцатый@")
  | ("1[E1]+6" : "шестнадцатым@")
  | ("1[E1]+6" : "шестнадцатыми@")
  | ("1[E1]+6" : "шестнадцатых@")
  | ("1[E1]+6" : "шестнадцать")
  | ("1[E1]+6" : "шестнадцатью")
  | ("1[E1]+7" : "семнадцатая@")
  | ("1[E1]+7" : "семнадцати")
  | ("1[E1]+7" : "семнадцатого@")
  | ("1[E1]+7" : "семнадцатое@")
  | ("1[E1]+7" : "семнадцатой@")
  | ("1[E1]+7" : "семнадцатом@")
  | ("1[E1]+7" : "семнадцатому@")
  | ("1[E1]+7" : "семнадцатую@")
  | ("1[E1]+7" : "семнадцатые@")
  | ("1[E1]+7" : "семнадцатый@")
  | ("1[E1]+7" : "семнадцатым@")
  | ("1[E1]+7" : "семнадцатыми@")
  | ("1[E1]+7" : "семнадцатых@")
  | ("1[E1]+7" : "семнадцать")
  | ("1[E1]+7" : "семнадцатью")
  | ("1[E1]+8" : "восемнадцатая@")
  | ("1[E1]+8" : "восемнадцати")
  | ("1[E1]+8" : "восемнадцатого@")
  | ("1[E1]+8" : "восемнадцатое@")
  | ("1[E1]+8" : "восемнадцатой@")
  | ("1[E1]+8" : "восемнадцатом@")
  | ("1[E1]+8" : "восемнадцатому@")
  | ("1[E1]+8" : "восемнадцатую@")
  | ("1[E1]+8" : "восемнадцатые@")
  | ("1[E1]+8" : "восемнадцатый@")
  | ("1[E1]+8" : "восемнадцатым@")
  | ("1[E1]+8" : "восемнадцатыми@")
  | ("1[E1]+8" : "восемнадцатых@")
  | ("1[E1]+8" : "восемнадцать")
  | ("1[E1]+8" : "восемнадцатью")
  | ("1[E1]+9" : "девятнадцатая@")
  | ("1[E1]+9" : "девятнадцати")
  | ("1[E1]+9" : "девятнадцатого@")
  | ("1[E1]+9" : "девятнадцатое@")
  | ("1[E1]+9" : "девятнадцатой@")
  | ("1[E1]+9" : "девятнадцатом@")
  | ("1[E1]+9" : "девятнадцатому@")
  | ("1[E1]+9" : "девятнадцатую@")
  | ("1[E1]+9" : "девятнадцатые@")
  | ("1[E1]+9" : "девятнадцатый@")
  | ("1[E1]+9" : "девятнадцатым@")
  | ("1[E1]+9" : "девятнадцатыми@")
  | ("1[E1]+9" : "девятнадцатых@")
  | ("1[E1]+9" : "девятнадцать")
  | ("1[E1]+9" : "девятнадцатью")]
 ;
 lex3 = CDRewrite[lexset3 I[space], "", "", SIGMA_STAR];
 lexset2 = Optimize[
    ("1[E1]" : "десятая@")
  | ("1[E1]" : "десяти")
  | ("1[E1]" : "десятого@")
  | ("1[E1]" : "десятое@")
  | ("1[E1]" : "десятой@")
  | ("1[E1]" : "десятом@")
  | ("1[E1]" : "десятому@")
  | ("1[E1]" : "десятую@")
  | ("1[E1]" : "десятые@")
  | ("1[E1]" : "десятый@")
  | ("1[E1]" : "десятым@")
  | ("1[E1]" : "десятыми@")
  | ("1[E1]" : "десятых@")
  | ("1[E1]" : "десять")
  | ("1[E1]" : "десятью")
  | ("1[E2]" : "сотая@")
  | ("1[E2]" : "сотого@")
  | ("1[E2]" : "сотое@")
  | ("1[E2]" : "сотой@")
  | ("1[E2]" : "сотом@")
  | ("1[E2]" : "сотому@")
  | ("1[E2]" : "сотую@")
  | ("1[E2]" : "сотые@")
  | ("1[E2]" : "сотый@")
  | ("1[E2]" : "сотым@")
  | ("1[E2]" : "сотыми@")
  | ("1[E2]" : "сотых@")
  | ("1[E2]" : "ста")
  | ("1[E2]" : "сто")
  | ("1[E3]" : "тысячная@")
  | ("1[E3]" : "тысячного@")
  | ("1[E3]" : "тысячное@")
  | ("1[E3]" : "тысячной@")
  | ("1[E3]" : "тысячном@")
  | ("1[E3]" : "тысячному@")
  | ("1[E3]" : "тысячную@")
  | ("1[E3]" : "тысячные@")
  | ("1[E3]" : "тысячный@")
  | ("1[E3]" : "тысячным@")
  | ("1[E3]" : "тысячными@")
  | ("1[E3]" : "тысячных@")
  | ("1[E6]" : "миллионная@")
  | ("1[E6]" : "миллионного@")
  | ("1[E6]" : "миллионное@")
  | ("1[E6]" : "миллионной@")
  | ("1[E6]" : "миллионном@")
  | ("1[E6]" : "миллионному@")
  | ("1[E6]" : "миллионную@")
  | ("1[E6]" : "миллионные@")
  | ("1[E6]" : "миллионный@")
  | ("1[E6]" : "миллионным@")
  | ("1[E6]" : "миллионными@")
  | ("1[E6]" : "миллионных@")
  | ("1[E9]" : "миллиардная@")
  | ("1[E9]" : "миллиардного@")
  | ("1[E9]" : "миллиардное@")
  | ("1[E9]" : "миллиардной@")
  | ("1[E9]" : "миллиардном@")
  | ("1[E9]" : "миллиардному@")
  | ("1[E9]" : "миллиардную@")
  | ("1[E9]" : "миллиардные@")
  | ("1[E9]" : "миллиардный@")
  | ("1[E9]" : "миллиардным@")
  | ("1[E9]" : "миллиардными@")
  | ("1[E9]" : "миллиардных@")
  | ("2[E1]" : "двадцатая@")
  | ("2[E1]" : "двадцати")
  | ("2[E1]" : "двадцатого@")
  | ("2[E1]" : "двадцатое@")
  | ("2[E1]" : "двадцатой@")
  | ("2[E1]" : "двадцатом@")
  | ("2[E1]" : "двадцатому@")
  | ("2[E1]" : "двадцатую@")
  | ("2[E1]" : "двадцатые@")
  | ("2[E1]" : "двадцатый@")
  | ("2[E1]" : "двадцатым@")
  | ("2[E1]" : "двадцатыми@")
  | ("2[E1]" : "двадцатых@")
  | ("2[E1]" : "двадцать")
  | ("2[E1]" : "двадцатью")
  | ("2[E2]" : "двести")
  | ("2[E2]" : "двумстам")
  | ("2[E2]" : "двумястами")
  | ("2[E2]" : "двухсот")
  | ("2[E2]" : "двухсотая@")
  | ("2[E2]" : "двухсотого@")
  | ("2[E2]" : "двухсотое@")
  | ("2[E2]" : "двухсотой@")
  | ("2[E2]" : "двухсотом@")
  | ("2[E2]" : "двухсотому@")
  | ("2[E2]" : "двухсотую@")
  | ("2[E2]" : "двухсотые@")
  | ("2[E2]" : "двухсотый@")
  | ("2[E2]" : "двухсотым@")
  | ("2[E2]" : "двухсотыми@")
  | ("2[E2]" : "двухсотых@")
  | ("2[E2]" : "двухстах")
  | ("3[E1]" : "тридцатая@")
  | ("3[E1]" : "тридцати")
  | ("3[E1]" : "тридцатого@")
  | ("3[E1]" : "тридцатое@")
  | ("3[E1]" : "тридцатой@")
  | ("3[E1]" : "тридцатом@")
  | ("3[E1]" : "тридцатому@")
  | ("3[E1]" : "тридцатую@")
  | ("3[E1]" : "тридцатые@")
  | ("3[E1]" : "тридцатый@")
  | ("3[E1]" : "тридцатым@")
  | ("3[E1]" : "тридцатыми@")
  | ("3[E1]" : "тридцатых@")
  | ("3[E1]" : "тридцать")
  | ("3[E1]" : "тридцатью")
  | ("3[E2]" : "тремстам")
  | ("3[E2]" : "тремястами")
  | ("3[E2]" : "трехсот")
  | ("3[E2]" : "трехсотая@")
  | ("3[E2]" : "трехсотого@")
  | ("3[E2]" : "трехсотое@")
  | ("3[E2]" : "трехсотой@")
  | ("3[E2]" : "трехсотом@")
  | ("3[E2]" : "трехсотому@")
  | ("3[E2]" : "трехсотую@")
  | ("3[E2]" : "трехсотые@")
  | ("3[E2]" : "трехсотый@")
  | ("3[E2]" : "трехсотым@")
  | ("3[E2]" : "трехсотыми@")
  | ("3[E2]" : "трехсотых@")
  | ("3[E2]" : "трехстах")
  | ("3[E2]" : "триста")
  | ("4[E1]" : "сорок")
  | ("4[E1]" : "сорока")
  | ("4[E1]" : "сороковая@")
  | ("4[E1]" : "сорокового@")
  | ("4[E1]" : "сороковое@")
  | ("4[E1]" : "сороковой@")
  | ("4[E1]" : "сороковом@")
  | ("4[E1]" : "сороковому@")
  | ("4[E1]" : "сороковую@")
  | ("4[E1]" : "сороковые@")
  | ("4[E1]" : "сороковым@")
  | ("4[E1]" : "сороковыми@")
  | ("4[E1]" : "сороковых@")
  | ("4[E2]" : "четыремстам")
  | ("4[E2]" : "четыреста")
  | ("4[E2]" : "четырехсот")
  | ("4[E2]" : "четырехсотая@")
  | ("4[E2]" : "четырехсотого@")
  | ("4[E2]" : "четырехсотое@")
  | ("4[E2]" : "четырехсотой@")
  | ("4[E2]" : "четырехсотом@")
  | ("4[E2]" : "четырехсотому@")
  | ("4[E2]" : "четырехсотую@")
  | ("4[E2]" : "четырехсотые@")
  | ("4[E2]" : "четырехсотый@")
  | ("4[E2]" : "четырехсотым@")
  | ("4[E2]" : "четырехсотыми@")
  | ("4[E2]" : "четырехсотых@")
  | ("4[E2]" : "четырехстах")
  | ("4[E2]" : "четырьмястами")
  | ("5[E1]" : "пятидесятая@")
  | ("5[E1]" : "пятидесяти")
  | ("5[E1]" : "пятидесятого@")
  | ("5[E1]" : "пятидесятое@")
  | ("5[E1]" : "пятидесятой@")
  | ("5[E1]" : "пятидесятом@")
  | ("5[E1]" : "пятидесятому@")
  | ("5[E1]" : "пятидесятую@")
  | ("5[E1]" : "пятидесятые@")
  | ("5[E1]" : "пятидесятый@")
  | ("5[E1]" : "пятидесятым@")
  | ("5[E1]" : "пятидесятыми@")
  | ("5[E1]" : "пятидесятых@")
  | ("5[E1]" : "пятьдесят")
  | ("5[E1]" : "пятьюдесятью")
  | ("5[E2]" : "пятисот")
  | ("5[E2]" : "пятисотая@")
  | ("5[E2]" : "пятисотого@")
  | ("5[E2]" : "пятисотое@")
  | ("5[E2]" : "пятисотой@")
  | ("5[E2]" : "пятисотом@")
  | ("5[E2]" : "пятисотому@")
  | ("5[E2]" : "пятисотую@")
  | ("5[E2]" : "пятисотые@")
  | ("5[E2]" : "пятисотый@")
  | ("5[E2]" : "пятисотым@")
  | ("5[E2]" : "пятисотыми@")
  | ("5[E2]" : "пятисотых@")
  | ("5[E2]" : "пятистам")
  | ("5[E2]" : "пятистах")
  | ("5[E2]" : "пятьсот")
  | ("5[E2]" : "пятьюстами")
  | ("6[E1]" : "шестидесятая@")
  | ("6[E1]" : "шестидесяти")
  | ("6[E1]" : "шестидесятого@")
  | ("6[E1]" : "шестидесятое@")
  | ("6[E1]" : "шестидесятой@")
  | ("6[E1]" : "шестидесятом@")
  | ("6[E1]" : "шестидесятому@")
  | ("6[E1]" : "шестидесятую@")
  | ("6[E1]" : "шестидесятые@")
  | ("6[E1]" : "шестидесятый@")
  | ("6[E1]" : "шестидесятым@")
  | ("6[E1]" : "шестидесятыми@")
  | ("6[E1]" : "шестидесятых@")
  | ("6[E1]" : "шестьдесят")
  | ("6[E1]" : "шестьюдесятью")
  | ("6[E2]" : "шестисот")
  | ("6[E2]" : "шестисотая@")
  | ("6[E2]" : "шестисотого@")
  | ("6[E2]" : "шестисотое@")
  | ("6[E2]" : "шестисотой@")
  | ("6[E2]" : "шестисотом@")
  | ("6[E2]" : "шестисотому@")
  | ("6[E2]" : "шестисотую@")
  | ("6[E2]" : "шестисотые@")
  | ("6[E2]" : "шестисотый@")
  | ("6[E2]" : "шестисотым@")
  | ("6[E2]" : "шестисотыми@")
  | ("6[E2]" : "шестисотых@")
  | ("6[E2]" : "шестистам")
  | ("6[E2]" : "шестистах")
  | ("6[E2]" : "шестьсот")
  | ("6[E2]" : "шестьюстами")
  | ("7[E1]" : "семидесятая@")
  | ("7[E1]" : "семидесяти")
  | ("7[E1]" : "семидесятого@")
  | ("7[E1]" : "семидесятое@")
  | ("7[E1]" : "семидесятой@")
  | ("7[E1]" : "семидесятом@")
  | ("7[E1]" : "семидесятому@")
  | ("7[E1]" : "семидесятую@")
  | ("7[E1]" : "семидесятые@")
  | ("7[E1]" : "семидесятый@")
  | ("7[E1]" : "семидесятым@")
  | ("7[E1]" : "семидесятыми@")
  | ("7[E1]" : "семидесятых@")
  | ("7[E1]" : "семьдесят")
  | ("7[E1]" : "семьюдесятью")
  | ("7[E2]" : "семисот")
  | ("7[E2]" : "семисотая@")
  | ("7[E2]" : "семисотого@")
  | ("7[E2]" : "семисотое@")
  | ("7[E2]" : "семисотой@")
  | ("7[E2]" : "семисотом@")
  | ("7[E2]" : "семисотому@")
  | ("7[E2]" : "семисотую@")
  | ("7[E2]" : "семисотые@")
  | ("7[E2]" : "семисотый@")
  | ("7[E2]" : "семисотым@")
  | ("7[E2]" : "семисотыми@")
  | ("7[E2]" : "семисотых@")
  | ("7[E2]" : "семистам")
  | ("7[E2]" : "семистах")
  | ("7[E2]" : "семьсот")
  | ("7[E2]" : "семьюстами")
  | ("8[E1]" : "восемьдесят")
  | ("8[E1]" : "восьмидесятая@")
  | ("8[E1]" : "восьмидесяти")
  | ("8[E1]" : "восьмидесятого@")
  | ("8[E1]" : "восьмидесятое@")
  | ("8[E1]" : "восьмидесятой@")
  | ("8[E1]" : "восьмидесятом@")
  | ("8[E1]" : "восьмидесятому@")
  | ("8[E1]" : "восьмидесятую@")
  | ("8[E1]" : "восьмидесятые@")
  | ("8[E1]" : "восьмидесятый@")
  | ("8[E1]" : "восьмидесятым@")
  | ("8[E1]" : "восьмидесятыми@")
  | ("8[E1]" : "восьмидесятых@")
  | ("8[E1]" : "восьмьюдесятью")
  | ("8[E2]" : "восемьсот")
  | ("8[E2]" : "восемьюстами")
  | ("8[E2]" : "восьмисот")
  | ("8[E2]" : "восьмисотая@")
  | ("8[E2]" : "восьмисотого@")
  | ("8[E2]" : "восьмисотое@")
  | ("8[E2]" : "восьмисотой@")
  | ("8[E2]" : "восьмисотом@")
  | ("8[E2]" : "восьмисотому@")
  | ("8[E2]" : "восьмисотую@")
  | ("8[E2]" : "восьмисотые@")
  | ("8[E2]" : "восьмисотый@")
  | ("8[E2]" : "восьмисотым@")
  | ("8[E2]" : "восьмисотыми@")
  | ("8[E2]" : "восьмисотых@")
  | ("8[E2]" : "восьмистам")
  | ("8[E2]" : "восьмистах")
  | ("8[E2]" : "восьмьюстами")
  | ("9[E1]" : "девяноста")
  | ("9[E1]" : "девяностая@")
  | ("9[E1]" : "девяносто")
  | ("9[E1]" : "девяностого@")
  | ("9[E1]" : "девяностое@")
  | ("9[E1]" : "девяностой@")
  | ("9[E1]" : "девяностом@")
  | ("9[E1]" : "девяностому@")
  | ("9[E1]" : "девяностую@")
  | ("9[E1]" : "девяностые@")
  | ("9[E1]" : "девяностый@")
  | ("9[E1]" : "девяностым@")
  | ("9[E1]" : "девяностыми@")
  | ("9[E1]" : "девяностых@")
  | ("9[E2]" : "девятисот")
  | ("9[E2]" : "девятисотая@")
  | ("9[E2]" : "девятисотого@")
  | ("9[E2]" : "девятисотое@")
  | ("9[E2]" : "девятисотой@")
  | ("9[E2]" : "девятисотом@")
  | ("9[E2]" : "девятисотому@")
  | ("9[E2]" : "девятисотую@")
  | ("9[E2]" : "девятисотые@")
  | ("9[E2]" : "девятисотый@")
  | ("9[E2]" : "девятисотым@")
  | ("9[E2]" : "девятисотыми@")
  | ("9[E2]" : "девятисотых@")
  | ("9[E2]" : "девятистам")
  | ("9[E2]" : "девятистах")
  | ("9[E2]" : "девятьсот")
  | ("9[E2]" : "девятьюстами")]
 ;
 lex2 = CDRewrite[lexset2 I[space], "", "", SIGMA_STAR];
 lexset1 = Optimize[
    ("+" : "")
  | ("1" : "один")
  | ("1" : "одна")
  | ("1" : "одни")
  | ("1" : "одним")
  | ("1" : "одними")
  | ("1" : "одних")
  | ("1" : "одно")
  | ("1" : "одного")
  | ("1" : "одной")
  | ("1" : "одном")
  | ("1" : "одному")
  | ("1" : "одною")
  | ("1" : "одну")
  | ("1" : "первая@")
  | ("1" : "первого@")
  | ("1" : "первое@")
  | ("1" : "первой@")
  | ("1" : "первом@")
  | ("1" : "первому@")
  | ("1" : "первую@")
  | ("1" : "первые@")
  | ("1" : "первый@")
  | ("1" : "первым@")
  | ("1" : "первыми@")
  | ("1" : "первых@")
  | ("2" : "вторая@")
  | ("2" : "второго@")
  | ("2" : "второе@")
  | ("2" : "второй@")
  | ("2" : "втором@")
  | ("2" : "второму@")
  | ("2" : "вторую@")
  | ("2" : "вторые@")
  | ("2" : "вторым@")
  | ("2" : "вторыми@")
  | ("2" : "вторых@")
  | ("2" : "два")
  | ("2" : "две")
  | ("2" : "двум")
  | ("2" : "двумя")
  | ("2" : "двух")
  | ("3" : "трем")
  | ("3" : "тремя")
  | ("3" : "третий@")
  | ("3" : "третье@")
  | ("3" : "третьего@")
  | ("3" : "третьей@")
  | ("3" : "третьем@")
  | ("3" : "третьему@")
  | ("3" : "третьи@")
  | ("3" : "третьим@")
  | ("3" : "третьими@")
  | ("3" : "третьих@")
  | ("3" : "третью@")
  | ("3" : "третья@")
  | ("3" : "трех")
  | ("3" : "три")
  | ("4" : "четвертая@")
  | ("4" : "четвертого@")
  | ("4" : "четвертое@")
  | ("4" : "четвертой@")
  | ("4" : "четвертом@")
  | ("4" : "четвертому@")
  | ("4" : "четвертую@")
  | ("4" : "четвертые@")
  | ("4" : "четвертый@")
  | ("4" : "четвертым@")
  | ("4" : "четвертыми@")
  | ("4" : "четвертых@")
  | ("4" : "четыре")
  | ("4" : "четырем")
  | ("4" : "четырех")
  | ("4" : "четырьмя")
  | ("5" : "пятая@")
  | ("5" : "пяти")
  | ("5" : "пятого@")
  | ("5" : "пятое@")
  | ("5" : "пятой@")
  | ("5" : "пятом@")
  | ("5" : "пятому@")
  | ("5" : "пятую@")
  | ("5" : "пятые@")
  | ("5" : "пятый@")
  | ("5" : "пятым@")
  | ("5" : "пятыми@")
  | ("5" : "пятых@")
  | ("5" : "пять")
  | ("5" : "пятью")
  | ("6" : "шестая@")
  | ("6" : "шести")
  | ("6" : "шестого@")
  | ("6" : "шестое@")
  | ("6" : "шестой@")
  | ("6" : "шестом@")
  | ("6" : "шестому@")
  | ("6" : "шестую@")
  | ("6" : "шестые@")
  | ("6" : "шестым@")
  | ("6" : "шестыми@")
  | ("6" : "шестых@")
  | ("6" : "шесть")
  | ("6" : "шестью")
  | ("7" : "седьмая@")
  | ("7" : "седьмого@")
  | ("7" : "седьмое@")
  | ("7" : "седьмой@")
  | ("7" : "седьмом@")
  | ("7" : "седьмому@")
  | ("7" : "седьмую@")
  | ("7" : "седьмые@")
  | ("7" : "седьмым@")
  | ("7" : "седьмыми@")
  | ("7" : "седьмых@")
  | ("7" : "семи")
  | ("7" : "семь")
  | ("7" : "семью")
  | ("8" : "восемь")
  | ("8" : "восьмая@")
  | ("8" : "восьми")
  | ("8" : "восьмого@")
  | ("8" : "восьмое@")
  | ("8" : "восьмой@")
  | ("8" : "восьмом@")
  | ("8" : "восьмому@")
  | ("8" : "восьмую@")
  | ("8" : "восьмые@")
  | ("8" : "восьмым@")
  | ("8" : "восьмыми@")
  | ("8" : "восьмых@")
  | ("8" : "восьмью")
  | ("9" : "девятая@")
  | ("9" : "девяти")
  | ("9" : "девятого@")
  | ("9" : "девятое@")
  | ("9" : "девятой@")
  | ("9" : "девятом@")
  | ("9" : "девятому@")
  | ("9" : "девятую@")
  | ("9" : "девятые@")
  | ("9" : "девятый@")
  | ("9" : "девятым@")
  | ("9" : "девятыми@")
  | ("9" : "девятых@")
  | ("9" : "девять")
  | ("9" : "девятью")
  | ("[E3]" : "тысяч")
  | ("[E3]" : "тысяча")
  | ("[E3]" : "тысячам")
  | ("[E3]" : "тысячами")
  | ("[E3]" : "тысячах")
  | ("[E3]" : "тысяче")
  | ("[E3]" : "тысячей")
  | ("[E3]" : "тысячи")
  | ("[E3]" : "тысячу")
  | ("[E3]" : "тысячью")
  | ("[E6]" : "миллион")
  | ("[E6]" : "миллиона")
  | ("[E6]" : "миллионам")
  | ("[E6]" : "миллионами")
  | ("[E6]" : "миллионах")
  | ("[E6]" : "миллионе")
  | ("[E6]" : "миллионов")
  | ("[E6]" : "миллионом")
  | ("[E6]" : "миллиону")
  | ("[E6]" : "миллионы")
  | ("[E9]" : "миллиард")
  | ("[E9]" : "миллиарда")
  | ("[E9]" : "миллиардам")
  | ("[E9]" : "миллиардами")
  | ("[E9]" : "миллиардах")
  | ("[E9]" : "миллиарде")
  | ("[E9]" : "миллиардов")
  | ("[E9]" : "миллиардом")
  | ("[E9]" : "миллиарду")
  | ("[E9]" : "миллиарды")
  | ("|0|" : "ноле")
  | ("|0|" : "нолем")
  | ("|0|" : "ноль")
  | ("|0|" : "нолю")
  | ("|0|" : "ноля")
  | ("|0|" : "нуле")
  | ("|0|" : "нулем")
  | ("|0|" : "нуль")
  | ("|0|" : "нулю")
  | ("|0|" : "нуля")]
 ;
 lex1 = CDRewrite[lexset1 I[space], "", "", SIGMA_STAR];
 export LEX = Optimize[lex3 @ lex2 @ lex1];
 export INDEPENDENT_EXPONENTS = "[E3]" | "[E6]" | "[E9]";
 # END LANGUAGE SPECIFIC DATA
 ################################################################################
 # Inserts a marker after the Ms.
 export INSERT_BOUNDARY = CDRewrite["" : "%", Ms, "", SIGMA_STAR];
 # Deletes all powers and "+".
 export DELETE_POWERS = CDRewrite[D[POWERS | "+"], "", "", SIGMA_STAR];
 # Deletes trailing zeros at the beginning of a number, so that "0003" does not
 # get treated as an ordinary number.
 export DELETE_INITIAL_ZEROS =
  CDRewrite[("0" POWERS "+") : "", "[BOS]", "", SIGMA_STAR]
 ;
 NonMs = Optimize[POWERS - Ms];
 # Deletes (usually) zeros before a non-M. E.g., +0[E1] should be
 # deleted
 export DELETE_INTERMEDIATE_ZEROS1 =
  CDRewrite[Zero["+0" NonMs], "", "", SIGMA_STAR]
 ;
 # Deletes (usually) zeros before an M, if there is no non-zero element between
 # that and the previous boundary. Thus, if after the result of the rule above we
 # end up with "%+0[E3]", then that gets deleted. Also (really) deletes a final
 # zero.
 export DELETE_INTERMEDIATE_ZEROS2 = Optimize[
   CDRewrite[Zero["%+0" Ms], "", "", SIGMA_STAR]
 @ CDRewrite[D["+0"], "", "[EOS]", SIGMA_STAR]]
 ;
 # Final clean up of stray zeros.
 export DELETE_REMAINING_ZEROS = Optimize[
   CDRewrite[Zero["+0"], "", "", SIGMA_STAR]
 @ CDRewrite[Zero["0"], "", "", SIGMA_STAR]]
 ;
 # Applies the revaluation map. For example in English, change [E4] to [E1] as a
 # modifier of [E3]
 export REVALUE = CDRewrite[revaluations, "", "", SIGMA_STAR];
 # Deletes the various marks and powers in the input and output.
 export DELETE_MARKS = CDRewrite[D["%" | "+" | POWERS], "", "", SIGMA_STAR];
 export CLEAN_SPACES = Optimize[
   CDRewrite[" "+ : " ", b.kNotSpace, b.kNotSpace, SIGMA_STAR]
 @ CDRewrite[" "* : "", "[BOS]", "", SIGMA_STAR]
 @ CDRewrite[" "* : "", "", "[EOS]", SIGMA_STAR]]
 ;
 d = b.kDigit;
 # Germanic inversion rule.
 germanic =
    (I["1+"] d "[E1]" D["+1"])
  | (I["2+"] d "[E1]" D["+2"])
  | (I["3+"] d "[E1]" D["+3"])
  | (I["4+"] d "[E1]" D["+4"])
  | (I["5+"] d "[E1]" D["+5"])
  | (I["6+"] d "[E1]" D["+6"])
  | (I["7+"] d "[E1]" D["+7"])
  | (I["8+"] d "[E1]" D["+8"])
  | (I["9+"] d "[E1]" D["+9"])
 ;
 germanic_inversion =
  CDRewrite[germanic, "", "", SIGMA_STAR, 'ltr', 'opt']
 ;
 export GERMANIC_INVERSION = SIGMA_STAR;
 export ORDINAL_RESTRICTION = 
  Optimize[((SIGMA - "@")* "@") @ CDRewrite[D["@"], "", "", SIGMA_STAR]]
 ;
 nondigits = b.kBytes - b.kDigit;
 export ORDINAL_SUFFIX = D[nondigits*];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/ordinals.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/ordinals.tsv
@ -1,527 +0,0 @@
 0	нулевая
 0	нулевого
 0	нулевое
 0	нулевой
 0	нулевом
 0	нулевому
 0	нулевую
 0	нулевые
 0	нулевым
 0	нулевым
 0	нулевыми
 0	нулевых
 1	первая
 1	первого
 1	первое
 1	первой
 1	первом
 1	первому
 1	первую
 1	первые
 1	первый
 1	первым
 1	первым
 1	первыми
 1	первых
 2	вторая
 2	второго
 2	второе
 2	второй
 2	втором
 2	второму
 2	вторую
 2	вторые
 2	вторым
 2	вторым
 2	вторыми
 2	вторых
 3	третий
 3	третье
 3	третьего
 3	третьей
 3	третьем
 3	третьему
 3	третьи
 3	третьим
 3	третьим
 3	третьими
 3	третьих
 3	третью
 3	третья
 4	четвертая
 4	четвертого
 4	четвертое
 4	четвертой
 4	четвертом
 4	четвертому
 4	четвертую
 4	четвертые
 4	четвертый
 4	четвертым
 4	четвертым
 4	четвертыми
 4	четвертых
 4	четвёртая
 4	четвёртого
 4	четвёртое
 4	четвёртой
 4	четвёртом
 4	четвёртому
 4	четвёртую
 4	четвёртые
 4	четвёртый
 4	четвёртым
 4	четвёртым
 4	четвёртыми
 4	четвёртых
 5	пятая
 5	пятого
 5	пятое
 5	пятой
 5	пятом
 5	пятому
 5	пятую
 5	пятые
 5	пятый
 5	пятым
 5	пятым
 5	пятыми
 5	пятых
 6	шестая
 6	шестого
 6	шестое
 6	шестой
 6	шестом
 6	шестому
 6	шестую
 6	шестые
 6	шестым
 6	шестым
 6	шестыми
 6	шестых
 7	седьмая
 7	седьмого
 7	седьмое
 7	седьмой
 7	седьмом
 7	седьмому
 7	седьмую
 7	седьмые
 7	седьмым
 7	седьмым
 7	седьмыми
 7	седьмых
 8	восьмая
 8	восьмого
 8	восьмое
 8	восьмой
 8	восьмом
 8	восьмому
 8	восьмую
 8	восьмые
 8	восьмым
 8	восьмым
 8	восьмыми
 8	восьмых
 9	девятая
 9	девятого
 9	девятое
 9	девятой
 9	девятом
 9	девятому
 9	девятую
 9	девятые
 9	девятый
 9	девятым
 9	девятым
 9	девятыми
 9	девятых
 10	десятая
 10	десятого
 10	десятое
 10	десятой
 10	десятом
 10	десятому
 10	десятую
 10	десятые
 10	десятый
 10	десятым
 10	десятым
 10	десятыми
 10	десятых
 11	одиннадцатая
 11	одиннадцатого
 11	одиннадцатое
 11	одиннадцатой
 11	одиннадцатом
 11	одиннадцатому
 11	одиннадцатую
 11	одиннадцатые
 11	одиннадцатый
 11	одиннадцатым
 11	одиннадцатым
 11	одиннадцатыми
 11	одиннадцатых
 12	двенадцатая
 12	двенадцатого
 12	двенадцатое
 12	двенадцатой
 12	двенадцатом
 12	двенадцатому
 12	двенадцатую
 12	двенадцатые
 12	двенадцатый
 12	двенадцатым
 12	двенадцатым
 12	двенадцатыми
 12	двенадцатых
 13	тринадцатая
 13	тринадцатого
 13	тринадцатое
 13	тринадцатой
 13	тринадцатом
 13	тринадцатому
 13	тринадцатую
 13	тринадцатые
 13	тринадцатый
 13	тринадцатым
 13	тринадцатым
 13	тринадцатыми
 13	тринадцатых
 14	четырнадцатая
 14	четырнадцатого
 14	четырнадцатое
 14	четырнадцатой
 14	четырнадцатом
 14	четырнадцатому
 14	четырнадцатую
 14	четырнадцатые
 14	четырнадцатый
 14	четырнадцатым
 14	четырнадцатым
 14	четырнадцатыми
 14	четырнадцатых
 15	пятнадцатая
 15	пятнадцатого
 15	пятнадцатое
 15	пятнадцатой
 15	пятнадцатом
 15	пятнадцатому
 15	пятнадцатую
 15	пятнадцатые
 15	пятнадцатый
 15	пятнадцатым
 15	пятнадцатым
 15	пятнадцатыми
 15	пятнадцатых
 16	шестнадцатая
 16	шестнадцатого
 16	шестнадцатое
 16	шестнадцатой
 16	шестнадцатом
 16	шестнадцатому
 16	шестнадцатую
 16	шестнадцатые
 16	шестнадцатый
 16	шестнадцатым
 16	шестнадцатым
 16	шестнадцатыми
 16	шестнадцатых
 17	семнадцатая
 17	семнадцатого
 17	семнадцатое
 17	семнадцатой
 17	семнадцатом
 17	семнадцатому
 17	семнадцатую
 17	семнадцатые
 17	семнадцатый
 17	семнадцатым
 17	семнадцатым
 17	семнадцатыми
 17	семнадцатых
 18	восемнадцатая
 18	восемнадцатого
 18	восемнадцатое
 18	восемнадцатой
 18	восемнадцатом
 18	восемнадцатому
 18	восемнадцатую
 18	восемнадцатые
 18	восемнадцатый
 18	восемнадцатым
 18	восемнадцатым
 18	восемнадцатыми
 18	восемнадцатых
 19	девятнадцатая
 19	девятнадцатого
 19	девятнадцатое
 19	девятнадцатой
 19	девятнадцатом
 19	девятнадцатому
 19	девятнадцатую
 19	девятнадцатые
 19	девятнадцатый
 19	девятнадцатым
 19	девятнадцатым
 19	девятнадцатыми
 19	девятнадцатых
 20	двадцатая
 20	двадцатого
 20	двадцатое
 20	двадцатой
 20	двадцатом
 20	двадцатому
 20	двадцатую
 20	двадцатые
 20	двадцатый
 20	двадцатым
 20	двадцатым
 20	двадцатыми
 20	двадцатых
 30	тридцатая
 30	тридцатого
 30	тридцатое
 30	тридцатой
 30	тридцатом
 30	тридцатому
 30	тридцатую
 30	тридцатые
 30	тридцатый
 30	тридцатым
 30	тридцатым
 30	тридцатыми
 30	тридцатых
 40	сороковая
 40	сорокового
 40	сороковое
 40	сороковой
 40	сороковом
 40	сороковому
 40	сороковую
 40	сороковые
 40	сороковым
 40	сороковым
 40	сороковыми
 40	сороковых
 50	пятидесятая
 50	пятидесятого
 50	пятидесятое
 50	пятидесятой
 50	пятидесятом
 50	пятидесятому
 50	пятидесятую
 50	пятидесятые
 50	пятидесятый
 50	пятидесятым
 50	пятидесятым
 50	пятидесятыми
 50	пятидесятых
 60	шестидесятая
 60	шестидесятого
 60	шестидесятое
 60	шестидесятой
 60	шестидесятом
 60	шестидесятому
 60	шестидесятую
 60	шестидесятые
 60	шестидесятый
 60	шестидесятым
 60	шестидесятым
 60	шестидесятыми
 60	шестидесятых
 70	семидесятая
 70	семидесятого
 70	семидесятое
 70	семидесятой
 70	семидесятом
 70	семидесятому
 70	семидесятую
 70	семидесятые
 70	семидесятый
 70	семидесятым
 70	семидесятым
 70	семидесятыми
 70	семидесятых
 80	восьмидесятая
 80	восьмидесятого
 80	восьмидесятое
 80	восьмидесятой
 80	восьмидесятом
 80	восьмидесятому
 80	восьмидесятую
 80	восьмидесятые
 80	восьмидесятый
 80	восьмидесятым
 80	восьмидесятым
 80	восьмидесятыми
 80	восьмидесятых
 90	девяностая
 90	девяностого
 90	девяностое
 90	девяностой
 90	девяностом
 90	девяностому
 90	девяностую
 90	девяностые
 90	девяностый
 90	девяностым
 90	девяностым
 90	девяностыми
 90	девяностых
 100	сотая
 100	сотого
 100	сотое
 100	сотой
 100	сотом
 100	сотому
 100	сотую
 100	сотые
 100	сотый
 100	сотым
 100	сотым
 100	сотыми
 100	сотых
 200	двухсотая
 200	двухсотого
 200	двухсотое
 200	двухсотой
 200	двухсотом
 200	двухсотому
 200	двухсотую
 200	двухсотые
 200	двухсотый
 200	двухсотым
 200	двухсотым
 200	двухсотыми
 200	двухсотых
 300	трехсотая
 300	трехсотого
 300	трехсотое
 300	трехсотой
 300	трехсотом
 300	трехсотому
 300	трехсотую
 300	трехсотые
 300	трехсотый
 300	трехсотым
 300	трехсотым
 300	трехсотыми
 300	трехсотых
 400	четырехсотая
 400	четырехсотого
 400	четырехсотое
 400	четырехсотой
 400	четырехсотом
 400	четырехсотому
 400	четырехсотую
 400	четырехсотые
 400	четырехсотый
 400	четырехсотым
 400	четырехсотым
 400	четырехсотыми
 400	четырехсотых
 500	пятисотая
 500	пятисотого
 500	пятисотое
 500	пятисотой
 500	пятисотом
 500	пятисотому
 500	пятисотую
 500	пятисотые
 500	пятисотый
 500	пятисотым
 500	пятисотым
 500	пятисотыми
 500	пятисотых
 600	шестисотая
 600	шестисотого
 600	шестисотое
 600	шестисотой
 600	шестисотом
 600	шестисотому
 600	шестисотую
 600	шестисотые
 600	шестисотый
 600	шестисотым
 600	шестисотым
 600	шестисотыми
 600	шестисотых
 700	семисотая
 700	семисотого
 700	семисотое
 700	семисотой
 700	семисотом
 700	семисотому
 700	семисотую
 700	семисотые
 700	семисотый
 700	семисотым
 700	семисотым
 700	семисотыми
 700	семисотых
 800	восьмисотая
 800	восьмисотого
 800	восьмисотое
 800	восьмисотой
 800	восьмисотом
 800	восьмисотому
 800	восьмисотую
 800	восьмисотые
 800	восьмисотый
 800	восьмисотым
 800	восьмисотым
 800	восьмисотыми
 800	восьмисотых
 900	девятисотая
 900	девятисотого
 900	девятисотое
 900	девятисотой
 900	девятисотом
 900	девятисотому
 900	девятисотую
 900	девятисотые
 900	девятисотый
 900	девятисотым
 900	девятисотым
 900	девятисотыми
 900	девятисотых
 1000	тысячная
 1000	тысячного
 1000	тысячное
 1000	тысячной
 1000	тысячном
 1000	тысячному
 1000	тысячную
 1000	тысячные
 1000	тысячный
 1000	тысячным
 1000	тысячным
 1000	тысячными
 1000	тысячных
 1000000	миллионная
 1000000	миллионного
 1000000	миллионное
 1000000	миллионной
 1000000	миллионном
 1000000	миллионному
 1000000	миллионную
 1000000	миллионные
 1000000	миллионный
 1000000	миллионным
 1000000	миллионным
 1000000	миллионными
 1000000	миллионных
 1000000000	миллиардная
 1000000000	миллиардного
 1000000000	миллиардное
 1000000000	миллиардной
 1000000000	миллиардном
 1000000000	миллиардному
 1000000000	миллиардную
 1000000000	миллиардные
 1000000000	миллиардный
 1000000000	миллиардным
 1000000000	миллиардным
 1000000000	миллиардными
 1000000000	миллиардных
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/spelled.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/spelled.grm
@ -1,77 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # This verbalizer is used whenever there is an LM symbol that consists of
 # letters immediately followed by "{spelled}". This strips the "{spelled}"
 # suffix.
 import 'util/byte.grm' as b;
 import 'ru/classifier/cyrillic.grm' as c;
 import 'ru/verbalizer/lexical_map.grm' as l;
 import 'ru/verbalizer/numbers.grm' as n;
 digit = b.kDigit @ n.CARDINAL_NUMBERS;
 char_set = (("a" | "A") : "letter-a")
        | (("b" | "B") : "letter-b")
        | (("c" | "C") : "letter-c")
        | (("d" | "D") : "letter-d")
        | (("e" | "E") : "letter-e")
        | (("f" | "F") : "letter-f")
        | (("g" | "G") : "letter-g")
        | (("h" | "H") : "letter-h")
        | (("i" | "I") : "letter-i")
        | (("j" | "J") : "letter-j")
        | (("k" | "K") : "letter-k")
        | (("l" | "L") : "letter-l")
        | (("m" | "M") : "letter-m")
        | (("n" | "N") : "letter-n")
        | (("o" | "O") : "letter-o")
        | (("p" | "P") : "letter-p")
        | (("q" | "Q") : "letter-q")
        | (("r" | "R") : "letter-r")
        | (("s" | "S") : "letter-s")
        | (("t" | "T") : "letter-t")
        | (("u" | "U") : "letter-u")
        | (("v" | "V") : "letter-v")
        | (("w" | "W") : "letter-w")
        | (("x" | "X") : "letter-x")
        | (("y" | "Y") : "letter-y")
        | (("z" | "Z") : "letter-z")
        | (digit)
        | ("&" : "@@AND@@")
        | ("." : "")
        | ("-" : "")
        | ("_" : "")
        | ("/" : "")
        | (n.I["letter-"] c.kCyrillicAlpha)
        ;
 ins_space = "" : " ";
 suffix = "{spelled}" : "";
 spelled = Optimize[char_set (ins_space char_set)* suffix];
 export SPELLED = Optimize[spelled @ l.LEXICAL_MAP];
 sigma_star = b.kBytes*;
 # Gets rid of the letter- prefix since in some cases we don't want it.
 del_letter = CDRewrite[n.D["letter-"], "", "", sigma_star];
 spelled_no_tag = Optimize[char_set (ins_space char_set)*];
 export SPELLED_NO_LETTER = Optimize[spelled_no_tag @ del_letter];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/spoken_punct.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/spoken_punct.grm
@ -1,24 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'ru/verbalizer/lexical_map.grm' as l;
 punct =
   ("." : "@@PERIOD@@")
 | ("," : "@@COMMA@@")
 | ("!" : "@@EXCLAMATION_MARK@@")
 | ("?" : "@@QUESTION_MARK@@")
 ;
 export SPOKEN_PUNCT = Optimize[punct @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/time.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/time.grm
@ -1,108 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/byte.grm' as b;
 import 'ru/verbalizer/lexical_map.grm' as l;
 import 'ru/verbalizer/numbers.grm' as n;
 # Only handles 24-hour time with quarter-to, half-past and quarter-past.
 increment_hour =
    ("0" : "1")
  | ("1" : "2")
  | ("2" : "3")
  | ("3" : "4")
  | ("4" : "5")
  | ("5" : "6")
  | ("6" : "7")
  | ("7" : "8")
  | ("8" : "9")
  | ("9" : "10")
  | ("10" : "11")
  | ("11" : "12")
  | ("12" : "1")  # If someone uses 12, we assume 12-hour by default.
  | ("13" : "14")
  | ("14" : "15")
  | ("15" : "16")
  | ("16" : "17")
  | ("17" : "18")
  | ("18" : "19")
  | ("19" : "20")
  | ("20" : "21")
  | ("21" : "22")
  | ("22" : "23")
  | ("23" : "12")
 ;
 hours = Project[increment_hour, 'input'];
 d = b.kDigit;
 D = d - "0";
 minutes09 = "0" D;
 minutes = ("1" | "2" | "3" | "4" | "5") d;
 __sep__ = ":";
 sep_space = __sep__ : " ";
 verbalize_hours = hours @ n.CARDINAL_NUMBERS;
 verbalize_minutes =
   ("00" : "@@HOUR@@")
 | (minutes09 @ (("0" : "@@TIME_ZERO@@") n.I[" "] n.CARDINAL_NUMBERS))
 | (minutes @ n.CARDINAL_NUMBERS)
 ;
 time_basic = Optimize[verbalize_hours sep_space verbalize_minutes];
 # Special cases we handle right now.
 # TODO: Need to allow for cases like
 #
 #   half twelve (in the UK English sense)
 #   half twaalf (in the Dutch sense)
 time_quarter_past =
   n.I["@@TIME_QUARTER@@ @@TIME_AFTER@@ "]
   verbalize_hours
   n.D[__sep__ "15"];
 time_half_past =
   n.I["@@TIME_HALF@@ @@TIME_AFTER@@ "]
   verbalize_hours
   n.D[__sep__ "30"];
 time_quarter_to =
   n.I["@@TIME_QUARTER@@ @@TIME_BEFORE@@ "]
   (increment_hour @ verbalize_hours)
   n.D[__sep__ "45"];
 time_extra = Optimize[
  time_quarter_past | time_half_past | time_quarter_to]
 ;
 # Basic time periods which most languages can be expected to have.
 __am__ = "a.m." | "am" | "AM" | "утра";
 __pm__ = "p.m." | "pm" | "PM" | "вечера";
 period = (__am__ : "@@TIME_AM@@") | (__pm__ : "@@TIME_PM@@");
 time_variants = time_basic | time_extra;
 time = Optimize[
    (period (" " | n.I[" "]))? time_variants
 |  time_variants ((" " | n.I[" "]) period)?]
 ;
 export TIME = Optimize[time @ l.LEXICAL_MAP];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/urls.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/urls.grm
@ -1,68 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Rules for URLs and email addresses.
 import 'util/byte.grm' as bytelib;
 import 'ru/verbalizer/lexical_map.grm' as l;
 ins_space = "" : " ";
 dot = "." : "@@URL_DOT_EXPRESSION@@";
 at = "@" : "@@AT@@";
 url_suffix =
  (".com" : dot ins_space "com") |
  (".gov" : dot ins_space "gov") |
  (".edu" : dot ins_space "e d u") |
  (".org" : dot ins_space "org") |
  (".net" : dot ins_space "net")
 ;
 letter_string = (bytelib.kAlnum)* bytelib.kAlnum;
 letter_string_dot =
  ((letter_string ins_space dot ins_space)* letter_string)
 ;
 # Rules for URLs.
 export URL = Optimize[
 ((letter_string_dot) (ins_space)
  (url_suffix)) @ l.LEXICAL_MAP
 ];
 # Rules for email addresses.
 letter_by_letter = ((bytelib.kAlnum ins_space)* bytelib.kAlnum);
 letter_by_letter_dot =
  ((letter_by_letter ins_space dot ins_space)*
  letter_by_letter)
 ;
 export EMAIL1 = Optimize[
 ((letter_by_letter) (ins_space)
  (at) (ins_space)
  (letter_by_letter_dot) (ins_space)
  (url_suffix)) @ l.LEXICAL_MAP
 ];
 export EMAIL2 = Optimize[
 ((letter_by_letter) (ins_space)
  (at) (ins_space)
  (letter_string_dot) (ins_space)
  (url_suffix)) @ l.LEXICAL_MAP
 ];
 export EMAILS = Optimize[
  EMAIL1 | EMAIL2
 ];
--- a/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/verbalizer.grm
+++ b/third_party/chinese_text_normalization/thrax/src/ru/verbalizer/verbalizer.grm
@ -1,42 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import 'util/util.grm' as util;
 import 'ru/verbalizer/extra_numbers.grm' as e;
 import 'ru/verbalizer/float.grm' as f;
 import 'ru/verbalizer/math.grm' as ma;
 import 'ru/verbalizer/miscellaneous.grm' as mi;
 import 'ru/verbalizer/money.grm' as mo;
 import 'ru/verbalizer/numbers.grm' as n;
 import 'ru/verbalizer/numbers_plus.grm' as np;
 import 'ru/verbalizer/spelled.grm' as s;
 import 'ru/verbalizer/spoken_punct.grm' as sp;
 import 'ru/verbalizer/time.grm' as t;
 import 'ru/verbalizer/urls.grm' as u;
 export VERBALIZER = Optimize[RmWeight[
 (  e.MIXED_NUMBERS
  | e.DIGITS
  | f.FLOAT
  | ma.ARITHMETIC
  | mi.MISCELLANEOUS
  | mo.MONEY
  | n.CARDINAL_NUMBERS
  | n.ORDINAL_NUMBERS
  | np.NUMBERS_PLUS
  | s.SPELLED
  | sp.SPOKEN_PUNCT
  | t.TIME
  | u.URL) @ util.CLEAN_SPACES
 ]];
--- a/third_party/chinese_text_normalization/thrax/src/universal/README.md
+++ b/third_party/chinese_text_normalization/thrax/src/universal/README.md
@ -1,3 +0,0 @@
 # Language-universal grammar definitions
 This directory contains various language-universal grammar definitions.
--- a/third_party/chinese_text_normalization/thrax/src/universal/roman_numerals.tsv
+++ b/third_party/chinese_text_normalization/thrax/src/universal/roman_numerals.tsv
@ -1,91 +0,0 @@
 i	1
 ii	2
 iii	3
 iv	4
 v	5
 vi	6
 vii	7
 viii	8
 ix	9
 x	10
 xi	11
 xii	12
 xiii	13
 xiv	14
 xv	15
 xvi	16
 xvii	17
 xviii	18
 xix	19
 xx	20
 xxi	21
 xxii	22
 xxiii	23
 xxiv	24
 xxv	25
 xxvi	26
 xxvii	27
 xxviii	28
 xxix	29
 xxx	30
 xxxi	31
 xxxii	32
 xxxiii	33
 xxxiv	34
 xxxv	35
 xxxvi	36
 xxxvii	37
 xxxviii	38
 xxxix	39
 xl	40
 xli	41
 xlii	42
 xliii	43
 xliv	44
 xlv	45
 xlvi	46
 xlvii	47
 xlviii	48
 xlix	49
 mcmxciv	1994
 mcmxcv	1995
 mcmxcvi	1996
 mcmxcvii	1997
 mcmxcviii	1998
 mcmxcix	1999
 mm	2000
 mmi	2001
 mmii	2002
 mmiii	2003
 mmiv	2004
 mmv	2005
 mmvi	2006
 mmvii	2007
 mmviii	2008
 mmix	2009
 mmx	2010
 mmxi	2011
 mmxii	2012
 mmxiii	2013
 mmxiv	2014
 mmxv	2015
 mmxvi	2016
 mmxvii	2017
 mmxviii	2018
 mmxix	2019
 mmxx	2020
 mmxxi	2021
 mmxxii	2022
 mmxxiii	2023
 mmxxiv	2024
 mmxxv	2025
 mmxxvi	2026
 mmxxvii	2027
 mmxxviii	2028
 mmxxix	2029
 mmxxx	2030
 mmxxxi	2031
 mmxxxii	2032
 mmxxxiii	2033
 mmxxxiv	2034
 mmxxxv	2035
--- a/third_party/chinese_text_normalization/thrax/src/universal/thousands_punct.grm
+++ b/third_party/chinese_text_normalization/thrax/src/universal/thousands_punct.grm
@ -1,126 +0,0 @@
 # Copyright 2017 Google Inc.
 # 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 # 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Specifies common ways of delimiting thousands in digit strings.
 import 'util/byte.grm' as bytelib;
 import 'util/util.grm' as util;
 killcomma = "," : "";
 dot2comma = "." : ",";
 spaces2comma = " "+ : ",";
 zero = "0";
 # no_delimiter = zero | "[1-9][0-9]*";
 export no_delimiter = zero | (util.d1to9 bytelib.kDigit*);
 # delim_map_dot = ("[0-9]" | ("\." : ","))*;
 delim_map_dot = (bytelib.kDigit | dot2comma)*;
 # delim_map_space = ("[0-9]" | (" +" : ","))*;
 delim_map_space = (bytelib.kDigit | spaces2comma)*;
 ## Western systems group thousands. Korean goes this way too.
 # comma_thousands = zero | ("[1-9][0-9]?[0-9]?" (("," : "") "[0-9][0-9][0-9]")*);
 export comma_thousands = zero | (util.d1to9 bytelib.kDigit{0,2} (killcomma bytelib.kDigit{3})*);
 # ComposeFst: 1st argument cannot match on output labels and 2nd argument
 # cannot match on input labels (sort?).
 export dot_thousands = delim_map_dot @ comma_thousands;
 # ComposeFst: 1st argument cannot match on output labels and 2nd argument
 # cannot match on input labels (sort?).
 export space_thousands = delim_map_space @ comma_thousands;
 ## Chinese prefers grouping by fours (by ten-thousands).
 # chinese_comma =
 #   zero | ("[1-9][0-9]?[0-9]?[0-9]?" (("," : "") "[0-9][0-9][0-9][0-9]")*);
 export chinese_comma = zero | (util.d1to9 (bytelib.kDigit{0,3}) (killcomma bytelib.kDigit{4})*);
 ## The Indian system is more complex because of the Stravinskian alternation
 ## between lakhs and crores.
 ##
 ## According to Wikipedia:
 ##
 ## Indian English       Value
 ## One                  1
 ## Ten                  10
 ## Hundred              100
 ## Thousand             1,000
 ## Lakh                 1,00,000
 ## Crore                1,00,00,000
 ## Arab                 1,00,00,00,000
 ## Kharab               1,00,00,00,00,000
 # indian_hundreds = "[1-9][0-9]?[0-9]?";
 indian_hundreds = util.d1to9 bytelib.kDigit{0,2};
 ## Up to 99,999.
 # indian_comma_thousands = "[1-9][0-9]?" ("," : "") "[0-9][0-9][0-9]";
 indian_comma_thousands = util.d1to9 bytelib.kDigit? killcomma bytelib.kDigit{3};
 ## Up to 99,99,999.
 # indian_comma_lakhs = "[1-9][0-9]?" ("," : "") "[0-9][0-9]" ("," : "") "[0-9][0-9][0-9]";
 indian_comma_lakhs = util.d1to9 bytelib.kDigit? killcomma bytelib.kDigit{2} killcomma bytelib.kDigit{3};
 ## Up to 999,99,99,999
 indian_comma_crores =
    util.d1to9 bytelib.kDigit? bytelib.kDigit? killcomma
    (bytelib.kDigit{2} killcomma)?
    bytelib.kDigit{2} killcomma
    bytelib.kDigit{3}
 ;
 ## Up to 99,999,99,99,999.
 indian_comma_thousand_crores =
    util.d1to9 bytelib.kDigit? killcomma
    bytelib.kDigit{3} killcomma
    bytelib.kDigit{2} killcomma
    bytelib.kDigit{2} killcomma
    bytelib.kDigit{3}
 ;
 ## Up to 999,99,999,99,99,999.
 indian_comma_lakh_crores =
    util.d1to9 bytelib.kDigit? bytelib.kDigit? killcomma
    bytelib.kDigit{2} killcomma
    bytelib.kDigit{3} killcomma
    bytelib.kDigit{2} killcomma
    bytelib.kDigit{2} killcomma
    bytelib.kDigit{3}
 ;
 export indian_comma =
    zero
  | indian_hundreds
  | indian_comma_thousands
  | indian_comma_lakhs
  | indian_comma_crores
  | indian_comma_thousand_crores
  | indian_comma_lakh_crores
 ;
 # Indian number system with dots.
 export indian_dot_number = delim_map_dot @ indian_comma;
 # Indian number system with spaces.
 export indian_space_number = delim_map_space @ indian_comma;
--- a/third_party/chinese_text_normalization/thrax/src/util/README.md
+++ b/third_party/chinese_text_normalization/thrax/src/util/README.md
@ -1,3 +0,0 @@
 # Utility grammar definitions
 This directory contains various utility grammar definitions.
--- a/Show More
+++ b/Show More
		`@ -1,3 +0,0 @@`
			`verbalizer.far: verbalizer.grm util/util.far en/verbalizer/extra_numbers.far en/verbalizer/float.far en/verbalizer/math.far en/verbalizer/miscellaneous.far en/verbalizer/money.far en/verbalizer/numbers.far en/verbalizer/numbers_plus.far en/verbalizer/spelled.far en/verbalizer/spoken_punct.far en/verbalizer/time.far en/verbalizer/urls.far`
			`thraxcompiler --input_grammar=$< --output_far=$@`
		`@ -1,3 +0,0 @@`
			`# Language-universal grammar definitions`

			`This directory contains various language-universal grammar definitions.`
		`@ -1,3 +0,0 @@`
			`# Utility grammar definitions`

			`This directory contains various utility grammar definitions.`