Added rhythm prediction function, test=tts

3 years ago · 4fe6c0dc99
parent 8e348d66b9
commit 4fe6c0dc99
14 changed files with 83626 additions and 0 deletions
--- a/examples/other/rhy/000001-010000.txt
+++ b/examples/other/rhy/000001-010000.txt
--- a/examples/other/rhy/README.md
+++ b/examples/other/rhy/README.md
@ -0,0 +1,79 @@
 # Punctuation Restoration with IWLST2012-Zh
 ## Get Started
 ### Data Preprocessing
 ```bash
 ./run.sh --stage 0 --stop-stage 0
 ```
 ### Model Training
 ```bash
 ./run.sh --stage 1 --stop-stage 1
 ```
 ### Testing
 ```bash
 ./run.sh --stage 2 --stop-stage 2
 ```
 ### Punctuation Restoration
 ```bash
 ./run.sh --stage 3 --stop-stage 3
 ```
 ## Pretrained Model
 The pretrained model can be downloaded here:
 [ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/text/ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip)
 [ernie-3.0-base.tar.gz](https://paddlespeech.bj.bcebos.com/punc_restore/ernie-3.0-base.tar.gz)
 [ernie-3.0-medium.tar.gz](https://paddlespeech.bj.bcebos.com/punc_restore/ernie-3.0-medium.tar.gz)
 [ernie-3.0-micro.tar.gz](https://paddlespeech.bj.bcebos.com/punc_restore/ernie-3.0-micro.tar.gz)
 [ernie-mini.tar.gz](https://paddlespeech.bj.bcebos.com/punc_restore/ernie-mini.tar.gz)
 [ernie-nano.tar.gz](https://paddlespeech.bj.bcebos.com/punc_restore/ernie-nano.tar.gz)
 [ernie-tiny.tar.gz](https://paddlespeech.bj.bcebos.com/punc_restore/ernie-tiny.tar.gz)
 ### Test Result
 - Ernie 1.0
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.510955  |0.526462  |0.820755  |0.619391|
    |Recall     |0.517433  |0.564179  |0.861386  |0.647666|
    |F1         |0.514173  |0.544669  |0.840580  |0.633141|
 - Ernie-tiny
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.733177  |0.721448  |0.754717  |0.736447|
    |Recall     |0.380740  |0.524646  |0.733945  |0.546443|
    |F1         |0.501204  |0.607506  |0.744186  |0.617632|
 - Ernie-3.0-base-zh
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.805947  |0.764160  |0.858491  |0.809532|
    |Recall     |0.399070  |0.567978  |0.850467  |0.605838|
    |F1         |0.533817  |0.651623  |0.854460  |0.679967|
 - Ernie-3.0-medium-zh
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.730829  |0.699164  |0.707547  |0.712514|
    |Recall     |0.388196  |0.533286  |0.797872  |0.573118|
    |F1         |0.507058  |0.605062  |0.750000  |0.620707|
 - Ernie-3.0-mini-zh
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.757433  |0.708449  |0.707547  |0.724477|
    |Recall     |0.355752  |0.506977  |0.735294  |0.532674|
    |F1         |0.484121  |0.591015  |0.721154  |0.598763|
 - Ernie-3.0-micro-zh
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.733959  |0.679666  |0.726415  |0.713347|
    |Recall     |0.332742  |0.483487  |0.712963  |0.509731|
    |F1         |0.457896  |0.565033  |0.719626  |0.580852|
 - Ernie-3.0-nano-zh
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.693271  |0.682451  |0.754717  |0.710146|
    |Recall     |0.327784  |0.491968  |0.666667  |0.495473|
    |F1         |0.445114  |0.571762  |0.707965  |0.574947|
--- a/examples/other/rhy/conf/default.yaml
+++ b/examples/other/rhy/conf/default.yaml
@ -0,0 +1,44 @@
 ###########################################################
 #                       DATA SETTING                      #
 ###########################################################
 dataset_type: Ernie
 train_path: data/rhy_predict/train.txt
 dev_path: data/rhy_predict/dev.txt
 test_path: data/rhy_predict/test.txt
 batch_size: 64
 num_workers: 2
 data_params: 
    pretrained_token: ernie-1.0
    punc_path: data/rhy_predict/rhy_token
    seq_len: 100
 ###########################################################
 #                       MODEL SETTING                     #
 ###########################################################
 model_type: ErnieLinear
 model:
    pretrained_token: ernie-1.0
    num_classes: 5
 ###########################################################
 #                     OPTIMIZER SETTING                   #
 ###########################################################
 optimizer_params:
    weight_decay: 1.0e-6               # weight decay coefficient.
 scheduler_params:
    learning_rate: 1.0e-5               # learning rate.
    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.
 ###########################################################
 #                     TRAINING SETTING                    #
 ###########################################################
 max_epoch: 20
 num_snapshots: 5
 ###########################################################
 #                     OTHER SETTING                       #
 ###########################################################
 num_snapshots: 10                 # max number of snapshots to keep while training
 seed: 42                          # random seed for paddle, random, and np.random
--- a/examples/other/rhy/data/rhy_predict/rhy_token
+++ b/examples/other/rhy/data/rhy_predict/rhy_token
@ -0,0 +1,4 @@
 %
 `
 ~
 $
--- a/examples/other/rhy/label_train-set.txt
+++ b/examples/other/rhy/label_train-set.txt
--- a/examples/other/rhy/local/data.sh
+++ b/examples/other/rhy/local/data.sh
@ -0,0 +1,18 @@
 #!/bin/bash
 aishell_data=$1
 biaobei_data=$2
 processed_path=$3
 python3 ./local/pre_for_sp_biaobei.py \
    --data=${biaobei_data} \
    --processed_path=${processed_path}
 python3 ./local/pre_for_sp_aishell.py \
    --data=${aishell_data} \
    --processed_path=${processed_path}
 echo "Finish data preparation."
 exit 0
--- a/examples/other/rhy/local/pre_for_sp_aishell.py
+++ b/examples/other/rhy/local/pre_for_sp_aishell.py
@ -0,0 +1,50 @@
 import argparse
 import os
 import re
 # This is the replacement for rhythm labels to predict.
 # 韵律标签的代替
 replace_ = {"#1": "%", "#2": "`", "#3": "~", "#4": "$"}
 def replace_rhy_with_punc(line):
    # r'[：、，；。？！,.:;"?!”’《》【】<=>{}()（）#&@“”^_|…\\]%*$', '', line)     #参考checkcheck_oov.py,
    line = re.sub(r'[：、，；。？！,.:;"?!’《》【】<=>{}()（）#&@“”^_|…\\]%*$', '', line)
    for r in replace_.keys():
        if r in line:
            line = line.replace(r, replace_[r])
    return line
 def pre_and_write(data, file):
    with open(file, 'a') as rf:
        for d in data:
            d = d.split('|')[2].strip()
            # d = replace_rhy_with_punc(d)
            d = ' '.join(d) + ' \n'
            rf.write(d)
 def main():
    parser = argparse.ArgumentParser(description="Train a FastSpeech2 model.")
    parser.add_argument("--data", type=str, default="label_train-set.txt")
    parser.add_argument(
        "--processed_path", type=str, default="../data/rhy_predict")
    args = parser.parse_args()
    os.makedirs(args.processed_path, exist_ok=True)
    with open(args.data) as rf:
        text = rf.readlines()[5:]
    len_ = len(text)
    lens = [int(len_ * 0.9), int(len_ * 0.05), int(len_ * 0.05)]
    files = ['train.txt', 'test.txt', 'dev.txt']
    i = 0
    for l_, file in zip(lens, files):
        file = os.path.join(args.processed_path, file)
        pre_and_write(text[i:i + l_], file)
        i = i + l_
 if __name__ == "__main__":
    main()
--- a/examples/other/rhy/local/pre_for_sp_biaobei.py
+++ b/examples/other/rhy/local/pre_for_sp_biaobei.py
@ -0,0 +1,50 @@
 import argparse
 import os
 import re
 replace_ = {"#1": "%", "#2": "`", "#3": "~", "#4": "$"}
 def replace_rhy_with_punc(line):
    # r'[：、，；。？！,.:;"?!”’《》【】<=>{}()（）#&@“”^_|…\\]%*$', '', line)     #参考checkcheck_oov.py,
    line = re.sub(r'^$\*%', '', line)
    for r in replace_.keys():
        if r in line:
            line = line.replace(r, replace_[r])
    return line
 def pre_and_write(data, file):
    with open(file, 'w') as rf:
        for d in data:
            d = d.split('\t')[1].strip()
            d = replace_rhy_with_punc(d)
            d = ' '.join(d) + ' \n'
            rf.write(d)
 def main():
    parser = argparse.ArgumentParser(description="Train a FastSpeech2 model.")
    parser.add_argument("--data", type=str, default="label_train-set.txt")
    parser.add_argument(
        "--processed_path", type=str, default="../data/rhy_predict")
    args = parser.parse_args()
    print(args.data, args.processed_path)
    os.makedirs(args.processed_path, exist_ok=True)
    with open(args.data) as rf:
        rf = rf.readlines()
    text = rf[0::2]
    len_ = len(text)
    lens = [int(len_ * 0.9), int(len_ * 0.05), int(len_ * 0.05)]
    files = ['train.txt', 'test.txt', 'dev.txt']
    i = 0
    for l_, file in zip(lens, files):
        file = os.path.join(args.processed_path, file)
        pre_and_write(text[i:i + l_], file)
        i = i + l_
 if __name__ == "__main__":
    main()
--- a/examples/other/rhy/local/preprocess.py
+++ b/examples/other/rhy/local/preprocess.py
@ -0,0 +1,29 @@
 import argparse
 def process_sentence(line):
    if line == '':
        return ''
    res = line[0]
    for i in range(1, len(line)):
        res += (' ' + line[i])
    return res
 if __name__ == "__main__":
    paser = argparse.ArgumentParser(description="Input filename")
    paser.add_argument('-input_file')
    paser.add_argument('-output_file')
    sentence_cnt = 0
    args = paser.parse_args()
    with open(args.input_file, 'r') as f:
        with open(args.output_file, 'w') as write_f:
            while True:
                line = f.readline()
                if line:
                    sentence_cnt += 1
                    write_f.write(process_sentence(line))
                else:
                    break
    print('preprocess over')
    print('total sentences number:', sentence_cnt)
--- a/examples/other/rhy/local/rhy_predict.sh
+++ b/examples/other/rhy/local/rhy_predict.sh
@ -0,0 +1,12 @@
 #!/bin/bash
 config_path=$1
 train_output_path=$2
 ckpt_name=$3
 text=$4
 ckpt_prefix=${ckpt_name%.*}
 python3 ${BIN_DIR}/punc_restore.py \
    --config=${config_path} \
    --checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
    --text=${text}
--- a/examples/other/rhy/local/test.sh
+++ b/examples/other/rhy/local/test.sh
@ -0,0 +1,11 @@
 #!/bin/bash
 config_path=$1
 train_output_path=$2
 ckpt_name=$3
 ckpt_prefix=${ckpt_name%.*}
 python3 ${BIN_DIR}/test.py \
    --config=${config_path} \
    --checkpoint=${train_output_path}/checkpoints/${ckpt_name}
--- a/examples/other/rhy/local/train.sh
+++ b/examples/other/rhy/local/train.sh
@ -0,0 +1,9 @@
 #!/bin/bash
 config_path=$1
 train_output_path=$2
 python3 ${BIN_DIR}/train.py \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=1
--- a/examples/other/rhy/path.sh
+++ b/examples/other/rhy/path.sh
@ -0,0 +1,14 @@
 #!/bin/bash
 export MAIN_ROOT=${PWD}/../../../
 export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
 export LC_ALL=C
 # Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
 export PYTHONIOENCODING=UTF-8
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/
 MODEL=ernie_linear
 export BIN_DIR=${MAIN_ROOT}/paddlespeech/text/exps/${MODEL}
--- a/examples/other/rhy/run.sh
+++ b/examples/other/rhy/run.sh
@ -0,0 +1,39 @@
 #!/bin/bash
 set -e
 source path.sh
 gpus=1
 stage=3
 stop_stage=3
 aishell_data=label_train-set.txt
 biaobei_data=000001-010000.txt
 processed_path=data/rhy_predict
 conf_path=conf/default.yaml
 train_output_path=exp/rhy
 ckpt_name=snapshot_iter_2600.pdz
 text=我们城市的复苏有赖于他强有力的政策。
 # with the following command, you can choose the stage range you want to run
 # such as `./run.sh --stage 0 --stop-stage 0`
 # this can not be mixed use with `$1`, `$2` ...
 source ${MAIN_ROOT}/utils/parse_options.sh || exit 1
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/data.sh ${aishell_data} ${biaobei_data} ${processed_path}
 fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
   CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
 fi
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
   CUDA_VISIBLE_DEVICES=${gpus} ./local/rhy_predict.sh ${conf_path} ${train_output_path} ${ckpt_name} ${text}|| exit -1
 fi