refactor g2p egs (#630)

* refactor g2p egs * add sha-bone; remove avg.sh from egs;
5 years ago · 1b373bfcd7
parent 075635d2b4
commit 1b373bfcd7
17 changed files with 44 additions and 133 deletions
--- a/examples/aishell/s0/local/avg.sh
+++ b/examples/aishell/s0/local/avg.sh
@ -1,23 +0,0 @@
 #! /usr/bin/env bash
 if [ $# != 2 ];then
    echo "usage: ${0} ckpt_dir avg_num"
    exit -1
 fi
 ckpt_dir=${1}
 average_num=${2}
 decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
 python3 -u ${MAIN_ROOT}/utils/avg_model.py \
 --dst_model ${decode_checkpoint} \
 --ckpt_dir ${ckpt_dir}  \
 --num ${average_num} \
 --val_best
 if [ $? -ne 0 ]; then
    echo "Failed in avg ckpt!"
    exit 1
 fi
 exit 0
--- a/examples/aishell/s1/local/test.sh
+++ b/examples/aishell/s1/local/test.sh
@ -15,6 +15,10 @@ fi
 config_path=$1
 ckpt_prefix=$2
 ckpt_name=$(basename ${ckpt_prefxi})
 mkdir -p exp
 # download language model
 #bash local/download_lm_ch.sh
 #if [ $? -ne 0 ]; then
@ -25,11 +29,13 @@ ckpt_prefix=$2
 for type in attention ctc_greedy_search; do
    echo "decoding ${type}"
    batch_size=64
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test.py \
    --device ${device} \
    --nproc 1 \
    --config ${config_path} \
-    --result_file ${ckpt_prefix}.${type}.rsl \
+    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
@ -42,11 +48,13 @@ done
 for type in ctc_prefix_beam_search attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test.py \
    --device ${device} \
    --nproc 1 \
    --config ${config_path} \
-    --result_file ${ckpt_prefix}.${type}.rsl \
+    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
--- a/examples/chinese_g2p/.gitignore
+++ b/examples/chinese_g2p/.gitignore
@ -0,0 +1,2 @@
 data
 exp
--- a/examples/chinese_g2p/local/prepare_dataset.sh
+++ b/examples/chinese_g2p/local/prepare_dataset.sh
@ -1,7 +1,8 @@
-echo "Extracting Prosody Labeling"
+#!/bin/bash
 exp_dir="exp"
 data_dir="data"
 source ${MAIN_ROOT}/utils/parse_options.sh || exit -1
 archive=${data_dir}/"BZNSYP.rar"
--- a/examples/chinese_g2p/run.sh
+++ b/examples/chinese_g2p/run.sh
@ -1,22 +1,33 @@
 #!/usr/bin/env bash
 source path.sh
 stage=-1
 stop_stage=100
-exp_dir="exp"
+exp_dir=exp
-data_dir="data"
+data_dir=data
 source ${MAIN_ROOT}/utils/parse_options.sh || exit -1
 mkdir -p ${exp_dir}
 if [ $stage -le 0 ] && [ $stop_stage -ge 0 ];then
    echo "stage 0: Extracting Prosody Labeling"
    bash local/prepare_dataset.sh --exp-dir ${exp_dir} --data-dir ${data_dir}
 fi
 # convert transcription in chinese into pinyin with pypinyin or jieba+pypinyin
 filename="000001-010000.txt"
 echo "Processing transcriptions..."
-python3 local/extract_pinyin_label.py ${exp_dir}/${filename} ${exp_dir}/"pinyin_baker.py"
+if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
-python3 local/convert_transcription.py ${exp_dir}/${filename} ${exp_dir}/"result_pypinyin.txt"
+    echo "stage 1: Processing transcriptions..."
-python3 local/convert_transcription.py --use-jieba ${exp_dir}/${filename} ${exp_dir}/"result_pypinyin_with_jieba.txt"
+    python3 local/extract_pinyin_label.py ${exp_dir}/${filename} ${exp_dir}/ref.pinyin
    python3 local/convert_transcription.py ${exp_dir}/${filename} ${exp_dir}/trans.pinyin
    python3 local/convert_transcription.py --use-jieba ${exp_dir}/${filename} ${exp_dir}/trans.jieba.pinyin
 fi
 echo "done"
 exit 0
--- a/examples/librispeech/s0/local/avg.sh
+++ b/examples/librispeech/s0/local/avg.sh
@ -1,23 +0,0 @@
 #! /usr/bin/env bash
 if [ $# != 2 ];then
    echo "usage: ${0} ckpt_dir avg_num"
    exit -1
 fi
 ckpt_dir=${1}
 average_num=${2}
 decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
 python3 -u ${MAIN_ROOT}/utils/avg_model.py \
 --dst_model ${decode_checkpoint} \
 --ckpt_dir ${ckpt_dir}  \
 --num ${average_num} \
 --val_best
 if [ $? -ne 0 ]; then
    echo "Failed in avg ckpt!"
    exit 1
 fi
 exit 0
--- a/examples/librispeech/s1/conf/conformer.yaml
+++ b/examples/librispeech/s1/conf/conformer.yaml
@ -77,7 +77,7 @@ model:
 training:
  n_epoch: 120
  accum_grad: 8
-  global_grad_clip: 5.0
+  global_grad_clip: 3.0
  optim: adam
  optim_conf:
    lr: 0.004
--- a/examples/librispeech/s1/local/avg.sh
+++ b/examples/librispeech/s1/local/avg.sh
@ -1,23 +0,0 @@
 #! /usr/bin/env bash
 if [ $# != 2 ]; then
    echo "usage: ${0} ckpt_dir avg_num"
    exit -1
 fi
 ckpt_dir=${1}
 average_num=${2}
 decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
 python3 -u ${MAIN_ROOT}/utils/avg_model.py \
 --dst_model ${decode_checkpoint} \
 --ckpt_dir ${ckpt_dir}  \
 --num ${average_num} \
 --val_best
 if [ $? -ne 0 ]; then
    echo "Failed in avg ckpt!"
    exit 1
 fi
 exit 0
--- a/examples/tiny/s0/local/avg.sh
+++ b/examples/tiny/s0/local/avg.sh
@ -1,23 +0,0 @@
 #! /usr/bin/env bash
 if [ $# != 2 ];then
    echo "usage: ${0} ckpt_dir avg_num"
    exit -1
 fi
 ckpt_dir=${1}
 average_num=${2}
 decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
 python3 -u ${MAIN_ROOT}/utils/avg_model.py \
 --dst_model ${decode_checkpoint} \
 --ckpt_dir ${ckpt_dir}  \
 --num ${average_num} \
 --val_best
 if [ $? -ne 0 ]; then
    echo "Failed in avg ckpt!"
    exit 1
 fi
 exit 0
--- a/examples/tiny/s1/local/avg.sh
+++ b/examples/tiny/s1/local/avg.sh
@ -1,23 +0,0 @@
 #! /usr/bin/env bash
 if [ $# != 2 ];then
    echo "usage: ${0} ckpt_dir avg_num"
    exit -1
 fi
 ckpt_dir=${1}
 average_num=${2}
 decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
 python3 -u ${MAIN_ROOT}/utils/avg_model.py \
 --dst_model ${decode_checkpoint} \
 --ckpt_dir ${ckpt_dir}  \
 --num ${average_num} \
 --val_best
 if [ $? -ne 0 ]; then
    echo "Failed in avg ckpt!"
    exit 1
 fi
 exit 0
--- a/examples/aishell/s1/local/avg.sh
+++ b/examples/aishell/s1/local/avg.sh
@ -9,7 +9,7 @@ ckpt_dir=${1}
 average_num=${2}
 decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
-python3 -u ${MAIN_ROOT}/utils/avg_model.py \
+avg_model.py \
 --dst_model ${decode_checkpoint} \
 --ckpt_dir ${ckpt_dir}  \
 --num ${average_num} \
--- a/utils/avg_model.py
+++ b/utils/avg_model.py
@ -1,3 +1,4 @@
 #!/usr/bin/env python3
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
--- a/utils/build_vocab.py
+++ b/utils/build_vocab.py
@ -1,3 +1,4 @@
 #!/usr/bin/env python3
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
--- a/utils/compute_mean_std.py
+++ b/utils/compute_mean_std.py
@ -1,3 +1,4 @@
 #!/usr/bin/env python3
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
--- a/utils/format_data.py
+++ b/utils/format_data.py
@ -1,3 +1,4 @@
 #!/usr/bin/env python3
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
--- a/utils/spm_decode
+++ b/utils/spm_decode
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # Copyright (c) Facebook, Inc. and its affiliates.
 # All rights reserved.
 #
--- a/utils/spm_encode
+++ b/utils/spm_encode
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # Copyright (c) Facebook, Inc. and its affiliates.
 # All rights reserved.
 #