From 6e7c71b26c2f8579ebb15570f1bc86ac6b0c7fa5 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 1 Jun 2023 10:54:50 +0000 Subject: [PATCH] refactor rhy --- examples/other/rhy/local/pre_for_sp_aishell.py | 3 ++- examples/other/rhy/local/pre_for_sp_csmsc.py | 3 ++- examples/other/rhy/run.sh | 6 ++++-- paddlespeech/text/exps/ernie_linear/train.py | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) mode change 100644 => 100755 examples/other/rhy/local/pre_for_sp_aishell.py mode change 100644 => 100755 examples/other/rhy/local/pre_for_sp_csmsc.py diff --git a/examples/other/rhy/local/pre_for_sp_aishell.py b/examples/other/rhy/local/pre_for_sp_aishell.py old mode 100644 new mode 100755 index a2a71668..ff0830a5 --- a/examples/other/rhy/local/pre_for_sp_aishell.py +++ b/examples/other/rhy/local/pre_for_sp_aishell.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 import argparse import os import re @@ -8,7 +9,7 @@ replace_ = {"#1": "%", "#2": "`", "#3": "~", "#4": "$"} def replace_rhy_with_punc(line): - # r'[:、,;。?!,.:;"?!”’《》【】<=>{}()()#&@“”^_|…\\]%*$', '', line) #参考checkcheck_oov.py, + # r'[:、,;。?!,.:;"?!”’《》【】<=>{}()()#&@“”^_|…\\]%*$', '', line) #参考check_oov.py, line = re.sub(r'[:、,;。?!,.:;"?!’《》【】<=>{}()()#&@“”^_|…\\]%*$', '', line) for r in replace_.keys(): if r in line: diff --git a/examples/other/rhy/local/pre_for_sp_csmsc.py b/examples/other/rhy/local/pre_for_sp_csmsc.py old mode 100644 new mode 100755 index 0a96092c..8b4f9e1f --- a/examples/other/rhy/local/pre_for_sp_csmsc.py +++ b/examples/other/rhy/local/pre_for_sp_csmsc.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 import argparse import os import re @@ -6,7 +7,7 @@ replace_ = {"#1": "%", "#2": "`", "#3": "~", "#4": "$"} def replace_rhy_with_punc(line): - # r'[:、,;。?!,.:;"?!”’《》【】<=>{}()()#&@“”^_|…\\]%*$', '', line) #参考checkcheck_oov.py, + # r'[:、,;。?!,.:;"?!”’《》【】<=>{}()()#&@“”^_|…\\]%*$', '', line) #参考check_oov.py, line = re.sub(r'^$\*%', '', line) for r in replace_.keys(): if r in line: diff --git a/examples/other/rhy/run.sh b/examples/other/rhy/run.sh index aed58152..d1f56586 100755 --- a/examples/other/rhy/run.sh +++ b/examples/other/rhy/run.sh @@ -6,9 +6,11 @@ gpus=0 stage=0 stop_stage=100 +data=data +mkdir -p $data + aishell_data=label_train-set.txt csmsc_data=000001-010000.txt -processed_path=data conf_path=conf/default.yaml train_output_path=exp/default @@ -23,7 +25,7 @@ source ${MAIN_ROOT}/utils/parse_options.sh || exit 1 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then # prepare data - ./local/data.sh ${aishell_data} ${csmsc_data} ${processed_path} + ./local/data.sh ${aishell_data} ${csmsc_data} ${data} fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then diff --git a/paddlespeech/text/exps/ernie_linear/train.py b/paddlespeech/text/exps/ernie_linear/train.py index 22c25e17..f6eab6fe 100644 --- a/paddlespeech/text/exps/ernie_linear/train.py +++ b/paddlespeech/text/exps/ernie_linear/train.py @@ -66,7 +66,7 @@ def train_sp(args, config): seed_everything(config.seed) print( - f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}", + f"rank:{dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}" ) # dataloader has been too verbose logging.getLogger("DataLoader").disabled = True