diff --git a/tests/unit/cli/aishell_test_prepare.py b/tests/unit/cli/aishell_test_prepare.py index 288de62a0..5088d7a48 100644 --- a/tests/unit/cli/aishell_test_prepare.py +++ b/tests/unit/cli/aishell_test_prepare.py @@ -55,6 +55,7 @@ args = parser.parse_args() def create_manifest(data_dir, manifest_path_prefix): print("Creating manifest %s ..." % manifest_path_prefix) json_lines = [] + reference_lines = [] transcript_path = os.path.join(data_dir, 'transcript', 'aishell_transcript_v0.8.txt') transcript_dict = {} @@ -88,6 +89,7 @@ def create_manifest(data_dir, manifest_path_prefix): duration = float(len(audio_data) / samplerate) text = transcript_dict[audio_id] json_lines.append(audio_path) + reference_lines.append(str(total_num+1) + "\t" + text) total_sec += duration total_text += len(text) @@ -98,6 +100,10 @@ def create_manifest(data_dir, manifest_path_prefix): for line in json_lines: fout.write(line + '\n') + with codecs.open(manifest_path + ".text", 'w', 'utf-8') as fout: + for line in reference_lines: + fout.write(line + '\n') + manifest_dir = os.path.dirname(manifest_path_prefix) def prepare_dataset(url, md5sum, target_dir, manifest_path=None): diff --git a/tests/unit/cli/calc_rtf_by_aishell.sh b/tests/unit/cli/calc_RTF_CER_by_aishell.sh similarity index 54% rename from tests/unit/cli/calc_rtf_by_aishell.sh rename to tests/unit/cli/calc_RTF_CER_by_aishell.sh index cee79160e..a5a1a77c1 100644 --- a/tests/unit/cli/calc_rtf_by_aishell.sh +++ b/tests/unit/cli/calc_RTF_CER_by_aishell.sh @@ -3,6 +3,10 @@ source path.sh stage=-1 stop_stage=100 +model_name=conformer_online_aishell +gpus=5 +log_file=res.log +res_file=res.rsl MAIN_ROOT=../../.. . ${MAIN_ROOT}/utils/parse_options.sh || exit -1; @@ -20,9 +24,16 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then echo "Prepare Aishell failed. Terminated." exit 1 fi - fi + if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then - cat data/manifest.test | paddlespeech asr --model conformer_online_aishell --device gpu --decode_method ctc_prefix_beam_search --rtf -v + export CUDA_VISIBLE_DEVICES=${gpus} + cat data/manifest.test | paddlespeech asr --model ${model_name} --device gpu --decode_method attention_rescoring --rtf -v &> ${log_file} +fi + +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + cat ${log_file} | grep "^[0-9]" > ${res_file} + python utils/compute-wer.py --char=1 --v=1 \ + data/manifest.test.text ${res_file} > ${res_file}.error fi diff --git a/tests/unit/cli/utils b/tests/unit/cli/utils new file mode 120000 index 000000000..973afe674 --- /dev/null +++ b/tests/unit/cli/utils @@ -0,0 +1 @@ +../../../utils \ No newline at end of file