Merge pull request #1931 from Jackwaterveg/cli_2

[CLI] add calc CER in cli
pull/1935/head
Hui Zhang 3 years ago committed by GitHub
commit 46645285f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -55,6 +55,7 @@ args = parser.parse_args()
def create_manifest(data_dir, manifest_path_prefix): def create_manifest(data_dir, manifest_path_prefix):
print("Creating manifest %s ..." % manifest_path_prefix) print("Creating manifest %s ..." % manifest_path_prefix)
json_lines = [] json_lines = []
reference_lines = []
transcript_path = os.path.join(data_dir, 'transcript', transcript_path = os.path.join(data_dir, 'transcript',
'aishell_transcript_v0.8.txt') 'aishell_transcript_v0.8.txt')
transcript_dict = {} transcript_dict = {}
@ -88,6 +89,7 @@ def create_manifest(data_dir, manifest_path_prefix):
duration = float(len(audio_data) / samplerate) duration = float(len(audio_data) / samplerate)
text = transcript_dict[audio_id] text = transcript_dict[audio_id]
json_lines.append(audio_path) json_lines.append(audio_path)
reference_lines.append(str(total_num+1) + "\t" + text)
total_sec += duration total_sec += duration
total_text += len(text) total_text += len(text)
@ -98,6 +100,10 @@ def create_manifest(data_dir, manifest_path_prefix):
for line in json_lines: for line in json_lines:
fout.write(line + '\n') fout.write(line + '\n')
with codecs.open(manifest_path + ".text", 'w', 'utf-8') as fout:
for line in reference_lines:
fout.write(line + '\n')
manifest_dir = os.path.dirname(manifest_path_prefix) manifest_dir = os.path.dirname(manifest_path_prefix)
def prepare_dataset(url, md5sum, target_dir, manifest_path=None): def prepare_dataset(url, md5sum, target_dir, manifest_path=None):

@ -3,6 +3,10 @@
source path.sh source path.sh
stage=-1 stage=-1
stop_stage=100 stop_stage=100
model_name=conformer_online_aishell
gpus=5
log_file=res.log
res_file=res.rsl
MAIN_ROOT=../../.. MAIN_ROOT=../../..
. ${MAIN_ROOT}/utils/parse_options.sh || exit -1; . ${MAIN_ROOT}/utils/parse_options.sh || exit -1;
@ -20,9 +24,16 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
echo "Prepare Aishell failed. Terminated." echo "Prepare Aishell failed. Terminated."
exit 1 exit 1
fi fi
fi fi
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
cat data/manifest.test | paddlespeech asr --model conformer_online_aishell --device gpu --decode_method ctc_prefix_beam_search --rtf -v export CUDA_VISIBLE_DEVICES=${gpus}
cat data/manifest.test | paddlespeech asr --model ${model_name} --device gpu --decode_method attention_rescoring --rtf -v &> ${log_file}
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cat ${log_file} | grep "^[0-9]" > ${res_file}
python utils/compute-wer.py --char=1 --v=1 \
data/manifest.test.text ${res_file} > ${res_file}.error
fi fi

@ -0,0 +1 @@
../../../utils
Loading…
Cancel
Save