diff --git a/examples/aishell/asr3/conf/wav2vec2ASR.yaml b/examples/aishell/asr3/conf/wav2vec2ASR.yaml index 48fe38d73..9bf74bcea 100755 --- a/examples/aishell/asr3/conf/wav2vec2ASR.yaml +++ b/examples/aishell/asr3/conf/wav2vec2ASR.yaml @@ -147,14 +147,12 @@ wav2vec2_optim_conf: lr: 0.0001 weight_decay: 0.0 - - -model_scheduler: newbobscheduler +model_scheduler: newbobscheduler model_scheduler_conf: improvement_threshold: 0.0025 annealing_factor: 0.8 patient: 0 -wav2vec2_scheduler: newbobscheduler +wav2vec2_scheduler: newbobscheduler wav2vec2_scheduler_conf: improvement_threshold: 0.0025 annealing_factor: 0.9 diff --git a/examples/aishell/asr3/local/test.sh b/examples/aishell/asr3/local/test.sh index 87396bf5b..9d4b84291 100755 --- a/examples/aishell/asr3/local/test.sh +++ b/examples/aishell/asr3/local/test.sh @@ -54,30 +54,30 @@ for type in ctc_greedy_search; do echo "decoding ${type} done." done -# for type in ctc_prefix_beam_search; do -# echo "decoding ${type}" -# batch_size=1 -# python3 -u ${BIN_DIR}/test.py \ -# --ngpu ${ngpu} \ -# --config ${config_path} \ -# --decode_cfg ${decode_config_path} \ -# --result_file ${ckpt_prefix}.${type}.rsl \ -# --checkpoint_path ${ckpt_prefix} \ -# --opts decode.decoding_method ${type} \ -# --opts decode.decode_batch_size ${batch_size} - -# if [ $? -ne 0 ]; then -# echo "Failed in evaluation!" -# exit 1 -# fi -# python3 utils/format_rsl.py \ -# --origin_hyp ${ckpt_prefix}.${type}.rsl \ -# --trans_hyp ${ckpt_prefix}.${type}.rsl.text - -# python3 utils/compute-wer.py --char=1 --v=1 \ -# data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error -# echo "decoding ${type} done." -# done +for type in ctc_prefix_beam_search; do + echo "decoding ${type}" + batch_size=1 + python3 -u ${BIN_DIR}/test.py \ + --ngpu ${ngpu} \ + --config ${config_path} \ + --decode_cfg ${decode_config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decode.decoding_method ${type} \ + --opts decode.decode_batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi + python3 utils/format_rsl.py \ + --origin_hyp ${ckpt_prefix}.${type}.rsl \ + --trans_hyp ${ckpt_prefix}.${type}.rsl.text + + python3 utils/compute-wer.py --char=1 --v=1 \ + data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error + echo "decoding ${type} done." +done echo "Finished" diff --git a/examples/aishell/asr3/run.sh b/examples/aishell/asr3/run.sh index 980d8211b..d6d78d3f7 100755 --- a/examples/aishell/asr3/run.sh +++ b/examples/aishell/asr3/run.sh @@ -11,7 +11,7 @@ conf_path=conf/wav2vec2ASR.yaml ips= #xx.xx.xx.xx,xx.xx.xx.xx decode_conf_path=conf/tuning/decode.yaml avg_num=1 -resume=20 # xx e.g. 30 +resume= # xx e.g. 30 export FLAGS_cudnn_deterministic=1 . ${MAIN_ROOT}/utils/parse_options.sh || exit 1; @@ -38,11 +38,11 @@ fi if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then # greedy search decoder - CUDA_VISIBLE_DEVICES=4 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 + CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 fi if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then # test a single .wav file - CUDA_VISIBLE_DEVICES=${gpus} ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1 + CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1 fi diff --git a/paddlespeech/s2t/exps/wav2vec2/model.py b/paddlespeech/s2t/exps/wav2vec2/model.py index ce1a8be2c..e3c4dc86b 100755 --- a/paddlespeech/s2t/exps/wav2vec2/model.py +++ b/paddlespeech/s2t/exps/wav2vec2/model.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,14 +17,12 @@ import math import os import re import time -from collections import defaultdict from collections import OrderedDict from contextlib import nullcontext import jsonlines import numpy as np import paddle -import tqdm import transformers from hyperpyyaml import load_hyperpyyaml from paddle import distributed as dist @@ -142,7 +140,6 @@ class Wav2Vec2ASRTrainer(Trainer): self.avg_train_loss = 0.0 self.flag = False self.use_sb = True - self.epoch = 1 def update_average(self, batch_index, loss): """Update running average of the loss. diff --git a/paddlespeech/s2t/models/wav2vec2/io/data_pipeline.py b/paddlespeech/s2t/models/wav2vec2/io/data_pipeline.py index 7af87ee2f..53c050c8d 100755 --- a/paddlespeech/s2t/models/wav2vec2/io/data_pipeline.py +++ b/paddlespeech/s2t/models/wav2vec2/io/data_pipeline.py @@ -110,29 +110,6 @@ class GeneratorDynamicItem(DynamicItem): The main benefit is to be able to define the pipeline in a clear function, even if parts of the pipeline depend on others for their initialization. - Example - ------- - >>> lab2ind = {} - >>> def text_pipeline(text): - ... text = text.lower().strip() - ... text = "".join(c for c in text if c.isalpha() or c == " ") - ... words = text.split() - ... yield words - ... encoded = [lab2ind[word] for word in words] - ... yield encoded - >>> item = GeneratorDynamicItem( - ... func=text_pipeline, - ... takes=["text"], - ... provides=["words", "words_encoded"]) - >>> # First create the integer-encoding: - >>> ind = 1 - >>> for token in item("Is this it? - This is it."): - ... if token not in lab2ind: - ... lab2ind[token] = ind - ... ind += 1 - >>> # Now the integers can be encoded! - >>> item() - [1, 2, 3, 2, 1, 3] """ def __init__(self, *args, **kwargs): diff --git a/paddlespeech/s2t/models/wav2vec2/io/sampler.py b/paddlespeech/s2t/models/wav2vec2/io/sampler.py index 9795df2d6..c83998fff 100755 --- a/paddlespeech/s2t/models/wav2vec2/io/sampler.py +++ b/paddlespeech/s2t/models/wav2vec2/io/sampler.py @@ -112,8 +112,6 @@ class ReproducibleWeightedRandomSampler(WeightedRandomSampler): to use a value which has a good mix of 0 and 1 bits. epoch : int The epoch to start at. - - """ def __init__( diff --git a/paddlespeech/s2t/models/wav2vec2/processing/speech_augmentation.py b/paddlespeech/s2t/models/wav2vec2/processing/speech_augmentation.py index 614124c26..5482ed561 100644 --- a/paddlespeech/s2t/models/wav2vec2/processing/speech_augmentation.py +++ b/paddlespeech/s2t/models/wav2vec2/processing/speech_augmentation.py @@ -83,7 +83,6 @@ class SpeedPerturb(nn.Layer): "new_freq": self.orig_freq * speed // 100, } self.resamplers.append(Resample(**config)) - paddle.seed(2) def forward(self, waveform): """ @@ -465,8 +464,6 @@ class DropFreq(nn.Layer): low=self.drop_count_low, high=self.drop_count_high + 1, shape=(1, ), ) - ##对齐固定drop_count - # drop_count = paddle.to_tensor([2]) # Filter parameters filter_length = 101 @@ -481,8 +478,6 @@ class DropFreq(nn.Layer): drop_range = self.drop_freq_high - self.drop_freq_low drop_frequency = ( paddle.rand(drop_count) * drop_range + self.drop_freq_low) - #对齐固定drop_frequency - # drop_frequency = torch.tensor([0.8102, 0.7742]) # Subtract each frequency for frequency in drop_frequency: notch_kernel = notch_filter( @@ -752,8 +747,7 @@ class SpecAugment(paddle.nn.Layer): # compute center and corresponding window c = paddle.randint(window, time - window, (1, ))[0] w = paddle.randint(c - window, c + window, (1, ))[0] + 1 - # c = 5 - # w = 10 + left = paddle.nn.functional.interpolate( x[:, :, :c], (w, x.shape[3]), diff --git a/paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py b/paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py index 6c0088668..f91a41c32 100755 --- a/paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py +++ b/paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py @@ -88,7 +88,7 @@ class Wav2vec2ASR(nn.Layer): if decoding_method == 'ctc_prefix_beam_search' and batch_size > 1: logger.error( - f'decoding mode {decoding_method} must be running with batch_size == 1' + f"decoding mode {decoding_method} must be running with batch_size == 1" ) logger.error(f"current batch_size is {batch_size}") @@ -311,4 +311,4 @@ class Wav2vec2Base(nn.Layer): def forward(self, wav): out = self.wav2vec2(wav) - return + return out