From 2ae94bd277ec3e5e5c74ba25f66f20d7cf102007 Mon Sep 17 00:00:00 2001 From: tianhao zhang <15600919271@163.com> Date: Wed, 12 Oct 2022 01:50:04 +0000 Subject: [PATCH] freeze wav2vec2=True, change loss report and update README.md --- examples/librispeech/asr3/README.md | 6 +++++ .../librispeech/asr3/conf/wav2vec2ASR.yaml | 2 +- paddlespeech/s2t/exps/wav2vec2/model.py | 24 +++++++++++++++++-- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/examples/librispeech/asr3/README.md b/examples/librispeech/asr3/README.md index bd96af86..f99beb33 100644 --- a/examples/librispeech/asr3/README.md +++ b/examples/librispeech/asr3/README.md @@ -88,6 +88,12 @@ data/ |-- test.meta `-- train.meta ``` + +Stage 0 also downloads the pre-trained [wav2vec2](https://paddlespeech.bj.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams) model. +```bash +mkdir -p exp/wav2vec2 +wget -P exp/wav2vec2 https://paddlespeech.bj.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams +``` ## Stage 1: Model Training If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. ```bash diff --git a/examples/librispeech/asr3/conf/wav2vec2ASR.yaml b/examples/librispeech/asr3/conf/wav2vec2ASR.yaml index 63f5d37c..b19881b7 100644 --- a/examples/librispeech/asr3/conf/wav2vec2ASR.yaml +++ b/examples/librispeech/asr3/conf/wav2vec2ASR.yaml @@ -1,7 +1,7 @@ ############################################ # Network Architecture # ############################################ -freeze_wav2vec2: False +freeze_wav2vec2: True normalize_wav: True output_norm: True dnn_blocks: 2 diff --git a/paddlespeech/s2t/exps/wav2vec2/model.py b/paddlespeech/s2t/exps/wav2vec2/model.py index d845d8c6..de4c895f 100644 --- a/paddlespeech/s2t/exps/wav2vec2/model.py +++ b/paddlespeech/s2t/exps/wav2vec2/model.py @@ -48,6 +48,24 @@ class Wav2Vec2ASRTrainer(Trainer): super().__init__(config, args) self.avg_train_loss = 0 + def update_average(self, batch_index, loss, avg_loss): + """Update running average of the loss. + Arguments + --------- + loss : paddle.tensor + detached loss, a single float value. + avg_loss : float + current running average. + Returns + ------- + avg_loss : float + The average loss. + """ + if paddle.isfinite(loss): + avg_loss -= avg_loss / (batch_index + 1) + avg_loss += float(loss) / (batch_index + 1) + return avg_loss + def train_batch(self, batch_index, batch, msg): train_conf = self.config start = time.time() @@ -59,11 +77,11 @@ class Wav2Vec2ASRTrainer(Trainer): wav = wav[:, :, 0] wav = self.speech_augmentation(wav, wavs_lens_rate) loss = self.model(wav, wavs_lens_rate, target, target_lens_rate) - # pring(wav, wavs_lens_rate, target, target_lens_rate) # loss div by `batch_size * accum_grad` loss /= train_conf.accum_grad - losses_np = {'loss': float(loss) * train_conf.accum_grad} + self.avg_train_loss = self.update_average(batch_index, loss, + self.avg_train_loss) # loss backward if (batch_index + 1) % train_conf.accum_grad != 0: @@ -87,6 +105,8 @@ class Wav2Vec2ASRTrainer(Trainer): self.optimizer.clear_grad() self.lr_scheduler.step() self.iteration += 1 + + losses_np = {'loss': float(self.avg_train_loss) * train_conf.accum_grad} iteration_time = time.time() - start for k, v in losses_np.items(): report(k, v)