From 7cef93a6f4a9a4d3f2e66a8ae4343b51221a08cd Mon Sep 17 00:00:00 2001
From: gongel <gongel@qq.com>
Date: Tue, 23 Nov 2021 07:53:47 +0000
Subject: [PATCH] refactor: update

---
 examples/ted_en_zh/t1/README.md               | 15 +++++++------
 ...nt_noam.yaml => transformer_mtl_noam.yaml} |  0
 .../t1/local/convert_torch_to_paddle.py       |  9 ++++++++
 .../ted_en_zh/t1/local/download_pretrain.sh   | 19 ++++++++++++++++
 .../t1/local/{train.sh => train_finetune.sh}  |  0
 examples/ted_en_zh/t1/run.sh                  | 22 +++++++++----------
 6 files changed, 47 insertions(+), 18 deletions(-)
 rename examples/ted_en_zh/t1/conf/{transformer_joint_noam.yaml => transformer_mtl_noam.yaml} (100%)
 create mode 100755 examples/ted_en_zh/t1/local/download_pretrain.sh
 rename examples/ted_en_zh/t1/local/{train.sh => train_finetune.sh} (100%)

diff --git a/examples/ted_en_zh/t1/README.md b/examples/ted_en_zh/t1/README.md
index 66a5dbec..e8aed53e 100644
--- a/examples/ted_en_zh/t1/README.md
+++ b/examples/ted_en_zh/t1/README.md
@@ -3,13 +3,14 @@
 
 ## Dataset
 
-| Data Subset | Duration in Seconds |
+| Data Subset | Duration in Frames |
 | --- | --- |
-| data/manifest.train | 0.942 ~ 60   |
-| data/manifest.dev   | 1.151 ~ 39   |
-| data/manifest.test  | 1.1 ~ 42.746 |
+| data/manifest.train | 94.2 ~ 6000   |
+| data/manifest.dev   | 115.1 ~ 3900   |
+| data/manifest.test  | 110 ~ 4274.6 |
 
 ## Transformer
-| Model | Params | Config | Char-BLEU |
-| --- | --- | --- | --- |
-| Transformer+ASR MTL | 50.26M | conf/transformer_joint_noam.yaml | 17.38 |
+| Model | Params | Config | Val loss | Char-BLEU |
+| --- | --- | --- | --- | --- |
+| FAT + Transformer+ASR MTL | 50.26M | conf/transformer_mtl_noam.yaml | 62.86 | 19.45 |
+| FAT + Transformer+ASR MTL with word reward | 50.26M | conf/transformer_mtl_noam.yaml | 62.86 | 20.80 |
diff --git a/examples/ted_en_zh/t1/conf/transformer_joint_noam.yaml b/examples/ted_en_zh/t1/conf/transformer_mtl_noam.yaml
similarity index 100%
rename from examples/ted_en_zh/t1/conf/transformer_joint_noam.yaml
rename to examples/ted_en_zh/t1/conf/transformer_mtl_noam.yaml
diff --git a/examples/ted_en_zh/t1/local/convert_torch_to_paddle.py b/examples/ted_en_zh/t1/local/convert_torch_to_paddle.py
index 93fa1595..4f4bfde8 100644
--- a/examples/ted_en_zh/t1/local/convert_torch_to_paddle.py
+++ b/examples/ted_en_zh/t1/local/convert_torch_to_paddle.py
@@ -27,6 +27,7 @@ def torch2paddle(args):
     torch_model = torch.load(args.torch_ckpt, map_location='cpu')
     cnt = 0
     for k, v in torch_model['model'].items():
+        # encoder.embed.* --> encoder.embed.*
         if k.startswith('encoder.embed'):
             if v.ndim == 2:
                 v = v.transpose(0, 1)
@@ -35,6 +36,10 @@ def torch2paddle(args):
             logger.info(
                 f"Convert torch weight: {k} to paddlepaddle weight: {k}, shape is {v.shape}"
             )
+
+        # encoder.after_norm.* --> encoder.after_norm.*
+        # encoder.after_norm.* --> decoder.after_norm.*
+        # encoder.after_norm.* --> st_decoder.after_norm.*
         if k.startswith('encoder.after_norm'):
             paddle_model_dict[k] = v.numpy()
             cnt += 1
@@ -47,6 +52,10 @@ def torch2paddle(args):
                 f"Convert torch weight: {k} to paddlepaddle weight: {'st_'+ k.replace('en','de')}, shape is {v.shape}"
             )
             cnt += 2
+
+        # encoder.encoders.* --> encoder.encoders.*
+        # encoder.encoders.* (last six layers) --> decoder.encoders.* (first six layers)
+        # encoder.encoders.* (last six layers) --> st_decoder.encoders.* (first six layers)
         if k.startswith('encoder.encoders'):
             if v.ndim == 2:
                 v = v.transpose(0, 1)
diff --git a/examples/ted_en_zh/t1/local/download_pretrain.sh b/examples/ted_en_zh/t1/local/download_pretrain.sh
new file mode 100755
index 00000000..1ff05ae3
--- /dev/null
+++ b/examples/ted_en_zh/t1/local/download_pretrain.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# download pytorch weight
+wget https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/snapshot.ep.98 --no-check-certificate
+
+# convert pytorch weight to paddlepaddle
+python local/convert_torch_to_paddle.py \
+--torch_ckpt snapshot.ep.98 \
+--paddle_ckpt paddle.98.pdparams
+
+# Or you can download converted weights
+# wget https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/paddle.98.pdparams --no-check-certificate
+
+if [ $? -ne 0 ]; then
+    echo "Failed in downloading and coverting!"
+    exit 1
+fi
+
+exit 0
\ No newline at end of file
diff --git a/examples/ted_en_zh/t1/local/train.sh b/examples/ted_en_zh/t1/local/train_finetune.sh
similarity index 100%
rename from examples/ted_en_zh/t1/local/train.sh
rename to examples/ted_en_zh/t1/local/train_finetune.sh
diff --git a/examples/ted_en_zh/t1/run.sh b/examples/ted_en_zh/t1/run.sh
index ddb155ad..f8adf4f6 100755
--- a/examples/ted_en_zh/t1/run.sh
+++ b/examples/ted_en_zh/t1/run.sh
@@ -4,8 +4,8 @@ source path.sh
 
 gpus=0,1,2,3
 stage=1
-stop_stage=100
-conf_path=conf/transformer_joint_noam.yaml
+stop_stage=4
+conf_path=conf/transformer_mtl_noam.yaml
 ckpt_path=paddle.98
 avg_num=5
 data_path=./TED_EnZh # path to unzipped data
@@ -22,21 +22,21 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
 fi
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt} ${ckpt_path}
+    # download pretrained
+    bash ./local/download_pretrain.sh || exit -1
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    # avg n best model
-    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
+    # train model, all `ckpt` under `exp` dir
+    CUDA_VISIBLE_DEVICES=${gpus} ./local/train_finetune.sh ${conf_path}  ${ckpt} ${ckpt_path}
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # test ckpt avg_n
-    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
+    # avg n best model
+    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
 
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-    # export ckpt avg_n
-    CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
-fi
+    # test ckpt avg_n
+    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
+fi
\ No newline at end of file