diff --git a/examples/ted_en_zh/t0/conf/transformer_joint_noam.yaml b/examples/ted_en_zh/t0/conf/transformer_joint_noam.yaml index f8dc383f0..9c1ac91a9 100644 --- a/examples/ted_en_zh/t0/conf/transformer_joint_noam.yaml +++ b/examples/ted_en_zh/t0/conf/transformer_joint_noam.yaml @@ -99,6 +99,7 @@ decoding: alpha: 2.5 beta: 0.3 beam_size: 10 + word_reward: 0.7 cutoff_prob: 1.0 cutoff_top_n: 0 num_proc_bsearch: 8 diff --git a/paddlespeech/s2t/exps/u2_st/model.py b/paddlespeech/s2t/exps/u2_st/model.py index 91390afe5..20bb31f53 100644 --- a/paddlespeech/s2t/exps/u2_st/model.py +++ b/paddlespeech/s2t/exps/u2_st/model.py @@ -441,10 +441,7 @@ class U2STTester(U2STTrainer): "".join(chr(t) for t in text[:text_len]) for text, text_len in zip(texts, texts_len) ] - # from IPython import embed - # import os - # embed() - # os._exit(0) + hyps = self.model.decode( audio, audio_len, @@ -458,6 +455,7 @@ class U2STTester(U2STTrainer): cutoff_top_n=cfg.cutoff_top_n, num_processes=cfg.num_proc_bsearch, ctc_weight=cfg.ctc_weight, + word_reward=cfg.word_reward, decoding_chunk_size=cfg.decoding_chunk_size, num_decoding_left_chunks=cfg.num_decoding_left_chunks, simulate_streaming=cfg.simulate_streaming) diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py index 76c50150e..a83e67078 100644 --- a/paddlespeech/s2t/models/u2_st/u2_st.py +++ b/paddlespeech/s2t/models/u2_st/u2_st.py @@ -315,6 +315,7 @@ class U2STBaseModel(nn.Layer): speech: paddle.Tensor, speech_lengths: paddle.Tensor, beam_size: int=10, + word_reward: float=0.0, decoding_chunk_size: int=-1, num_decoding_left_chunks: int=-1, simulate_streaming: bool=False, ) -> paddle.Tensor: @@ -378,6 +379,7 @@ class U2STBaseModel(nn.Layer): # 2.2 First beam prune: select topk best prob at current time top_k_logp, top_k_index = logp.topk(beam_size) # (B*N, N) + top_k_logp += word_reward top_k_logp = mask_finished_scores(top_k_logp, end_flag) top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos) @@ -528,6 +530,7 @@ class U2STBaseModel(nn.Layer): cutoff_top_n: int, num_processes: int, ctc_weight: float=0.0, + word_reward: float=0.0, decoding_chunk_size: int=-1, num_decoding_left_chunks: int=-1, simulate_streaming: bool=False): @@ -569,6 +572,7 @@ class U2STBaseModel(nn.Layer): feats, feats_lengths, beam_size=beam_size, + word_reward=word_reward, decoding_chunk_size=decoding_chunk_size, num_decoding_left_chunks=num_decoding_left_chunks, simulate_streaming=simulate_streaming)