PaddleSpeech/examples/other/1xt2x/src_deepspeech2x/test_model.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains DeepSpeech2 and DeepSpeech2Online model."""
import time
from collections import defaultdict
from contextlib import nullcontext

import numpy as np
import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from src_deepspeech2x.models.ds2 import DeepSpeech2InferModel
from src_deepspeech2x.models.ds2 import DeepSpeech2Model

from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.io.collator import SpeechCollator
from paddlespeech.s2t.io.dataset import ManifestDataset
from paddlespeech.s2t.io.sampler import SortagradBatchSampler
from paddlespeech.s2t.io.sampler import SortagradDistributedBatchSampler
from paddlespeech.s2t.models.ds2_online import DeepSpeech2InferModelOnline
from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline
from paddlespeech.s2t.training.gradclip import ClipGradByGlobalNormWithLog
from paddlespeech.s2t.training.trainer import Trainer
from paddlespeech.s2t.utils import error_rate
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils import mp_tools
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()


class DeepSpeech2Trainer(Trainer):
    def __init__(self, config, args):
        super().__init__(config, args)

    def train_batch(self, batch_index, batch_data, msg):
        train_conf = self.config
        start = time.time()

        # forward
        utt, audio, audio_len, text, text_len = batch_data
        loss = self.model(audio, audio_len, text, text_len)
        losses_np = {
            'train_loss': float(loss),
        }

        # loss backward
        if (batch_index + 1) % train_conf.accum_grad != 0:
            # Disable gradient synchronizations across DDP processes.
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
            context = self.model.no_sync
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.
            context = nullcontext

        with context():
            loss.backward()
            layer_tools.print_grads(self.model, print_func=None)

        # optimizer step
        if (batch_index + 1) % train_conf.accum_grad == 0:
            self.optimizer.step()
            self.optimizer.clear_grad()
            self.iteration += 1

        iteration_time = time.time() - start

        msg += "train time: {:>.3f}s, ".format(iteration_time)
        msg += "batch size: {}, ".format(self.config.batch_size)
        msg += "accum: {}, ".format(train_conf.accum_grad)
        msg += ', '.join('{}: {:>.6f}'.format(k, v)
                         for k, v in losses_np.items())
        logger.info(msg)

        if dist.get_rank() == 0 and self.visualizer:
            for k, v in losses_np.items():
                # `step -1` since we update `step` after optimizer.step().
                self.visualizer.add_scalar("train/{}".format(k), v,
                                           self.iteration - 1)

    @paddle.no_grad()
    def valid(self):
        logger.info(f"Valid Total Examples: {len(self.valid_loader.dataset)}")
        self.model.eval()
        valid_losses = defaultdict(list)
        num_seen_utts = 1
        total_loss = 0.0
        for i, batch in enumerate(self.valid_loader):
            utt, audio, audio_len, text, text_len = batch
            loss = self.model(audio, audio_len, text, text_len)
            if paddle.isfinite(loss):
                num_utts = batch[1].shape[0]
                num_seen_utts += num_utts
                total_loss += float(loss) * num_utts
                valid_losses['val_loss'].append(float(loss))

            if (i + 1) % self.config.log_interval == 0:
                valid_dump = {k: np.mean(v) for k, v in valid_losses.items()}
                valid_dump['val_history_loss'] = total_loss / num_seen_utts

                # logging
                msg = f"Valid: Rank: {dist.get_rank()}, "
                msg += "epoch: {}, ".format(self.epoch)
                msg += "step: {}, ".format(self.iteration)
                msg += "batch : {}/{}, ".format(i + 1, len(self.valid_loader))
                msg += ', '.join('{}: {:>.6f}'.format(k, v)
                                 for k, v in valid_dump.items())
                logger.info(msg)

        logger.info('Rank {} Val info val_loss {}'.format(
            dist.get_rank(), total_loss / num_seen_utts))
        return total_loss, num_seen_utts

    def setup_model(self):
        config = self.config.clone()
        config.defrost()
        config.feat_size = self.train_loader.collate_fn.feature_size
        #config.dict_size = self.train_loader.collate_fn.vocab_size
        config.dict_size = len(self.train_loader.collate_fn.vocab_list)
        config.freeze()

        if self.args.model_type == 'offline':
            model = DeepSpeech2Model.from_config(config)
        elif self.args.model_type == 'online':
            model = DeepSpeech2ModelOnline.from_config(config)
        else:
            raise Exception("wrong model type")
        if self.parallel:
            model = paddle.DataParallel(model)

        logger.info(f"{model}")
        layer_tools.print_params(model, logger.info)

        grad_clip = ClipGradByGlobalNormWithLog(config.global_grad_clip)
        lr_scheduler = paddle.optimizer.lr.ExponentialDecay(
            learning_rate=config.lr, gamma=config.lr_decay, verbose=True)
        optimizer = paddle.optimizer.Adam(
            learning_rate=lr_scheduler,
            parameters=model.parameters(),
            weight_decay=paddle.regularizer.L2Decay(config.weight_decay),
            grad_clip=grad_clip)

        self.model = model
        self.optimizer = optimizer
        self.lr_scheduler = lr_scheduler
        logger.info("Setup model/optimizer/lr_scheduler!")

    def setup_dataloader(self):
        config = self.config.clone()
        config.defrost()
        config.keep_transcription_text = False

        config.manifest = config.train_manifest
        train_dataset = ManifestDataset.from_config(config)

        config.manifest = config.dev_manifest
        dev_dataset = ManifestDataset.from_config(config)

        config.manifest = config.test_manifest
        test_dataset = ManifestDataset.from_config(config)

        if self.parallel:
            batch_sampler = SortagradDistributedBatchSampler(
                train_dataset,
                batch_size=config.batch_size,
                num_replicas=None,
                rank=None,
                shuffle=True,
                drop_last=True,
                sortagrad=config.sortagrad,
                shuffle_method=config.shuffle_method)
        else:
            batch_sampler = SortagradBatchSampler(
                train_dataset,
                shuffle=True,
                batch_size=config.batch_size,
                drop_last=True,
                sortagrad=config.sortagrad,
                shuffle_method=config.shuffle_method)

        collate_fn_train = SpeechCollator.from_config(config)

        config.augmentation_config = ""
        collate_fn_dev = SpeechCollator.from_config(config)

        config.keep_transcription_text = True
        config.augmentation_config = ""
        collate_fn_test = SpeechCollator.from_config(config)

        self.train_loader = DataLoader(
            train_dataset,
            batch_sampler=batch_sampler,
            collate_fn=collate_fn_train,
            num_workers=config.num_workers)
        self.valid_loader = DataLoader(
            dev_dataset,
            batch_size=config.batch_size,
            shuffle=False,
            drop_last=False,
            collate_fn=collate_fn_dev)
        self.test_loader = DataLoader(
            test_dataset,
            batch_size=config.decode.decode_batch_size,
            shuffle=False,
            drop_last=False,
            collate_fn=collate_fn_test)
        if "<eos>" in self.test_loader.collate_fn.vocab_list:
            self.test_loader.collate_fn.vocab_list.remove("<eos>")
        if "<eos>" in self.valid_loader.collate_fn.vocab_list:
            self.valid_loader.collate_fn.vocab_list.remove("<eos>")
        if "<eos>" in self.train_loader.collate_fn.vocab_list:
            self.train_loader.collate_fn.vocab_list.remove("<eos>")
        logger.info("Setup train/valid/test  Dataloader!")


class DeepSpeech2Tester(DeepSpeech2Trainer):
    def __init__(self, config, args):

        self._text_featurizer = TextFeaturizer(
            unit_type=config.unit_type, vocab=None)
        super().__init__(config, args)

    def ordid2token(self, texts, texts_len):
        """ ord() id to chr() chr """
        trans = []
        for text, n in zip(texts, texts_len):
            n = n.numpy().item()
            ids = text[:n]
            trans.append(''.join([chr(i) for i in ids]))
        return trans

    def compute_metrics(self,
                        utts,
                        audio,
                        audio_len,
                        texts,
                        texts_len,
                        fout=None):
        cfg = self.config.decode
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        errors_func = error_rate.char_errors if cfg.error_rate_type == 'cer' else error_rate.word_errors
        error_rate_func = error_rate.cer if cfg.error_rate_type == 'cer' else error_rate.wer

        vocab_list = self.test_loader.collate_fn.vocab_list

        target_transcripts = self.ordid2token(texts, texts_len)

        result_transcripts = self.compute_result_transcripts(audio, audio_len,
                                                             vocab_list, cfg)
        for utt, target, result in zip(utts, target_transcripts,
                                       result_transcripts):
            errors, len_ref = errors_func(target, result)
            errors_sum += errors
            len_refs += len_ref
            num_ins += 1
            if fout:
                fout.write(utt + " " + result + "\n")
            logger.info("\nTarget Transcription: %s\nOutput Transcription: %s" %
                        (target, result))
            logger.info("Current error rate [%s] = %f" %
                        (cfg.error_rate_type, error_rate_func(target, result)))

        return dict(
            errors_sum=errors_sum,
            len_refs=len_refs,
            num_ins=num_ins,
            error_rate=errors_sum / len_refs,
            error_rate_type=cfg.error_rate_type)

    def compute_result_transcripts(self, audio, audio_len, vocab_list, cfg):
        result_transcripts = self.model.decode(
            audio,
            audio_len,
            vocab_list,
            decoding_method=cfg.decoding_method,
            lang_model_path=cfg.lang_model_path,
            beam_alpha=cfg.alpha,
            beam_beta=cfg.beta,
            beam_size=cfg.beam_size,
            cutoff_prob=cfg.cutoff_prob,
            cutoff_top_n=cfg.cutoff_top_n,
            num_processes=cfg.num_proc_bsearch)
        result_transcripts = [
            self._text_featurizer.detokenize(item)
            for item in result_transcripts
        ]
        return result_transcripts

    @mp_tools.rank_zero_only
    @paddle.no_grad()
    def test(self):
        logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")
        self.model.eval()
        cfg = self.config
        error_rate_type = None
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        with open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                utts, audio, audio_len, texts, texts_len = batch
                metrics = self.compute_metrics(utts, audio, audio_len, texts,
                                               texts_len, fout)
                errors_sum += metrics['errors_sum']
                len_refs += metrics['len_refs']
                num_ins += metrics['num_ins']
                error_rate_type = metrics['error_rate_type']
                logger.info("Error rate [%s] (%d/?) = %f" %
                            (error_rate_type, num_ins, errors_sum / len_refs))

        # logging
        msg = "Test: "
        msg += "epoch: {}, ".format(self.epoch)
        msg += "step: {}, ".format(self.iteration)
        msg += "Final error rate [%s] (%d/%d) = %f" % (
            error_rate_type, num_ins, num_ins, errors_sum / len_refs)
        logger.info(msg)

    def run_test(self):
        self.resume_or_scratch()
        try:
            self.test()
        except KeyboardInterrupt:
            exit(-1)

    def export(self):
        if self.args.model_type == 'offline':
            infer_model = DeepSpeech2InferModel.from_pretrained(
                self.test_loader, self.config, self.args.checkpoint_path)
        elif self.args.model_type == 'online':
            infer_model = DeepSpeech2InferModelOnline.from_pretrained(
                self.test_loader, self.config, self.args.checkpoint_path)
        else:
            raise Exception("wrong model type")

        infer_model.eval()
        feat_dim = self.test_loader.collate_fn.feature_size
        static_model = infer_model.export()
        logger.info(f"Export code: {static_model.forward.code}")
        paddle.jit.save(static_model, self.args.export_path)

    def run_export(self):
        try:
            self.export()
        except KeyboardInterrupt:
            exit(-1)
change the code format to 2.x style 3 years ago			`# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`"""Contains DeepSpeech2 and DeepSpeech2Online model."""`
			`import time`
			`from collections import defaultdict`
			`from contextlib import nullcontext`

			`import numpy as np`
			`import paddle`
			`from paddle import distributed as dist`
			`from paddle.io import DataLoader`
fix the bug: read space as unk 3 years ago			`from src_deepspeech2x.models.ds2 import DeepSpeech2InferModel`
			`from src_deepspeech2x.models.ds2 import DeepSpeech2Model`
change the code format to 2.x style 3 years ago
merge deepspeech, parakeet and text_processing into paddlespeech 3 years ago			`from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer`
			`from paddlespeech.s2t.io.collator import SpeechCollator`
			`from paddlespeech.s2t.io.dataset import ManifestDataset`
			`from paddlespeech.s2t.io.sampler import SortagradBatchSampler`
			`from paddlespeech.s2t.io.sampler import SortagradDistributedBatchSampler`
			`from paddlespeech.s2t.models.ds2_online import DeepSpeech2InferModelOnline`
			`from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline`
			`from paddlespeech.s2t.training.gradclip import ClipGradByGlobalNormWithLog`
			`from paddlespeech.s2t.training.trainer import Trainer`
			`from paddlespeech.s2t.utils import error_rate`
			`from paddlespeech.s2t.utils import layer_tools`
			`from paddlespeech.s2t.utils import mp_tools`
			`from paddlespeech.s2t.utils.log import Log`
change the code format to 2.x style 3 years ago
			`logger = Log(__name__).getlog()`


			`class DeepSpeech2Trainer(Trainer):`
			`def __init__(self, config, args):`
			`super().__init__(config, args)`

			`def train_batch(self, batch_index, batch_data, msg):`
change all recipes 3 years ago			`train_conf = self.config`
change the code format to 2.x style 3 years ago			`start = time.time()`

			`# forward`
			`utt, audio, audio_len, text, text_len = batch_data`
			`loss = self.model(audio, audio_len, text, text_len)`
			`losses_np = {`
			`'train_loss': float(loss),`
			`}`

			`# loss backward`
			`if (batch_index + 1) % train_conf.accum_grad != 0:`
			`# Disable gradient synchronizations across DDP processes.`
			`# Within this context, gradients will be accumulated on module`
			`# variables, which will later be synchronized.`
			`context = self.model.no_sync`
			`else:`
			`# Used for single gpu training and DDP gradient synchronization`
			`# processes.`
			`context = nullcontext`

			`with context():`
			`loss.backward()`
			`layer_tools.print_grads(self.model, print_func=None)`

			`# optimizer step`
			`if (batch_index + 1) % train_conf.accum_grad == 0:`
			`self.optimizer.step()`
			`self.optimizer.clear_grad()`
			`self.iteration += 1`

			`iteration_time = time.time() - start`

			`msg += "train time: {:>.3f}s, ".format(iteration_time)`
change all recipes 3 years ago			`msg += "batch size: {}, ".format(self.config.batch_size)`
change the code format to 2.x style 3 years ago			`msg += "accum: {}, ".format(train_conf.accum_grad)`
			`msg += ', '.join('{}: {:>.6f}'.format(k, v)`
			`for k, v in losses_np.items())`
			`logger.info(msg)`

			`if dist.get_rank() == 0 and self.visualizer:`
			`for k, v in losses_np.items():`
			# `step -1` since we update `step` after optimizer.step().
			`self.visualizer.add_scalar("train/{}".format(k), v,`
			`self.iteration - 1)`

			`@paddle.no_grad()`
			`def valid(self):`
			`logger.info(f"Valid Total Examples: {len(self.valid_loader.dataset)}")`
			`self.model.eval()`
			`valid_losses = defaultdict(list)`
			`num_seen_utts = 1`
			`total_loss = 0.0`
			`for i, batch in enumerate(self.valid_loader):`
			`utt, audio, audio_len, text, text_len = batch`
			`loss = self.model(audio, audio_len, text, text_len)`
			`if paddle.isfinite(loss):`
			`num_utts = batch[1].shape[0]`
			`num_seen_utts += num_utts`
			`total_loss += float(loss) * num_utts`
			`valid_losses['val_loss'].append(float(loss))`

change all recipes 3 years ago			`if (i + 1) % self.config.log_interval == 0:`
change the code format to 2.x style 3 years ago			`valid_dump = {k: np.mean(v) for k, v in valid_losses.items()}`
			`valid_dump['val_history_loss'] = total_loss / num_seen_utts`

			`# logging`
			`msg = f"Valid: Rank: {dist.get_rank()}, "`
			`msg += "epoch: {}, ".format(self.epoch)`
			`msg += "step: {}, ".format(self.iteration)`
			`msg += "batch : {}/{}, ".format(i + 1, len(self.valid_loader))`
			`msg += ', '.join('{}: {:>.6f}'.format(k, v)`
			`for k, v in valid_dump.items())`
			`logger.info(msg)`

			`logger.info('Rank {} Val info val_loss {}'.format(`
			`dist.get_rank(), total_loss / num_seen_utts))`
			`return total_loss, num_seen_utts`

			`def setup_model(self):`
			`config = self.config.clone()`
			`config.defrost()`
change all recipes 3 years ago			`config.feat_size = self.train_loader.collate_fn.feature_size`
			`#config.dict_size = self.train_loader.collate_fn.vocab_size`
			`config.dict_size = len(self.train_loader.collate_fn.vocab_list)`
change the code format to 2.x style 3 years ago			`config.freeze()`

			`if self.args.model_type == 'offline':`
change all recipes 3 years ago			`model = DeepSpeech2Model.from_config(config)`
change the code format to 2.x style 3 years ago			`elif self.args.model_type == 'online':`
change all recipes 3 years ago			`model = DeepSpeech2ModelOnline.from_config(config)`
change the code format to 2.x style 3 years ago			`else:`
			`raise Exception("wrong model type")`
			`if self.parallel:`
			`model = paddle.DataParallel(model)`

			`logger.info(f"{model}")`
			`layer_tools.print_params(model, logger.info)`

change all recipes 3 years ago			`grad_clip = ClipGradByGlobalNormWithLog(config.global_grad_clip)`
change the code format to 2.x style 3 years ago			`lr_scheduler = paddle.optimizer.lr.ExponentialDecay(`
change all recipes 3 years ago			`learning_rate=config.lr, gamma=config.lr_decay, verbose=True)`
change the code format to 2.x style 3 years ago			`optimizer = paddle.optimizer.Adam(`
			`learning_rate=lr_scheduler,`
			`parameters=model.parameters(),`
change all recipes 3 years ago			`weight_decay=paddle.regularizer.L2Decay(config.weight_decay),`
change the code format to 2.x style 3 years ago			`grad_clip=grad_clip)`

			`self.model = model`
			`self.optimizer = optimizer`
			`self.lr_scheduler = lr_scheduler`
			`logger.info("Setup model/optimizer/lr_scheduler!")`

			`def setup_dataloader(self):`
			`config = self.config.clone()`
			`config.defrost()`
change all recipes 3 years ago			`config.keep_transcription_text = False`
change the code format to 2.x style 3 years ago
change all recipes 3 years ago			`config.manifest = config.train_manifest`
change the code format to 2.x style 3 years ago			`train_dataset = ManifestDataset.from_config(config)`

change all recipes 3 years ago			`config.manifest = config.dev_manifest`
change the code format to 2.x style 3 years ago			`dev_dataset = ManifestDataset.from_config(config)`

change all recipes 3 years ago			`config.manifest = config.test_manifest`
change the code format to 2.x style 3 years ago			`test_dataset = ManifestDataset.from_config(config)`

			`if self.parallel:`
			`batch_sampler = SortagradDistributedBatchSampler(`
			`train_dataset,`
change all recipes 3 years ago			`batch_size=config.batch_size,`
change the code format to 2.x style 3 years ago			`num_replicas=None,`
			`rank=None,`
			`shuffle=True,`
			`drop_last=True,`
change all recipes 3 years ago			`sortagrad=config.sortagrad,`
			`shuffle_method=config.shuffle_method)`
change the code format to 2.x style 3 years ago			`else:`
			`batch_sampler = SortagradBatchSampler(`
			`train_dataset,`
			`shuffle=True,`
change all recipes 3 years ago			`batch_size=config.batch_size,`
change the code format to 2.x style 3 years ago			`drop_last=True,`
change all recipes 3 years ago			`sortagrad=config.sortagrad,`
			`shuffle_method=config.shuffle_method)`
change the code format to 2.x style 3 years ago
			`collate_fn_train = SpeechCollator.from_config(config)`

change all recipes 3 years ago			`config.augmentation_config = ""`
change the code format to 2.x style 3 years ago			`collate_fn_dev = SpeechCollator.from_config(config)`

change all recipes 3 years ago			`config.keep_transcription_text = True`
			`config.augmentation_config = ""`
change the code format to 2.x style 3 years ago			`collate_fn_test = SpeechCollator.from_config(config)`

			`self.train_loader = DataLoader(`
			`train_dataset,`
			`batch_sampler=batch_sampler,`
			`collate_fn=collate_fn_train,`
change all recipes 3 years ago			`num_workers=config.num_workers)`
change the code format to 2.x style 3 years ago			`self.valid_loader = DataLoader(`
			`dev_dataset,`
change all recipes 3 years ago			`batch_size=config.batch_size,`
change the code format to 2.x style 3 years ago			`shuffle=False,`
			`drop_last=False,`
			`collate_fn=collate_fn_dev)`
			`self.test_loader = DataLoader(`
			`test_dataset,`
change all recipes 3 years ago			`batch_size=config.decode.decode_batch_size,`
change the code format to 2.x style 3 years ago			`shuffle=False,`
			`drop_last=False,`
			`collate_fn=collate_fn_test)`
			`if "<eos>" in self.test_loader.collate_fn.vocab_list:`
			`self.test_loader.collate_fn.vocab_list.remove("<eos>")`
			`if "<eos>" in self.valid_loader.collate_fn.vocab_list:`
			`self.valid_loader.collate_fn.vocab_list.remove("<eos>")`
			`if "<eos>" in self.train_loader.collate_fn.vocab_list:`
			`self.train_loader.collate_fn.vocab_list.remove("<eos>")`
			`logger.info("Setup train/valid/test Dataloader!")`


			`class DeepSpeech2Tester(DeepSpeech2Trainer):`
			`def __init__(self, config, args):`
optimize the 1xt2x 3 years ago
			`self._text_featurizer = TextFeaturizer(`
change all recipes 3 years ago			`unit_type=config.unit_type, vocab=None)`
change the code format to 2.x style 3 years ago			`super().__init__(config, args)`

			`def ordid2token(self, texts, texts_len):`
			`""" ord() id to chr() chr """`
			`trans = []`
			`for text, n in zip(texts, texts_len):`
			`n = n.numpy().item()`
			`ids = text[:n]`
			`trans.append(''.join([chr(i) for i in ids]))`
			`return trans`

			`def compute_metrics(self,`
			`utts,`
			`audio,`
			`audio_len,`
			`texts,`
			`texts_len,`
			`fout=None):`
change all recipes 3 years ago			`cfg = self.config.decode`
change the code format to 2.x style 3 years ago			`errors_sum, len_refs, num_ins = 0.0, 0, 0`
			`errors_func = error_rate.char_errors if cfg.error_rate_type == 'cer' else error_rate.word_errors`
			`error_rate_func = error_rate.cer if cfg.error_rate_type == 'cer' else error_rate.wer`

			`vocab_list = self.test_loader.collate_fn.vocab_list`

			`target_transcripts = self.ordid2token(texts, texts_len)`

			`result_transcripts = self.compute_result_transcripts(audio, audio_len,`
			`vocab_list, cfg)`
			`for utt, target, result in zip(utts, target_transcripts,`
			`result_transcripts):`
			`errors, len_ref = errors_func(target, result)`
			`errors_sum += errors`
			`len_refs += len_ref`
			`num_ins += 1`
			`if fout:`
			`fout.write(utt + " " + result + "\n")`
			`logger.info("\nTarget Transcription: %s\nOutput Transcription: %s" %`
			`(target, result))`
			`logger.info("Current error rate [%s] = %f" %`
			`(cfg.error_rate_type, error_rate_func(target, result)))`

			`return dict(`
			`errors_sum=errors_sum,`
			`len_refs=len_refs,`
			`num_ins=num_ins,`
			`error_rate=errors_sum / len_refs,`
			`error_rate_type=cfg.error_rate_type)`

			`def compute_result_transcripts(self, audio, audio_len, vocab_list, cfg):`
			`result_transcripts = self.model.decode(`
			`audio,`
			`audio_len,`
			`vocab_list,`
			`decoding_method=cfg.decoding_method,`
			`lang_model_path=cfg.lang_model_path,`
			`beam_alpha=cfg.alpha,`
			`beam_beta=cfg.beta,`
			`beam_size=cfg.beam_size,`
			`cutoff_prob=cfg.cutoff_prob,`
			`cutoff_top_n=cfg.cutoff_top_n,`
			`num_processes=cfg.num_proc_bsearch)`
optimize the 1xt2x 3 years ago			`result_transcripts = [`
			`self._text_featurizer.detokenize(item)`
			`for item in result_transcripts`
			`]`
change the code format to 2.x style 3 years ago			`return result_transcripts`

			`@mp_tools.rank_zero_only`
			`@paddle.no_grad()`
			`def test(self):`
			`logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")`
			`self.model.eval()`
			`cfg = self.config`
			`error_rate_type = None`
			`errors_sum, len_refs, num_ins = 0.0, 0, 0`
			`with open(self.args.result_file, 'w') as fout:`
			`for i, batch in enumerate(self.test_loader):`
			`utts, audio, audio_len, texts, texts_len = batch`
			`metrics = self.compute_metrics(utts, audio, audio_len, texts,`
			`texts_len, fout)`
			`errors_sum += metrics['errors_sum']`
			`len_refs += metrics['len_refs']`
			`num_ins += metrics['num_ins']`
			`error_rate_type = metrics['error_rate_type']`
			`logger.info("Error rate [%s] (%d/?) = %f" %`
			`(error_rate_type, num_ins, errors_sum / len_refs))`

			`# logging`
			`msg = "Test: "`
			`msg += "epoch: {}, ".format(self.epoch)`
			`msg += "step: {}, ".format(self.iteration)`
			`msg += "Final error rate [%s] (%d/%d) = %f" % (`
			`error_rate_type, num_ins, num_ins, errors_sum / len_refs)`
			`logger.info(msg)`

			`def run_test(self):`
			`self.resume_or_scratch()`
			`try:`
			`self.test()`
			`except KeyboardInterrupt:`
			`exit(-1)`

			`def export(self):`
			`if self.args.model_type == 'offline':`
			`infer_model = DeepSpeech2InferModel.from_pretrained(`
			`self.test_loader, self.config, self.args.checkpoint_path)`
			`elif self.args.model_type == 'online':`
			`infer_model = DeepSpeech2InferModelOnline.from_pretrained(`
			`self.test_loader, self.config, self.args.checkpoint_path)`
			`else:`
			`raise Exception("wrong model type")`

			`infer_model.eval()`
			`feat_dim = self.test_loader.collate_fn.feature_size`
			`static_model = infer_model.export()`
			`logger.info(f"Export code: {static_model.forward.code}")`
			`paddle.jit.save(static_model, self.args.export_path)`

			`def run_export(self):`
			`try:`
			`self.export()`
			`except KeyboardInterrupt:`
			`exit(-1)`