Merge pull request #1054 from zh794390558/visual

[asr] using visualdl , jsonlines read manifest
3 years ago · 3e19978194
parent 0892536a8e 39228864bb
commit 3e19978194
21 changed files with 90 additions and 77 deletions
--- a/examples/aishell/asr1/READEME.md
+++ b/examples/aishell/asr1/READEME.md
@ -339,6 +339,3 @@ You need to prepare an audio file, please confirm the sample rate of the audio i
 ```bash
 CUDA_VISIBLE_DEVICES= ./local/test_hub.sh conf/transformer.yaml exp/transformer/checkpoints/avg_20 data/test_audio.wav
 ```
--- a/paddlespeech/s2t/exps/u2/model.py
+++ b/paddlespeech/s2t/exps/u2/model.py
@ -128,8 +128,9 @@ class U2Trainer(Trainer):
            if dist.get_rank() == 0 and self.visualizer:
                losses_np_v = losses_np.copy()
                losses_np_v.update({"lr": self.lr_scheduler()})
-                self.visualizer.add_scalars("step", losses_np_v,
+                for key, val in losses_np_v.items():
-                                            self.iteration - 1)
+                    self.visualizer.add_scalar(
                        tag='train/' + key, value=val, step=self.iteration - 1)
    @paddle.no_grad()
    def valid(self):
@ -237,9 +238,10 @@ class U2Trainer(Trainer):
            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
-                self.visualizer.add_scalars(
+                self.visualizer.add_scalar(
-                    'epoch', {'cv_loss': cv_loss,
+                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
-                              'lr': self.lr_scheduler()}, self.epoch)
+                self.visualizer.add_scalar(
                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.new_epoch()
--- a/paddlespeech/s2t/exps/u2_kaldi/model.py
+++ b/paddlespeech/s2t/exps/u2_kaldi/model.py
@ -131,8 +131,9 @@ class U2Trainer(Trainer):
            if dist.get_rank() == 0 and self.visualizer:
                losses_np_v = losses_np.copy()
                losses_np_v.update({"lr": self.lr_scheduler()})
-                self.visualizer.add_scalars("step", losses_np_v,
+                for key, val in losses_np_v.items():
-                                            self.iteration - 1)
+                    self.visualizer.add_scalar(
                        tag="train/" + key, value=val, step=self.iteration - 1)
    @paddle.no_grad()
    def valid(self):
@ -222,9 +223,11 @@ class U2Trainer(Trainer):
            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
-                self.visualizer.add_scalars(
+                self.visualizer.add_scalar(
-                    'epoch', {'cv_loss': cv_loss,
+                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
-                              'lr': self.lr_scheduler()}, self.epoch)
+                self.visualizer.add_scalar(
                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.new_epoch()
--- a/paddlespeech/s2t/exps/u2_st/model.py
+++ b/paddlespeech/s2t/exps/u2_st/model.py
@ -138,8 +138,9 @@ class U2STTrainer(Trainer):
            if dist.get_rank() == 0 and self.visualizer:
                losses_np_v = losses_np.copy()
                losses_np_v.update({"lr": self.lr_scheduler()})
-                self.visualizer.add_scalars("step", losses_np_v,
+                for key, val in losses_np_v.items():
-                                            self.iteration - 1)
+                    self.visualizer.add_scalar(
                        tag="train/" + key, value=val, step=self.iteration - 1)
    @paddle.no_grad()
    def valid(self):
@ -235,9 +236,11 @@ class U2STTrainer(Trainer):
            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
-                self.visualizer.add_scalars(
+                self.visualizer.add_scalar(
-                    'epoch', {'cv_loss': cv_loss,
+                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
-                              'lr': self.lr_scheduler()}, self.epoch)
+                self.visualizer.add_scalar(
                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.new_epoch()
--- a/paddlespeech/s2t/frontend/augmentor/impulse_response.py
+++ b/paddlespeech/s2t/frontend/augmentor/impulse_response.py
@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Contains the impulse response augmentation model."""
 import jsonlines
 from paddlespeech.s2t.frontend.audio import AudioSegment
 from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase
 from paddlespeech.s2t.frontend.utility import read_manifest
 class ImpulseResponseAugmentor(AugmentorBase):
@ -28,7 +29,8 @@ class ImpulseResponseAugmentor(AugmentorBase):
    def __init__(self, rng, impulse_manifest_path):
        self._rng = rng
-        self._impulse_manifest = read_manifest(impulse_manifest_path)
+        with jsonlines.open(impulse_manifest_path, 'r') as reader:
            self._impulse_manifest = list(reader)
    def __call__(self, x, uttid=None, train=True):
        if not train:
--- a/paddlespeech/s2t/frontend/augmentor/noise_perturb.py
+++ b/paddlespeech/s2t/frontend/augmentor/noise_perturb.py
@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Contains the noise perturb augmentation model."""
 import jsonlines
 from paddlespeech.s2t.frontend.audio import AudioSegment
 from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase
 from paddlespeech.s2t.frontend.utility import read_manifest
 class NoisePerturbAugmentor(AugmentorBase):
@ -34,7 +35,8 @@ class NoisePerturbAugmentor(AugmentorBase):
        self._min_snr_dB = min_snr_dB
        self._max_snr_dB = max_snr_dB
        self._rng = rng
-        self._noise_manifest = read_manifest(manifest_path=noise_manifest_path)
+        with jsonlines.open(noise_manifest_path, 'r') as reader:
            self._noise_manifest = list(reader)
    def __call__(self, x, uttid=None, train=True):
        if not train:
--- a/paddlespeech/s2t/frontend/normalizer.py
+++ b/paddlespeech/s2t/frontend/normalizer.py
@ -14,6 +14,7 @@
 """Contains feature normalizers."""
 import json
 import jsonlines
 import numpy as np
 import paddle
 from paddle.io import DataLoader
@ -21,7 +22,6 @@ from paddle.io import Dataset
 from paddlespeech.s2t.frontend.audio import AudioSegment
 from paddlespeech.s2t.frontend.utility import load_cmvn
 from paddlespeech.s2t.frontend.utility import read_manifest
 from paddlespeech.s2t.utils.log import Log
 __all__ = ["FeatureNormalizer"]
@ -61,7 +61,10 @@ class CollateFunc(object):
 class AudioDataset(Dataset):
    def __init__(self, manifest_path, num_samples=-1, rng=None, random_seed=0):
        self._rng = rng if rng else np.random.RandomState(random_seed)
-        manifest = read_manifest(manifest_path)
+
        with jsonlines.open(manifest_path, 'r') as reader:
            manifest = list(reader)
        if num_samples == -1:
            sampled_manifest = manifest
        else:
--- a/paddlespeech/s2t/frontend/utility.py
+++ b/paddlespeech/s2t/frontend/utility.py
@ -98,7 +98,6 @@ def read_manifest(
    Returns:
        List[dict]: Manifest parsing results.
    """
    manifest = []
    with jsonlines.open(manifest_path, 'r') as reader:
        for json_data in reader:
--- a/paddlespeech/s2t/io/dataloader.py
+++ b/paddlespeech/s2t/io/dataloader.py
@ -16,10 +16,10 @@ from typing import Dict
 from typing import List
 from typing import Text
 import jsonlines
 import numpy as np
 from paddle.io import DataLoader
 from paddlespeech.s2t.frontend.utility import read_manifest
 from paddlespeech.s2t.io.batchfy import make_batchset
 from paddlespeech.s2t.io.converter import CustomConverter
 from paddlespeech.s2t.io.dataset import TransformDataset
@ -91,7 +91,9 @@ class BatchDataLoader():
        self.n_iter_processes = n_iter_processes
        # read json data
-        self.data_json = read_manifest(json_file)
+        with jsonlines.open(json_file, 'r') as reader:
            self.data_json = list(reader)
        self.feat_dim, self.vocab_size = feat_dim_and_vocab_size(
            self.data_json, mode='asr')
--- a/paddlespeech/s2t/io/dataset.py
+++ b/paddlespeech/s2t/io/dataset.py
@ -15,6 +15,7 @@
 # Modified from wenet(https://github.com/wenet-e2e/wenet)
 from typing import Optional
 import jsonlines
 from paddle.io import Dataset
 from yacs.config import CfgNode
@ -184,7 +185,8 @@ class AudioDataset(Dataset):
        """
        assert batch_type in ['static', 'dynamic']
        # read manifest
-        data = read_manifest(data_file)
+        with jsonlines.open(data_file, 'r') as reader:
            data = list(reader)
        if sort:
            data = sorted(data, key=lambda x: x["feat_shape"][0])
        if raw_wav:
--- a/paddlespeech/s2t/io/sampler.py
+++ b/paddlespeech/s2t/io/sampler.py
@ -51,7 +51,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False):
    """
    rng = np.random.RandomState(epoch)
    shift_len = rng.randint(0, batch_size - 1)
-    batch_indices = list(zip(*[iter(indices[shift_len:])] * batch_size))
+    batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size))
    rng.shuffle(batch_indices)
    batch_indices = [item for batch in batch_indices for item in batch]
    assert clipped is False
--- a/paddlespeech/s2t/training/trainer.py
+++ b/paddlespeech/s2t/training/trainer.py
@ -19,7 +19,7 @@ from pathlib import Path
 import paddle
 from paddle import distributed as dist
-from tensorboardX import SummaryWriter
+from visualdl import LogWriter
 from paddlespeech.s2t.training.reporter import ObsScope
 from paddlespeech.s2t.training.reporter import report
@ -309,9 +309,10 @@ class Trainer():
            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
-                self.visualizer.add_scalars(
+                self.visualizer.add_scalar(
-                    'epoch', {'cv_loss': cv_loss,
+                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
-                              'lr': self.lr_scheduler()}, self.epoch)
+                self.visualizer.add_scalar(
                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
            # after epoch
            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
@ -427,7 +428,7 @@ class Trainer():
        unexpected behaviors.
        """
        # visualizer
-        visualizer = SummaryWriter(logdir=str(self.visual_dir))
+        visualizer = LogWriter(logdir=str(self.visual_dir))
        self.visualizer = visualizer
    @mp_tools.rank_zero_only
--- a/paddlespeech/s2t/utils/socket_server.py
+++ b/paddlespeech/s2t/utils/socket_server.py
@ -21,7 +21,7 @@ import wave
 from time import gmtime
 from time import strftime
-from paddlespeech.s2t.frontend.utility import read_manifest
+import jsonlines
 __all__ = ["socket_send", "warm_up_test", "AsrTCPServer", "AsrRequestHandler"]
@ -44,7 +44,8 @@ def warm_up_test(audio_process_handler,
                 num_test_cases,
                 random_seed=0):
    """Warming-up test."""
-    manifest = read_manifest(manifest_path)
+    with jsonlines.open(manifest_path) as reader:
        manifest = list(reader)
    rng = random.Random(random_seed)
    samples = rng.sample(manifest, num_test_cases)
    for idx, sample in enumerate(samples):
--- a/paddlespeech/text/speechtask/punctuation_restoration/training/trainer.py
+++ b/paddlespeech/text/speechtask/punctuation_restoration/training/trainer.py
@ -34,7 +34,7 @@ from speechtask.punctuation_restoration.model.lstm import RnnLm
 from speechtask.punctuation_restoration.utils import layer_tools
 from speechtask.punctuation_restoration.utils import mp_tools
 from speechtask.punctuation_restoration.utils.checkpoint import Checkpoint
-from tensorboardX import SummaryWriter
+from visualdl import LogWriter
 __all__ = ["Trainer", "Tester"]
@ -252,10 +252,10 @@ class Trainer():
            self.logger.info("Epoch {} Val info val_loss {}, F1_score {}".
                             format(self.epoch, total_loss, F1_score))
            if self.visualizer:
-                self.visualizer.add_scalars("epoch", {
+                self.visualizer.add_scalar(
-                    "total_loss": total_loss,
+                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
-                    "lr": self.lr_scheduler()
+                self.visualizer.add_scalar(
-                }, self.epoch)
+                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
            self.save(
                tag=self.epoch, infos={"val_loss": total_loss,
@ -341,7 +341,7 @@ class Trainer():
        unexpected behaviors.
        """
        # visualizer
-        visualizer = SummaryWriter(logdir=str(self.output_dir))
+        visualizer = LogWriter(logdir=str(self.output_dir))
        self.visualizer = visualizer
    @mp_tools.rank_zero_only
--- a/requirements.txt
+++ b/requirements.txt
@ -40,7 +40,6 @@ snakeviz
 soundfile~=0.10
 sox
 soxbindings
 tensorboardX
 textgrid
 timer
 tqdm
--- a/utils/build_vocab.py
+++ b/utils/build_vocab.py
@ -21,9 +21,10 @@ import os
 import tempfile
 from collections import Counter
 import jsonlines
 from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
 from paddlespeech.s2t.frontend.utility import BLANK
 from paddlespeech.s2t.frontend.utility import read_manifest
 from paddlespeech.s2t.frontend.utility import SOS
 from paddlespeech.s2t.frontend.utility import SPACE
 from paddlespeech.s2t.frontend.utility import UNK
@ -59,13 +60,21 @@ args = parser.parse_args()
 def count_manifest(counter, text_feature, manifest_path):
-    manifest_jsons = read_manifest(manifest_path)
+    manifest_jsons = []
    with jsonlines.open(manifest_path, 'r') as reader:
        for json_data in reader:
            manifest_jsons.append(json_data)
    for line_json in manifest_jsons:
        line = text_feature.tokenize(line_json['text'], replace_space=False)
        counter.update(line)
 def dump_text_manifest(fileobj, manifest_path, key='text'):
-    manifest_jsons = read_manifest(manifest_path)
+    manifest_jsons = []
    with jsonlines.open(manifest_path, 'r') as reader:
        for json_data in reader:
            manifest_jsons.append(json_data)
    for line_json in manifest_jsons:
        fileobj.write(line_json[key] + "\n")
--- a/utils/dump_manifest.py
+++ b/utils/dump_manifest.py
@ -17,7 +17,7 @@ import argparse
 from pathlib import Path
 from typing import Union
-from paddlespeech.s2t.frontend.utility import read_manifest
+import jsonlines
 key_whitelist = set(['feat', 'text', 'syllable', 'phone'])
 filename = {
@ -32,7 +32,10 @@ def dump_manifest(manifest_path, output_dir: Union[str, Path]):
    output_dir = Path(output_dir).expanduser()
    manifest_path = Path(manifest_path).expanduser()
-    manifest_jsons = read_manifest(manifest_path)
+
    with jsonlines.open(str(manifest_path), 'r') as reader:
        manifest_jsons = list(reader)
    first_line = manifest_jsons[0]
    file_map = {}
--- a/utils/format_data.py
+++ b/utils/format_data.py
@ -17,9 +17,10 @@ import argparse
 import functools
 import json
 import jsonlines
 from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
 from paddlespeech.s2t.frontend.utility import load_cmvn
 from paddlespeech.s2t.frontend.utility import read_manifest
 from paddlespeech.s2t.io.utility import feat_type
 from paddlespeech.s2t.utils.utility import add_arguments
 from paddlespeech.s2t.utils.utility import print_arguments
@ -71,7 +72,9 @@ def main():
    # }
    count = 0
    for manifest_path in args.manifest_paths:
-        manifest_jsons = read_manifest(manifest_path)
+        with jsonlines.open(str(manifest_path), 'r') as reader:
            manifest_jsons = list(reader)
        for line_json in manifest_jsons:
            output_json = {
                "input": [],
--- a/utils/format_triplet_data.py
+++ b/utils/format_triplet_data.py
@ -17,9 +17,10 @@ import argparse
 import functools
 import json
 import jsonlines
 from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
 from paddlespeech.s2t.frontend.utility import load_cmvn
 from paddlespeech.s2t.frontend.utility import read_manifest
 from paddlespeech.s2t.io.utility import feat_type
 from paddlespeech.s2t.utils.utility import add_arguments
 from paddlespeech.s2t.utils.utility import print_arguments
@ -63,7 +64,8 @@ def main():
    count = 0
    for manifest_path in args.manifest_paths:
-        manifest_jsons = read_manifest(manifest_path)
+        with jsonlines.open(str(manifest_path), 'r') as reader:
            manifest_jsons = list(reader)
        for line_json in manifest_jsons:
            # text: translation text, text1: transcript text.
            # Currently only support joint-vocab, will add separate vocabs setting.
--- a/utils/manifest_key_value.py
+++ b/utils/manifest_key_value.py
@ -4,9 +4,10 @@ import argparse
 import functools
 from pathlib import Path
 import jsonlines
 from utils.utility import add_arguments
 from utils.utility import print_arguments
 from utils.utility import read_manifest
 def main(args):
@ -19,7 +20,8 @@ def main(args):
    dur_scp = outdir / 'duration'
    text_scp = outdir / 'text'
-    manifest_jsons = read_manifest(args.manifest_path)
+    with jsonlines.open(args.manifest_path, 'r') as reader:
        manifest_jsons = list(reader)
    with wav_scp.open('w') as fwav, dur_scp.open('w') as fdur, text_scp.open(
            'w') as ftxt:
--- a/utils/utility.py
+++ b/utils/utility.py
@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import hashlib
 import json
 import os
 import sys
 import tarfile
@ -22,31 +21,10 @@ from typing import Text
 __all__ = [
    "check_md5sum", "getfile_insensitive", "download_multi", "download",
    "unpack", "unzip", "md5file", "print_arguments", "add_arguments",
-    "read_manifest", "get_commandline_args"
+    "get_commandline_args"
 ]
 def read_manifest(manifest_path):
    """Load and parse manifest file.
    Args:
        manifest_path ([type]): Manifest file to load and parse.
    Raises:
        IOError: If failed to parse the manifest.
    Returns:
        List[dict]: Manifest parsing results.
    """
    manifest = []
    for json_line in open(manifest_path, 'r'):
        try:
            json_data = json.loads(json_line)
        except Exception as e:
            raise IOError("Error reading manifest: %s" % str(e))
    return manifest
 def get_commandline_args():
    extra_chars = [
        " ",