Merge pull request #1054 from zh794390558/visual

[asr] using visualdl , jsonlines read manifest
pull/1060/head
Hui Zhang 3 years ago committed by GitHub
commit 3e19978194
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -339,6 +339,3 @@ You need to prepare an audio file, please confirm the sample rate of the audio i
```bash
CUDA_VISIBLE_DEVICES= ./local/test_hub.sh conf/transformer.yaml exp/transformer/checkpoints/avg_20 data/test_audio.wav
```

@ -128,8 +128,9 @@ class U2Trainer(Trainer):
if dist.get_rank() == 0 and self.visualizer:
losses_np_v = losses_np.copy()
losses_np_v.update({"lr": self.lr_scheduler()})
self.visualizer.add_scalars("step", losses_np_v,
self.iteration - 1)
for key, val in losses_np_v.items():
self.visualizer.add_scalar(
tag='train/' + key, value=val, step=self.iteration - 1)
@paddle.no_grad()
def valid(self):
@ -237,9 +238,10 @@ class U2Trainer(Trainer):
logger.info(
'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
if self.visualizer:
self.visualizer.add_scalars(
'epoch', {'cv_loss': cv_loss,
'lr': self.lr_scheduler()}, self.epoch)
self.visualizer.add_scalar(
tag='eval/cv_loss', value=cv_loss, step=self.epoch)
self.visualizer.add_scalar(
tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
self.save(tag=self.epoch, infos={'val_loss': cv_loss})
self.new_epoch()

@ -131,8 +131,9 @@ class U2Trainer(Trainer):
if dist.get_rank() == 0 and self.visualizer:
losses_np_v = losses_np.copy()
losses_np_v.update({"lr": self.lr_scheduler()})
self.visualizer.add_scalars("step", losses_np_v,
self.iteration - 1)
for key, val in losses_np_v.items():
self.visualizer.add_scalar(
tag="train/" + key, value=val, step=self.iteration - 1)
@paddle.no_grad()
def valid(self):
@ -222,9 +223,11 @@ class U2Trainer(Trainer):
logger.info(
'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
if self.visualizer:
self.visualizer.add_scalars(
'epoch', {'cv_loss': cv_loss,
'lr': self.lr_scheduler()}, self.epoch)
self.visualizer.add_scalar(
tag='eval/cv_loss', value=cv_loss, step=self.epoch)
self.visualizer.add_scalar(
tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
self.save(tag=self.epoch, infos={'val_loss': cv_loss})
self.new_epoch()

@ -138,8 +138,9 @@ class U2STTrainer(Trainer):
if dist.get_rank() == 0 and self.visualizer:
losses_np_v = losses_np.copy()
losses_np_v.update({"lr": self.lr_scheduler()})
self.visualizer.add_scalars("step", losses_np_v,
self.iteration - 1)
for key, val in losses_np_v.items():
self.visualizer.add_scalar(
tag="train/" + key, value=val, step=self.iteration - 1)
@paddle.no_grad()
def valid(self):
@ -235,9 +236,11 @@ class U2STTrainer(Trainer):
logger.info(
'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
if self.visualizer:
self.visualizer.add_scalars(
'epoch', {'cv_loss': cv_loss,
'lr': self.lr_scheduler()}, self.epoch)
self.visualizer.add_scalar(
tag='eval/cv_loss', value=cv_loss, step=self.epoch)
self.visualizer.add_scalar(
tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
self.save(tag=self.epoch, infos={'val_loss': cv_loss})
self.new_epoch()

@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the impulse response augmentation model."""
import jsonlines
from paddlespeech.s2t.frontend.audio import AudioSegment
from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase
from paddlespeech.s2t.frontend.utility import read_manifest
class ImpulseResponseAugmentor(AugmentorBase):
@ -28,7 +29,8 @@ class ImpulseResponseAugmentor(AugmentorBase):
def __init__(self, rng, impulse_manifest_path):
self._rng = rng
self._impulse_manifest = read_manifest(impulse_manifest_path)
with jsonlines.open(impulse_manifest_path, 'r') as reader:
self._impulse_manifest = list(reader)
def __call__(self, x, uttid=None, train=True):
if not train:

@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the noise perturb augmentation model."""
import jsonlines
from paddlespeech.s2t.frontend.audio import AudioSegment
from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase
from paddlespeech.s2t.frontend.utility import read_manifest
class NoisePerturbAugmentor(AugmentorBase):
@ -34,7 +35,8 @@ class NoisePerturbAugmentor(AugmentorBase):
self._min_snr_dB = min_snr_dB
self._max_snr_dB = max_snr_dB
self._rng = rng
self._noise_manifest = read_manifest(manifest_path=noise_manifest_path)
with jsonlines.open(noise_manifest_path, 'r') as reader:
self._noise_manifest = list(reader)
def __call__(self, x, uttid=None, train=True):
if not train:

@ -14,6 +14,7 @@
"""Contains feature normalizers."""
import json
import jsonlines
import numpy as np
import paddle
from paddle.io import DataLoader
@ -21,7 +22,6 @@ from paddle.io import Dataset
from paddlespeech.s2t.frontend.audio import AudioSegment
from paddlespeech.s2t.frontend.utility import load_cmvn
from paddlespeech.s2t.frontend.utility import read_manifest
from paddlespeech.s2t.utils.log import Log
__all__ = ["FeatureNormalizer"]
@ -61,7 +61,10 @@ class CollateFunc(object):
class AudioDataset(Dataset):
def __init__(self, manifest_path, num_samples=-1, rng=None, random_seed=0):
self._rng = rng if rng else np.random.RandomState(random_seed)
manifest = read_manifest(manifest_path)
with jsonlines.open(manifest_path, 'r') as reader:
manifest = list(reader)
if num_samples == -1:
sampled_manifest = manifest
else:

@ -98,7 +98,6 @@ def read_manifest(
Returns:
List[dict]: Manifest parsing results.
"""
manifest = []
with jsonlines.open(manifest_path, 'r') as reader:
for json_data in reader:

@ -16,10 +16,10 @@ from typing import Dict
from typing import List
from typing import Text
import jsonlines
import numpy as np
from paddle.io import DataLoader
from paddlespeech.s2t.frontend.utility import read_manifest
from paddlespeech.s2t.io.batchfy import make_batchset
from paddlespeech.s2t.io.converter import CustomConverter
from paddlespeech.s2t.io.dataset import TransformDataset
@ -91,7 +91,9 @@ class BatchDataLoader():
self.n_iter_processes = n_iter_processes
# read json data
self.data_json = read_manifest(json_file)
with jsonlines.open(json_file, 'r') as reader:
self.data_json = list(reader)
self.feat_dim, self.vocab_size = feat_dim_and_vocab_size(
self.data_json, mode='asr')

@ -15,6 +15,7 @@
# Modified from wenet(https://github.com/wenet-e2e/wenet)
from typing import Optional
import jsonlines
from paddle.io import Dataset
from yacs.config import CfgNode
@ -184,7 +185,8 @@ class AudioDataset(Dataset):
"""
assert batch_type in ['static', 'dynamic']
# read manifest
data = read_manifest(data_file)
with jsonlines.open(data_file, 'r') as reader:
data = list(reader)
if sort:
data = sorted(data, key=lambda x: x["feat_shape"][0])
if raw_wav:

@ -51,7 +51,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False):
"""
rng = np.random.RandomState(epoch)
shift_len = rng.randint(0, batch_size - 1)
batch_indices = list(zip(*[iter(indices[shift_len:])] * batch_size))
batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size))
rng.shuffle(batch_indices)
batch_indices = [item for batch in batch_indices for item in batch]
assert clipped is False

@ -19,7 +19,7 @@ from pathlib import Path
import paddle
from paddle import distributed as dist
from tensorboardX import SummaryWriter
from visualdl import LogWriter
from paddlespeech.s2t.training.reporter import ObsScope
from paddlespeech.s2t.training.reporter import report
@ -309,9 +309,10 @@ class Trainer():
logger.info(
'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
if self.visualizer:
self.visualizer.add_scalars(
'epoch', {'cv_loss': cv_loss,
'lr': self.lr_scheduler()}, self.epoch)
self.visualizer.add_scalar(
tag='eval/cv_loss', value=cv_loss, step=self.epoch)
self.visualizer.add_scalar(
tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
# after epoch
self.save(tag=self.epoch, infos={'val_loss': cv_loss})
@ -427,7 +428,7 @@ class Trainer():
unexpected behaviors.
"""
# visualizer
visualizer = SummaryWriter(logdir=str(self.visual_dir))
visualizer = LogWriter(logdir=str(self.visual_dir))
self.visualizer = visualizer
@mp_tools.rank_zero_only

@ -21,7 +21,7 @@ import wave
from time import gmtime
from time import strftime
from paddlespeech.s2t.frontend.utility import read_manifest
import jsonlines
__all__ = ["socket_send", "warm_up_test", "AsrTCPServer", "AsrRequestHandler"]
@ -44,7 +44,8 @@ def warm_up_test(audio_process_handler,
num_test_cases,
random_seed=0):
"""Warming-up test."""
manifest = read_manifest(manifest_path)
with jsonlines.open(manifest_path) as reader:
manifest = list(reader)
rng = random.Random(random_seed)
samples = rng.sample(manifest, num_test_cases)
for idx, sample in enumerate(samples):

@ -34,7 +34,7 @@ from speechtask.punctuation_restoration.model.lstm import RnnLm
from speechtask.punctuation_restoration.utils import layer_tools
from speechtask.punctuation_restoration.utils import mp_tools
from speechtask.punctuation_restoration.utils.checkpoint import Checkpoint
from tensorboardX import SummaryWriter
from visualdl import LogWriter
__all__ = ["Trainer", "Tester"]
@ -252,10 +252,10 @@ class Trainer():
self.logger.info("Epoch {} Val info val_loss {}, F1_score {}".
format(self.epoch, total_loss, F1_score))
if self.visualizer:
self.visualizer.add_scalars("epoch", {
"total_loss": total_loss,
"lr": self.lr_scheduler()
}, self.epoch)
self.visualizer.add_scalar(
tag='eval/cv_loss', value=cv_loss, step=self.epoch)
self.visualizer.add_scalar(
tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
self.save(
tag=self.epoch, infos={"val_loss": total_loss,
@ -341,7 +341,7 @@ class Trainer():
unexpected behaviors.
"""
# visualizer
visualizer = SummaryWriter(logdir=str(self.output_dir))
visualizer = LogWriter(logdir=str(self.output_dir))
self.visualizer = visualizer
@mp_tools.rank_zero_only

@ -40,7 +40,6 @@ snakeviz
soundfile~=0.10
sox
soxbindings
tensorboardX
textgrid
timer
tqdm

@ -21,9 +21,10 @@ import os
import tempfile
from collections import Counter
import jsonlines
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.frontend.utility import BLANK
from paddlespeech.s2t.frontend.utility import read_manifest
from paddlespeech.s2t.frontend.utility import SOS
from paddlespeech.s2t.frontend.utility import SPACE
from paddlespeech.s2t.frontend.utility import UNK
@ -59,13 +60,21 @@ args = parser.parse_args()
def count_manifest(counter, text_feature, manifest_path):
manifest_jsons = read_manifest(manifest_path)
manifest_jsons = []
with jsonlines.open(manifest_path, 'r') as reader:
for json_data in reader:
manifest_jsons.append(json_data)
for line_json in manifest_jsons:
line = text_feature.tokenize(line_json['text'], replace_space=False)
counter.update(line)
def dump_text_manifest(fileobj, manifest_path, key='text'):
manifest_jsons = read_manifest(manifest_path)
manifest_jsons = []
with jsonlines.open(manifest_path, 'r') as reader:
for json_data in reader:
manifest_jsons.append(json_data)
for line_json in manifest_jsons:
fileobj.write(line_json[key] + "\n")

@ -17,7 +17,7 @@ import argparse
from pathlib import Path
from typing import Union
from paddlespeech.s2t.frontend.utility import read_manifest
import jsonlines
key_whitelist = set(['feat', 'text', 'syllable', 'phone'])
filename = {
@ -32,7 +32,10 @@ def dump_manifest(manifest_path, output_dir: Union[str, Path]):
output_dir = Path(output_dir).expanduser()
manifest_path = Path(manifest_path).expanduser()
manifest_jsons = read_manifest(manifest_path)
with jsonlines.open(str(manifest_path), 'r') as reader:
manifest_jsons = list(reader)
first_line = manifest_jsons[0]
file_map = {}

@ -17,9 +17,10 @@ import argparse
import functools
import json
import jsonlines
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.frontend.utility import load_cmvn
from paddlespeech.s2t.frontend.utility import read_manifest
from paddlespeech.s2t.io.utility import feat_type
from paddlespeech.s2t.utils.utility import add_arguments
from paddlespeech.s2t.utils.utility import print_arguments
@ -71,7 +72,9 @@ def main():
# }
count = 0
for manifest_path in args.manifest_paths:
manifest_jsons = read_manifest(manifest_path)
with jsonlines.open(str(manifest_path), 'r') as reader:
manifest_jsons = list(reader)
for line_json in manifest_jsons:
output_json = {
"input": [],

@ -17,9 +17,10 @@ import argparse
import functools
import json
import jsonlines
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.frontend.utility import load_cmvn
from paddlespeech.s2t.frontend.utility import read_manifest
from paddlespeech.s2t.io.utility import feat_type
from paddlespeech.s2t.utils.utility import add_arguments
from paddlespeech.s2t.utils.utility import print_arguments
@ -63,7 +64,8 @@ def main():
count = 0
for manifest_path in args.manifest_paths:
manifest_jsons = read_manifest(manifest_path)
with jsonlines.open(str(manifest_path), 'r') as reader:
manifest_jsons = list(reader)
for line_json in manifest_jsons:
# text: translation text, text1: transcript text.
# Currently only support joint-vocab, will add separate vocabs setting.

@ -4,9 +4,10 @@ import argparse
import functools
from pathlib import Path
import jsonlines
from utils.utility import add_arguments
from utils.utility import print_arguments
from utils.utility import read_manifest
def main(args):
@ -19,7 +20,8 @@ def main(args):
dur_scp = outdir / 'duration'
text_scp = outdir / 'text'
manifest_jsons = read_manifest(args.manifest_path)
with jsonlines.open(args.manifest_path, 'r') as reader:
manifest_jsons = list(reader)
with wav_scp.open('w') as fwav, dur_scp.open('w') as fdur, text_scp.open(
'w') as ftxt:

@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib
import json
import os
import sys
import tarfile
@ -22,31 +21,10 @@ from typing import Text
__all__ = [
"check_md5sum", "getfile_insensitive", "download_multi", "download",
"unpack", "unzip", "md5file", "print_arguments", "add_arguments",
"read_manifest", "get_commandline_args"
"get_commandline_args"
]
def read_manifest(manifest_path):
"""Load and parse manifest file.
Args:
manifest_path ([type]): Manifest file to load and parse.
Raises:
IOError: If failed to parse the manifest.
Returns:
List[dict]: Manifest parsing results.
"""
manifest = []
for json_line in open(manifest_path, 'r'):
try:
json_data = json.loads(json_line)
except Exception as e:
raise IOError("Error reading manifest: %s" % str(e))
return manifest
def get_commandline_args():
extra_chars = [
" ",

Loading…
Cancel
Save