From c47bbefa6458cb43952a618bf166322c934a9174 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 3 Feb 2021 06:58:50 +0000 Subject: [PATCH] support py3 --- README.md | 109 ++++++++++-------- data/aishell/aishell.py | 3 - data/librispeech/librispeech.py | 3 - data/noise/chime3_background.py | 3 - data/voxforge/voxforge.py | 3 - data_utils/audio.py | 13 +-- data_utils/augmentor/augmentation.py | 3 - data_utils/augmentor/base.py | 3 - data_utils/augmentor/impulse_response.py | 5 +- data_utils/augmentor/noise_perturb.py | 5 +- .../online_bayesian_normalization.py | 3 - data_utils/augmentor/resample.py | 3 - data_utils/augmentor/shift_perturb.py | 3 - data_utils/augmentor/speed_perturb.py | 3 - data_utils/augmentor/volume_perturb.py | 3 - data_utils/data.py | 17 ++- data_utils/featurizer/audio_featurizer.py | 3 - data_utils/featurizer/speech_featurizer.py | 5 +- data_utils/featurizer/text_featurizer.py | 7 +- data_utils/normalizer.py | 9 +- data_utils/speech.py | 17 ++- data_utils/utility.py | 6 +- decoders/decoders_deprecated.py | 3 - decoders/scorer_deprecated.py | 7 +- decoders/swig/_init_paths.py | 3 - decoders/swig/setup.py | 3 - decoders/swig/setup.sh | 2 +- decoders/swig_wrapper.py | 9 +- decoders/tests/test_decoders.py | 3 - deploy/_init_paths.py | 3 - deploy/demo_client.py | 2 +- examples/aishell/run_data.sh | 6 +- examples/aishell/run_infer.sh | 2 +- examples/aishell/run_infer_golden.sh | 2 +- examples/aishell/run_test.sh | 2 +- examples/aishell/run_test_golden.sh | 2 +- examples/aishell/run_train.sh | 2 +- examples/baidu_en8k/run_infer_golden.sh | 2 +- examples/baidu_en8k/run_test_golden.sh | 2 +- examples/deploy_demo/run_demo_client.sh | 2 +- .../deploy_demo/run_english_demo_server.sh | 2 +- examples/librispeech/run_data.sh | 6 +- examples/librispeech/run_infer.sh | 2 +- examples/librispeech/run_infer_golden.sh | 2 +- examples/librispeech/run_test.sh | 2 +- examples/librispeech/run_test_golden.sh | 2 +- examples/librispeech/run_train.sh | 2 +- examples/librispeech/run_tune.sh | 2 +- examples/tiny/run_data.sh | 6 +- examples/tiny/run_infer.sh | 2 +- examples/tiny/run_infer_golden.sh | 2 +- examples/tiny/run_test.sh | 2 +- examples/tiny/run_test_golden.sh | 2 +- examples/tiny/run_train.sh | 2 +- examples/tiny/run_tune.sh | 2 +- infer.py | 8 +- model_utils/model.py | 10 +- model_utils/network.py | 3 - setup.sh | 6 +- test.py | 5 +- tools/_init_paths.py | 3 - tools/build_vocab.py | 3 - tools/compute_mean_std.py | 3 - tools/tune.py | 5 +- train.py | 3 - utils/error_rate.py | 28 ++--- utils/tests/test_error_rate.py | 4 - utils/utility.py | 5 +- 68 files changed, 145 insertions(+), 265 deletions(-) diff --git a/README.md b/README.md index 3d8b9e600..e08e2ac4a 100644 --- a/README.md +++ b/README.md @@ -26,20 +26,20 @@ To avoid the trouble of environment setup, [running in Docker container](#running-in-docker-container) is highly recommended. Otherwise follow the guidelines below to install the dependencies manually. ### Prerequisites -- Python 2.7 only supported +- Python >= 3.5 - PaddlePaddle 1.8.0 or later (please refer to the [Installation Guide](https://www.paddlepaddle.org.cn/documentation/docs/en/beginners_guide/index_en.html)) ### Setup - Make sure these libraries or tools installed: `pkg-config`, `flac`, `ogg`, `vorbis`, `boost` and `swig`, e.g. installing them via `apt-get`: ```bash -sudo apt-get install -y pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig python-dev +sudo apt-get install -y pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev ``` or, installing them via `yum`: ```bash -sudo yum install pkgconfig libogg-devel libvorbis-devel boost-devel python-devel +sudo yum install pkgconfig libogg-devel libvorbis-devel boost-devel python3-devel wget https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.1.tar.xz xz -d flac-1.3.1.tar.xz tar -xvf flac-1.3.1.tar @@ -57,6 +57,39 @@ cd DeepSpeech sh setup.sh ``` +### Running in Docker Container + +Docker is an open source tool to build, ship, and run distributed applications in an isolated environment. A Docker image for this project has been provided in [hub.docker.com](https://hub.docker.com) with all the dependencies installed, including the pre-built PaddlePaddle, CTC decoders, and other necessary Python and third-party packages. This Docker image requires the support of NVIDIA GPU, so please make sure its availiability and the [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) has been installed. + +Take several steps to launch the Docker image: + +- Download the Docker image + +```bash +nvidia-docker pull hub.baidubce.com/paddlepaddle/deep_speech_fluid:latest-gpu +``` + +- Clone this repository + +``` +git clone https://github.com/PaddlePaddle/DeepSpeech.git +``` + +- Run the Docker image + +```bash +sudo nvidia-docker run -it -v $(pwd)/DeepSpeech:/DeepSpeech hub.baidubce.com/paddlepaddle/deep_speech_fluid:latest-gpu /bin/bash +``` +Now go back and start from the [Getting Started](#getting-started) section, you can execute training, inference and hyper-parameters tuning similarly in the Docker container. + + +- Install PaddlePaddle + +For example, for CUDA 10.1, CuDNN7.5: +```bash +python3 -m pip install paddlepaddle-gpu==1.8.0.post107 +``` + ## Getting Started Several shell scripts provided in `./examples` will help us to quickly give it a try, for most major modules, including data preparation, model training, case inference and model evaluation, with a few public dataset (e.g. [LibriSpeech](http://www.openslr.org/12/), [Aishell](http://www.openslr.org/33)). Reading these examples will also help you to understand how to make it work with your own data. @@ -132,7 +165,7 @@ For how to generate such manifest files, please refer to `data/librispeech/libri To perform z-score normalization (zero-mean, unit stddev) upon audio features, we have to estimate in advance the mean and standard deviation of the features, with some training samples: ```bash -python tools/compute_mean_std.py \ +python3 tools/compute_mean_std.py \ --num_samples 2000 \ --specgram_type linear \ --manifest_path data/librispeech/manifest.train \ @@ -147,7 +180,7 @@ It will compute the mean and standard deviatio of power spectrum feature with 20 A vocabulary of possible characters is required to convert the transcription into a list of token indices for training, and in decoding, to convert from a list of indices back to text again. Such a character-based vocabulary can be built with `tools/build_vocab.py`. ```bash -python tools/build_vocab.py \ +python3 tools/build_vocab.py \ --count_threshold 0 \ --vocab_path data/librispeech/eng_vocab.txt \ --manifest_paths data/librispeech/manifest.train @@ -160,9 +193,9 @@ It will write a vocabuary file `data/librispeeech/eng_vocab.txt` with all transc For more help on arguments: ```bash -python data/librispeech/librispeech.py --help -python tools/compute_mean_std.py --help -python tools/build_vocab.py --help +python3 data/librispeech/librispeech.py --help +python3 tools/compute_mean_std.py --help +python3 tools/build_vocab.py --help ``` ## Training a model @@ -172,26 +205,26 @@ python tools/build_vocab.py --help - Start training from scratch with 8 GPUs: ``` - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python train.py + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 train.py ``` - Start training from scratch with CPUs: ``` - python train.py --use_gpu False + python3 train.py --use_gpu False ``` - Resume training from a checkpoint: ``` CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ - python train.py \ + python3 train.py \ --init_from_pretrained_model CHECKPOINT_PATH_TO_RESUME_FROM ``` For more help on arguments: ```bash -python train.py --help +python3 train.py --help ``` or refer to `example/librispeech/run_train.sh`. @@ -273,13 +306,13 @@ An inference module caller `infer.py` is provided to infer, decode and visualize - Inference with GPU: ```bash - CUDA_VISIBLE_DEVICES=0 python infer.py + CUDA_VISIBLE_DEVICES=0 python3 infer.py ``` - Inference with CPUs: ```bash - python infer.py --use_gpu False + python3 infer.py --use_gpu False ``` We provide two types of CTC decoders: *CTC greedy decoder* and *CTC beam search decoder*. The *CTC greedy decoder* is an implementation of the simple best-path decoding algorithm, selecting at each timestep the most likely token, thus being greedy and locally optimal. The [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) otherwise utilizes a heuristic breadth-first graph search for reaching a near global optimality; it also requires a pre-trained KenLM language model for better scoring and ranking. The decoder type can be set with argument `--decoding_method`. @@ -287,7 +320,7 @@ We provide two types of CTC decoders: *CTC greedy decoder* and *CTC beam search For more help on arguments: ``` -python infer.py --help +python3 infer.py --help ``` or refer to `example/librispeech/run_infer.sh`. @@ -298,13 +331,13 @@ To evaluate a model's performance quantitatively, please run: - Evaluation with GPUs: ```bash - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python test.py + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 test.py ``` - Evaluation with CPUs: ```bash - python test.py --use_gpu False + python3 test.py --use_gpu False ``` The error rate (default: word error rate; can be set with `--error_rate_type`) will be printed. @@ -312,7 +345,7 @@ The error rate (default: word error rate; can be set with `--error_rate_type`) w For more help on arguments: ```bash -python test.py --help +python3 test.py --help ``` or refer to `example/librispeech/run_test.sh`. @@ -326,7 +359,7 @@ The hyper-parameters $\alpha$ (language model weight) and $\beta$ (word insertio ```bash CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ - python tools/tune.py \ + python3 tools/tune.py \ --alpha_from 1.0 \ --alpha_to 3.2 \ --num_alphas 45 \ @@ -338,7 +371,7 @@ The hyper-parameters $\alpha$ (language model weight) and $\beta$ (word insertio - Tuning with CPU: ```bash - python tools/tune.py --use_gpu False + python3 tools/tune.py --use_gpu False ``` The grid search will print the WER (word error rate) or CER (character error rate) at each point in the hyper-parameters space, and draw the error surface optionally. A proper hyper-parameters range should include the global minima of the error surface for WER/CER, as illustrated in the following figure. @@ -352,36 +385,10 @@ Usually, as the figure shows, the variation of language model weight ($\alpha$) After tuning, you can reset $\alpha$ and $\beta$ in the inference and evaluation modules to see if they really help improve the ASR performance. For more help ```bash -python tune.py --help +python3 tune.py --help ``` or refer to `example/librispeech/run_tune.sh`. -## Running in Docker Container - -Docker is an open source tool to build, ship, and run distributed applications in an isolated environment. A Docker image for this project has been provided in [hub.docker.com](https://hub.docker.com) with all the dependencies installed, including the pre-built PaddlePaddle, CTC decoders, and other necessary Python and third-party packages. This Docker image requires the support of NVIDIA GPU, so please make sure its availiability and the [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) has been installed. - -Take several steps to launch the Docker image: - -- Download the Docker image - -```bash -nvidia-docker pull hub.baidubce.com/paddlepaddle/deep_speech_fluid:latest-gpu -``` - -- Clone this repository - -``` -git clone https://github.com/PaddlePaddle/DeepSpeech.git -``` - -- Run the Docker image - -```bash -sudo nvidia-docker run -it -v $(pwd)/DeepSpeech:/DeepSpeech hub.baidubce.com/paddlepaddle/deep_speech_fluid:latest-gpu /bin/bash -``` -Now go back and start from the [Getting Started](#getting-started) section, you can execute training, inference and hyper-parameters tuning similarly in the Docker container. - - ## Training for Mandarin Language The key steps of training for Mandarin language are same to that of English language and we have also provided an example for Mandarin training with Aishell in ```examples/aishell```. As mentioned above, please execute ```sh run_data.sh```, ```sh run_train.sh```, ```sh run_test.sh``` and ```sh run_infer.sh``` to do data preparation, training, testing and inference correspondingly. We have also prepared a pre-trained model (downloaded by ./models/aishell/download_model.sh) for users to try with ```sh run_infer_golden.sh``` and ```sh run_test_golden.sh```. Notice that, different from English LM, the Mandarin LM is character-based and please run ```tools/tune.py``` to find an optimal setting. @@ -394,7 +401,7 @@ To start the demo's server, please run this in one console: ```bash CUDA_VISIBLE_DEVICES=0 \ -python deploy/demo_server.py \ +python3 deploy/demo_server.py \ --host_ip localhost \ --host_port 8086 ``` @@ -413,7 +420,7 @@ Then to start the client, please run this in another console: ```bash CUDA_VISIBLE_DEVICES=0 \ -python -u deploy/demo_client.py \ +python3 -u deploy/demo_client.py \ --host_ip 'localhost' \ --host_port 8086 ``` @@ -427,8 +434,8 @@ Please also refer to `examples/deploy_demo/run_english_demo_server.sh`, which wi For more help on arguments: ```bash -python deploy/demo_server.py --help -python deploy/demo_client.py --help +python3 deploy/demo_server.py --help +python3 deploy/demo_client.py --help ``` ## Released Models diff --git a/data/aishell/aishell.py b/data/aishell/aishell.py index e81d20853..6290712f2 100644 --- a/data/aishell/aishell.py +++ b/data/aishell/aishell.py @@ -5,9 +5,6 @@ Manifest file is a json-format file with each line containing the meta data (i.e. audio filepath, transcript and audio duration) of each audio file in the data set. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import os import codecs diff --git a/data/librispeech/librispeech.py b/data/librispeech/librispeech.py index 770b71108..8a136453b 100644 --- a/data/librispeech/librispeech.py +++ b/data/librispeech/librispeech.py @@ -5,9 +5,6 @@ Manifest file is a json-format file with each line containing the meta data (i.e. audio filepath, transcript and audio duration) of each audio file in the data set. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import distutils.util import os diff --git a/data/noise/chime3_background.py b/data/noise/chime3_background.py index 1aa7f8df8..30a2e14e7 100644 --- a/data/noise/chime3_background.py +++ b/data/noise/chime3_background.py @@ -5,9 +5,6 @@ Manifest file is a json-format file with each line containing the meta data (i.e. audio filepath, transcript and audio duration) of each audio file in the data set. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import distutils.util import os diff --git a/data/voxforge/voxforge.py b/data/voxforge/voxforge.py index b7cc38bdb..8478166fb 100644 --- a/data/voxforge/voxforge.py +++ b/data/voxforge/voxforge.py @@ -5,9 +5,6 @@ Manifest file is a json-format file with each line containing the meta data (i.e. audio filepath, transcript and audio duration) of each audio file in the data set. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import os import codecs diff --git a/data_utils/audio.py b/data_utils/audio.py index e0feb21f3..94e2b52fe 100644 --- a/data_utils/audio.py +++ b/data_utils/audio.py @@ -1,7 +1,4 @@ """Contains the audio segment class.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import numpy as np import io @@ -62,11 +59,11 @@ class AudioSegment(object): """Create audio segment from audio file. :param filepath: Filepath or file object to audio file. - :type filepath: basestring|file + :type filepath: str|file :return: Audio segment instance. :rtype: AudioSegment """ - if isinstance(file, basestring) and re.findall(r".seqbin_\d+$", file): + if isinstance(file, str) and re.findall(r".seqbin_\d+$", file): return cls.from_sequence_file(file) else: samples, sample_rate = soundfile.read(file, dtype='float32') @@ -78,7 +75,7 @@ class AudioSegment(object): the entire file into the memory which can be incredibly wasteful. :param file: Input audio filepath or file object. - :type file: basestring|file + :type file: str|file :param start: Start time in seconds. If start is negative, it wraps around from the end. If not provided, this function reads from the very beginning. @@ -143,7 +140,7 @@ class AudioSegment(object): sequence file (starting from 1). :param filepath: Filepath of sequence file. - :type filepath: basestring + :type filepath: str :return: Audio segment instance. :rtype: AudioSegment """ @@ -236,7 +233,7 @@ class AudioSegment(object): :param filepath: WAV filepath or file object to save the audio segment. - :type filepath: basestring|file + :type filepath: str|file :param dtype: Subtype for audio file. Options: 'int16', 'int32', 'float32', 'float64'. Default is 'float32'. :type dtype: str diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index 5c30b627e..37d6bc1dd 100644 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -1,7 +1,4 @@ """Contains the data augmentation pipeline.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import json import random diff --git a/data_utils/augmentor/base.py b/data_utils/augmentor/base.py index a323165aa..adeda6465 100644 --- a/data_utils/augmentor/base.py +++ b/data_utils/augmentor/base.py @@ -1,7 +1,4 @@ """Contains the abstract base class for augmentation models.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from abc import ABCMeta, abstractmethod diff --git a/data_utils/augmentor/impulse_response.py b/data_utils/augmentor/impulse_response.py index 536b4d6a4..937e578bd 100644 --- a/data_utils/augmentor/impulse_response.py +++ b/data_utils/augmentor/impulse_response.py @@ -1,7 +1,4 @@ """Contains the impulse response augmentation model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from data_utils.augmentor.base import AugmentorBase from data_utils.utility import read_manifest @@ -14,7 +11,7 @@ class ImpulseResponseAugmentor(AugmentorBase): :param rng: Random generator object. :type rng: random.Random :param impulse_manifest_path: Manifest path for impulse audio data. - :type impulse_manifest_path: basestring + :type impulse_manifest_path: str """ def __init__(self, rng, impulse_manifest_path): diff --git a/data_utils/augmentor/noise_perturb.py b/data_utils/augmentor/noise_perturb.py index 96e0ff4de..952ca3fb9 100644 --- a/data_utils/augmentor/noise_perturb.py +++ b/data_utils/augmentor/noise_perturb.py @@ -1,7 +1,4 @@ """Contains the noise perturb augmentation model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from data_utils.augmentor.base import AugmentorBase from data_utils.utility import read_manifest @@ -18,7 +15,7 @@ class NoisePerturbAugmentor(AugmentorBase): :param max_snr_dB: Maximal signal noise ratio, in decibels. :type max_snr_dB: float :param noise_manifest_path: Manifest path for noise audio data. - :type noise_manifest_path: basestring + :type noise_manifest_path: str """ def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest_path): diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/data_utils/augmentor/online_bayesian_normalization.py index e488ac7d6..deba9a77a 100644 --- a/data_utils/augmentor/online_bayesian_normalization.py +++ b/data_utils/augmentor/online_bayesian_normalization.py @@ -1,7 +1,4 @@ """Contain the online bayesian normalization augmentation model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from data_utils.augmentor.base import AugmentorBase diff --git a/data_utils/augmentor/resample.py b/data_utils/augmentor/resample.py index 8df17f3a8..e26d06b46 100644 --- a/data_utils/augmentor/resample.py +++ b/data_utils/augmentor/resample.py @@ -1,7 +1,4 @@ """Contain the resample augmentation model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from data_utils.augmentor.base import AugmentorBase diff --git a/data_utils/augmentor/shift_perturb.py b/data_utils/augmentor/shift_perturb.py index c4cbe3e17..a9b732161 100644 --- a/data_utils/augmentor/shift_perturb.py +++ b/data_utils/augmentor/shift_perturb.py @@ -1,7 +1,4 @@ """Contains the volume perturb augmentation model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from data_utils.augmentor.base import AugmentorBase diff --git a/data_utils/augmentor/speed_perturb.py b/data_utils/augmentor/speed_perturb.py index cc5738bd1..78dfd0c65 100644 --- a/data_utils/augmentor/speed_perturb.py +++ b/data_utils/augmentor/speed_perturb.py @@ -1,7 +1,4 @@ """Contain the speech perturbation augmentation model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from data_utils.augmentor.base import AugmentorBase diff --git a/data_utils/augmentor/volume_perturb.py b/data_utils/augmentor/volume_perturb.py index 758676d55..b1d8c190b 100644 --- a/data_utils/augmentor/volume_perturb.py +++ b/data_utils/augmentor/volume_perturb.py @@ -1,7 +1,4 @@ """Contains the volume perturb augmentation model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from data_utils.augmentor.base import AugmentorBase diff --git a/data_utils/data.py b/data_utils/data.py index 4a5224efa..ed65a0947 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -1,9 +1,6 @@ """Contains data generator for orgnaizing various audio data preprocessing pipeline and offering data reader interface of PaddlePaddle requirements. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import random import tarfile @@ -25,9 +22,9 @@ class DataGenerator(object): :param vocab_filepath: Vocabulary filepath for indexing tokenized transcripts. - :type vocab_filepath: basestring + :type vocab_filepath: str :param mean_std_filepath: File containing the pre-computed mean and stddev. - :type mean_std_filepath: None|basestring + :type mean_std_filepath: None|str :param augmentation_config: Augmentation configuration in json string. Details see AugmentationPipeline.__doc__. :type augmentation_config: str @@ -104,14 +101,14 @@ class DataGenerator(object): """Load, augment, featurize and normalize for speech data. :param audio_file: Filepath or file object of audio file. - :type audio_file: basestring | file + :type audio_file: str | file :param transcript: Transcription text. - :type transcript: basestring + :type transcript: str :return: Tuple of audio feature tensor and data of transcription part, where transcription part could be token ids or text. :rtype: tuple of (2darray, list) """ - if isinstance(audio_file, basestring) and audio_file.startswith('tar:'): + if isinstance(audio_file, str) and audio_file.startswith('tar:'): speech_segment = SpeechSegment.from_file( self._subfile_from_tar(audio_file), transcript) else: @@ -137,7 +134,7 @@ class DataGenerator(object): same shape, or a user-defined shape. :param manifest_path: Filepath of manifest for audio files. - :type manifest_path: basestring + :type manifest_path: str :param batch_size: Number of instances in a batch. :type batch_size: int :param padding_to: If set -1, the maximun shape in the batch @@ -361,7 +358,7 @@ class DataGenerator(object): """ manifest.sort(key=lambda x: x["duration"]) shift_len = self._rng.randint(0, batch_size - 1) - batch_manifest = zip(*[iter(manifest[shift_len:])] * batch_size) + batch_manifest = list(zip(*[iter(manifest[shift_len:])] * batch_size)) self._rng.shuffle(batch_manifest) batch_manifest = [item for batch in batch_manifest for item in batch] if not clipped: diff --git a/data_utils/featurizer/audio_featurizer.py b/data_utils/featurizer/audio_featurizer.py index 0a54701bf..c5f20a16b 100644 --- a/data_utils/featurizer/audio_featurizer.py +++ b/data_utils/featurizer/audio_featurizer.py @@ -1,7 +1,4 @@ """Contains the audio featurizer class.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import numpy as np from data_utils.utility import read_manifest diff --git a/data_utils/featurizer/speech_featurizer.py b/data_utils/featurizer/speech_featurizer.py index 4555dc31d..a4484ae44 100644 --- a/data_utils/featurizer/speech_featurizer.py +++ b/data_utils/featurizer/speech_featurizer.py @@ -1,7 +1,4 @@ """Contains the speech featurizer class.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from data_utils.featurizer.audio_featurizer import AudioFeaturizer from data_utils.featurizer.text_featurizer import TextFeaturizer @@ -18,7 +15,7 @@ class SpeechFeaturizer(object): :param vocab_filepath: Filepath to load vocabulary for token indices conversion. - :type specgram_type: basestring + :type specgram_type: str :param specgram_type: Specgram feature type. Options: 'linear', 'mfcc'. :type specgram_type: str :param stride_ms: Striding size (in milliseconds) for generating frames. diff --git a/data_utils/featurizer/text_featurizer.py b/data_utils/featurizer/text_featurizer.py index 89202163c..0c48b38e0 100644 --- a/data_utils/featurizer/text_featurizer.py +++ b/data_utils/featurizer/text_featurizer.py @@ -1,7 +1,4 @@ """Contains the text featurizer class.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import os import codecs @@ -16,7 +13,7 @@ class TextFeaturizer(object): :param vocab_filepath: Filepath to load vocabulary for token indices conversion. - :type specgram_type: basestring + :type specgram_type: str """ def __init__(self, vocab_filepath): @@ -28,7 +25,7 @@ class TextFeaturizer(object): that the token indexing order follows the given vocabulary file. :param text: Text to process. - :type text: basestring + :type text: str :return: List of char-level token indices. :rtype: list """ diff --git a/data_utils/normalizer.py b/data_utils/normalizer.py index 7c2e05c9d..378714cc0 100644 --- a/data_utils/normalizer.py +++ b/data_utils/normalizer.py @@ -1,7 +1,4 @@ """Contains feature normalizers.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import numpy as np import random @@ -18,9 +15,9 @@ class FeatureNormalizer(object): should be given for on-the-fly mean and stddev computing. :param mean_std_filepath: File containing the pre-computed mean and stddev. - :type mean_std_filepath: None|basestring + :type mean_std_filepath: None|str :param manifest_path: Manifest of instances for computing mean and stddev. - :type meanifest_path: None|basestring + :type meanifest_path: None|str :param featurize_func: Function to extract features. It should be callable with ``featurize_func(audio_segment)``. :type featurize_func: None|callable @@ -63,7 +60,7 @@ class FeatureNormalizer(object): """Write the mean and stddev to the file. :param filepath: File to write mean and stddev. - :type filepath: basestring + :type filepath: str """ np.savez(filepath, mean=self._mean, std=self._std) diff --git a/data_utils/speech.py b/data_utils/speech.py index e64109dc8..3ea6ec296 100644 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -1,7 +1,4 @@ """Contains the speech segment class.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import numpy as np from data_utils.audio import AudioSegment @@ -16,7 +13,7 @@ class SpeechSegment(AudioSegment): :param sample_rate: Audio sample rate. :type sample_rate: int :param transcript: Transcript text for the speech. - :type transript: basestring + :type transript: str :raises TypeError: If the sample data type is not float or int. """ @@ -42,9 +39,9 @@ class SpeechSegment(AudioSegment): """Create speech segment from audio file and corresponding transcript. :param filepath: Filepath or file object to audio file. - :type filepath: basestring|file + :type filepath: str|file :param transcript: Transcript text for the speech. - :type transript: basestring + :type transript: str :return: Speech segment instance. :rtype: SpeechSegment """ @@ -59,7 +56,7 @@ class SpeechSegment(AudioSegment): :param bytes: Byte string containing audio samples. :type bytes: str :param transcript: Transcript text for the speech. - :type transript: basestring + :type transript: str :return: Speech segment instance. :rtype: Speech Segment """ @@ -100,7 +97,7 @@ class SpeechSegment(AudioSegment): the entire file into the memory which can be incredibly wasteful. :param filepath: Filepath or file object to audio file. - :type filepath: basestring|file + :type filepath: str|file :param start: Start time in seconds. If start is negative, it wraps around from the end. If not provided, this function reads from the very beginning. @@ -111,7 +108,7 @@ class SpeechSegment(AudioSegment): :type end: float :param transcript: Transcript text for the speech. if not provided, the defaults is an empty string. - :type transript: basestring + :type transript: str :return: SpeechSegment instance of the specified slice of the input speech file. :rtype: SpeechSegment @@ -139,6 +136,6 @@ class SpeechSegment(AudioSegment): """Return the transcript text. :return: Transcript text for the speech. - :rtype: basestring + :rtype: str """ return self._transcript diff --git a/data_utils/utility.py b/data_utils/utility.py index 7143f7ded..2377bc221 100644 --- a/data_utils/utility.py +++ b/data_utils/utility.py @@ -1,14 +1,10 @@ """Contains data helper functions.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import json import codecs import os import tarfile import time -from Queue import Queue from threading import Thread from multiprocessing import Process, Manager, Value from paddle.dataset.common import md5file @@ -21,7 +17,7 @@ def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0): filtered out. :param manifest_path: Manifest file to load and parse. - :type manifest_path: basestring + :type manifest_path: str :param max_duration: Maximal duration in seconds for instance filter. :type max_duration: float :param min_duration: Minimal duration in seconds for instance filter. diff --git a/decoders/decoders_deprecated.py b/decoders/decoders_deprecated.py index b9248b58b..4298e0e1e 100644 --- a/decoders/decoders_deprecated.py +++ b/decoders/decoders_deprecated.py @@ -1,7 +1,4 @@ """Contains various CTC decoders.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from itertools import groupby import numpy as np diff --git a/decoders/scorer_deprecated.py b/decoders/scorer_deprecated.py index c6a661030..266df559b 100644 --- a/decoders/scorer_deprecated.py +++ b/decoders/scorer_deprecated.py @@ -1,7 +1,4 @@ """External Scorer for Beam Search Decoder.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import os import kenlm @@ -20,7 +17,7 @@ class Scorer(object): count when beta = 0. :type beta: float :model_path: Path to load language model. - :type model_path: basestring + :type model_path: str """ def __init__(self, alpha, beta, model_path): @@ -53,7 +50,7 @@ class Scorer(object): and return the final one. :param sentence: The input sentence for evalutation - :type sentence: basestring + :type sentence: str :param log: Whether return the score in log representation. :type log: bool :return: Evaluation score, in the decimal or log. diff --git a/decoders/swig/_init_paths.py b/decoders/swig/_init_paths.py index ddabb535b..3bb2fd197 100644 --- a/decoders/swig/_init_paths.py +++ b/decoders/swig/_init_paths.py @@ -1,7 +1,4 @@ """Set up paths for DS2""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import os.path import sys diff --git a/decoders/swig/setup.py b/decoders/swig/setup.py index a4bb2e9da..c5a7c4ca6 100644 --- a/decoders/swig/setup.py +++ b/decoders/swig/setup.py @@ -1,7 +1,4 @@ """Script to build and install decoder package.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from setuptools import setup, Extension, distutils import glob diff --git a/decoders/swig/setup.sh b/decoders/swig/setup.sh index eb5937186..73fa7aea7 100644 --- a/decoders/swig/setup.sh +++ b/decoders/swig/setup.sh @@ -21,4 +21,4 @@ if [ ! -d ThreadPool ]; then fi echo "Install decoders ..." -python setup.py install --num_processes 4 +python3 setup.py install --num_processes 4 diff --git a/decoders/swig_wrapper.py b/decoders/swig_wrapper.py index 21aed03c1..49215722d 100644 --- a/decoders/swig_wrapper.py +++ b/decoders/swig_wrapper.py @@ -1,7 +1,4 @@ """Wrapper for various CTC decoders in SWIG.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import swig_decoders @@ -16,7 +13,7 @@ class Scorer(swig_decoders.Scorer): count when beta = 0. :type beta: float :model_path: Path to load language model. - :type model_path: basestring + :type model_path: str """ def __init__(self, alpha, beta, model_path, vocabulary): @@ -33,7 +30,7 @@ def ctc_greedy_decoder(probs_seq, vocabulary): :param vocabulary: Vocabulary list. :type vocabulary: list :return: Decoding result string. - :rtype: basestring + :rtype: str """ result = swig_decoders.ctc_greedy_decoder(probs_seq.tolist(), vocabulary) return result.decode('utf-8') @@ -118,7 +115,7 @@ def ctc_beam_search_decoder_batch(probs_split, probs_split, vocabulary, beam_size, num_processes, cutoff_prob, cutoff_top_n, ext_scoring_func) batch_beam_results = [ - [(res[0], res[1].decode("utf-8")) for res in beam_results] + [(res[0], res[1]) for res in beam_results] for beam_results in batch_beam_results ] return batch_beam_results diff --git a/decoders/tests/test_decoders.py b/decoders/tests/test_decoders.py index d522b5efa..878417432 100644 --- a/decoders/tests/test_decoders.py +++ b/decoders/tests/test_decoders.py @@ -1,7 +1,4 @@ """Test decoders.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import unittest from decoders import decoders_deprecated as decoder diff --git a/deploy/_init_paths.py b/deploy/_init_paths.py index ddabb535b..3bb2fd197 100644 --- a/deploy/_init_paths.py +++ b/deploy/_init_paths.py @@ -1,7 +1,4 @@ """Set up paths for DS2""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import os.path import sys diff --git a/deploy/demo_client.py b/deploy/demo_client.py index 7f8869462..45a9d319a 100644 --- a/deploy/demo_client.py +++ b/deploy/demo_client.py @@ -56,7 +56,7 @@ def callback(in_data, frame_count, time_info, status): print('Speech[length=%d] Sent.' % len(sent)) # Receive data from the server and shut down received = sock.recv(1024) - print "Recognition Results: {}".format(received) + print("Recognition Results: {}".format(received)) sock.close() data_list = [] enable_trigger_record = True diff --git a/examples/aishell/run_data.sh b/examples/aishell/run_data.sh index 93ea6c291..fc002fcd2 100644 --- a/examples/aishell/run_data.sh +++ b/examples/aishell/run_data.sh @@ -3,7 +3,7 @@ cd ../.. > /dev/null # download data, generate manifests -PYTHONPATH=.:$PYTHONPATH python data/aishell/aishell.py \ +PYTHONPATH=.:$PYTHONPATH python3 data/aishell/aishell.py \ --manifest_prefix='data/aishell/manifest' \ --target_dir='./dataset/aishell' @@ -14,7 +14,7 @@ fi # build vocabulary -python tools/build_vocab.py \ +python3 tools/build_vocab.py \ --count_threshold=0 \ --vocab_path='data/aishell/vocab.txt' \ --manifest_paths 'data/aishell/manifest.train' 'data/aishell/manifest.dev' @@ -26,7 +26,7 @@ fi # compute mean and stddev for normalizer -python tools/compute_mean_std.py \ +python3 tools/compute_mean_std.py \ --manifest_path='data/aishell/manifest.train' \ --num_samples=2000 \ --specgram_type='linear' \ diff --git a/examples/aishell/run_infer.sh b/examples/aishell/run_infer.sh index 6683b770c..ef382e310 100644 --- a/examples/aishell/run_infer.sh +++ b/examples/aishell/run_infer.sh @@ -13,7 +13,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python -u infer.py \ +python3 -u infer.py \ --num_samples=10 \ --beam_size=300 \ --num_proc_bsearch=8 \ diff --git a/examples/aishell/run_infer_golden.sh b/examples/aishell/run_infer_golden.sh index 56d3365d9..dabdc0c68 100644 --- a/examples/aishell/run_infer_golden.sh +++ b/examples/aishell/run_infer_golden.sh @@ -22,7 +22,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python -u infer.py \ +python3 -u infer.py \ --num_samples=10 \ --beam_size=300 \ --num_proc_bsearch=8 \ diff --git a/examples/aishell/run_test.sh b/examples/aishell/run_test.sh index 2867444be..b56d164b9 100644 --- a/examples/aishell/run_test.sh +++ b/examples/aishell/run_test.sh @@ -13,7 +13,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u test.py \ +python3 -u test.py \ --batch_size=128 \ --beam_size=300 \ --num_proc_bsearch=8 \ diff --git a/examples/aishell/run_test_golden.sh b/examples/aishell/run_test_golden.sh index 799f382f5..2f79cc647 100644 --- a/examples/aishell/run_test_golden.sh +++ b/examples/aishell/run_test_golden.sh @@ -22,7 +22,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u test.py \ +python3 -u test.py \ --batch_size=128 \ --beam_size=300 \ --num_proc_bsearch=8 \ diff --git a/examples/aishell/run_train.sh b/examples/aishell/run_train.sh index 335473fcf..889e19047 100644 --- a/examples/aishell/run_train.sh +++ b/examples/aishell/run_train.sh @@ -6,7 +6,7 @@ cd ../.. > /dev/null # if you wish to resume from an exists model, uncomment --init_from_pretrained_model export FLAGS_sync_nccl_allreduce=0 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u train.py \ +python3 -u train.py \ --batch_size=64 \ --num_epoch=50 \ --num_conv_layers=2 \ diff --git a/examples/baidu_en8k/run_infer_golden.sh b/examples/baidu_en8k/run_infer_golden.sh index 2f3f0acf7..85fea5f47 100644 --- a/examples/baidu_en8k/run_infer_golden.sh +++ b/examples/baidu_en8k/run_infer_golden.sh @@ -22,7 +22,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python -u infer.py \ +python3 -u infer.py \ --num_samples=10 \ --beam_size=500 \ --num_proc_bsearch=5 \ diff --git a/examples/baidu_en8k/run_test_golden.sh b/examples/baidu_en8k/run_test_golden.sh index 612e71a01..1ed633624 100644 --- a/examples/baidu_en8k/run_test_golden.sh +++ b/examples/baidu_en8k/run_test_golden.sh @@ -22,7 +22,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3 \ -python -u test.py \ +python3 -u test.py \ --batch_size=128 \ --beam_size=500 \ --num_proc_bsearch=8 \ diff --git a/examples/deploy_demo/run_demo_client.sh b/examples/deploy_demo/run_demo_client.sh index 6ae4ddcab..7c2609511 100644 --- a/examples/deploy_demo/run_demo_client.sh +++ b/examples/deploy_demo/run_demo_client.sh @@ -4,7 +4,7 @@ cd ../.. > /dev/null # start demo client CUDA_VISIBLE_DEVICES=0 \ -python -u deploy/demo_client.py \ +python3 -u deploy/demo_client.py \ --host_ip='localhost' \ --host_port=8086 \ diff --git a/examples/deploy_demo/run_english_demo_server.sh b/examples/deploy_demo/run_english_demo_server.sh index d67559f33..5d86ab6e1 100644 --- a/examples/deploy_demo/run_english_demo_server.sh +++ b/examples/deploy_demo/run_english_demo_server.sh @@ -23,7 +23,7 @@ cd - > /dev/null # start demo server CUDA_VISIBLE_DEVICES=0 \ -python -u deploy/demo_server.py \ +python3 -u deploy/demo_server.py \ --host_ip='localhost' \ --host_port=8086 \ --num_conv_layers=2 \ diff --git a/examples/librispeech/run_data.sh b/examples/librispeech/run_data.sh index e4db1ac9b..788b415de 100644 --- a/examples/librispeech/run_data.sh +++ b/examples/librispeech/run_data.sh @@ -3,7 +3,7 @@ cd ../.. > /dev/null # download data, generate manifests -PYTHONPATH=.:$PYTHONPATH python data/librispeech/librispeech.py \ +PYTHONPATH=.:$PYTHONPATH python3 data/librispeech/librispeech.py \ --manifest_prefix='data/librispeech/manifest' \ --target_dir='./dataset/librispeech' \ --full_download='True' @@ -17,7 +17,7 @@ cat data/librispeech/manifest.train-* | shuf > data/librispeech/manifest.train # build vocabulary -python tools/build_vocab.py \ +python3 tools/build_vocab.py \ --count_threshold=0 \ --vocab_path='data/librispeech/vocab.txt' \ --manifest_paths='data/librispeech/manifest.train' @@ -29,7 +29,7 @@ fi # compute mean and stddev for normalizer -python tools/compute_mean_std.py \ +python3 tools/compute_mean_std.py \ --manifest_path='data/librispeech/manifest.train' \ --num_samples=2000 \ --specgram_type='linear' \ diff --git a/examples/librispeech/run_infer.sh b/examples/librispeech/run_infer.sh index 91d8ff2eb..2452b45e3 100644 --- a/examples/librispeech/run_infer.sh +++ b/examples/librispeech/run_infer.sh @@ -13,7 +13,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python -u infer.py \ +python3 -u infer.py \ --num_samples=10 \ --beam_size=500 \ --num_proc_bsearch=8 \ diff --git a/examples/librispeech/run_infer_golden.sh b/examples/librispeech/run_infer_golden.sh index eb8121294..6004c6af6 100644 --- a/examples/librispeech/run_infer_golden.sh +++ b/examples/librispeech/run_infer_golden.sh @@ -22,7 +22,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python -u infer.py \ +python3 -u infer.py \ --num_samples=10 \ --beam_size=500 \ --num_proc_bsearch=8 \ diff --git a/examples/librispeech/run_test.sh b/examples/librispeech/run_test.sh index 9eebbbf24..e8e65c39e 100644 --- a/examples/librispeech/run_test.sh +++ b/examples/librispeech/run_test.sh @@ -13,7 +13,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u test.py \ +python3 -u test.py \ --batch_size=128 \ --beam_size=500 \ --num_proc_bsearch=8 \ diff --git a/examples/librispeech/run_test_golden.sh b/examples/librispeech/run_test_golden.sh index abd895925..1db36758a 100644 --- a/examples/librispeech/run_test_golden.sh +++ b/examples/librispeech/run_test_golden.sh @@ -22,7 +22,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u test.py \ +python3 -u test.py \ --batch_size=128 \ --beam_size=500 \ --num_proc_bsearch=8 \ diff --git a/examples/librispeech/run_train.sh b/examples/librispeech/run_train.sh index a568bf221..6b8982d78 100644 --- a/examples/librispeech/run_train.sh +++ b/examples/librispeech/run_train.sh @@ -7,7 +7,7 @@ cd ../.. > /dev/null export FLAGS_sync_nccl_allreduce=0 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u train.py \ +python3 -u train.py \ --batch_size=20 \ --num_epoch=50 \ --num_conv_layers=2 \ diff --git a/examples/librispeech/run_tune.sh b/examples/librispeech/run_tune.sh index af6e9dafd..834f060fa 100644 --- a/examples/librispeech/run_tune.sh +++ b/examples/librispeech/run_tune.sh @@ -4,7 +4,7 @@ cd ../.. > /dev/null # grid-search for hyper-parameters in language model CUDA_VISIBLE_DEVICES=0,1,2,3 \ -python -u tools/tune.py \ +python3 -u tools/tune.py \ --num_batches=-1 \ --batch_size=128 \ --beam_size=500 \ diff --git a/examples/tiny/run_data.sh b/examples/tiny/run_data.sh index 1428194d3..dd75ddadc 100644 --- a/examples/tiny/run_data.sh +++ b/examples/tiny/run_data.sh @@ -8,7 +8,7 @@ if [ ! -e data/tiny ]; then fi # download data, generate manifests -PYTHONPATH=.:$PYTHONPATH python data/librispeech/librispeech.py \ +PYTHONPATH=.:$PYTHONPATH python3 data/librispeech/librispeech.py \ --manifest_prefix='data/tiny/manifest' \ --target_dir='./dataset/librispeech' \ --full_download='False' @@ -21,7 +21,7 @@ fi head -n 64 data/tiny/manifest.dev-clean > data/tiny/manifest.tiny # build vocabulary -python tools/build_vocab.py \ +python3 tools/build_vocab.py \ --count_threshold=0 \ --vocab_path='data/tiny/vocab.txt' \ --manifest_paths='data/tiny/manifest.tiny' @@ -33,7 +33,7 @@ fi # compute mean and stddev for normalizer -python tools/compute_mean_std.py \ +python3 tools/compute_mean_std.py \ --manifest_path='data/tiny/manifest.tiny' \ --num_samples=64 \ --specgram_type='linear' \ diff --git a/examples/tiny/run_infer.sh b/examples/tiny/run_infer.sh index bded0e7b6..d88f4526a 100644 --- a/examples/tiny/run_infer.sh +++ b/examples/tiny/run_infer.sh @@ -13,7 +13,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python -u infer.py \ +python3 -u infer.py \ --num_samples=10 \ --beam_size=500 \ --num_proc_bsearch=8 \ diff --git a/examples/tiny/run_infer_golden.sh b/examples/tiny/run_infer_golden.sh index 33662622d..d18c21f54 100644 --- a/examples/tiny/run_infer_golden.sh +++ b/examples/tiny/run_infer_golden.sh @@ -22,7 +22,7 @@ cd - > /dev/null # infer CUDA_VISIBLE_DEVICES=0 \ -python -u infer.py \ +python3 -u infer.py \ --num_samples=10 \ --beam_size=500 \ --num_proc_bsearch=8 \ diff --git a/examples/tiny/run_test.sh b/examples/tiny/run_test.sh index 1dfc65e19..81eafe236 100644 --- a/examples/tiny/run_test.sh +++ b/examples/tiny/run_test.sh @@ -13,7 +13,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u test.py \ +python3 -u test.py \ --batch_size=128 \ --beam_size=500 \ --num_proc_bsearch=8 \ diff --git a/examples/tiny/run_test_golden.sh b/examples/tiny/run_test_golden.sh index 542552657..d82865f42 100644 --- a/examples/tiny/run_test_golden.sh +++ b/examples/tiny/run_test_golden.sh @@ -22,7 +22,7 @@ cd - > /dev/null # evaluate model CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u test.py \ +python3 -u test.py \ --batch_size=128 \ --beam_size=500 \ --num_proc_bsearch=8 \ diff --git a/examples/tiny/run_train.sh b/examples/tiny/run_train.sh index 95ccd2bc6..fe5b62030 100644 --- a/examples/tiny/run_train.sh +++ b/examples/tiny/run_train.sh @@ -6,7 +6,7 @@ cd ../.. > /dev/null # if you wish to resume from an exists model, uncomment --init_from_pretrained_model export FLAGS_sync_nccl_allreduce=0 CUDA_VISIBLE_DEVICES=0,1,2,3 \ -python -u train.py \ +python3 -u train.py \ --batch_size=4 \ --num_epoch=20 \ --num_conv_layers=2 \ diff --git a/examples/tiny/run_tune.sh b/examples/tiny/run_tune.sh index 87bcb67b1..bec71111a 100644 --- a/examples/tiny/run_tune.sh +++ b/examples/tiny/run_tune.sh @@ -4,7 +4,7 @@ cd ../.. > /dev/null # grid-search for hyper-parameters in language model CUDA_VISIBLE_DEVICES=0,1,2,3 \ -python -u tools/tune.py \ +python3 -u tools/tune.py \ --num_batches=-1 \ --batch_size=128 \ --beam_size=500 \ diff --git a/infer.py b/infer.py index 9e6a8c3b2..910140282 100644 --- a/infer.py +++ b/infer.py @@ -1,12 +1,6 @@ """Inferer for DeepSpeech2 model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import sys -reload(sys) -sys.setdefaultencoding('utf-8') - import argparse import functools import paddle.fluid as fluid @@ -104,7 +98,7 @@ def infer(): init_from_pretrained_model=args.model_path) # decoders only accept string encoded in utf-8 - vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] + vocab_list = [chars for chars in data_generator.vocab_list] if args.decoding_method == "ctc_greedy": ds2_model.logger.info("start inference ...") diff --git a/model_utils/model.py b/model_utils/model.py index 7cb26f39f..68b963cdf 100644 --- a/model_utils/model.py +++ b/model_utils/model.py @@ -1,7 +1,4 @@ """Contains DeepSpeech2 model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import sys import os @@ -10,7 +7,6 @@ import logging import gzip import copy import inspect -import cPickle as pickle import collections import multiprocessing import numpy as np @@ -445,7 +441,7 @@ class DeepSpeech2Model(object): :param vocab_list: List of tokens in the vocabulary, for decoding. :type vocab_list: list :return: List of transcription texts. - :rtype: List of basestring + :rtype: List of str """ results = [] for i, probs in enumerate(probs_split): @@ -466,7 +462,7 @@ class DeepSpeech2Model(object): empty, the external scorer will be set to None, and the decoding method will be pure beam search without scorer. - :type language_model_path: basestring|None + :type language_model_path: str|None :param vocab_list: List of tokens in the vocabulary, for decoding. :type vocab_list: list """ @@ -513,7 +509,7 @@ class DeepSpeech2Model(object): :param num_processes: Number of processes (CPU) for decoder. :type num_processes: int :return: List of transcription texts. - :rtype: List of basestring + :rtype: List of str """ if self._ext_scorer != None: self._ext_scorer.reset_params(beam_alpha, beam_beta) diff --git a/model_utils/network.py b/model_utils/network.py index e27ff02cf..6e29eb6e4 100644 --- a/model_utils/network.py +++ b/model_utils/network.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import collections import paddle.fluid as fluid diff --git a/setup.sh b/setup.sh index ec5e47ec8..8d471cbf6 100644 --- a/setup.sh +++ b/setup.sh @@ -2,7 +2,7 @@ # install python dependencies if [ -f "requirements.txt" ]; then - pip install -r requirements.txt + pip3 install -r requirements.txt fi if [ $? != 0 ]; then echo "Install python dependencies failed !!!" @@ -10,7 +10,7 @@ if [ $? != 0 ]; then fi # install package libsndfile -python -c "import soundfile" +python3 -c "import soundfile" if [ $? != 0 ]; then echo "Install package libsndfile into default system path." wget "http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz" @@ -27,7 +27,7 @@ if [ $? != 0 ]; then fi # install decoders -python -c "import pkg_resources; pkg_resources.require(\"swig_decoders==1.1\")" +python3 -c "import pkg_resources; pkg_resources.require(\"swig_decoders==1.1\")" if [ $? != 0 ]; then cd decoders/swig > /dev/null sh setup.sh diff --git a/test.py b/test.py index ec9fdfda6..053a43acd 100644 --- a/test.py +++ b/test.py @@ -1,7 +1,4 @@ """Evaluation for DeepSpeech2 model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import argparse import functools @@ -99,7 +96,7 @@ def evaluate(): init_from_pretrained_model=args.model_path) # decoders only accept string encoded in utf-8 - vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] + vocab_list = [chars for chars in data_generator.vocab_list] if args.decoding_method == "ctc_beam_search": ds2_model.init_ext_scorer(args.alpha, args.beta, args.lang_model_path, diff --git a/tools/_init_paths.py b/tools/_init_paths.py index ddabb535b..3bb2fd197 100644 --- a/tools/_init_paths.py +++ b/tools/_init_paths.py @@ -1,7 +1,4 @@ """Set up paths for DS2""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import os.path import sys diff --git a/tools/build_vocab.py b/tools/build_vocab.py index e167e92ad..13ab843c4 100644 --- a/tools/build_vocab.py +++ b/tools/build_vocab.py @@ -2,9 +2,6 @@ Each item in vocabulary file is a character. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import argparse import functools diff --git a/tools/compute_mean_std.py b/tools/compute_mean_std.py index 11aa856d7..203f83a2c 100644 --- a/tools/compute_mean_std.py +++ b/tools/compute_mean_std.py @@ -1,7 +1,4 @@ """Compute mean and std for feature normalizer, and save to file.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import argparse import functools diff --git a/tools/tune.py b/tools/tune.py index 7996e4d53..74007f55a 100644 --- a/tools/tune.py +++ b/tools/tune.py @@ -1,7 +1,4 @@ """Beam search parameters tuning for DeepSpeech2 model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import sys import os @@ -107,7 +104,7 @@ def tune(): share_rnn_weights=args.share_rnn_weights) # decoders only accept string encoded in utf-8 - vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] + vocab_list = [chars for chars in data_generator.vocab_list] errors_func = char_errors if args.error_rate_type == 'cer' else word_errors # create grid for search cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas) diff --git a/train.py b/train.py index 5dae4ccdd..caa7c266e 100644 --- a/train.py +++ b/train.py @@ -1,7 +1,4 @@ """Trainer for DeepSpeech2 model.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import argparse import functools diff --git a/utils/error_rate.py b/utils/error_rate.py index d84d9f875..628a84cb4 100644 --- a/utils/error_rate.py +++ b/utils/error_rate.py @@ -1,10 +1,6 @@ -# -*- coding: utf-8 -*- """This module provides functions to calculate error rate in different level. e.g. wer for word-level, cer for char-level. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import numpy as np @@ -61,9 +57,9 @@ def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '): hypothesis sequence in word-level. :param reference: The reference sentence. - :type reference: basestring + :type reference: str :param hypothesis: The hypothesis sentence. - :type hypothesis: basestring + :type hypothesis: str :param ignore_case: Whether case-sensitive or not. :type ignore_case: bool :param delimiter: Delimiter of input sentences. @@ -75,8 +71,8 @@ def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '): reference = reference.lower() hypothesis = hypothesis.lower() - ref_words = filter(None, reference.split(delimiter)) - hyp_words = filter(None, hypothesis.split(delimiter)) + ref_words = list(filter(None, reference.split(delimiter))) + hyp_words = list(filter(None, hypothesis.split(delimiter))) edit_distance = _levenshtein_distance(ref_words, hyp_words) return float(edit_distance), len(ref_words) @@ -87,9 +83,9 @@ def char_errors(reference, hypothesis, ignore_case=False, remove_space=False): hypothesis sequence in char-level. :param reference: The reference sentence. - :type reference: basestring + :type reference: str :param hypothesis: The hypothesis sentence. - :type hypothesis: basestring + :type hypothesis: str :param ignore_case: Whether case-sensitive or not. :type ignore_case: bool :param remove_space: Whether remove internal space characters @@ -105,8 +101,8 @@ def char_errors(reference, hypothesis, ignore_case=False, remove_space=False): if remove_space == True: join_char = '' - reference = join_char.join(filter(None, reference.split(' '))) - hypothesis = join_char.join(filter(None, hypothesis.split(' '))) + reference = join_char.join(list(filter(None, reference.split(' ')))) + hypothesis = join_char.join(list(filter(None, hypothesis.split(' ')))) edit_distance = _levenshtein_distance(reference, hypothesis) return float(edit_distance), len(reference) @@ -132,9 +128,9 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): that empty items will be removed when splitting sentences by delimiter. :param reference: The reference sentence. - :type reference: basestring + :type reference: str :param hypothesis: The hypothesis sentence. - :type hypothesis: basestring + :type hypothesis: str :param ignore_case: Whether case-sensitive or not. :type ignore_case: bool :param delimiter: Delimiter of input sentences. @@ -175,9 +171,9 @@ def cer(reference, hypothesis, ignore_case=False, remove_space=False): characters in a sentence will be replaced by one space character. :param reference: The reference sentence. - :type reference: basestring + :type reference: str :param hypothesis: The hypothesis sentence. - :type hypothesis: basestring + :type hypothesis: str :param ignore_case: Whether case-sensitive or not. :type ignore_case: bool :param remove_space: Whether remove internal space characters diff --git a/utils/tests/test_error_rate.py b/utils/tests/test_error_rate.py index d6bc7442e..efa04b827 100644 --- a/utils/tests/test_error_rate.py +++ b/utils/tests/test_error_rate.py @@ -1,8 +1,4 @@ -# -*- coding: utf-8 -*- """Test error rate.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import unittest from utils import error_rate diff --git a/utils/utility.py b/utils/utility.py index 2e489ade6..543f3ebce 100644 --- a/utils/utility.py +++ b/utils/utility.py @@ -1,7 +1,4 @@ """Contains common utility functions.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import distutils.util @@ -22,7 +19,7 @@ def print_arguments(args): :type args: argparse.Namespace """ print("----------- Configuration Arguments -----------") - for arg, value in sorted(vars(args).iteritems()): + for arg, value in sorted(vars(args).items()): print("%s: %s" % (arg, value)) print("------------------------------------------------")