Merge pull request #1616 from zh794390558/spx

[speechx] more comment of code
pull/1619/head
YangZhou 3 years ago committed by GitHub
commit b75268c588
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -20,12 +20,12 @@ of each audio file in the data set.
"""
import argparse
import codecs
import distutils.util
import io
import json
import os
from multiprocessing.pool import Pool
import distutils.util
import soundfile
from utils.utility import download

@ -16,8 +16,8 @@ import os
import librosa
import numpy as np
from config import DEFAULT_TABLE
from logs import LOGGER
from paddlespeech.cli import VectorExecutor
vector_executor = VectorExecutor()

@ -26,9 +26,8 @@ def get_audios(path):
"""
supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"]
return [
item
for sublist in [[os.path.join(dir, file) for file in files]
for dir, _, files in list(os.walk(path))]
item for sublist in [[os.path.join(dir, file) for file in files]
for dir, _, files in list(os.walk(path))]
for item in sublist if os.path.splitext(item)[1] in supported_formats
]

@ -24,11 +24,11 @@ import json
import logging
import os
import xml.etree.ElementTree as et
from distutils.util import strtobool
from ami_splits import get_AMI_split
from dataio import load_pkl
from dataio import save_pkl
from distutils.util import strtobool
logger = logging.getLogger(__name__)
SAMPLERATE = 16000

@ -17,10 +17,10 @@ import logging
import os
import random
import sys
from distutils.util import strtobool
import configargparse
import numpy as np
from distutils.util import strtobool
def get_parser():

@ -14,9 +14,9 @@
# Modified from espnet(https://github.com/espnet/espnet)
import sys
from collections.abc import Sequence
from distutils.util import strtobool as dist_strtobool
import numpy
from distutils.util import strtobool as dist_strtobool
def strtobool(x):

@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common utility functions."""
import distutils.util
import math
import os
import random
@ -21,6 +20,7 @@ from contextlib import contextmanager
from pprint import pformat
from typing import List
import distutils.util
import numpy as np
import paddle
import soundfile

@ -18,11 +18,11 @@ A few sklearn functions are modified in this script as per requirement.
"""
import argparse
import warnings
from distutils.util import strtobool
import numpy as np
import scipy
import sklearn
from distutils.util import strtobool
from scipy import sparse
from scipy.sparse.csgraph import connected_components
from scipy.sparse.csgraph import laplacian as csgraph_laplacian

@ -5,7 +5,7 @@
We develop under:
* docker - registry.baidubce.com/paddlepaddle/paddle:2.1.1-gpu-cuda10.2-cudnn7
* os - Ubuntu 16.04.7 LTS
* gcc/g++ - 8.2.0
* ** gcc/g++/gfortran - 8.2.0 **
* cmake - 3.16.0
> We make sure all things work fun under docker, and recommend using it to develop and deploy.
@ -29,6 +29,8 @@ nvidia-docker run --privileged --net=host --ipc=host -it --rm -v $PWD:/workspac
2. Build `speechx` and `examples`.
> Do not source venv.
```
pushd /path/to/speechx
./build.sh

@ -2,8 +2,7 @@
# the build script had verified in the paddlepaddle docker image.
# please follow the instruction below to install PaddlePaddle image.
# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html
# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html
boost_SOURCE_DIR=$PWD/fc_patch/boost-src
if [ ! -d ${boost_SOURCE_DIR} ]; then wget -c https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz
tar xzfv boost_1_75_0.tar.gz
@ -23,6 +22,6 @@ cd build
cmake .. -DBOOST_ROOT:STRING=${boost_SOURCE_DIR}
#cmake ..
make -j1
make -j10
cd -

@ -0,0 +1,145 @@
#.rst:
# FindGFortranLibs
# --------
# https://github.com/Argonne-National-Laboratory/PIPS/blob/master/cmake/Modules/FindGFortranLibs.cmake
# https://enccs.github.io/cmake-workshop/cxx-fortran/
#
# Find gcc Fortran compiler & library paths
#
# The module defines the following variables:
#
# ::
#
#
# GFORTRANLIBS_FOUND - true if system has gfortran
# LIBGFORTRAN_LIBRARIES - path to libgfortran
# LIBQUADMATH_LIBRARIES - path to libquadmath
# GFORTRAN_LIBARIES_DIR - directory containing libgfortran, libquadmath
# GFORTRAN_INCLUDE_DIR - directory containing gfortran/gcc headers
# LIBGOMP_LIBRARIES - path to libgomp
# LIBGOMP_INCLUDE_DIR - directory containing omp.h header
# GFORTRAN_VERSION_STRING - version of gfortran found
#
set(CMAKE_REQUIRED_QUIET ${LIBIOMP_FIND_QUIETLY})
if(NOT CMAKE_REQUIRED_QUIET)
message(STATUS "Looking for gfortran related libraries...")
endif()
enable_language(Fortran)
if(CMAKE_Fortran_COMPILER_ID MATCHES "GNU")
# Basically, call "gfortran -v" to dump compiler info to the string
# GFORTRAN_VERBOSE_STR, which will be used to get necessary paths
message(STATUS "Extracting library and header information by calling 'gfortran -v'...")
execute_process(COMMAND "${CMAKE_Fortran_COMPILER}" "-v" ERROR_VARIABLE
GFORTRAN_VERBOSE_STR RESULT_VARIABLE FLAG)
# For debugging
message(STATUS "'gfortran -v' returned:")
message(STATUS "${GFORTRAN_VERBOSE_STR}")
# Detect gfortran version
string(REGEX MATCH "gcc version [^\t\n ]+" GFORTRAN_VER_STR "${GFORTRAN_VERBOSE_STR}")
string(REGEX REPLACE "gcc version ([^\t\n ]+)" "\\1" GFORTRAN_VERSION_STRING "${GFORTRAN_VER_STR}")
message(STATUS "Detected gfortran version ${GFORTRAN_VERSION_STRING}")
unset(GFORTRAN_VER_STR)
set(MATCH_REGEX "[^\t\n ]+[\t\n ]+")
set(REPLACE_REGEX "([^\t\n ]+)")
# Find architecture for compiler
string(REGEX MATCH "Target: [^\t\n ]+"
GFORTRAN_ARCH_STR "${GFORTRAN_VERBOSE_STR}")
message(STATUS "Architecture string: ${GFORTRAN_ARCH_STR}")
string(REGEX REPLACE "Target: ([^\t\n ]+)" "\\1"
GFORTRAN_ARCH "${GFORTRAN_ARCH_STR}")
message(STATUS "Detected gfortran architecture: ${GFORTRAN_ARCH}")
unset(GFORTRAN_ARCH_STR)
# Find install prefix, if it exists; if not, use default
string(REGEX MATCH "--prefix=[^\t\n ]+[\t\n ]+"
GFORTRAN_PREFIX_STR "${GFORTRAN_VERBOSE_STR}")
if(NOT GFORTRAN_PREFIX_STR)
message(STATUS "Detected default gfortran prefix")
set(GFORTRAN_PREFIX_DIR "/usr/local") # default prefix for gcc install
else()
string(REGEX REPLACE "--prefix=([^\t\n ]+)" "\\1"
GFORTRAN_PREFIX_DIR "${GFORTRAN_PREFIX_STR}")
endif()
message(STATUS "Detected gfortran prefix: ${GFORTRAN_PREFIX_DIR}")
unset(GFORTRAN_PREFIX_STR)
# Find install exec-prefix, if it exists; if not, use default
string(REGEX MATCH "--exec-prefix=[^\t\n ]+[\t\n ]+" "\\1"
GFORTRAN_EXEC_PREFIX_STR "${GFORTRAN_VERBOSE_STR}")
if(NOT GFORTRAN_EXEC_PREFIX_STR)
message(STATUS "Detected default gfortran exec-prefix")
set(GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_PREFIX_DIR}")
else()
string(REGEX REPLACE "--exec-prefix=([^\t\n ]+)" "\\1"
GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_EXEC_PREFIX_STR}")
endif()
message(STATUS "Detected gfortran exec-prefix: ${GFORTRAN_EXEC_PREFIX_DIR}")
UNSET(GFORTRAN_EXEC_PREFIX_STR)
# Find library directory and include directory, if library directory specified
string(REGEX MATCH "--libdir=[^\t\n ]+"
GFORTRAN_LIB_DIR_STR "${GFORTRAN_VERBOSE_STR}")
if(NOT GFORTRAN_LIB_DIR_STR)
message(STATUS "Found --libdir flag -- not found")
message(STATUS "Using default gfortran library & include directory paths")
set(GFORTRAN_LIBRARIES_DIR
"${GFORTRAN_EXEC_PREFIX_DIR}/lib/gcc/${GFORTRAN_ARCH}/${GFORTRAN_VERSION_STRING}")
string(CONCAT GFORTRAN_INCLUDE_DIR "${GFORTRAN_LIBRARIES_DIR}" "/include")
else()
message(STATUS "Found --libdir flag -- yes")
string(REGEX REPLACE "--libdir=([^\t\n ]+)" "\\1"
GFORTRAN_LIBRARIES_DIR "${GFORTRAN_LIB_DIR_STR}")
string(CONCAT GFORTRAN_INCLUDE_DIR "${GFORTRAN_LIBRARIES_DIR}" "/gcc/" "${GFORTRAN_ARCH}" "/" "${GFORTRAN_VERSION_STRING}" "/include")
endif()
message(STATUS "gfortran libraries path: ${GFORTRAN_LIBRARIES_DIR}")
message(STATUS "gfortran include path dir: ${GFORTRAN_INCLUDE_DIR}")
unset(GFORTRAN_LIB_DIR_STR)
# There are lots of other build options for gcc & gfortran. For now, the
# options implemented above should cover a lot of common use cases.
# Clean up be deleting the output string from "gfortran -v"
unset(GFORTRAN_VERBOSE_STR)
# Find paths for libgfortran, libquadmath, libgomp
# libgomp needed for OpenMP support without Clang
find_library(LIBGFORTRAN_LIBRARIES NAMES gfortran libgfortran
HINTS ${GFORTRAN_LIBRARIES_DIR})
find_library(LIBQUADMATH_LIBRARIES NAMES quadmath libquadmath
HINTS ${GFORTRAN_LIBRARIES_DIR})
find_library(LIBGOMP_LIBRARIES NAMES gomp libgomp
HINTS ${GFORTRAN_LIBRARIES_DIR})
# Find OpenMP headers
find_path(LIBGOMP_INCLUDE_DIR NAMES omp.h HINTS ${GFORTRAN_INCLUDE_DIR})
else()
message(STATUS "CMAKE_Fortran_COMPILER_ID does not match 'GNU'!")
endif()
include(FindPackageHandleStandardArgs)
# Required: libgfortran, libquadmath, path for gfortran libraries
# Optional: libgomp, path for OpenMP headers, path for gcc/gfortran headers
find_package_handle_standard_args(GFortranLibs
REQUIRED_VARS LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES GFORTRAN_LIBRARIES_DIR
VERSION_VAR GFORTRAN_VERSION_STRING)
if(GFORTRANLIBS_FOUND)
message(STATUS "Looking for gfortran libraries -- found")
message(STATUS "gfortran version: ${GFORTRAN_VERSION_STRING}")
else()
message(STATUS "Looking for gfortran libraries -- not found")
endif()
mark_as_advanced(LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES
LIBGOMP_LIBRARIES LIBGOMP_INCLUDE_DIR
GFORTRAN_LIBRARIES_DIR GFORTRAN_INCLUDE_DIR)
# FindGFortranLIBS.cmake ends here

@ -7,6 +7,27 @@ set(OpenBLAS_PREFIX ${fc_patch}/OpenBLAS-prefix)
# OPENBLAS https://github.com/lattice/quda/blob/develop/CMakeLists.txt#L575
# ######################################################################################################################
enable_language(Fortran)
include(FortranCInterface)
# # Clang doesn't have a Fortran compiler in its suite (yet),
# # so detect libraries for gfortran; we need equivalents to
# # libgfortran and libquadmath, which are implicitly
# # linked by flags in CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES
# include(FindGFortranLibs REQUIRED)
# # Add directory containing libgfortran and libquadmath to
# # linker. Should also contain libgomp, if not using
# # Intel OpenMP runtime
# link_directories(${GFORTRAN_LIBRARIES_DIR})
# # gfortan dir in the docker.
# link_directories(/usr/local/gcc-8.2/lib64)
# # if you are working with C and Fortran
# FortranCInterface_VERIFY()
# # if you are working with C++ and Fortran
# FortranCInterface_VERIFY(CXX)
#TODO: switch to CPM
include(GNUInstallDirs)
ExternalProject_Add(

@ -1,13 +1,14 @@
include(FetchContent)
set(openfst_PREFIX_DIR ${fc_patch}/openfst)
set(openfst_SOURCE_DIR ${fc_patch}/openfst-src)
set(openfst_BINARY_DIR ${fc_patch}/openfst-build)
ExternalProject_Add(openfst
URL https://github.com/mjansche/openfst/archive/refs/tags/1.7.2.zip
URL_HASH SHA256=ffc56931025579a8af3515741c0f3b0fc3a854c023421472c07ca0c6389c75e6
# #PREFIX ${openfst_PREFIX_DIR}
# SOURCE_DIR ${openfst_SOURCE_DIR}
# BINARY_DIR ${openfst_BINARY_DIR}
PREFIX ${openfst_PREFIX_DIR}
SOURCE_DIR ${openfst_SOURCE_DIR}
BINARY_DIR ${openfst_BINARY_DIR}
CONFIGURE_COMMAND ${openfst_SOURCE_DIR}/configure --prefix=${openfst_PREFIX_DIR}
"CPPFLAGS=-I${gflags_BINARY_DIR}/include -I${glog_SOURCE_DIR}/src -I${glog_BINARY_DIR}"
"LDFLAGS=-L${gflags_BINARY_DIR} -L${glog_BINARY_DIR}"
@ -16,4 +17,4 @@ ExternalProject_Add(openfst
BUILD_COMMAND make -j 4
)
link_directories(${openfst_PREFIX_DIR}/lib)
include_directories(${openfst_PREFIX_DIR}/include)
include_directories(${openfst_PREFIX_DIR}/include)

@ -41,6 +41,7 @@ void FeatureCache::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
// pop feature chunk
bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
kaldi::Timer timer;
std::unique_lock<std::mutex> lock(mutex_);
while (cache_.empty() && base_extractor_->IsFinished() == false) {
ready_read_condition_.wait(lock);
@ -64,10 +65,13 @@ bool FeatureCache::Compute() {
// compute and feed
Vector<BaseFloat> feature_chunk;
bool result = base_extractor_->Read(&feature_chunk);
std::unique_lock<std::mutex> lock(mutex_);
while (cache_.size() >= max_size_) {
ready_feed_condition_.wait(lock);
}
// feed cache
if (feature_chunk.Dim() != 0) {
cache_.push(feature_chunk);
}

@ -24,17 +24,24 @@ class FeatureCache : public FeatureExtractorInterface {
explicit FeatureCache(
int32 max_size = kint16max,
std::unique_ptr<FeatureExtractorInterface> base_extractor = NULL);
// Feed feats or waves
virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
// feats dim = num_frames * feature_dim
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* feats);
// feature cache only cache feature which from base extractor
virtual size_t Dim() const { return base_extractor_->Dim(); }
virtual void SetFinished() {
base_extractor_->SetFinished();
// read the last chunk data
Compute();
}
virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
virtual void Reset() {
base_extractor_->Reset();
while (!cache_.empty()) {
@ -45,12 +52,14 @@ class FeatureCache : public FeatureExtractorInterface {
private:
bool Compute();
std::mutex mutex_;
size_t max_size_;
std::queue<kaldi::Vector<BaseFloat>> cache_;
std::unique_ptr<FeatureExtractorInterface> base_extractor_;
std::mutex mutex_;
std::queue<kaldi::Vector<BaseFloat>> cache_;
std::condition_variable ready_feed_condition_;
std::condition_variable ready_read_condition_;
// DISALLOW_COPY_AND_ASSGIN(FeatureCache);
};

@ -21,17 +21,26 @@ namespace ppspeech {
class FeatureExtractorInterface {
public:
// accept input data, accept feature or raw waves which decided
// by the base_extractor
// Feed inputs: features(2D saved in 1D) or waveforms(1D).
virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) = 0;
// get the processed result
// the length of output = feature_row * feature_dim,
// the Matrix is squashed into Vector
// Fetch processed data: features or waveforms.
// For features(2D saved in 1D), the Matrix is squashed into Vector,
// the length of output = feature_row * feature_dim.
// For waveforms(1D), samples saved in vector.
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* outputs) = 0;
// the Dim is the feature dim
// Dim is the feature dim. For waveforms(1D), Dim is zero; else is specific,
// e.g 80 for fbank.
virtual size_t Dim() const = 0;
// End Flag for Streaming Data.
virtual void SetFinished() = 0;
// whether is end of Streaming Data.
virtual bool IsFinished() const = 0;
// Reset to start state.
virtual void Reset() = 0;
};

@ -23,12 +23,14 @@ namespace ppspeech {
struct LinearSpectrogramOptions {
kaldi::FrameExtractionOptions frame_opts;
kaldi::BaseFloat streaming_chunk;
kaldi::BaseFloat streaming_chunk; // second
LinearSpectrogramOptions() : streaming_chunk(0.36), frame_opts() {}
void Register(kaldi::OptionsItf* opts) {
opts->Register(
"streaming-chunk", &streaming_chunk, "streaming chunk size");
opts->Register("streaming-chunk",
&streaming_chunk,
"streaming chunk size, default: 0.36 sec");
frame_opts.Register(opts);
}
};

@ -26,9 +26,9 @@ import argparse
import os
import re
import subprocess
from distutils.util import strtobool
import numpy as np
from distutils.util import strtobool
FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)")
SCORED_SPEAKER_TIME = re.compile(r"(?<=SCORED SPEAKER TIME =)[\d.]+")

@ -10,8 +10,8 @@ import codecs
import json
import logging
import sys
from distutils.util import strtobool
from distutils.util import strtobool
from espnet.utils.cli_utils import get_commandline_args
is_python2 = sys.version_info[0] == 2

@ -1,10 +1,10 @@
#!/usr/bin/env python3
import argparse
import logging
from distutils.util import strtobool
import kaldiio
import numpy
from distutils.util import strtobool
from paddlespeech.s2t.transform.cmvn import CMVN
from paddlespeech.s2t.utils.cli_readers import file_reader_helper

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import argparse
import logging
from distutils.util import strtobool
from paddlespeech.s2t.transform.transformation import Transformation

@ -5,9 +5,10 @@ import codecs
import json
import logging
import sys
from distutils.util import strtobool
from io import open
from distutils.util import strtobool
from paddlespeech.s2t.utils.cli_utils import get_commandline_args
PY2 = sys.version_info[0] == 2

Loading…
Cancel
Save