[engine] rename speechx (#2892)

* rename speechx

* fix wfst decode error

* replace reset with make_unique
pull/2923/head
YangZhou 3 years ago committed by GitHub
parent 21183d48b6
commit 8e1b4cd513
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -8,7 +8,7 @@ repos:
entry: yapf entry: yapf
args: [-i, -vv] args: [-i, -vv]
types: [python] types: [python]
exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$ exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: a11d9314b22d8f8c7556443875b731ef05965464 rev: a11d9314b22d8f8c7556443875b731ef05965464
@ -35,7 +35,7 @@ repos:
- --ignore=E501,E228,E226,E261,E266,E128,E402,W503 - --ignore=E501,E228,E226,E261,E266,E128,E402,W503
- --builtins=G,request - --builtins=G,request
- --jobs=1 - --jobs=1
exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$ exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$
- repo : https://github.com/Lucas-C/pre-commit-hooks - repo : https://github.com/Lucas-C/pre-commit-hooks
rev: v1.0.1 rev: v1.0.1
@ -57,16 +57,16 @@ repos:
entry: bash .pre-commit-hooks/clang-format.hook -i entry: bash .pre-commit-hooks/clang-format.hook -i
language: system language: system
files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$ files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$
exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders|speechx/speechx/common/utils).*(\.cpp|\.cc|\.h|\.hpp|\.py)$ exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|runtime/patch|runtime/tools/fstbin|runtime/tools/lmbin|third_party/ctc_decoders|runtime/engine/common/utils).*(\.cpp|\.cc|\.h|\.hpp|\.py)$
- id: cpplint - id: cpplint
name: cpplint name: cpplint
description: Static code analysis of C/C++ files description: Static code analysis of C/C++ files
language: python language: python
files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$ files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$
exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders|speechx/speechx/common/utils).*(\.cpp|\.cc|\.h|\.hpp|\.py)$ exclude: (?=runtime/engine/kaldi|runtime/engine/common/matrix|audio/paddleaudio/src|runtime/patch|runtime/tools/fstbin|runtime/tools/lmbin|third_party/ctc_decoders|runtime/engine/common/utils).*(\.cpp|\.cc|\.h|\.hpp|\.py)$
entry: cpplint --filter=-build,-whitespace,+whitespace/comma,-whitespace/indent entry: cpplint --filter=-build,-whitespace,+whitespace/comma,-whitespace/indent
- repo: https://github.com/asottile/reorder_python_imports - repo: https://github.com/asottile/reorder_python_imports
rev: v2.4.0 rev: v2.4.0
hooks: hooks:
- id: reorder-python-imports - id: reorder-python-imports
exclude: (?=speechx/speechx/kaldi|audio/paddleaudio/src|speechx/patch|speechx/tools/fstbin|speechx/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h\.hpp|\.py)$ exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|runtime/patch|runtime/tools/fstbin|runtime/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h\.hpp|\.py)$

@ -164,7 +164,7 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision
- 👑 2022.11.18: Add [Whisper CLI and Demos](https://github.com/PaddlePaddle/PaddleSpeech/pull/2640), support multi language recognition and translation. - 👑 2022.11.18: Add [Whisper CLI and Demos](https://github.com/PaddlePaddle/PaddleSpeech/pull/2640), support multi language recognition and translation.
- 🔥 2022.11.18: Add [Wav2vec2 CLI and Demos](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speech_ssl), Support ASR and Feature Extraction. - 🔥 2022.11.18: Add [Wav2vec2 CLI and Demos](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speech_ssl), Support ASR and Feature Extraction.
- 🎉 2022.11.17: Add [male voice for TTS](https://github.com/PaddlePaddle/PaddleSpeech/pull/2660). - 🎉 2022.11.17: Add [male voice for TTS](https://github.com/PaddlePaddle/PaddleSpeech/pull/2660).
- 🔥 2022.11.07: Add [U2/U2++ C++ High Performance Streaming ASR Deployment](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/speechx/examples/u2pp_ol/wenetspeech). - 🔥 2022.11.07: Add [U2/U2++ C++ High Performance Streaming ASR Deployment](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/runtime/examples/u2pp_ol/wenetspeech).
- 👑 2022.11.01: Add [Adversarial Loss](https://arxiv.org/pdf/1907.04448.pdf) for [Chinese English mixed TTS](./examples/zh_en_tts/tts3). - 👑 2022.11.01: Add [Adversarial Loss](https://arxiv.org/pdf/1907.04448.pdf) for [Chinese English mixed TTS](./examples/zh_en_tts/tts3).
- 🔥 2022.10.26: Add [Prosody Prediction](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/rhy) for TTS. - 🔥 2022.10.26: Add [Prosody Prediction](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/rhy) for TTS.
- 🎉 2022.10.21: Add [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for TTS Chinese Text Frontend. - 🎉 2022.10.21: Add [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for TTS Chinese Text Frontend.

@ -1,2 +1,3 @@
tools/valgrind* tools/valgrind*
*log *log
fc_patch/*

@ -93,7 +93,7 @@ endif()
# paddle libpaddle.so # paddle libpaddle.so
# paddle include and link option # paddle include and link option
# -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so # -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so
execute_process( execute_process(
COMMAND python -c "\ COMMAND python -c "\
import os;\ import os;\
@ -112,7 +112,7 @@ message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS})
string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS) string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS)
# paddle compile option # paddle compile option
# -I/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/include # -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include
execute_process( execute_process(
COMMAND python -c "\ COMMAND python -c "\
import paddle; \ import paddle; \
@ -143,6 +143,6 @@ message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
############################################################################### ###############################################################################
# Add local library # Add local library
############################################################################### ###############################################################################
set(SPEECHX_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/speechx) set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine)
add_subdirectory(speechx) add_subdirectory(engine)

@ -1,4 +1,3 @@
# SpeechX -- All in One Speech Task Inference
## Environment ## Environment
@ -9,7 +8,7 @@ We develop under:
* gcc/g++/gfortran - 8.2.0 * gcc/g++/gfortran - 8.2.0
* cmake - 3.16.0 * cmake - 3.16.0
> Please use `tools/env.sh` to create python `venv`, then `source venv/bin/activate` to build speechx. > Please use `tools/env.sh` to create python `venv`, then `source venv/bin/activate` to build engine.
> We make sure all things work fun under docker, and recommend using it to develop and deploy. > We make sure all things work fun under docker, and recommend using it to develop and deploy.
@ -33,7 +32,7 @@ docker run --privileged --net=host --ipc=host -it --rm -v /path/to/paddlespeech
bash tools/venv.sh bash tools/venv.sh
``` ```
2. Build `speechx` and `examples`. 2. Build `engine` and `examples`.
For now we are using feature under `develop` branch of paddle, so we need to install `paddlepaddle` nightly build version. For now we are using feature under `develop` branch of paddle, so we need to install `paddlepaddle` nightly build version.
For example: For example:

@ -2,10 +2,10 @@ include(FetchContent)
FetchContent_Declare( FetchContent_Declare(
gflags gflags
URL https://github.com/gflags/gflags/archive/v2.2.2.zip URL https://paddleaudio.bj.bcebos.com/build/gflag-2.2.2.zip
URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5 URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
) )
FetchContent_MakeAvailable(gflags) FetchContent_MakeAvailable(gflags)
# openfst need # openfst need
include_directories(${gflags_BINARY_DIR}/include) include_directories(${gflags_BINARY_DIR}/include)

@ -1,7 +1,7 @@
include(FetchContent) include(FetchContent)
FetchContent_Declare( FetchContent_Declare(
glog glog
URL https://github.com/google/glog/archive/v0.4.0.zip URL https://paddleaudio.bj.bcebos.com/build/glog-0.4.0.zip
URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
) )
FetchContent_MakeAvailable(glog) FetchContent_MakeAvailable(glog)

@ -2,7 +2,7 @@
include(FetchContent) include(FetchContent)
FetchContent_Declare( FetchContent_Declare(
gtest gtest
URL https://github.com/google/googletest/archive/release-1.11.0.zip URL https://paddleaudio.bj.bcebos.com/build/gtest-release-1.11.0.zip
URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
) )
FetchContent_MakeAvailable(gtest) FetchContent_MakeAvailable(gtest)
@ -12,4 +12,4 @@ include_directories(${gtest_BINARY_DIR} ${gtest_SOURCE_DIR}/src)
if(WITH_TESTING) if(WITH_TESTING)
enable_testing() enable_testing()
endif() endif()

@ -1,8 +1,8 @@
include(FetchContent)
set(openfst_PREFIX_DIR ${fc_patch}/openfst) set(openfst_PREFIX_DIR ${fc_patch}/openfst)
set(openfst_SOURCE_DIR ${fc_patch}/openfst-src) set(openfst_SOURCE_DIR ${fc_patch}/openfst-src)
set(openfst_BINARY_DIR ${fc_patch}/openfst-build) set(openfst_BINARY_DIR ${fc_patch}/openfst-build)
include(FetchContent)
# openfst Acknowledgments: # openfst Acknowledgments:
#Cyril Allauzen, Michael Riley, Johan Schalkwyk, Wojciech Skut and Mehryar Mohri, #Cyril Allauzen, Michael Riley, Johan Schalkwyk, Wojciech Skut and Mehryar Mohri,
#"OpenFst: A General and Efficient Weighted Finite-State Transducer Library", #"OpenFst: A General and Efficient Weighted Finite-State Transducer Library",
@ -25,5 +25,7 @@ ExternalProject_Add(openfst
) )
link_directories(${openfst_PREFIX_DIR}/lib) link_directories(${openfst_PREFIX_DIR}/lib)
include_directories(${openfst_PREFIX_DIR}/include) include_directories(${openfst_PREFIX_DIR}/include)
message(STATUS "OpenFST inc dir: ${openfst_PREFIX_DIR}/include") message(STATUS "OpenFST inc dir: ${openfst_PREFIX_DIR}/include")
message(STATUS "OpenFST lib dir: ${openfst_PREFIX_DIR}/lib") message(STATUS "OpenFST lib dir: ${openfst_PREFIX_DIR}/lib")

@ -63,9 +63,7 @@ void CTCPrefixBeamSearch::Reset() {
times_.emplace_back(empty); times_.emplace_back(empty);
} }
void CTCPrefixBeamSearch::InitDecoder() { void CTCPrefixBeamSearch::InitDecoder() { Reset(); }
Reset();
}
void CTCPrefixBeamSearch::AdvanceDecode( void CTCPrefixBeamSearch::AdvanceDecode(
const std::shared_ptr<kaldi::DecodableInterface>& decodable) { const std::shared_ptr<kaldi::DecodableInterface>& decodable) {

@ -29,6 +29,11 @@ TLGDecoder::TLGDecoder(TLGDecoderOptions opts) : opts_(opts) {
void TLGDecoder::Reset() { void TLGDecoder::Reset() {
decoder_->InitDecoding(); decoder_->InitDecoding();
hypotheses_.clear();
likelihood_.clear();
olabels_.clear();
times_.clear();
num_frame_decoded_ = 0; num_frame_decoded_ = 0;
return; return;
} }
@ -103,7 +108,7 @@ void TLGDecoder::FinalizeSearch() {
time.push_back(idx); // fake time, todo later time.push_back(idx); // fake time, todo later
hypotheses_.push_back(hypothese); hypotheses_.push_back(hypothese);
times_.push_back(time); times_.push_back(time);
olabels.push_back(words_id); olabels_.push_back(words_id);
likelihood_.push_back(-(weight.Value2() + weight.Value1())); likelihood_.push_back(-(weight.Value2() + weight.Value1()));
} }
} }

@ -24,6 +24,7 @@ DECLARE_string(graph_path);
DECLARE_int32(max_active); DECLARE_int32(max_active);
DECLARE_double(beam); DECLARE_double(beam);
DECLARE_double(lattice_beam); DECLARE_double(lattice_beam);
DECLARE_int32(nbest);
namespace ppspeech { namespace ppspeech {
@ -46,7 +47,7 @@ struct TLGDecoderOptions {
decoder_opts.opts.max_active = FLAGS_max_active; decoder_opts.opts.max_active = FLAGS_max_active;
decoder_opts.opts.beam = FLAGS_beam; decoder_opts.opts.beam = FLAGS_beam;
decoder_opts.opts.lattice_beam = FLAGS_lattice_beam; decoder_opts.opts.lattice_beam = FLAGS_lattice_beam;
// decoder_opts.nbest = FLAGS_lattice_nbest; decoder_opts.nbest = FLAGS_nbest;
LOG(INFO) << "LatticeFasterDecoder max active: " LOG(INFO) << "LatticeFasterDecoder max active: "
<< decoder_opts.opts.max_active; << decoder_opts.opts.max_active;
LOG(INFO) << "LatticeFasterDecoder beam: " << decoder_opts.opts.beam; LOG(INFO) << "LatticeFasterDecoder beam: " << decoder_opts.opts.beam;
@ -85,7 +86,7 @@ class TLGDecoder : public DecoderBase {
return hypotheses_; return hypotheses_;
} }
const std::vector<std::vector<int>>& Outputs() const override { const std::vector<std::vector<int>>& Outputs() const override {
return olabels; return olabels_;
} // outputs_; } } // outputs_; }
const std::vector<float>& Likelihood() const override { const std::vector<float>& Likelihood() const override {
return likelihood_; return likelihood_;
@ -111,8 +112,9 @@ class TLGDecoder : public DecoderBase {
private: private:
void AdvanceDecoding(kaldi::DecodableInterface* decodable); void AdvanceDecoding(kaldi::DecodableInterface* decodable);
int num_frame_decoded_;
std::vector<std::vector<int>> hypotheses_; std::vector<std::vector<int>> hypotheses_;
std::vector<std::vector<int>> olabels; std::vector<std::vector<int>> olabels_;
std::vector<float> likelihood_; std::vector<float> likelihood_;
std::vector<std::vector<int>> times_; std::vector<std::vector<int>> times_;
@ -123,4 +125,4 @@ class TLGDecoder : public DecoderBase {
}; };
} // namespace ppspeech } // namespace ppspeech

@ -15,7 +15,6 @@
#pragma once #pragma once
#include "base/common.h" #include "base/common.h"
//#include "decoder/ctc_tlg_decoder.h"
// feature // feature
DEFINE_bool(use_fbank, false, "False for fbank; or linear feature"); DEFINE_bool(use_fbank, false, "False for fbank; or linear feature");

@ -13,13 +13,13 @@
// limitations under the License. // limitations under the License.
#include "nnet/u2_nnet.h"
#include "base/common.h" #include "base/common.h"
#include "decoder/param.h" #include "decoder/param.h"
#include "frontend/assembler.h" #include "frontend/assembler.h"
#include "frontend/data_cache.h" #include "frontend/data_cache.h"
#include "kaldi/util/table-types.h" #include "kaldi/util/table-types.h"
#include "nnet/decodable.h" #include "nnet/decodable.h"
#include "nnet/u2_nnet.h"
DEFINE_string(feature_rspecifier, "", "test feature rspecifier"); DEFINE_string(feature_rspecifier, "", "test feature rspecifier");
@ -93,9 +93,9 @@ int main(int argc, char* argv[]) {
ori_feature_len - chunk_idx * chunk_stride, chunk_size); ori_feature_len - chunk_idx * chunk_stride, chunk_size);
} }
if (this_chunk_size < receptive_field_length) { if (this_chunk_size < receptive_field_length) {
LOG(WARNING) LOG(WARNING) << "utt: " << utt << " skip last "
<< "utt: " << utt << " skip last " << this_chunk_size << this_chunk_size << " frames, expect is "
<< " frames, expect is " << receptive_field_length; << receptive_field_length;
break; break;
} }

@ -13,13 +13,13 @@
// limitations under the License. // limitations under the License.
#include "nnet/u2_nnet.h"
#include "base/common.h" #include "base/common.h"
#include "decoder/param.h" #include "decoder/param.h"
#include "frontend/wave-reader.h"
#include "frontend/feature_pipeline.h" #include "frontend/feature_pipeline.h"
#include "frontend/wave-reader.h"
#include "kaldi/util/table-types.h" #include "kaldi/util/table-types.h"
#include "nnet/decodable.h" #include "nnet/decodable.h"
#include "nnet/u2_nnet.h"
#include "nnet/nnet_producer.h" #include "nnet/nnet_producer.h"
DEFINE_string(wav_rspecifier, "", "test wav rspecifier"); DEFINE_string(wav_rspecifier, "", "test wav rspecifier");
@ -104,7 +104,7 @@ int main(int argc, char* argv[]) {
CHECK(sample_offset == tot_samples); CHECK(sample_offset == tot_samples);
std::vector<std::vector<kaldi::BaseFloat>> prob_vec; std::vector<std::vector<kaldi::BaseFloat>> prob_vec;
while(1) { while (1) {
std::vector<kaldi::BaseFloat> logprobs; std::vector<kaldi::BaseFloat> logprobs;
bool isok = nnet_producer->Read(&logprobs); bool isok = nnet_producer->Read(&logprobs);
if (nnet_producer->IsFinished()) break; if (nnet_producer->IsFinished()) break;

@ -33,12 +33,12 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
decodable_.reset(new Decodable(nnet_producer_, am_scale)); decodable_.reset(new Decodable(nnet_producer_, am_scale));
CHECK_NE(resource.vocab_path, ""); CHECK_NE(resource.vocab_path, "");
if (resource.decoder_opts.tlg_decoder_opts.fst_path == "") { if (resource.decoder_opts.tlg_decoder_opts.fst_path.empty()) {
LOG(INFO) << resource.decoder_opts.tlg_decoder_opts.fst_path; LOG(INFO) << resource.decoder_opts.tlg_decoder_opts.fst_path;
decoder_.reset(new CTCPrefixBeamSearch( decoder_ = std::make_unique<CTCPrefixBeamSearch>(
resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts)); resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts);
} else { } else {
decoder_.reset(new TLGDecoder(resource.decoder_opts.tlg_decoder_opts)); decoder_ = std::make_unique<TLGDecoder>(resource.decoder_opts.tlg_decoder_opts);
} }
symbol_table_ = decoder_->WordSymbolTable(); symbol_table_ = decoder_->WordSymbolTable();
@ -268,4 +268,4 @@ void U2Recognizer::SetInputFinished() {
} }
} // namespace ppspeech } // namespace ppspeech

@ -31,11 +31,9 @@ DECLARE_double(rescoring_weight);
DECLARE_double(reverse_weight); DECLARE_double(reverse_weight);
DECLARE_int32(nbest); DECLARE_int32(nbest);
DECLARE_int32(blank); DECLARE_int32(blank);
DECLARE_double(acoustic_scale); DECLARE_double(acoustic_scale);
DECLARE_string(vocab_path); DECLARE_string(vocab_path);
DECLARE_string(word_symbol_table); DECLARE_string(word_symbol_table);
// DECLARE_string(fst_path);
namespace ppspeech { namespace ppspeech {
@ -74,10 +72,6 @@ struct DecodeOptions {
decoder_opts.ctc_prefix_search_opts.blank = FLAGS_blank; decoder_opts.ctc_prefix_search_opts.blank = FLAGS_blank;
decoder_opts.ctc_prefix_search_opts.first_beam_size = FLAGS_nbest; decoder_opts.ctc_prefix_search_opts.first_beam_size = FLAGS_nbest;
decoder_opts.ctc_prefix_search_opts.second_beam_size = FLAGS_nbest; decoder_opts.ctc_prefix_search_opts.second_beam_size = FLAGS_nbest;
// decoder_opts.tlg_decoder_opts.fst_path = "";//FLAGS_fst_path;
// decoder_opts.tlg_decoder_opts.word_symbol_table =
// FLAGS_word_symbol_table;
// decoder_opts.tlg_decoder_opts.nbest = FLAGS_nbest;
decoder_opts.tlg_decoder_opts = decoder_opts.tlg_decoder_opts =
ppspeech::TLGDecoderOptions::InitFromFlags(); ppspeech::TLGDecoderOptions::InitFromFlags();
@ -183,4 +177,4 @@ class U2Recognizer {
std::thread thread_; std::thread thread_;
}; };
} // namespace ppspeech } // namespace ppspeech

@ -12,10 +12,10 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "recognizer/u2_recognizer.h"
#include "decoder/param.h" #include "decoder/param.h"
#include "frontend/wave-reader.h" #include "frontend/wave-reader.h"
#include "kaldi/util/table-types.h" #include "kaldi/util/table-types.h"
#include "recognizer/u2_recognizer.h"
DEFINE_string(wav_rspecifier, "", "test feature rspecifier"); DEFINE_string(wav_rspecifier, "", "test feature rspecifier");
DEFINE_string(result_wspecifier, "", "test result wspecifier"); DEFINE_string(result_wspecifier, "", "test result wspecifier");

@ -100,7 +100,7 @@ int main(int argc, char* argv[]) {
continue; continue;
} }
tot_decode_time += local_timer.Elapsed(); tot_decode_time += local_timer.Elapsed();
LOG(INFO) << utt << " " << result; LOG(INFO) << utt << " " << result;
LOG(INFO) << " RTF: " << local_timer.Elapsed() / dur << " dur: " << dur LOG(INFO) << " RTF: " << local_timer.Elapsed() / dur << " dur: " << dur
<< " cost: " << local_timer.Elapsed(); << " cost: " << local_timer.Elapsed();

@ -12,10 +12,10 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "websocket/websocket_client.h"
#include "kaldi/feat/wave-reader.h" #include "kaldi/feat/wave-reader.h"
#include "kaldi/util/kaldi-io.h" #include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h" #include "kaldi/util/table-types.h"
#include "websocket/websocket_client.h"
DEFINE_string(host, "127.0.0.1", "host of websocket server"); DEFINE_string(host, "127.0.0.1", "host of websocket server");
DEFINE_int32(port, 8082, "port of websocket server"); DEFINE_int32(port, 8082, "port of websocket server");

@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "decoder/param.h"
#include "websocket/websocket_server.h" #include "websocket/websocket_server.h"
#include "decoder/param.h"
DEFINE_int32(port, 8082, "websocket listening port"); DEFINE_int32(port, 8082, "websocket listening port");

@ -28,7 +28,7 @@ typedef int int32; // NOLINT
#if defined(__LP64__) && !defined(OS_MACOSX) && !defined(OS_OPENBSD) #if defined(__LP64__) && !defined(OS_MACOSX) && !defined(OS_OPENBSD)
typedef long int64; // NOLINT typedef long int64; // NOLINT
#else #else
typedef long long int64; // NOLINT typedef long long int64; // NOLINT
#endif #endif
typedef unsigned char uint8; // NOLINT typedef unsigned char uint8; // NOLINT

@ -21,6 +21,8 @@
#include <cstring> #include <cstring>
#include <deque> #include <deque>
#include <fstream> #include <fstream>
#include <functional>
#include <future>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <istream> #include <istream>
@ -42,8 +44,6 @@
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <future>
#include <functional>
#include "base/basic_types.h" #include "base/basic_types.h"
#include "base/flags.h" #include "base/flags.h"

@ -97,8 +97,8 @@ bool Assembler::Compute(vector<BaseFloat>* feats) {
CHECK(val.size() == dim_) << val.size(); CHECK(val.size() == dim_) << val.size();
int32 start = counter * dim_; int32 start = counter * dim_;
std::memcpy(feats->data() + start, std::memcpy(
val.data(), val.size() * sizeof(BaseFloat)); feats->data() + start, val.data(), val.size() * sizeof(BaseFloat));
if (this_chunk_size - counter <= cache_size_) { if (this_chunk_size - counter <= cache_size_) {
feature_cache_.push(val); feature_cache_.push(val);

@ -84,11 +84,12 @@ void CMVN::Compute(vector<BaseFloat>* feats) const {
KALDI_ASSERT(feats != NULL); KALDI_ASSERT(feats != NULL);
if (feats->size() % dim_ != 0) { if (feats->size() % dim_ != 0) {
LOG(ERROR)<< "Dim mismatch: cmvn " << mean_stats_.size() << ',' LOG(ERROR) << "Dim mismatch: cmvn " << mean_stats_.size() << ','
<< var_stats_.size() - 1 << ", feats " << feats->size() << 'x'; << var_stats_.size() - 1 << ", feats " << feats->size()
<< 'x';
} }
if (var_stats_.size() == 0 && var_norm_) { if (var_stats_.size() == 0 && var_norm_) {
LOG(ERROR) LOG(ERROR)
<< "You requested variance normalization but no variance stats_ " << "You requested variance normalization but no variance stats_ "
<< "are supplied."; << "are supplied.";
} }
@ -98,8 +99,8 @@ void CMVN::Compute(vector<BaseFloat>* feats) const {
// computing an offset and representing it as stats_, we use a count of one. // computing an offset and representing it as stats_, we use a count of one.
if (count < 1.0) if (count < 1.0)
LOG(ERROR) << "Insufficient stats_ for cepstral mean and variance " LOG(ERROR) << "Insufficient stats_ for cepstral mean and variance "
"normalization: " "normalization: "
<< "count = " << count; << "count = " << count;
if (!var_norm_) { if (!var_norm_) {
vector<BaseFloat> offset(feats->size()); vector<BaseFloat> offset(feats->size());
@ -112,11 +113,12 @@ void CMVN::Compute(vector<BaseFloat>* feats) const {
// with the dim_ of feature. // with the dim_ of feature.
// the dim_ of feats = dim_ * num_frames; // the dim_ of feats = dim_ * num_frames;
for (int32 idx = 0; idx < feats->size() / dim_; ++idx) { for (int32 idx = 0; idx < feats->size() / dim_; ++idx) {
std::memcpy(mean_stats_apply.data() + dim_ * idx, std::memcpy(mean_stats_apply.data() + dim_ * idx,
mean_stats.data(), dim_* sizeof(double)); mean_stats.data(),
dim_ * sizeof(double));
} }
for (size_t idx = 0; idx < feats->size(); ++idx) { for (size_t idx = 0; idx < feats->size(); ++idx) {
feats->at(idx) += offset[idx]; feats->at(idx) += offset[idx];
} }
return; return;
} }
@ -130,7 +132,7 @@ void CMVN::Compute(vector<BaseFloat>* feats) const {
double var = (var_stats_[d] / count) - mean * mean, floor = 1.0e-20; double var = (var_stats_[d] / count) - mean * mean, floor = 1.0e-20;
if (var < floor) { if (var < floor) {
LOG(WARNING) << "Flooring cepstral variance from " << var << " to " LOG(WARNING) << "Flooring cepstral variance from " << var << " to "
<< floor; << floor;
var = floor; var = floor;
} }
scale = 1.0 / sqrt(var); scale = 1.0 / sqrt(var);
@ -146,7 +148,7 @@ void CMVN::Compute(vector<BaseFloat>* feats) const {
} }
// Apply the normalization. // Apply the normalization.
for (size_t idx = 0; idx < feats->size(); ++idx) { for (size_t idx = 0; idx < feats->size(); ++idx) {
feats->at(idx) *= norm1[idx]; feats->at(idx) *= norm1[idx];
} }
for (size_t idx = 0; idx < feats->size(); ++idx) { for (size_t idx = 0; idx < feats->size(); ++idx) {

@ -15,8 +15,8 @@
#pragma once #pragma once
#include "base/common.h" #include "base/common.h"
#include "frontend/feature_common.h"
#include "frontend/feature-fbank.h" #include "frontend/feature-fbank.h"
#include "frontend/feature_common.h"
namespace ppspeech { namespace ppspeech {

@ -67,7 +67,7 @@ bool FeatureCache::Compute() {
for (int chunk_idx = 0; chunk_idx < num_chunk; ++chunk_idx) { for (int chunk_idx = 0; chunk_idx < num_chunk; ++chunk_idx) {
int32 start = chunk_idx * dim_; int32 start = chunk_idx * dim_;
vector<BaseFloat> feature_chunk(feature.data() + start, vector<BaseFloat> feature_chunk(feature.data() + start,
feature.data() + start + dim_); feature.data() + start + dim_);
// feed cache // feed cache
cache_.push(feature_chunk); cache_.push(feature_chunk);

@ -57,7 +57,7 @@ class FeatureCache : public FrontendInterface {
bool Compute(); bool Compute();
int32 dim_; int32 dim_;
size_t max_size_; // cache capacity size_t max_size_; // cache capacity
std::unique_ptr<FrontendInterface> base_extractor_; std::unique_ptr<FrontendInterface> base_extractor_;
std::queue<std::vector<BaseFloat>> cache_; // feature cache std::queue<std::vector<BaseFloat>> cache_; // feature cache

@ -14,8 +14,8 @@
#pragma once #pragma once
#include "frontend_itf.h"
#include "frontend/feature-window.h" #include "frontend/feature-window.h"
#include "frontend_itf.h"
namespace ppspeech { namespace ppspeech {

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save