pull/1707/head
Hui Zhang 3 years ago
parent cad09b4910
commit c7b987c55d

@ -20,6 +20,7 @@ from diskcache import Cache
from fastapi import FastAPI
from fastapi import File
from fastapi import UploadFile
from logs import LOGGER
from milvus_helpers import MilvusHelper
from mysql_helpers import MySQLHelper
from operations.count import do_count
@ -31,8 +32,6 @@ from starlette.middleware.cors import CORSMiddleware
from starlette.requests import Request
from starlette.responses import FileResponse
from logs import LOGGER
app = FastAPI()
app.add_middleware(
CORSMiddleware,

@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from logs import LOGGER
from paddlespeech.cli import VectorExecutor
vector_executor = VectorExecutor()

@ -20,7 +20,6 @@ from config import MYSQL_HOST
from config import MYSQL_PORT
from config import MYSQL_PWD
from config import MYSQL_USER
from logs import LOGGER

@ -14,7 +14,6 @@
import sys
from config import DEFAULT_TABLE
from logs import LOGGER

@ -14,7 +14,6 @@
import sys
from config import DEFAULT_TABLE
from logs import LOGGER

@ -17,7 +17,6 @@ import sys
from config import DEFAULT_TABLE
from diskcache import Cache
from encode import get_audio_embedding
from logs import LOGGER
@ -27,8 +26,7 @@ def get_audios(path):
"""
supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"]
return [
item
for sublist in [[os.path.join(dir, file) for file in files]
item for sublist in [[os.path.join(dir, file) for file in files]
for dir, _, files in list(os.walk(path))]
for item in sublist if os.path.splitext(item)[1] in supported_formats
]

@ -17,7 +17,6 @@ import numpy
from config import DEFAULT_TABLE
from config import TOP_K
from encode import get_audio_embedding
from logs import LOGGER

@ -18,6 +18,7 @@ from config import UPLOAD_PATH
from fastapi import FastAPI
from fastapi import File
from fastapi import UploadFile
from logs import LOGGER
from mysql_helpers import MySQLHelper
from operations.count import do_count_vpr
from operations.count import do_get
@ -30,8 +31,6 @@ from starlette.middleware.cors import CORSMiddleware
from starlette.requests import Request
from starlette.responses import FileResponse
from logs import LOGGER
app = FastAPI()
app.add_middleware(
CORSMiddleware,

@ -12,15 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import asyncio
import base64
import io
import json
import logging
import os
import random
import time
from typing import List
import logging
import asyncio
import numpy as np
import requests
@ -30,9 +30,9 @@ from ..executor import BaseExecutor
from ..util import cli_client_register
from ..util import stats_wrapper
from paddlespeech.cli.log import logger
from paddlespeech.server.tests.asr.online.websocket_client import ASRAudioHandler
from paddlespeech.server.utils.audio_process import wav2pcm
from paddlespeech.server.utils.util import wav2base64
from paddlespeech.server.tests.asr.online.websocket_client import ASRAudioHandler
__all__ = ['TTSClientExecutor', 'ASRClientExecutor', 'CLSClientExecutor']
@ -234,7 +234,8 @@ class ASRClientExecutor(BaseExecutor):
@cli_client_register(
name='paddlespeech_client.asr_online', description='visit asr online service')
name='paddlespeech_client.asr_online',
description='visit asr online service')
class ASRClientExecutor(BaseExecutor):
def __init__(self):
super(ASRClientExecutor, self).__init__()

@ -1,12 +1,11 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2021 Mobvoi Inc. All Rights Reserved.
# Author: zhendong.peng@mobvoi.com (Zhendong Peng)
import argparse
from flask import Flask, render_template
from flask import Flask
from flask import render_template
parser = argparse.ArgumentParser(description='training your network')
parser.add_argument('--port', default=19999, type=int, help='port id')
@ -14,9 +13,11 @@ args = parser.parse_args()
app = Flask(__name__)
@app.route('/')
def index():
return render_template('index.html')
if __name__ == '__main__':
app.run(host='0.0.0.0', port=args.port, debug=True)

@ -13,6 +13,7 @@
# limitations under the License.
from dataclasses import dataclass
from dataclasses import fields
from paddle.io import Dataset
from paddleaudio import load as load_audio

@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from dataclasses import dataclass
from dataclasses import fields
from paddle.io import Dataset
from paddleaudio import load as load_audio

@ -34,7 +34,8 @@ DEFINE_int32(receptive_field_length,
DEFINE_int32(downsampling_rate,
4,
"two CNN(kernel=5) module downsampling rate.");
DEFINE_string(model_input_names,
DEFINE_string(
model_input_names,
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box",
"model input names");
DEFINE_string(model_output_names,

@ -5,4 +5,3 @@ ASR audio feature test bins. We using theses bins to test linaer/fbank/mfcc asr
* linear_spectrogram_without_db_norm_main.cc
compute linear spectrogram w/o db norm in streaming manner.

@ -31,7 +31,7 @@ int main(int argc, char* argv[]) {
gflags::ParseCommandLineFlags(&argc, &argv, false);
google::InitGoogleLogging(argv[0]);
LOG(INFO) << "cmvn josn path: " << FLAGS_json_file ;
LOG(INFO) << "cmvn josn path: " << FLAGS_json_file;
padded_string json = padded_string::load(FLAGS_json_file);
ondemand::parser parser;
@ -43,7 +43,9 @@ int main(int argc, char* argv[]) {
for (double x : mean_stat) {
mean_stat_vec.push_back(x);
}
// LOG(INFO) << mean_stat; this line will casue simdjson::simdjson_error("Objects and arrays can only be iterated when they are first encountered")
// LOG(INFO) << mean_stat; this line will casue
// simdjson::simdjson_error("Objects and arrays can only be iterated when
// they are first encountered")
ondemand::array var_stat = val["var_stat"];
std::vector<kaldi::BaseFloat> var_stat_vec;

@ -14,8 +14,6 @@
// deepspeech2 online model info
#include "base/flags.h"
#include "base/log.h"
#include <algorithm>
#include <fstream>
#include <functional>
@ -23,6 +21,8 @@
#include <iterator>
#include <numeric>
#include <thread>
#include "base/flags.h"
#include "base/log.h"
#include "paddle_inference_api.h"
using std::cout;

@ -92,8 +92,7 @@ void CTCBeamSearch::AdvanceDecode(
while (1) {
vector<vector<BaseFloat>> likelihood;
vector<BaseFloat> frame_prob;
bool flag =
decodable->FrameLikelihood(num_frame_decoded_, &frame_prob);
bool flag = decodable->FrameLikelihood(num_frame_decoded_, &frame_prob);
if (flag == false) break;
likelihood.push_back(frame_prob);
AdvanceDecoding(likelihood);

@ -49,10 +49,9 @@ bool Decodable::IsLastFrame(int32 frame) {
int32 Decodable::NumIndices() const { return 0; }
// the ilable(TokenId) of wfst(TLG) insert <eps>(id = 0) in front of Nnet prob id.
int32 Decodable::TokenId2NnetId(int32 token_id) {
return token_id - 1;
}
// the ilable(TokenId) of wfst(TLG) insert <eps>(id = 0) in front of Nnet prob
// id.
int32 Decodable::TokenId2NnetId(int32 token_id) { return token_id - 1; }
BaseFloat Decodable::LogLikelihood(int32 frame, int32 index) {
CHECK_LE(index, nnet_cache_.NumCols());
@ -60,7 +59,8 @@ BaseFloat Decodable::LogLikelihood(int32 frame, int32 index) {
int32 frame_idx = frame - frame_offset_;
// the nnet output is prob ranther than log prob
// the index - 1, because the ilabel
return acoustic_scale_ * std::log(nnet_cache_(frame_idx, TokenId2NnetId(index)) +
return acoustic_scale_ *
std::log(nnet_cache_(frame_idx, TokenId2NnetId(index)) +
std::numeric_limits<float>::min());
}

@ -45,7 +45,8 @@ struct ModelOptions {
thread_num(2),
use_gpu(false),
input_names(
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box"),
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_"
"box"),
output_names(
"save_infer_model/scale_0.tmp_1,save_infer_model/"
"scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/"

@ -40,5 +40,4 @@ std::string ReadFile2String(const std::string& path) {
return std::string((std::istreambuf_iterator<char>(input_file)),
std::istreambuf_iterator<char>());
}
}

@ -20,5 +20,4 @@ bool ReadFileToVector(const std::string& filename,
std::vector<std::string>* data);
std::string ReadFile2String(const std::string& path);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,3 +1,17 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fstaddselfloops.cc
// Copyright 2009-2011 Microsoft Corporation
@ -43,12 +57,14 @@ int main(int argc, char *argv[]) {
const char *usage =
"Adds self-loops to states of an FST to propagate disambiguation "
"symbols through it\n"
"They are added on each final state and each state with non-epsilon "
"They are added on each final state and each state with "
"non-epsilon "
"output symbols\n"
"on at least one arc out of the state. Useful in conjunction with "
"predeterminize\n"
"\n"
"Usage: fstaddselfloops in-disambig-list out-disambig-list [in.fst "
"Usage: fstaddselfloops in-disambig-list out-disambig-list "
"[in.fst "
"[out.fst] ]\n"
"E.g: fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
"in.list and out.list are lists of integers, one per line, of the\n"
@ -71,19 +87,19 @@ int main(int argc, char *argv[]) {
std::vector<int32> disambig_in;
if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
KALDI_ERR
<< "fstaddselfloops: Could not read disambiguation symbols from "
KALDI_ERR << "fstaddselfloops: Could not read disambiguation "
"symbols from "
<< kaldi::PrintableRxfilename(disambig_in_rxfilename);
std::vector<int32> disambig_out;
if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
KALDI_ERR
<< "fstaddselfloops: Could not read disambiguation symbols from "
KALDI_ERR << "fstaddselfloops: Could not read disambiguation "
"symbols from "
<< kaldi::PrintableRxfilename(disambig_out_rxfilename);
if (disambig_in.size() != disambig_out.size())
KALDI_ERR
<< "fstaddselfloops: mismatch in size of disambiguation symbols";
KALDI_ERR << "fstaddselfloops: mismatch in size of disambiguation "
"symbols";
AddSelfLoops(fst, disambig_in, disambig_out);

@ -1,3 +1,17 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fstdeterminizestar.cc
// Copyright 2009-2011 Microsoft Corporation
@ -73,11 +87,13 @@ int main(int argc, char *argv[]) {
bool use_log = false;
ParseOptions po(usage);
po.Register("use-log", &use_log, "Determinize in log semiring.");
po.Register("delta", &delta,
po.Register("delta",
&delta,
"Delta value used to determine equivalence of weights.");
po.Register(
"max-states", &max_states,
"Maximum number of states in determinized FST before it will abort.");
po.Register("max-states",
&max_states,
"Maximum number of states in determinized FST before it "
"will abort.");
po.Read(argc, argv);
if (po.NumArgs() > 2) {
@ -87,8 +103,8 @@ int main(int argc, char *argv[]) {
std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);
// This enables us to get traceback info from determinization that is
// not seeming to terminate.
// This enables us to get traceback info from determinization that is
// not seeming to terminate.
#if !defined(_MSC_VER) && !defined(__APPLE__)
signal(SIGUSR1, signal_handler);
#endif

@ -1,3 +1,17 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fstisstochastic.cc
// Copyright 2009-2011 Microsoft Corporation
@ -48,7 +62,8 @@ int main(int argc, char *argv[]) {
using kaldi::int32;
const char *usage =
"Checks whether an FST is stochastic and exits with success if so.\n"
"Checks whether an FST is stochastic and exits with success if "
"so.\n"
"Prints out maximum error (in log units).\n"
"\n"
"Usage: fstisstochastic [ in.fst ]\n";
@ -58,8 +73,8 @@ int main(int argc, char *argv[]) {
ParseOptions po(usage);
po.Register("delta", &delta, "Maximum error to accept.");
po.Register("test-in-log", &test_in_log,
"Test stochasticity in log semiring.");
po.Register(
"test-in-log", &test_in_log, "Test stochasticity in log semiring.");
po.Read(argc, argv);
if (po.NumArgs() > 1) {

@ -1,3 +1,17 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fstminimizeencoded.cc
// Copyright 2009-2011 Microsoft Corporation
@ -46,7 +60,8 @@ int main(int argc, char *argv[]) {
float delta = kDelta;
ParseOptions po(usage);
po.Register("delta", &delta,
po.Register("delta",
&delta,
"Delta likelihood used for quantization of weights");
po.Read(argc, argv);

@ -1,3 +1,17 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fsttablecompose.cc
// Copyright 2009-2011 Microsoft Corporation
@ -54,12 +68,15 @@ int main(int argc, char *argv[]) {
const char *usage =
"Composition algorithm [between two FSTs of standard type, in "
"tropical\n"
"semiring] that is more efficient for certain cases-- in particular,\n"
"where one of the FSTs (the left one, if --match-side=left) has large\n"
"semiring] that is more efficient for certain cases-- in "
"particular,\n"
"where one of the FSTs (the left one, if --match-side=left) has "
"large\n"
"out-degree\n"
"\n"
"Usage: fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
"(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]\n";
"(fst2-rxfilename|fst2-rspecifier) "
"[(out-rxfilename|out-rspecifier)]\n";
ParseOptions po(usage);
@ -67,11 +84,15 @@ int main(int argc, char *argv[]) {
std::string match_side = "left";
std::string compose_filter = "sequence";
po.Register("connect", &opts.connect, "If true, trim FST before output.");
po.Register("match-side", &match_side,
po.Register(
"connect", &opts.connect, "If true, trim FST before output.");
po.Register("match-side",
&match_side,
"Side of composition to do table "
"match, one of: \"left\" or \"right\".");
po.Register("compose-filter", &compose_filter,
po.Register(
"compose-filter",
&compose_filter,
"Composition filter to use, "
"one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");

@ -1,3 +1,17 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// bin/arpa2fst.cc
//
// Copyright 2009-2011 Gilles Boulianne.
@ -31,8 +45,10 @@ int main(int argc, char *argv[]) {
"Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
" e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
"data/lang/words.txt lm/input.arpa G.fst\n\n"
"Note: When called without switches, the output G.fst will contain\n"
"an embedded symbol table. This is compatible with the way a previous\n"
"Note: When called without switches, the output G.fst will "
"contain\n"
"an embedded symbol table. This is compatible with the way a "
"previous\n"
"version of arpa2fst worked.\n";
ParseOptions po(usage);
@ -51,14 +67,20 @@ int main(int argc, char *argv[]) {
po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
po.Register("disambig-symbol", &disambig_symbol,
po.Register(
"disambig-symbol",
&disambig_symbol,
"Disambiguator. If provided (e. g. #0), used on input side of "
"backoff links, and <s> and </s> are replaced with epsilons");
po.Register("read-symbol-table", &read_syms_filename,
po.Register("read-symbol-table",
&read_syms_filename,
"Use existing symbol table");
po.Register("write-symbol-table", &write_syms_filename,
po.Register("write-symbol-table",
&write_syms_filename,
"Write generated symbol table to a file");
po.Register("keep-symbols", &keep_symbols,
po.Register(
"keep-symbols",
&keep_symbols,
"Store symbol table with FST. Symbols always saved to FST if "
"symbol tables are neither read or written (otherwise symbols "
"would be lost entirely)");

Loading…
Cancel
Save