pull/1707/head
Hui Zhang 3 years ago
parent cad09b4910
commit c7b987c55d

@ -20,6 +20,7 @@ from diskcache import Cache
from fastapi import FastAPI from fastapi import FastAPI
from fastapi import File from fastapi import File
from fastapi import UploadFile from fastapi import UploadFile
from logs import LOGGER
from milvus_helpers import MilvusHelper from milvus_helpers import MilvusHelper
from mysql_helpers import MySQLHelper from mysql_helpers import MySQLHelper
from operations.count import do_count from operations.count import do_count
@ -31,8 +32,6 @@ from starlette.middleware.cors import CORSMiddleware
from starlette.requests import Request from starlette.requests import Request
from starlette.responses import FileResponse from starlette.responses import FileResponse
from logs import LOGGER
app = FastAPI() app = FastAPI()
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,

@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np import numpy as np
from logs import LOGGER from logs import LOGGER
from paddlespeech.cli import VectorExecutor from paddlespeech.cli import VectorExecutor
vector_executor = VectorExecutor() vector_executor = VectorExecutor()

@ -20,7 +20,6 @@ from config import MYSQL_HOST
from config import MYSQL_PORT from config import MYSQL_PORT
from config import MYSQL_PWD from config import MYSQL_PWD
from config import MYSQL_USER from config import MYSQL_USER
from logs import LOGGER from logs import LOGGER

@ -14,7 +14,6 @@
import sys import sys
from config import DEFAULT_TABLE from config import DEFAULT_TABLE
from logs import LOGGER from logs import LOGGER

@ -14,7 +14,6 @@
import sys import sys
from config import DEFAULT_TABLE from config import DEFAULT_TABLE
from logs import LOGGER from logs import LOGGER

@ -17,7 +17,6 @@ import sys
from config import DEFAULT_TABLE from config import DEFAULT_TABLE
from diskcache import Cache from diskcache import Cache
from encode import get_audio_embedding from encode import get_audio_embedding
from logs import LOGGER from logs import LOGGER
@ -27,8 +26,7 @@ def get_audios(path):
""" """
supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"] supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"]
return [ return [
item item for sublist in [[os.path.join(dir, file) for file in files]
for sublist in [[os.path.join(dir, file) for file in files]
for dir, _, files in list(os.walk(path))] for dir, _, files in list(os.walk(path))]
for item in sublist if os.path.splitext(item)[1] in supported_formats for item in sublist if os.path.splitext(item)[1] in supported_formats
] ]

@ -17,7 +17,6 @@ import numpy
from config import DEFAULT_TABLE from config import DEFAULT_TABLE
from config import TOP_K from config import TOP_K
from encode import get_audio_embedding from encode import get_audio_embedding
from logs import LOGGER from logs import LOGGER

@ -18,6 +18,7 @@ from config import UPLOAD_PATH
from fastapi import FastAPI from fastapi import FastAPI
from fastapi import File from fastapi import File
from fastapi import UploadFile from fastapi import UploadFile
from logs import LOGGER
from mysql_helpers import MySQLHelper from mysql_helpers import MySQLHelper
from operations.count import do_count_vpr from operations.count import do_count_vpr
from operations.count import do_get from operations.count import do_get
@ -30,8 +31,6 @@ from starlette.middleware.cors import CORSMiddleware
from starlette.requests import Request from starlette.requests import Request
from starlette.responses import FileResponse from starlette.responses import FileResponse
from logs import LOGGER
app = FastAPI() app = FastAPI()
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,

@ -12,15 +12,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import argparse import argparse
import asyncio
import base64 import base64
import io import io
import json import json
import logging
import os import os
import random import random
import time import time
from typing import List from typing import List
import logging
import asyncio
import numpy as np import numpy as np
import requests import requests
@ -30,9 +30,9 @@ from ..executor import BaseExecutor
from ..util import cli_client_register from ..util import cli_client_register
from ..util import stats_wrapper from ..util import stats_wrapper
from paddlespeech.cli.log import logger from paddlespeech.cli.log import logger
from paddlespeech.server.tests.asr.online.websocket_client import ASRAudioHandler
from paddlespeech.server.utils.audio_process import wav2pcm from paddlespeech.server.utils.audio_process import wav2pcm
from paddlespeech.server.utils.util import wav2base64 from paddlespeech.server.utils.util import wav2base64
from paddlespeech.server.tests.asr.online.websocket_client import ASRAudioHandler
__all__ = ['TTSClientExecutor', 'ASRClientExecutor', 'CLSClientExecutor'] __all__ = ['TTSClientExecutor', 'ASRClientExecutor', 'CLSClientExecutor']
@ -234,7 +234,8 @@ class ASRClientExecutor(BaseExecutor):
@cli_client_register( @cli_client_register(
name='paddlespeech_client.asr_online', description='visit asr online service') name='paddlespeech_client.asr_online',
description='visit asr online service')
class ASRClientExecutor(BaseExecutor): class ASRClientExecutor(BaseExecutor):
def __init__(self): def __init__(self):
super(ASRClientExecutor, self).__init__() super(ASRClientExecutor, self).__init__()

@ -1,12 +1,11 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2021 Mobvoi Inc. All Rights Reserved. # Copyright 2021 Mobvoi Inc. All Rights Reserved.
# Author: zhendong.peng@mobvoi.com (Zhendong Peng) # Author: zhendong.peng@mobvoi.com (Zhendong Peng)
import argparse import argparse
from flask import Flask, render_template from flask import Flask
from flask import render_template
parser = argparse.ArgumentParser(description='training your network') parser = argparse.ArgumentParser(description='training your network')
parser.add_argument('--port', default=19999, type=int, help='port id') parser.add_argument('--port', default=19999, type=int, help='port id')
@ -14,9 +13,11 @@ args = parser.parse_args()
app = Flask(__name__) app = Flask(__name__)
@app.route('/') @app.route('/')
def index(): def index():
return render_template('index.html') return render_template('index.html')
if __name__ == '__main__': if __name__ == '__main__':
app.run(host='0.0.0.0', port=args.port, debug=True) app.run(host='0.0.0.0', port=args.port, debug=True)

@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
from dataclasses import dataclass from dataclasses import dataclass
from dataclasses import fields from dataclasses import fields
from paddle.io import Dataset from paddle.io import Dataset
from paddleaudio import load as load_audio from paddleaudio import load as load_audio

@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import json import json
from dataclasses import dataclass from dataclasses import dataclass
from dataclasses import fields from dataclasses import fields
from paddle.io import Dataset from paddle.io import Dataset
from paddleaudio import load as load_audio from paddleaudio import load as load_audio

@ -34,7 +34,8 @@ DEFINE_int32(receptive_field_length,
DEFINE_int32(downsampling_rate, DEFINE_int32(downsampling_rate,
4, 4,
"two CNN(kernel=5) module downsampling rate."); "two CNN(kernel=5) module downsampling rate.");
DEFINE_string(model_input_names, DEFINE_string(
model_input_names,
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box", "audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box",
"model input names"); "model input names");
DEFINE_string(model_output_names, DEFINE_string(model_output_names,

@ -5,4 +5,3 @@ ASR audio feature test bins. We using theses bins to test linaer/fbank/mfcc asr
* linear_spectrogram_without_db_norm_main.cc * linear_spectrogram_without_db_norm_main.cc
compute linear spectrogram w/o db norm in streaming manner. compute linear spectrogram w/o db norm in streaming manner.

@ -43,7 +43,9 @@ int main(int argc, char* argv[]) {
for (double x : mean_stat) { for (double x : mean_stat) {
mean_stat_vec.push_back(x); mean_stat_vec.push_back(x);
} }
// LOG(INFO) << mean_stat; this line will casue simdjson::simdjson_error("Objects and arrays can only be iterated when they are first encountered") // LOG(INFO) << mean_stat; this line will casue
// simdjson::simdjson_error("Objects and arrays can only be iterated when
// they are first encountered")
ondemand::array var_stat = val["var_stat"]; ondemand::array var_stat = val["var_stat"];
std::vector<kaldi::BaseFloat> var_stat_vec; std::vector<kaldi::BaseFloat> var_stat_vec;

@ -14,8 +14,6 @@
// deepspeech2 online model info // deepspeech2 online model info
#include "base/flags.h"
#include "base/log.h"
#include <algorithm> #include <algorithm>
#include <fstream> #include <fstream>
#include <functional> #include <functional>
@ -23,6 +21,8 @@
#include <iterator> #include <iterator>
#include <numeric> #include <numeric>
#include <thread> #include <thread>
#include "base/flags.h"
#include "base/log.h"
#include "paddle_inference_api.h" #include "paddle_inference_api.h"
using std::cout; using std::cout;

@ -1,3 +1 @@
# NGram Train # NGram Train

@ -92,8 +92,7 @@ void CTCBeamSearch::AdvanceDecode(
while (1) { while (1) {
vector<vector<BaseFloat>> likelihood; vector<vector<BaseFloat>> likelihood;
vector<BaseFloat> frame_prob; vector<BaseFloat> frame_prob;
bool flag = bool flag = decodable->FrameLikelihood(num_frame_decoded_, &frame_prob);
decodable->FrameLikelihood(num_frame_decoded_, &frame_prob);
if (flag == false) break; if (flag == false) break;
likelihood.push_back(frame_prob); likelihood.push_back(frame_prob);
AdvanceDecoding(likelihood); AdvanceDecoding(likelihood);

@ -49,10 +49,9 @@ bool Decodable::IsLastFrame(int32 frame) {
int32 Decodable::NumIndices() const { return 0; } int32 Decodable::NumIndices() const { return 0; }
// the ilable(TokenId) of wfst(TLG) insert <eps>(id = 0) in front of Nnet prob id. // the ilable(TokenId) of wfst(TLG) insert <eps>(id = 0) in front of Nnet prob
int32 Decodable::TokenId2NnetId(int32 token_id) { // id.
return token_id - 1; int32 Decodable::TokenId2NnetId(int32 token_id) { return token_id - 1; }
}
BaseFloat Decodable::LogLikelihood(int32 frame, int32 index) { BaseFloat Decodable::LogLikelihood(int32 frame, int32 index) {
CHECK_LE(index, nnet_cache_.NumCols()); CHECK_LE(index, nnet_cache_.NumCols());
@ -60,7 +59,8 @@ BaseFloat Decodable::LogLikelihood(int32 frame, int32 index) {
int32 frame_idx = frame - frame_offset_; int32 frame_idx = frame - frame_offset_;
// the nnet output is prob ranther than log prob // the nnet output is prob ranther than log prob
// the index - 1, because the ilabel // the index - 1, because the ilabel
return acoustic_scale_ * std::log(nnet_cache_(frame_idx, TokenId2NnetId(index)) + return acoustic_scale_ *
std::log(nnet_cache_(frame_idx, TokenId2NnetId(index)) +
std::numeric_limits<float>::min()); std::numeric_limits<float>::min());
} }

@ -45,7 +45,8 @@ struct ModelOptions {
thread_num(2), thread_num(2),
use_gpu(false), use_gpu(false),
input_names( input_names(
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box"), "audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_"
"box"),
output_names( output_names(
"save_infer_model/scale_0.tmp_1,save_infer_model/" "save_infer_model/scale_0.tmp_1,save_infer_model/"
"scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/" "scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/"

@ -40,5 +40,4 @@ std::string ReadFile2String(const std::string& path) {
return std::string((std::istreambuf_iterator<char>(input_file)), return std::string((std::istreambuf_iterator<char>(input_file)),
std::istreambuf_iterator<char>()); std::istreambuf_iterator<char>());
} }
} }

@ -20,5 +20,4 @@ bool ReadFileToVector(const std::string& filename,
std::vector<std::string>* data); std::vector<std::string>* data);
std::string ReadFile2String(const std::string& path); std::string ReadFile2String(const std::string& path);
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,3 +1,17 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fstaddselfloops.cc // fstbin/fstaddselfloops.cc
// Copyright 2009-2011 Microsoft Corporation // Copyright 2009-2011 Microsoft Corporation
@ -43,12 +57,14 @@ int main(int argc, char *argv[]) {
const char *usage = const char *usage =
"Adds self-loops to states of an FST to propagate disambiguation " "Adds self-loops to states of an FST to propagate disambiguation "
"symbols through it\n" "symbols through it\n"
"They are added on each final state and each state with non-epsilon " "They are added on each final state and each state with "
"non-epsilon "
"output symbols\n" "output symbols\n"
"on at least one arc out of the state. Useful in conjunction with " "on at least one arc out of the state. Useful in conjunction with "
"predeterminize\n" "predeterminize\n"
"\n" "\n"
"Usage: fstaddselfloops in-disambig-list out-disambig-list [in.fst " "Usage: fstaddselfloops in-disambig-list out-disambig-list "
"[in.fst "
"[out.fst] ]\n" "[out.fst] ]\n"
"E.g: fstaddselfloops in.list out.list < in.fst > withloops.fst\n" "E.g: fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
"in.list and out.list are lists of integers, one per line, of the\n" "in.list and out.list are lists of integers, one per line, of the\n"
@ -71,19 +87,19 @@ int main(int argc, char *argv[]) {
std::vector<int32> disambig_in; std::vector<int32> disambig_in;
if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in)) if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
KALDI_ERR KALDI_ERR << "fstaddselfloops: Could not read disambiguation "
<< "fstaddselfloops: Could not read disambiguation symbols from " "symbols from "
<< kaldi::PrintableRxfilename(disambig_in_rxfilename); << kaldi::PrintableRxfilename(disambig_in_rxfilename);
std::vector<int32> disambig_out; std::vector<int32> disambig_out;
if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out)) if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
KALDI_ERR KALDI_ERR << "fstaddselfloops: Could not read disambiguation "
<< "fstaddselfloops: Could not read disambiguation symbols from " "symbols from "
<< kaldi::PrintableRxfilename(disambig_out_rxfilename); << kaldi::PrintableRxfilename(disambig_out_rxfilename);
if (disambig_in.size() != disambig_out.size()) if (disambig_in.size() != disambig_out.size())
KALDI_ERR KALDI_ERR << "fstaddselfloops: mismatch in size of disambiguation "
<< "fstaddselfloops: mismatch in size of disambiguation symbols"; "symbols";
AddSelfLoops(fst, disambig_in, disambig_out); AddSelfLoops(fst, disambig_in, disambig_out);

@ -1,3 +1,17 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fstdeterminizestar.cc // fstbin/fstdeterminizestar.cc
// Copyright 2009-2011 Microsoft Corporation // Copyright 2009-2011 Microsoft Corporation
@ -73,11 +87,13 @@ int main(int argc, char *argv[]) {
bool use_log = false; bool use_log = false;
ParseOptions po(usage); ParseOptions po(usage);
po.Register("use-log", &use_log, "Determinize in log semiring."); po.Register("use-log", &use_log, "Determinize in log semiring.");
po.Register("delta", &delta, po.Register("delta",
&delta,
"Delta value used to determine equivalence of weights."); "Delta value used to determine equivalence of weights.");
po.Register( po.Register("max-states",
"max-states", &max_states, &max_states,
"Maximum number of states in determinized FST before it will abort."); "Maximum number of states in determinized FST before it "
"will abort.");
po.Read(argc, argv); po.Read(argc, argv);
if (po.NumArgs() > 2) { if (po.NumArgs() > 2) {

@ -1,3 +1,17 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fstisstochastic.cc // fstbin/fstisstochastic.cc
// Copyright 2009-2011 Microsoft Corporation // Copyright 2009-2011 Microsoft Corporation
@ -48,7 +62,8 @@ int main(int argc, char *argv[]) {
using kaldi::int32; using kaldi::int32;
const char *usage = const char *usage =
"Checks whether an FST is stochastic and exits with success if so.\n" "Checks whether an FST is stochastic and exits with success if "
"so.\n"
"Prints out maximum error (in log units).\n" "Prints out maximum error (in log units).\n"
"\n" "\n"
"Usage: fstisstochastic [ in.fst ]\n"; "Usage: fstisstochastic [ in.fst ]\n";
@ -58,8 +73,8 @@ int main(int argc, char *argv[]) {
ParseOptions po(usage); ParseOptions po(usage);
po.Register("delta", &delta, "Maximum error to accept."); po.Register("delta", &delta, "Maximum error to accept.");
po.Register("test-in-log", &test_in_log, po.Register(
"Test stochasticity in log semiring."); "test-in-log", &test_in_log, "Test stochasticity in log semiring.");
po.Read(argc, argv); po.Read(argc, argv);
if (po.NumArgs() > 1) { if (po.NumArgs() > 1) {

@ -1,3 +1,17 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fstminimizeencoded.cc // fstbin/fstminimizeencoded.cc
// Copyright 2009-2011 Microsoft Corporation // Copyright 2009-2011 Microsoft Corporation
@ -46,7 +60,8 @@ int main(int argc, char *argv[]) {
float delta = kDelta; float delta = kDelta;
ParseOptions po(usage); ParseOptions po(usage);
po.Register("delta", &delta, po.Register("delta",
&delta,
"Delta likelihood used for quantization of weights"); "Delta likelihood used for quantization of weights");
po.Read(argc, argv); po.Read(argc, argv);

@ -1,3 +1,17 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// fstbin/fsttablecompose.cc // fstbin/fsttablecompose.cc
// Copyright 2009-2011 Microsoft Corporation // Copyright 2009-2011 Microsoft Corporation
@ -54,12 +68,15 @@ int main(int argc, char *argv[]) {
const char *usage = const char *usage =
"Composition algorithm [between two FSTs of standard type, in " "Composition algorithm [between two FSTs of standard type, in "
"tropical\n" "tropical\n"
"semiring] that is more efficient for certain cases-- in particular,\n" "semiring] that is more efficient for certain cases-- in "
"where one of the FSTs (the left one, if --match-side=left) has large\n" "particular,\n"
"where one of the FSTs (the left one, if --match-side=left) has "
"large\n"
"out-degree\n" "out-degree\n"
"\n" "\n"
"Usage: fsttablecompose (fst1-rxfilename|fst1-rspecifier) " "Usage: fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
"(fst2-rxfilename|fst2-rspecifier) [(out-rxfilename|out-rspecifier)]\n"; "(fst2-rxfilename|fst2-rspecifier) "
"[(out-rxfilename|out-rspecifier)]\n";
ParseOptions po(usage); ParseOptions po(usage);
@ -67,11 +84,15 @@ int main(int argc, char *argv[]) {
std::string match_side = "left"; std::string match_side = "left";
std::string compose_filter = "sequence"; std::string compose_filter = "sequence";
po.Register("connect", &opts.connect, "If true, trim FST before output."); po.Register(
po.Register("match-side", &match_side, "connect", &opts.connect, "If true, trim FST before output.");
po.Register("match-side",
&match_side,
"Side of composition to do table " "Side of composition to do table "
"match, one of: \"left\" or \"right\"."); "match, one of: \"left\" or \"right\".");
po.Register("compose-filter", &compose_filter, po.Register(
"compose-filter",
&compose_filter,
"Composition filter to use, " "Composition filter to use, "
"one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\""); "one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");

@ -1,3 +1,17 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// bin/arpa2fst.cc // bin/arpa2fst.cc
// //
// Copyright 2009-2011 Gilles Boulianne. // Copyright 2009-2011 Gilles Boulianne.
@ -31,8 +45,10 @@ int main(int argc, char *argv[]) {
"Usage: arpa2fst [opts] <input-arpa> <output-fst>\n" "Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
" e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table=" " e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
"data/lang/words.txt lm/input.arpa G.fst\n\n" "data/lang/words.txt lm/input.arpa G.fst\n\n"
"Note: When called without switches, the output G.fst will contain\n" "Note: When called without switches, the output G.fst will "
"an embedded symbol table. This is compatible with the way a previous\n" "contain\n"
"an embedded symbol table. This is compatible with the way a "
"previous\n"
"version of arpa2fst worked.\n"; "version of arpa2fst worked.\n";
ParseOptions po(usage); ParseOptions po(usage);
@ -51,14 +67,20 @@ int main(int argc, char *argv[]) {
po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol"); po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
po.Register("eos-symbol", &eos_symbol, "End of sentence symbol"); po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
po.Register("disambig-symbol", &disambig_symbol, po.Register(
"disambig-symbol",
&disambig_symbol,
"Disambiguator. If provided (e. g. #0), used on input side of " "Disambiguator. If provided (e. g. #0), used on input side of "
"backoff links, and <s> and </s> are replaced with epsilons"); "backoff links, and <s> and </s> are replaced with epsilons");
po.Register("read-symbol-table", &read_syms_filename, po.Register("read-symbol-table",
&read_syms_filename,
"Use existing symbol table"); "Use existing symbol table");
po.Register("write-symbol-table", &write_syms_filename, po.Register("write-symbol-table",
&write_syms_filename,
"Write generated symbol table to a file"); "Write generated symbol table to a file");
po.Register("keep-symbols", &keep_symbols, po.Register(
"keep-symbols",
&keep_symbols,
"Store symbol table with FST. Symbols always saved to FST if " "Store symbol table with FST. Symbols always saved to FST if "
"symbol tables are neither read or written (otherwise symbols " "symbol tables are neither read or written (otherwise symbols "
"would be lost entirely)"); "would be lost entirely)");

Loading…
Cancel
Save