* Fix

* Fix
pull/4029/head
co63oc 6 months ago committed by GitHub
parent c2dc4dae2d
commit 50ef94b68c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -233,7 +233,7 @@ def spectrogram(waveform: Tensor,
round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
to FFT. Defaults to True. to FFT. Defaults to True.
sr (int, optional): Sample rate of input waveform. Defaults to 16000. sr (int, optional): Sample rate of input waveform. Defaults to 16000.
snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True. is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False. subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey". window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey".
@ -443,7 +443,7 @@ def fbank(waveform: Tensor,
round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
to FFT. Defaults to True. to FFT. Defaults to True.
sr (int, optional): Sample rate of input waveform. Defaults to 16000. sr (int, optional): Sample rate of input waveform. Defaults to 16000.
snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True. is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False. subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False. use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
@ -566,7 +566,7 @@ def mfcc(waveform: Tensor,
round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
to FFT. Defaults to True. to FFT. Defaults to True.
sr (int, optional): Sample rate of input waveform. Defaults to 16000. sr (int, optional): Sample rate of input waveform. Defaults to 16000.
snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True. is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False. subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False. use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.

@ -527,7 +527,7 @@ def melspectrogram(x: np.ndarray,
if fmax is None: if fmax is None:
fmax = sr // 2 fmax = sr // 2
if fmin < 0 or fmin >= fmax: if fmin < 0 or fmin >= fmax:
raise ParameterError('fmin and fmax must statisfy 0<fmin<fmax') raise ParameterError('fmin and fmax must satisfy 0<fmin<fmax')
s = stft( s = stft(
x, x,

@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
# "sbatch" (Slurm) # "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then elif [ "${cmd_backend}" = slurm ]; then
# The default setting is written in conf/slurm.conf. # The default setting is written in conf/slurm.conf.
# You must change "-p cpu" and "-p gpu" for the "partion" for your environment. # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
# To know the "partion" names, type "sinfo". # To know the "partition" names, type "sinfo".
# You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
# "sbatch" (Slurm) # "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then elif [ "${cmd_backend}" = slurm ]; then
# The default setting is written in conf/slurm.conf. # The default setting is written in conf/slurm.conf.
# You must change "-p cpu" and "-p gpu" for the "partion" for your environment. # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
# To know the "partion" names, type "sinfo". # To know the "partition" names, type "sinfo".
# You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
# "sbatch" (Slurm) # "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then elif [ "${cmd_backend}" = slurm ]; then
# The default setting is written in conf/slurm.conf. # The default setting is written in conf/slurm.conf.
# You must change "-p cpu" and "-p gpu" for the "partion" for your environment. # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
# To know the "partion" names, type "sinfo". # To know the "partition" names, type "sinfo".
# You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
# "sbatch" (Slurm) # "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then elif [ "${cmd_backend}" = slurm ]; then
# The default setting is written in conf/slurm.conf. # The default setting is written in conf/slurm.conf.
# You must change "-p cpu" and "-p gpu" for the "partion" for your environment. # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
# To know the "partion" names, type "sinfo". # To know the "partition" names, type "sinfo".
# You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
# "sbatch" (Slurm) # "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then elif [ "${cmd_backend}" = slurm ]; then
# The default setting is written in conf/slurm.conf. # The default setting is written in conf/slurm.conf.
# You must change "-p cpu" and "-p gpu" for the "partion" for your environment. # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
# To know the "partion" names, type "sinfo". # To know the "partition" names, type "sinfo".
# You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
# "sbatch" (Slurm) # "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then elif [ "${cmd_backend}" = slurm ]; then
# The default setting is written in conf/slurm.conf. # The default setting is written in conf/slurm.conf.
# You must change "-p cpu" and "-p gpu" for the "partion" for your environment. # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
# To know the "partion" names, type "sinfo". # To know the "partition" names, type "sinfo".
# You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
# "sbatch" (Slurm) # "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then elif [ "${cmd_backend}" = slurm ]; then
# The default setting is written in conf/slurm.conf. # The default setting is written in conf/slurm.conf.
# You must change "-p cpu" and "-p gpu" for the "partion" for your environment. # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
# To know the "partion" names, type "sinfo". # To know the "partition" names, type "sinfo".
# You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

@ -97,7 +97,7 @@ def test_full_scores_words():
if w not in model: if w not in model:
print('"{0}" is an OOV'.format(w)) print('"{0}" is an OOV'.format(w))
oov.append(w) oov.append(w)
# zh_giga.no_cna_cmn.prune01244.klm is chinese charactor LM # zh_giga.no_cna_cmn.prune01244.klm is chinese character LM
assert oov == ["盘点", "不怕", "网站", "", "", "海淘", "向来", "便宜", "保真", assert oov == ["盘点", "不怕", "网站", "", "", "海淘", "向来", "便宜", "保真",
""], 'error oov' ""], 'error oov'

@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
# "sbatch" (Slurm) # "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then elif [ "${cmd_backend}" = slurm ]; then
# The default setting is written in conf/slurm.conf. # The default setting is written in conf/slurm.conf.
# You must change "-p cpu" and "-p gpu" for the "partion" for your environment. # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
# To know the "partion" names, type "sinfo". # To know the "partition" names, type "sinfo".
# You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
# The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

@ -62,7 +62,7 @@ def create_manifest(data_dir, manifest_path_prefix):
if line == '': if line == '':
continue continue
audio_id, text = line.split(' ', 1) audio_id, text = line.split(' ', 1)
# remove withespace, charactor text # remove withespace, character text
text = ''.join(text.split()) text = ''.join(text.split())
transcript_dict[audio_id] = text transcript_dict[audio_id] = text

@ -65,7 +65,7 @@ def create_manifest(data_dir, manifest_path_prefix):
if line == '': if line == '':
continue continue
audio_id, text = line.split(' ', 1) audio_id, text = line.split(' ', 1)
# remove withespace, charactor text # remove withespace, character text
text = ''.join(text.split()) text = ''.join(text.split())
transcript_dict[audio_id] = text transcript_dict[audio_id] = text
@ -159,7 +159,7 @@ def check_dataset(data_dir):
if line == '': if line == '':
continue continue
audio_id, text = line.split(' ', 1) audio_id, text = line.split(' ', 1)
# remove withespace, charactor text # remove withespace, character text
text = ''.join(text.split()) text = ''.join(text.split())
transcript_dict[audio_id] = text transcript_dict[audio_id] = text

@ -171,7 +171,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
def cer(reference, hypothesis, ignore_case=False, remove_space=False): def cer(reference, hypothesis, ignore_case=False, remove_space=False):
"""Calculate charactor error rate (CER). CER compares reference text and """Calculate character error rate (CER). CER compares reference text and
hypothesis text in char-level. CER is defined as: hypothesis text in char-level. CER is defined as:
.. math:: .. math::

@ -523,7 +523,7 @@ class Frontend():
initials = [] initials = []
finals = [] finals = []
# to charactor list # to character list
words = self._split_word_to_char(words[0]) words = self._split_word_to_char(words[0])
for pinyin, char in zip(pinyin_spec, words): for pinyin, char in zip(pinyin_spec, words):

@ -159,7 +159,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
def cer(reference, hypothesis, ignore_case=False, remove_space=False): def cer(reference, hypothesis, ignore_case=False, remove_space=False):
"""Calculate charactor error rate (CER). CER compares reference text and """Calculate character error rate (CER). CER compares reference text and
hypothesis text in char-level. CER is defined as: hypothesis text in char-level. CER is defined as:
.. math:: .. math::
CER = (Sc + Dc + Ic) / Nc CER = (Sc + Dc + Ic) / Nc

@ -157,7 +157,7 @@ void CTCPrefixBeamSearch::AdvanceDecoding(
next_score.v_b = prefix_score.ViterbiScore() + prob; next_score.v_b = prefix_score.ViterbiScore() + prob;
next_score.times_b = prefix_score.Times(); next_score.times_b = prefix_score.Times();
// Prefix not changed, copy the context from pefix // Prefix not changed, copy the context from prefix
if (context_graph_ && !next_score.has_context) { if (context_graph_ && !next_score.has_context) {
next_score.CopyContext(prefix_score); next_score.CopyContext(prefix_score);
next_score.has_context = true; next_score.has_context = true;
@ -183,7 +183,7 @@ void CTCPrefixBeamSearch::AdvanceDecoding(
} }
} }
// Prefix not changed, copy the context from pefix // Prefix not changed, copy the context from prefix
if (context_graph_ && !next_score1.has_context) { if (context_graph_ && !next_score1.has_context) {
next_score1.CopyContext(prefix_score); next_score1.CopyContext(prefix_score);
next_score1.has_context = true; next_score1.has_context = true;

@ -72,7 +72,7 @@ bool CMVN::Read(std::vector<BaseFloat>* feats) {
return false; return false;
} }
// appply cmvn // apply cmvn
kaldi::Timer timer; kaldi::Timer timer;
Compute(feats); Compute(feats);
VLOG(1) << "CMVN::Read cost: " << timer.Elapsed() << " sec."; VLOG(1) << "CMVN::Read cost: " << timer.Elapsed() << " sec.";

@ -29,7 +29,7 @@ class CMVN : public FrontendInterface {
// the length of feats = feature_row * feature_dim, // the length of feats = feature_row * feature_dim,
// the Matrix is squashed into Vector // the Matrix is squashed into Vector
virtual bool Read(std::vector<kaldi::BaseFloat>* feats); virtual bool Read(std::vector<kaldi::BaseFloat>* feats);
// the dim_ is the feautre dim. // the dim_ is the feature dim.
virtual size_t Dim() const { return dim_; } virtual size_t Dim() const { return dim_; }
virtual void SetFinished() { base_extractor_->SetFinished(); } virtual void SetFinished() { base_extractor_->SetFinished(); }
virtual bool IsFinished() const { return base_extractor_->IsFinished(); } virtual bool IsFinished() const { return base_extractor_->IsFinished(); }

@ -47,7 +47,7 @@ class DecibelNormalizer : public FrontendInterface {
std::unique_ptr<FrontendInterface> base_extractor); std::unique_ptr<FrontendInterface> base_extractor);
virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& waves); virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& waves);
virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves); virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
// noramlize audio, the dim is 1. // normalize audio, the dim is 1.
virtual size_t Dim() const { return dim_; } virtual size_t Dim() const { return dim_; }
virtual void SetFinished() { base_extractor_->SetFinished(); } virtual void SetFinished() { base_extractor_->SetFinished(); }
virtual bool IsFinished() const { return base_extractor_->IsFinished(); } virtual bool IsFinished() const { return base_extractor_->IsFinished(); }

@ -244,8 +244,8 @@ void MatrixBase<Real>::SymAddMat2(const Real alpha,
/// function will produce NaN in the output. This is a bug in the /// function will produce NaN in the output. This is a bug in the
/// ATLAS library. To overcome this, the AddMatMat function, which calls /// ATLAS library. To overcome this, the AddMatMat function, which calls
/// cblas_Xgemm(...) rather than cblas_Xsyrk(...), is used in this special /// cblas_Xgemm(...) rather than cblas_Xsyrk(...), is used in this special
/// sitation. /// situation.
/// Wei Shi: Note this bug is observerd for single precision matrix /// Wei Shi: Note this bug is observed for single precision matrix
/// on a 64-bit machine /// on a 64-bit machine
#ifdef HAVE_ATLAS #ifdef HAVE_ATLAS
if (transA == kTrans && num_rows_ >= 56) { if (transA == kTrans && num_rows_ >= 56) {
@ -683,7 +683,7 @@ empty.
if (V_in == NULL) tmpV.Resize(1, this->num_cols_); // work-space if V_in if (V_in == NULL) tmpV.Resize(1, this->num_cols_); // work-space if V_in
empty. empty.
/// Impementation notes: /// Implementation notes:
/// Lapack works in column-order, therefore the dimensions of *this are /// Lapack works in column-order, therefore the dimensions of *this are
/// swapped as well as the U and V matrices. /// swapped as well as the U and V matrices.
@ -2378,7 +2378,7 @@ bool ReadHtk(std::istream &is, Matrix<Real> *M_ptr, HtkHeader *header_ptr)
Matrix<Real> &M = *M_ptr; Matrix<Real> &M = *M_ptr;
HtkHeader htk_hdr; HtkHeader htk_hdr;
// TODO(arnab): this fails if the HTK file has CRC cheksum or is compressed. // TODO(arnab): this fails if the HTK file has CRC checksum or is compressed.
is.read((char*)&htk_hdr, sizeof(htk_hdr)); // we're being really POSIX here! is.read((char*)&htk_hdr, sizeof(htk_hdr)); // we're being really POSIX here!
if (is.fail()) { if (is.fail()) {
KALDI_WARN << "Could not read header from HTK feature file "; KALDI_WARN << "Could not read header from HTK feature file ";

@ -235,7 +235,7 @@ void VectorBase<Real>::CopyRowsFromMat(const MatrixBase<Real> &mat) {
memcpy(inc_data, mat.Data(), cols * rows * sizeof(Real)); memcpy(inc_data, mat.Data(), cols * rows * sizeof(Real));
} else { } else {
for (MatrixIndexT i = 0; i < rows; i++) { for (MatrixIndexT i = 0; i < rows; i++) {
// copy the data to the propper position // copy the data to the proper position
memcpy(inc_data, mat.RowData(i), cols * sizeof(Real)); memcpy(inc_data, mat.RowData(i), cols * sizeof(Real));
// set new copy position // set new copy position
inc_data += cols; inc_data += cols;

@ -44,7 +44,7 @@ std::string ReadFile2String(const std::string& path) {
} }
bool FileExists(const std::string& strFilename) { bool FileExists(const std::string& strFilename) {
// this funciton if from: // this function if from:
// https://github.com/kaldi-asr/kaldi/blob/master/src/fstext/deterministic-fst-test.cc // https://github.com/kaldi-asr/kaldi/blob/master/src/fstext/deterministic-fst-test.cc
struct stat stFileInfo; struct stat stFileInfo;
bool blnReturn; bool blnReturn;

@ -407,7 +407,7 @@ bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
if (os.fail()) if (os.fail())
KALDI_WARN << "Stream failure detected."; KALDI_WARN << "Stream failure detected.";
// Write another newline as a terminating character. The read routine will // Write another newline as a terminating character. The read routine will
// detect this [this is a Kaldi mechanism, not somethig in the original // detect this [this is a Kaldi mechanism, not something in the original
// OpenFst code]. // OpenFst code].
os << '\n'; os << '\n';
return os.good(); return os.good();

@ -34,7 +34,7 @@ bash run.sh --stop_stage 4
## Display Model with [Netron](https://github.com/lutzroeder/netron) ## Display Model with [Netron](https://github.com/lutzroeder/netron)
If you have a model, we can using this commnd to show model graph. If you have a model, we can using this commend to show model graph.
For example: For example:
``` ```

@ -74,7 +74,7 @@ includes/
#### set path #### set path
push resource into android phone push resource into android phone
1. change resource path in conf to gloabal path, such as: 1. change resource path in conf to global path, such as:
[CONF] [CONF]
wav_normal=true wav_normal=true
@ -92,9 +92,9 @@ push resource into android phone
high_freq=14000 high_freq=14000
dither=0.0 dither=0.0
2. adb push conf label_list scp test.wav /data/local/tmp/ 2. adb push conf label_list scp test.wav /data/local/tmp/
3. set reource path in android demo(android_demo/app/src/main/cpp/native-lib.cpp) to actual path, such as: 3. set resource path in android demo(android_demo/app/src/main/cpp/native-lib.cpp) to actual path, such as:
std::string conf_path = "/data/local/tmp/conf"; std::string conf_path = "/data/local/tmp/conf";
std::string wav_path = "/data/local/tmp/test.wav"; std::string wav_path = "/data/local/tmp/test.wav";
4. excecute android_demo in android studio 4. execute android_demo in android studio

@ -253,7 +253,6 @@ class Analysis:
# print(word3.length, word3.text) # print(word3.length, word3.text)
if word3.length == -1: if word3.length == -1:
chunk = Chunk(word1, word2) chunk = Chunk(word1, word2)
# print("Ture")
else: else:
chunk = Chunk(word1, word2, word3) chunk = Chunk(word1, word2, word3)
chunks.append(chunk) chunks.append(chunk)

@ -181,8 +181,8 @@ template <typename T>
class FlagRegisterer { class FlagRegisterer {
public: public:
FlagRegisterer(const string &name, const FlagDescription<T> &desc) { FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
auto registr = FlagRegister<T>::GetRegister(); auto r = FlagRegister<T>::GetRegister();
registr->SetDescription(name, desc); r->SetDescription(name, desc);
} }
private: private:

@ -62,7 +62,7 @@ def create_manifest(data_dir, manifest_path_prefix):
if line == '': if line == '':
continue continue
audio_id, text = line.split(' ', 1) audio_id, text = line.split(' ', 1)
# remove withespace, charactor text # remove withespace, character text
text = ''.join(text.split()) text = ''.join(text.split())
transcript_dict[audio_id] = text transcript_dict[audio_id] = text

@ -63,7 +63,7 @@ def create_manifest(data_dir, manifest_path_prefix):
if line == '': if line == '':
continue continue
audio_id, text = line.split(' ', 1) audio_id, text = line.split(' ', 1)
# remove withespace, charactor text # remove withespace, character text
text = ''.join(text.split()) text = ''.join(text.split())
transcript_dict[audio_id] = text transcript_dict[audio_id] = text

@ -30,7 +30,7 @@ def _test_snapshot():
# use a simplest iterable object as dataloader # use a simplest iterable object as dataloader
dataloader = count() dataloader = count()
# hack the training proecss: training does nothing except increse iteration # hack the training proecss: training does nothing except increase iteration
updater = StandardUpdater(model, optimizer, dataloader=dataloader) updater = StandardUpdater(model, optimizer, dataloader=dataloader)
updater.update_core = lambda x: None updater.update_core = lambda x: None

@ -17,7 +17,7 @@ cd liblbfgs-$VER
./configure --prefix=`pwd` ./configure --prefix=`pwd`
make make
# due to the liblbfgs project directory structure, we have to use -i # due to the liblbfgs project directory structure, we have to use -i
# but the erros are completely harmless # but the errors are completely harmless
make -i install make -i install
cd .. cd ..

@ -44,7 +44,7 @@ add_arg('manifest_paths', str,
# bpe # bpe
add_arg('spm_model_prefix', str, None, add_arg('spm_model_prefix', str, None,
"spm model prefix, spm_model_%(bpe_mode)_%(count_threshold), only need when `unit_type` is spm") "spm model prefix, spm_model_%(bpe_mode)_%(count_threshold), only need when `unit_type` is spm")
add_arg('output_path', str, None, "filepath of formated manifest.", required=True) add_arg('output_path', str, None, "filepath of formatted manifest.", required=True)
# yapf: disable # yapf: disable
args = parser.parse_args() args = parser.parse_args()

@ -79,7 +79,7 @@ if ($HELP)
print " -b ... disable Perl buffering.\n"; print " -b ... disable Perl buffering.\n";
print " -time ... enable processing time calculation.\n"; print " -time ... enable processing time calculation.\n";
print " -penn ... use Penn treebank-like tokenization.\n"; print " -penn ... use Penn treebank-like tokenization.\n";
print " -protected FILE ... specify file with patters to be protected in tokenisation.\n"; print " -protected FILE ... specify file with patterns to be protected in tokenisation.\n";
print " -no-escape ... don't perform HTML escaping on apostrophy, quotes, etc.\n"; print " -no-escape ... don't perform HTML escaping on apostrophy, quotes, etc.\n";
exit; exit;
} }

Loading…
Cancel
Save