releace the <space> with ' ' in ctcdecoder

pull/922/head
huangyuxin 3 years ago
parent b1a90d4d7a
commit b53171694e

@ -53,7 +53,8 @@ std::string ctc_greedy_decoder(
std::string best_path_result;
for (size_t i = 0; i < idx_vec.size(); ++i) {
if (idx_vec[i] != blank_id) {
best_path_result += vocabulary[idx_vec[i]];
std::string ch = vocabulary[idx_vec[i]];
best_path_result += (ch == kSPACE) ? tSPACE : ch;
}
}
return best_path_result;

@ -74,7 +74,8 @@ std::vector<std::pair<double, std::string>> get_beam_search_result(
// convert index to string
std::string output_str;
for (size_t j = 0; j < output.size(); j++) {
output_str += vocabulary[output[j]];
std::string ch = vocabulary[output[j]];
output_str += (ch == kSPACE) ? tSPACE : ch;
}
std::pair<double, std::string> output_pair(
-space_prefixes[i]->approx_ctc, output_str);

@ -21,6 +21,7 @@
#include "path_trie.h"
const std::string kSPACE = "<space>";
const std::string tSPACE = " ";
const float NUM_FLT_INF = std::numeric_limits<float>::max();
const float NUM_FLT_MIN = std::numeric_limits<float>::min();

@ -56,10 +56,6 @@ class DeepSpeech2Tester_hub():
cutoff_prob=cfg.cutoff_prob,
cutoff_top_n=cfg.cutoff_top_n,
num_processes=cfg.num_proc_bsearch)
#replace the '<space>' with ' '
result_transcripts = [
sentence.replace("<space>", " ") for sentence in result_transcripts
]
return result_transcripts

@ -341,11 +341,6 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
cutoff_prob=cfg.cutoff_prob,
cutoff_top_n=cfg.cutoff_top_n,
num_processes=cfg.num_proc_bsearch)
#replace the <space> with ' '
result_transcripts = [
self._text_featurizer.detokenize(sentence)
for sentence in result_transcripts
]
self.autolog.times.stamp()
self.autolog.times.stamp()

Loading…
Cancel
Save