releace the <space> with ' ' in ctcdecoder

5 years ago · b53171694e
parent b1a90d4d7a
commit b53171694e
5 changed files with 5 additions and 11 deletions
--- a/deepspeech/decoders/ctcdecoder/swig/ctc_greedy_decoder.cpp
+++ b/deepspeech/decoders/ctcdecoder/swig/ctc_greedy_decoder.cpp
@ -53,7 +53,8 @@ std::string ctc_greedy_decoder(
    std::string best_path_result;
    for (size_t i = 0; i < idx_vec.size(); ++i) {
        if (idx_vec[i] != blank_id) {
-            best_path_result += vocabulary[idx_vec[i]];
+            std::string ch = vocabulary[idx_vec[i]];
+            best_path_result += (ch == kSPACE) ? tSPACE : ch;
        }
    }
    return best_path_result;
--- a/deepspeech/decoders/ctcdecoder/swig/decoder_utils.cpp
+++ b/deepspeech/decoders/ctcdecoder/swig/decoder_utils.cpp
@ -74,7 +74,8 @@ std::vector<std::pair<double, std::string>> get_beam_search_result(
        // convert index to string
        std::string output_str;
        for (size_t j = 0; j < output.size(); j++) {
-            output_str += vocabulary[output[j]];
+            std::string ch = vocabulary[output[j]];
+            output_str += (ch == kSPACE) ? tSPACE : ch;
        }
        std::pair<double, std::string> output_pair(
            -space_prefixes[i]->approx_ctc, output_str);
--- a/deepspeech/decoders/ctcdecoder/swig/decoder_utils.h
+++ b/deepspeech/decoders/ctcdecoder/swig/decoder_utils.h
@ -21,6 +21,7 @@
 #include "path_trie.h"

 const std::string kSPACE = "<space>";
+const std::string tSPACE = " ";
 const float NUM_FLT_INF = std::numeric_limits<float>::max();
 const float NUM_FLT_MIN = std::numeric_limits<float>::min();

--- a/deepspeech/exps/deepspeech2/bin/test_hub.py
+++ b/deepspeech/exps/deepspeech2/bin/test_hub.py
@ -56,10 +56,6 @@ class DeepSpeech2Tester_hub():
            cutoff_prob=cfg.cutoff_prob,
            cutoff_top_n=cfg.cutoff_top_n,
            num_processes=cfg.num_proc_bsearch)
-        #replace the '<space>' with ' '
-        result_transcripts = [
-            sentence.replace("<space>", " ") for sentence in result_transcripts
-        ]

        return result_transcripts

--- a/deepspeech/exps/deepspeech2/model.py
+++ b/deepspeech/exps/deepspeech2/model.py
@ -341,11 +341,6 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
            cutoff_prob=cfg.cutoff_prob,
            cutoff_top_n=cfg.cutoff_top_n,
            num_processes=cfg.num_proc_bsearch)
-        #replace the <space> with ' '
-        result_transcripts = [
-            self._text_featurizer.detokenize(sentence)
-            for sentence in result_transcripts
-        ]

        self.autolog.times.stamp()
        self.autolog.times.stamp()