append some comments

7 years ago · adab01bbf6
parent 8ec4a96523
commit adab01bbf6
8 changed files with 80 additions and 62 deletions
--- a/models/swig_decoders/ctc_decoders.cpp
+++ b/models/swig_decoders/ctc_decoders.cpp
@ -104,7 +104,7 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
    }
    if (!extscorer->is_character_based()) {
      if (extscorer->dictionary == nullptr) {
-        // fill dictionary for fst
+        // fill dictionary for fst with space
        extscorer->fill_dictionary(true);
      }
      auto fst_dict = static_cast<fst::StdVectorFst *>(extscorer->dictionary);
@ -304,8 +304,7 @@ ctc_beam_search_decoder_batch(
    if (extscorer->is_char_map_empty()) {
      extscorer->set_char_map(vocabulary);
    }
-    if (!extscorer->is_character_based() &&
+    if (!extscorer->is_character_based() && extscorer->dictionary == nullptr) {
        extscorer->dictionary == nullptr) {
      // init dictionary
      extscorer->fill_dictionary(true);
    }
--- a/models/swig_decoders/ctc_decoders.h
+++ b/models/swig_decoders/ctc_decoders.h
@ -14,8 +14,7 @@
 *               over vocabulary of one time step.
 *     vocabulary: A vector of vocabulary.
 * Return:
- *     A vector that each element is a pair of score  and decoding result,
+ *     The decoding result in string
 *     in desending order.
 */
 std::string ctc_greedy_decoder(
    const std::vector<std::vector<double>> &probs_seq,
@ -59,8 +58,8 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
 *     cutoff_top_n: Cutoff number for pruning.
 *     ext_scorer: External scorer to evaluate a prefix.
 * Return:
- *     A 2-D vector that each element is a vector of decoding result for one
+ *     A 2-D vector that each element is a vector of beam search decoding
- *     sample.
+ *     result for one audio sample.
 */
 std::vector<std::vector<std::pair<double, std::string>>>
 ctc_beam_search_decoder_batch(
--- a/models/swig_decoders/decoder_utils.cpp
+++ b/models/swig_decoders/decoder_utils.cpp
@ -108,5 +108,5 @@ bool add_word_to_dictionary(
  }
  add_word_to_fst(int_word, dictionary);
-  return true;
+  return true;  // return with successful adding
 }
--- a/models/swig_decoders/decoder_utils.h
+++ b/models/swig_decoders/decoder_utils.h
@ -14,12 +14,14 @@ bool pair_comp_first_rev(const std::pair<T1, T2> &a,
  return a.first > b.first;
 }
 // Function template for comparing two pairs
 template <typename T1, typename T2>
 bool pair_comp_second_rev(const std::pair<T1, T2> &a,
                          const std::pair<T1, T2> &b) {
  return a.second > b.second;
 }
 // Return the sum of two probabilities in log scale
 template <typename T>
 T log_sum_exp(const T &x, const T &y) {
  static T num_min = -std::numeric_limits<T>::max();
@ -32,18 +34,21 @@ T log_sum_exp(const T &x, const T &y) {
 // Functor for prefix comparsion
 bool prefix_compare(const PathTrie *x, const PathTrie *y);
-// Get length of utf8 encoding string
+/* Get length of utf8 encoding string
-// See: http://stackoverflow.com/a/4063229
+ * See: http://stackoverflow.com/a/4063229
 */
 size_t get_utf8_str_len(const std::string &str);
-// Split a string into a list of strings on a given string
+/* Split a string into a list of strings on a given string
-// delimiter. NB: delimiters on beginning / end of string are
+ * delimiter. NB: delimiters on beginning / end of string are
-// trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"].
+ * trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"].
 */
 std::vector<std::string> split_str(const std::string &s,
                                   const std::string &delim);
-// Splits string into vector of strings representing
+/* Splits string into vector of strings representing
-// UTF-8 characters (not same as chars)
+ * UTF-8 characters (not same as chars)
 */
 std::vector<std::string> split_utf8_str(const std::string &str);
 // Add a word in index to the dicionary of fst
--- a/models/swig_decoders/path_trie.cpp
+++ b/models/swig_decoders/path_trie.cpp
@ -22,7 +22,7 @@ PathTrie::PathTrie() {
  _dictionary = nullptr;
  _dictionary_state = 0;
  _has_dictionary = false;
-  _matcher = nullptr;  // finds arcs in FST
+  _matcher = nullptr;
 }
 PathTrie::~PathTrie() {
--- a/models/swig_decoders/path_trie.h
+++ b/models/swig_decoders/path_trie.h
@ -10,27 +10,36 @@
 using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
 /* Trie tree for prefix storing and manipulating, with a dictionary in
 * finite-state transducer for spelling correction.
 */
 class PathTrie {
 public:
  PathTrie();
  ~PathTrie();
  // get new prefix after appending new char
  PathTrie* get_path_trie(int new_char, bool reset = true);
  // get the prefix in index from root to current node
  PathTrie* get_path_vec(std::vector<int>& output);
  // get the prefix in index from some stop node to current nodel
  PathTrie* get_path_vec(std::vector<int>& output,
                         int stop,
                         size_t max_steps = std::numeric_limits<size_t>::max());
  // update log probs
  void iterate_to_vec(std::vector<PathTrie*>& output);
  // set dictionary for FST
  void set_dictionary(fst::StdVectorFst* dictionary);
  void set_matcher(std::shared_ptr<FSTMATCH> matcher);
  bool is_empty() { return _ROOT == character; }
  // remove current path from root
  void remove();
  float log_prob_b_prev;
@ -49,8 +58,10 @@ private:
  std::vector<std::pair<int, PathTrie*>> _children;
  // pointer to dictionary of FST
  fst::StdVectorFst* _dictionary;
  fst::StdVectorFst::StateId _dictionary_state;
  // true if finding ars in FST
  std::shared_ptr<FSTMATCH> _matcher;
 };
--- a/models/swig_decoders/scorer.cpp
+++ b/models/swig_decoders/scorer.cpp
@ -68,7 +68,7 @@ double Scorer::get_log_cond_prob(const std::vector<std::string>& words) {
    state = out_state;
    out_state = tmp_state;
  }
-  // log10 prob
+  // return  log10 prob
  return cond_prob;
 }
@ -189,23 +189,26 @@ void Scorer::fill_dictionary(bool add_space) {
  std::cerr << "Vocab Size " << vocab_size << std::endl;
-  // Simplify FST
+  /* Simplify FST
-  // This gets rid of "epsilon" transitions in the FST.
+   * This gets rid of "epsilon" transitions in the FST.
-  // These are transitions that don't require a string input to be taken.
+   * These are transitions that don't require a string input to be taken.
-  // Getting rid of them is necessary to make the FST determinisitc, but
+   * Getting rid of them is necessary to make the FST determinisitc, but
-  // can greatly increase the size of the FST
+   * can greatly increase the size of the FST
   */
  fst::RmEpsilon(&dictionary);
  fst::StdVectorFst* new_dict = new fst::StdVectorFst;
-  // This makes the FST deterministic, meaning for any string input there's
+  /* This makes the FST deterministic, meaning for any string input there's
-  // only one possible state the FST could be in.  It is assumed our
+   * only one possible state the FST could be in.  It is assumed our
-  // dictionary is deterministic when using it.
+   * dictionary is deterministic when using it.
-  // (lest we'd have to check for multiple transitions at each state)
+   * (lest we'd have to check for multiple transitions at each state)
   */
  fst::Determinize(dictionary, new_dict);
-  // Finds the simplest equivalent fst.  This is unnecessary but decreases
+  /* Finds the simplest equivalent fst. This is unnecessary but decreases
-  // memory usage of the dictionary
+   * memory usage of the dictionary
   */
  fst::Minimize(new_dict);
  this->dictionary = new_dict;
 }
--- a/models/swig_decoders/scorer.h
+++ b/models/swig_decoders/scorer.h
@ -30,7 +30,8 @@ public:
  std::vector<std::string> vocabulary;
 };
-/* External scorer to query languange score for n-gram or sentence.
+/* External scorer to query score for n-gram or sentence, including language
 * model scoring and word insertion.
 *
 * Example:
 *     Scorer scorer(alpha, beta, "path_of_language_model");