gen lexicon with tone in mfa, test=tts

4 years ago · 9a253bc091
parent 88cd0fcaf5
commit 9a253bc091
2 changed files with 2 additions and 7 deletions
--- a/examples/other/mfa/local/reorganize_baker.py
+++ b/examples/other/mfa/local/reorganize_baker.py
@ -42,9 +42,6 @@ def get_transcripts(path: Union[str, Path]):
    for i in range(0, len(lines), 2):
        sentence_id = lines[i].split()[0]
        transcription = lines[i + 1].strip()
        # tones are dropped here
        # since the lexicon does not consider tones, too
        transcription = " ".join([item[:-1] for item in transcription.split()])
        transcripts[sentence_id] = transcription
    return transcripts
--- a/examples/other/mfa/run.sh
+++ b/examples/other/mfa/run.sh
@ -4,7 +4,7 @@ mkdir -p $EXP_DIR
 LEXICON_NAME='simple'
 if [ ! -f "$EXP_DIR/$LEXICON_NAME.lexicon" ]; then
    echo "generating lexicon..."
-    python local/generate_lexicon.py "$EXP_DIR/$LEXICON_NAME" --with-r
+    python local/generate_lexicon.py "$EXP_DIR/$LEXICON_NAME" --with-r --with-tone
    echo "lexicon done"
 fi
@ -16,6 +16,7 @@ if [ ! -d $EXP_DIR/baker_corpus ]; then
    echo "transcription for each audio file is saved with the same namd in $EXP_DIR/baker_corpus "
 fi
 echo "detecting oov..."
 python local/detect_oov.py $EXP_DIR/baker_corpus $EXP_DIR/"$LEXICON_NAME.lexicon"
 echo "detecting oov done. you may consider regenerate lexicon if there is unexpected OOVs."
@ -44,6 +45,3 @@ if [ ! -d "$EXP_DIR/baker_alignment" ]; then
    echo "model: $EXP_DIR/baker_model"
 fi