diff --git a/examples/other/mfa/local/reorganize_baker.py b/examples/other/mfa/local/reorganize_baker.py index 8adad834..153e01d1 100644 --- a/examples/other/mfa/local/reorganize_baker.py +++ b/examples/other/mfa/local/reorganize_baker.py @@ -42,9 +42,6 @@ def get_transcripts(path: Union[str, Path]): for i in range(0, len(lines), 2): sentence_id = lines[i].split()[0] transcription = lines[i + 1].strip() - # tones are dropped here - # since the lexicon does not consider tones, too - transcription = " ".join([item[:-1] for item in transcription.split()]) transcripts[sentence_id] = transcription return transcripts diff --git a/examples/other/mfa/run.sh b/examples/other/mfa/run.sh old mode 100644 new mode 100755 index 1fef58b4..29dacc9b --- a/examples/other/mfa/run.sh +++ b/examples/other/mfa/run.sh @@ -4,7 +4,7 @@ mkdir -p $EXP_DIR LEXICON_NAME='simple' if [ ! -f "$EXP_DIR/$LEXICON_NAME.lexicon" ]; then echo "generating lexicon..." - python local/generate_lexicon.py "$EXP_DIR/$LEXICON_NAME" --with-r + python local/generate_lexicon.py "$EXP_DIR/$LEXICON_NAME" --with-r --with-tone echo "lexicon done" fi @@ -16,6 +16,7 @@ if [ ! -d $EXP_DIR/baker_corpus ]; then echo "transcription for each audio file is saved with the same namd in $EXP_DIR/baker_corpus " fi + echo "detecting oov..." python local/detect_oov.py $EXP_DIR/baker_corpus $EXP_DIR/"$LEXICON_NAME.lexicon" echo "detecting oov done. you may consider regenerate lexicon if there is unexpected OOVs." @@ -44,6 +45,3 @@ if [ ! -d "$EXP_DIR/baker_alignment" ]; then echo "model: $EXP_DIR/baker_model" fi - - -