gen lexicon with tone in mfa, test=tts

pull/1991/head
TianYuan 2 years ago
parent 88cd0fcaf5
commit 9a253bc091

@ -42,9 +42,6 @@ def get_transcripts(path: Union[str, Path]):
for i in range(0, len(lines), 2):
sentence_id = lines[i].split()[0]
transcription = lines[i + 1].strip()
# tones are dropped here
# since the lexicon does not consider tones, too
transcription = " ".join([item[:-1] for item in transcription.split()])
transcripts[sentence_id] = transcription
return transcripts

@ -4,7 +4,7 @@ mkdir -p $EXP_DIR
LEXICON_NAME='simple'
if [ ! -f "$EXP_DIR/$LEXICON_NAME.lexicon" ]; then
echo "generating lexicon..."
python local/generate_lexicon.py "$EXP_DIR/$LEXICON_NAME" --with-r
python local/generate_lexicon.py "$EXP_DIR/$LEXICON_NAME" --with-r --with-tone
echo "lexicon done"
fi
@ -16,6 +16,7 @@ if [ ! -d $EXP_DIR/baker_corpus ]; then
echo "transcription for each audio file is saved with the same namd in $EXP_DIR/baker_corpus "
fi
echo "detecting oov..."
python local/detect_oov.py $EXP_DIR/baker_corpus $EXP_DIR/"$LEXICON_NAME.lexicon"
echo "detecting oov done. you may consider regenerate lexicon if there is unexpected OOVs."
@ -44,6 +45,3 @@ if [ ! -d "$EXP_DIR/baker_alignment" ]; then
echo "model: $EXP_DIR/baker_model"
fi

Loading…
Cancel
Save