gen lexicon with tone in mfa, test=tts

pull/1991/head
TianYuan 2 years ago
parent 88cd0fcaf5
commit 9a253bc091

@ -42,9 +42,6 @@ def get_transcripts(path: Union[str, Path]):
for i in range(0, len(lines), 2): for i in range(0, len(lines), 2):
sentence_id = lines[i].split()[0] sentence_id = lines[i].split()[0]
transcription = lines[i + 1].strip() transcription = lines[i + 1].strip()
# tones are dropped here
# since the lexicon does not consider tones, too
transcription = " ".join([item[:-1] for item in transcription.split()])
transcripts[sentence_id] = transcription transcripts[sentence_id] = transcription
return transcripts return transcripts

@ -4,7 +4,7 @@ mkdir -p $EXP_DIR
LEXICON_NAME='simple' LEXICON_NAME='simple'
if [ ! -f "$EXP_DIR/$LEXICON_NAME.lexicon" ]; then if [ ! -f "$EXP_DIR/$LEXICON_NAME.lexicon" ]; then
echo "generating lexicon..." echo "generating lexicon..."
python local/generate_lexicon.py "$EXP_DIR/$LEXICON_NAME" --with-r python local/generate_lexicon.py "$EXP_DIR/$LEXICON_NAME" --with-r --with-tone
echo "lexicon done" echo "lexicon done"
fi fi
@ -16,6 +16,7 @@ if [ ! -d $EXP_DIR/baker_corpus ]; then
echo "transcription for each audio file is saved with the same namd in $EXP_DIR/baker_corpus " echo "transcription for each audio file is saved with the same namd in $EXP_DIR/baker_corpus "
fi fi
echo "detecting oov..." echo "detecting oov..."
python local/detect_oov.py $EXP_DIR/baker_corpus $EXP_DIR/"$LEXICON_NAME.lexicon" python local/detect_oov.py $EXP_DIR/baker_corpus $EXP_DIR/"$LEXICON_NAME.lexicon"
echo "detecting oov done. you may consider regenerate lexicon if there is unexpected OOVs." echo "detecting oov done. you may consider regenerate lexicon if there is unexpected OOVs."
@ -44,6 +45,3 @@ if [ ! -d "$EXP_DIR/baker_alignment" ]; then
echo "model: $EXP_DIR/baker_model" echo "model: $EXP_DIR/baker_model"
fi fi

Loading…
Cancel
Save