#! /usr/bin/env bash

stage=-1
stop_stage=100

source ${MAIN_ROOT}/utils/parse_options.sh

mkdir -p data
TARGET_DIR=${MAIN_ROOT}/examples/dataset
mkdir -p ${TARGET_DIR}
LEXICON_NAME=$1

# download data, generate manifests
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    python3 ${TARGET_DIR}/thchs30/thchs30.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/thchs30"

    if [ $? -ne 0 ]; then
        echo "Prepare THCHS-30 failed. Terminated."
        exit 1
    fi
    
fi

# dump manifest to data/
python3 ${MAIN_ROOT}/utils/dump_manifest.py --manifest-path=data/manifest.train --output-dir=data

# copy files to data/dict to gen word.lexicon
cp  ${TARGET_DIR}/thchs30/data_thchs30/lm_word/lexicon.txt data/dict/lm_word_lexicon_1
cp  ${TARGET_DIR}/thchs30/resource/dict/lexicon.txt data/dict/lm_word_lexicon_2

# copy phone.lexicon to data/dict
cp  ${TARGET_DIR}/thchs30/data_thchs30/lm_phone/lexicon.txt data/dict/phone.lexicon

# gen word.lexicon
python local/gen_word2phone.py  --root-dir=data/dict --output-dir=data/dict

# reorganize dataset for MFA
if [ ! -d $EXP_DIR/thchs30_corpus ]; then
    echo "reorganizing thchs30 corpus..."
    python local/reorganize_thchs30.py --root-dir=data --output-dir=data/thchs30_corpus --script-type=$LEXICON_NAME
    echo "reorganization done."
fi

echo "THCHS-30  data preparation done."
exit 0