You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/examples/other/cc-cedict/run.sh

40 lines
1.1 KiB

#!/bin/bash
# CC-CEDICT download: https://www.mdbg.net/chinese/dictionary?page=cc-cedict
# The word dictionary of this website is based on CC-CEDICT.
# CC-CEDICT is a continuation of the CEDICT project started by Paul Denisowski in 1997 with the
# aim to provide a complete downloadable Chinese to English dictionary with pronunciation in pinyin for the Chinese characters.
# This website allows you to easily add new entries or correct existing entries in CC-CEDICT.
# Submitted entries will be checked and processed frequently and released for download in CEDICT format on this page.
set -e
source path.sh
stage=-1
stop_stage=100
source ${MAIN_ROOT}/utils/parse_options.sh || exit -1
cedict_url=https://www.mdbg.net/chinese/export/cedict/cedict_1_0_ts_utf-8_mdbg.zip
cedict=cedict_1_0_ts_utf-8_mdbg.zip
mkdir -p data
if [ $stage -le -1 ] && [ $stop_stage -ge -1 ];then
test -f data/${cedict} || wget -O data/${cedict} ${cedict_url}
pushd data
unzip ${cedict}
popd
fi
mkdir -p exp
if [ $stage -le 0 ] && [ $stop_stage -ge 0 ];then
cp data/cedict_ts.u8 exp/cedict
python3 local/parser.py exp/cedict exp/cedict.json
fi