You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
46 lines
1.1 KiB
46 lines
1.1 KiB
7 years ago
|
#! /usr/bin/bash
|
||
|
|
||
|
pushd ../..
|
||
|
|
||
|
# download data, generate manifests
|
||
|
python data/tiny/tiny.py \
|
||
|
--manifest_prefix='data/tiny/manifest' \
|
||
|
--target_dir=$HOME'/.cache/paddle/dataset/speech/tiny'
|
||
|
|
||
|
if [ $? -ne 0 ]; then
|
||
|
echo "Prepare LibriSpeech failed. Terminated."
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
cat data/tiny/manifest.dev-clean | head -n 32 > data/tiny/manifest.train
|
||
|
cat data/tiny/manifest.dev-clean | head -n 48 | tail -n 16 > data/tiny/manifest.dev
|
||
|
cat data/tiny/manifest.dev-clean | head -n 64 | tail -n 16 > data/tiny/manifest.test
|
||
|
|
||
|
|
||
|
# build vocabulary
|
||
|
python tools/build_vocab.py \
|
||
|
--count_threshold=0 \
|
||
|
--vocab_path='data/tiny/vocab.txt' \
|
||
|
--manifest_paths='data/tiny/manifest.train'
|
||
|
|
||
|
if [ $? -ne 0 ]; then
|
||
|
echo "Build vocabulary failed. Terminated."
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
|
||
|
# compute mean and stddev for normalizer
|
||
|
python tools/compute_mean_std.py \
|
||
|
--manifest_path='data/tiny/manifest.train' \
|
||
|
--num_samples=32 \
|
||
|
--specgram_type='linear' \
|
||
|
--output_path='data/tiny/mean_std.npz'
|
||
|
|
||
|
if [ $? -ne 0 ]; then
|
||
|
echo "Compute mean and stddev failed. Terminated."
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
|
||
|
echo "Tiny data preparation done."
|