diff --git a/examples/dataset/aidatatang_200zh/.gitignore b/examples/dataset/aidatatang_200zh/.gitignore index fcb88779..fc56525e 100644 --- a/examples/dataset/aidatatang_200zh/.gitignore +++ b/examples/dataset/aidatatang_200zh/.gitignore @@ -1,4 +1,4 @@ *.tgz manifest.* *.meta -aidatatang_200zh/ +aidatatang_200zh/ \ No newline at end of file diff --git a/examples/dataset/aishell/.gitignore b/examples/dataset/aishell/.gitignore index 9c6e517e..eea6573e 100644 --- a/examples/dataset/aishell/.gitignore +++ b/examples/dataset/aishell/.gitignore @@ -1 +1,4 @@ data_aishell* +*.meta +manifest.* +*.tgz \ No newline at end of file diff --git a/examples/dataset/gigaspeech/.gitignore b/examples/dataset/gigaspeech/.gitignore new file mode 100644 index 00000000..7f78176b --- /dev/null +++ b/examples/dataset/gigaspeech/.gitignore @@ -0,0 +1 @@ +GigaSpeech/ diff --git a/examples/dataset/gigaspeech/README.md b/examples/dataset/gigaspeech/README.md new file mode 100644 index 00000000..4a1715cb --- /dev/null +++ b/examples/dataset/gigaspeech/README.md @@ -0,0 +1,10 @@ +# [GigaSpeech](https://github.com/SpeechColab/GigaSpeech) + +``` +git clone https://github.com/SpeechColab/GigaSpeech.git + +cd GigaSpeech +utils/gigaspeech_download.sh /disk1/audio_data/gigaspeech +toolkits/kaldi/gigaspeech_data_prep.sh --train-subset XL /disk1/audio_data/gigaspeech ../data +cd .. +``` diff --git a/examples/dataset/gigaspeech/gigaspeech.py b/examples/dataset/gigaspeech/gigaspeech.py new file mode 100644 index 00000000..185a92b8 --- /dev/null +++ b/examples/dataset/gigaspeech/gigaspeech.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/examples/dataset/gigaspeech/run.sh b/examples/dataset/gigaspeech/run.sh new file mode 100644 index 00000000..0f7b46ab --- /dev/null +++ b/examples/dataset/gigaspeech/run.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e + +curdir=$PWD + +test -d GigaSpeech || git clone https://github.com/SpeechColab/GigaSpeech.git +cd GigaSpeech +source env_vars.sh +utils/gigaspeech_download.sh ${curdir}/ diff --git a/examples/dataset/librispeech/.gitignore b/examples/dataset/librispeech/.gitignore index dfd5c67b..465806de 100644 --- a/examples/dataset/librispeech/.gitignore +++ b/examples/dataset/librispeech/.gitignore @@ -5,3 +5,5 @@ test-other train-clean-100 train-clean-360 train-other-500 +*.meta +manifest.* diff --git a/examples/dataset/mini_librispeech/.gitignore b/examples/dataset/mini_librispeech/.gitignore index 61f54c96..7fbcfd65 100644 --- a/examples/dataset/mini_librispeech/.gitignore +++ b/examples/dataset/mini_librispeech/.gitignore @@ -2,3 +2,4 @@ dev-clean/ manifest.dev-clean manifest.train-clean train-clean/ +*.meta diff --git a/examples/dataset/thchs30/.gitignore b/examples/dataset/thchs30/.gitignore index 47dd6268..b94cd7e4 100644 --- a/examples/dataset/thchs30/.gitignore +++ b/examples/dataset/thchs30/.gitignore @@ -3,3 +3,4 @@ manifest.* data_thchs30 resource test-noise +*.meta