diff --git a/paddlespeech/t2s/datasets/__init__.py b/paddlespeech/t2s/datasets/__init__.py index acaf808a..fc64a82f 100644 --- a/paddlespeech/t2s/datasets/__init__.py +++ b/paddlespeech/t2s/datasets/__init__.py @@ -12,5 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. from .common import * -from .csmsc import * from .ljspeech import * diff --git a/paddlespeech/t2s/datasets/csmsc.py b/paddlespeech/t2s/datasets/csmsc.py deleted file mode 100644 index 9928a73a..00000000 --- a/paddlespeech/t2s/datasets/csmsc.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os -from pathlib import Path - -from paddle.io import Dataset - -__all__ = ["CSMSCMetaData"] - - -class CSMSCMetaData(Dataset): - def __init__(self, root): - """ - :param root: the path of baker dataset - """ - self.root = os.path.abspath(root) - records = [] - index = 1 - self.meta_info = ["file_path", "text", "pinyin"] - - metadata_path = os.path.join(root, "ProsodyLabeling/000001-010000.txt") - wav_dirs = os.path.join(self.root, "Wave") - with open(metadata_path, 'r', encoding='utf-8') as f: - while True: - line1 = f.readline().strip() - if not line1: - break - line2 = f.readline().strip() - strs = line1.split() - wav_fname = line1.split()[0].strip() + '.wav' - wav_filepath = os.path.join(wav_dirs, wav_fname) - text = strs[1].strip() - pinyin = line2 - records.append([wav_filepath, text, pinyin]) - - self.records = records - - def __getitem__(self, i): - return self.records[i] - - def __len__(self): - return len(self.records) - - def get_meta_info(self): - return self.meta_info