diff --git a/examples/voxceleb/sv0/local/data.sh b/examples/voxceleb/sv0/local/data.sh
deleted file mode 100755
index 6df9c3b8..00000000
--- a/examples/voxceleb/sv0/local/data.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-stage=-1
-stop_stage=100
-TARGET_DIR=${MAIN_ROOT}/dataset
-
-. utils/parse_options.sh || exit -1;
-
-src=$1
-mkdir -p data/{dev,test}
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    # download data, generate manifests
-    # create data/{dev,test} directory to store the manifest files
-    python3 ${TARGET_DIR}/voxceleb/voxceleb1.py \
-    --manifest_prefix="data/manifest" \
-    --target_dir="${src}"
-
-    if [ $? -ne 0 ]; then
-        echo "Prepare Voxceleb failed. Terminated."
-        exit 1
-    fi
-    mv data/manifest.dev data/dev
-    mv data/voxceleb1.dev.meta data/dev
-
-    mv data/manifest.test data/test
-    mv data/voxceleb1.test.meta data/test
-fi
diff --git a/paddlespeech/vector/__init__.py b/paddlespeech/vector/__init__.py
index 5c846193..61d5aa21 100644
--- a/paddlespeech/vector/__init__.py
+++ b/paddlespeech/vector/__init__.py
@@ -10,32 +10,4 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-__init__ file for sidt package.
-"""
-
-import logging as sidt_logging
-import colorlog
-
-LOG_COLOR_CONFIG = {
-    'DEBUG': 'white',
-    'INFO': 'white',
-    'WARNING': 'yellow',
-    'ERROR': 'red',
-    'CRITICAL': 'purple',
-}
-
-# 设置全局的logger
-colored_formatter = colorlog.ColoredFormatter(
-    '%(log_color)s [%(levelname)s] [%(asctime)s] [%(filename)s:%(lineno)d] - %(message)s',
-    datefmt="%Y-%m-%d %H:%M:%S",
-    log_colors=LOG_COLOR_CONFIG)  # 日志输出格式
-_logger = sidt_logging.getLogger("sidt")
-handler = colorlog.StreamHandler()
-handler.setLevel(sidt_logging.INFO)
-handler.setFormatter(colored_formatter)
-_logger.addHandler(handler)
-_logger.setLevel(sidt_logging.INFO)
-
+# limitations under the License.
\ No newline at end of file
diff --git a/paddlespeech/vector/datasets/ark_dataset.py b/paddlespeech/vector/datasets/ark_dataset.py
deleted file mode 100755
index 7a00e7ba..00000000
--- a/paddlespeech/vector/datasets/ark_dataset.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import numpy as np
-import kaldi_python_io as k_io
-from paddle.io import Dataset
-from paddlespeech.vector.utils.data_utils import batch_pad_right
-import paddlespeech.vector.utils as utils
-from paddlespeech.vector.utils.utils import read_map_file
-from paddlespeech.vector import _logger as log
-
-def ark_collate_fn(batch):
-    """
-    Custom collate function] for kaldi feats dataset
-
-    Args:
-        min_chunk_size: min chunk size of a utterance
-        max_chunk_size: max chunk size of a utterance
-
-    Returns:
-        ark_collate_fn: collate funtion for dataloader
-    """
-
-    data = []
-    target = []
-    for items in batch:
-        for x, y in zip(items[0], items[1]):
-            data.append(np.array(x))
-            target.append(y)
-
-    data, lengths = batch_pad_right(data)
-    return np.array(data, dtype=np.float32), \
-           np.array(lengths, dtype=np.float32), \
-           np.array(target, dtype=np.long).reshape((len(target), 1))
-
-
-class KaldiArkDataset(Dataset):
-    """
-    Dataset used to load kaldi ark/scp files.
-    """
-    def __init__(self, scp_file, label2utt, min_item_size=1,
-                 max_item_size=1, repeat=50, min_chunk_size=200,
-                 max_chunk_size=400, select_by_speaker=True):
-        self.scp_file = scp_file
-        self.scp_reader = None
-        self.repeat = repeat
-        self.min_item_size = min_item_size
-        self.max_item_size = max_item_size
-        self.min_chunk_size = min_chunk_size
-        self.max_chunk_size = max_chunk_size
-        self._collate_fn = ark_collate_fn
-        self._is_select_by_speaker = select_by_speaker
-        if utils.is_exist(self.scp_file):
-            self.scp_reader = k_io.ScriptReader(self.scp_file)
-
-        label2utts, utt2label = read_map_file(label2utt, key_func=int)
-        self.utt_info = list(label2utts.items()) if self._is_select_by_speaker else list(utt2label.items())
-
-    @property
-    def collate_fn(self):
-        """
-        Return a collate funtion.
-        """
-        return self._collate_fn
-
-    def _random_chunk(self, length):
-        chunk_size = random.randint(self.min_chunk_size, self.max_chunk_size)
-        if chunk_size >= length:
-            return 0, length
-        start = random.randint(0, length - chunk_size)
-        end = start + chunk_size
-
-        return start, end
-
-    def _select_by_speaker(self, index):
-        if self.scp_reader is None or not self.utt_info:
-            return []
-        index = index % (len(self.utt_info))
-        inputs = []
-        labels = []
-        item_size = random.randint(self.min_item_size, self.max_item_size)
-        for loop_idx in range(item_size):
-            try:
-                utt_index = random.randint(0, len(self.utt_info[index][1])) \
-                        % len(self.utt_info[index][1])
-                key = self.utt_info[index][1][utt_index]
-            except:
-                print(index, utt_index, len(self.utt_info[index][1]))
-                sys.exit(-1)
-            x = self.scp_reader[key]
-            x = np.transpose(x)
-            bg, end = self._random_chunk(x.shape[-1])
-            inputs.append(x[:, bg: end])
-            labels.append(self.utt_info[index][0])
-        return inputs, labels
-
-    def _select_by_utt(self, index):
-        if self.scp_reader is None or len(self.utt_info) == 0:
-            return {}
-        index = index % (len(self.utt_info))
-        key = self.utt_info[index][0]
-        x = self.scp_reader[key]
-        x = np.transpose(x)
-        bg, end = self._random_chunk(x.shape[-1])
-
-        y = self.utt_info[index][1]
-
-        return [x[:, bg: end]], [y]
-
-    def __getitem__(self, index):
-        if self._is_select_by_speaker:
-            return self._select_by_speaker(index)
-        else:
-            return self._select_by_utt(index)
-
-    def __len__(self):
-        return len(self.utt_info) * self.repeat
-
-    def __iter__(self):
-        self._start = 0
-        return self
-
-    def __next__(self):
-        if self._start < len(self):
-            ret = self[self._start]
-            self._start += 1
-            return ret
-        else:
-            raise StopIteration
diff --git a/paddlespeech/vector/datasets/dataset.py b/paddlespeech/vector/datasets/dataset.py
deleted file mode 100644
index e7030053..00000000
--- a/paddlespeech/vector/datasets/dataset.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import random
-import numpy as np
-import kaldi_python_io as k_io
-from paddle.io import Dataset
-from paddlespeech.vector.utils.data_utils import batch_pad_right
-import paddlespeech.vector.utils as utils
-from paddlespeech.vector.utils.utils import read_map_file
-
-def ark_collate_fn(batch):
-    """
-    Custom collate function for kaldi feats dataset
-
-    Args:
-        min_chunk_size: min chunk size of a utterance
-        max_chunk_size: max chunk size of a utterance
-
-    Returns:
-        ark_collate_fn: collate funtion for dataloader
-    """
-
-    data = []
-    target = []
-    for items in batch:
-        for x, y in zip(items[0], items[1]):
-            data.append(np.array(x))
-            target.append(y)
-
-    data, lengths = batch_pad_right(data)
-    return np.array(data, dtype=np.float32), \
-           np.array(lengths, dtype=np.float32), \
-           np.array(target, dtype=np.long).reshape((len(target), 1))
-
-
-class KaldiArkDataset(Dataset):
-    """
-    Dataset used to load kaldi ark/scp files.
-    """
-    def __init__(self, scp_file, label2utt, min_item_size=1,
-                 max_item_size=1, repeat=50, min_chunk_size=200,
-                 max_chunk_size=400, select_by_speaker=True):
-        self.scp_file = scp_file
-        self.scp_reader = None
-        self.repeat = repeat
-        self.min_item_size = min_item_size
-        self.max_item_size = max_item_size
-        self.min_chunk_size = min_chunk_size
-        self.max_chunk_size = max_chunk_size
-        self._collate_fn = ark_collate_fn
-        self._is_select_by_speaker = select_by_speaker
-        if utils.is_exist(self.scp_file):
-            self.scp_reader = k_io.ScriptReader(self.scp_file)
-
-        label2utts, utt2label = read_map_file(label2utt, key_func=int)
-        self.utt_info = list(label2utts.items()) if self._is_select_by_speaker else list(utt2label.items())
-
-    @property
-    def collate_fn(self):
-        """
-        Return a collate funtion.
-        """
-        return self._collate_fn
-
-    def _random_chunk(self, length):
-        chunk_size = random.randint(self.min_chunk_size, self.max_chunk_size)
-        if chunk_size >= length:
-            return 0, length
-        start = random.randint(0, length - chunk_size)
-        end = start + chunk_size
-
-        return start, end
-
-    def _select_by_speaker(self, index):
-        if self.scp_reader is None or not self.utt_info:
-            return []
-        index = index % (len(self.utt_info))
-        inputs = []
-        labels = []
-        item_size = random.randint(self.min_item_size, self.max_item_size)
-        for loop_idx in range(item_size):
-            try:
-                utt_index = random.randint(0, len(self.utt_info[index][1])) \
-                        % len(self.utt_info[index][1])
-                key = self.utt_info[index][1][utt_index]
-            except:
-                print(index, utt_index, len(self.utt_info[index][1]))
-                sys.exit(-1)
-            x = self.scp_reader[key]
-            x = np.transpose(x)
-            bg, end = self._random_chunk(x.shape[-1])
-            inputs.append(x[:, bg: end])
-            labels.append(self.utt_info[index][0])
-        return inputs, labels
-
-    def _select_by_utt(self, index):
-        if self.scp_reader is None or len(self.utt_info) == 0:
-            return {}
-        index = index % (len(self.utt_info))
-        key = self.utt_info[index][0]
-        x = self.scp_reader[key]
-        x = np.transpose(x)
-        bg, end = self._random_chunk(x.shape[-1])
-
-        y = self.utt_info[index][1]
-
-        return [x[:, bg: end]], [y]
-
-    def __getitem__(self, index):
-        if self._is_select_by_speaker:
-            return self._select_by_speaker(index)
-        else:
-            return self._select_by_utt(index)
-
-    def __len__(self):
-        return len(self.utt_info) * self.repeat
-
-    def __iter__(self):
-        self._start = 0
-        return self
-
-    def __next__(self):
-        if self._start < len(self):
-            ret = self[self._start]
-            self._start += 1
-            return ret
-        else:
-            raise StopIteration
-
-return KaldiArkDataset
diff --git a/paddlespeech/vector/datasets/egs_dataset.py b/paddlespeech/vector/datasets/egs_dataset.py
deleted file mode 100644
index 53130d5f..00000000
--- a/paddlespeech/vector/datasets/egs_dataset.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-Load nnet3 training egs which generated by kaldi
-"""
-
-import random
-import numpy as np
-import kaldi_python_io as k_io
-from paddle.io import Dataset
-import paddlespeech.vector.utils.utils as utils
-from paddlespeech.vector import _logger as log
-class KaldiEgsDataset(Dataset):
-    """
-    Dataset used to load kaldi nnet3 egs files.
-    """
-    def __init__(self, egs_list_file, egs_idx, transforms=None):
-        self.scp_reader = None
-        self.subset_idx = egs_idx - 1
-        self.transforms = transforms
-        if not utils.is_exist(egs_list_file):
-            return
-
-        self.egs_files = []
-        with open(egs_list_file, 'r') as in_fh:
-            for line in in_fh:
-                if line.strip():
-                    self.egs_files.append(line.strip())
-
-        self.next_subset()
-
-    def next_subset(self, target_index=None, delta_index=None):
-        """
-        Use next specific subset
-
-        Args:
-            target_index: target egs index
-            delta_index: incremental value of egs index
-        """
-        if self.egs_files:
-            if target_index:
-                self.subset_idx = target_index
-            else:
-                delta_index = delta_index if delta_index else 1
-                self.subset_idx += delta_index
-            log.info("egs dataset subset index: %d" % (self.subset_idx))
-            egs_file = self.egs_files[self.subset_idx % len(self.egs_files)]
-            if utils.is_exist(egs_file):
-                self.scp_reader = k_io.Nnet3EgsScriptReader(egs_file)
-            else:
-                log.warning("No such file or directory: %s" % (egs_file))
-
-    def __getitem__(self, index):
-        if self.scp_reader is None:
-            return {}
-        index %= len(self)
-        in_dict, out_dict = self.scp_reader[index]
-        x = np.array(in_dict['matrix'])
-        x = np.transpose(x)
-        y = np.array(out_dict['matrix'][0][0][0], dtype=np.int).reshape((1,))
-        if self.transforms is not None:
-            idx = random.randint(0, len(self.transforms) - 1)
-            x = self.transforms[idx](x)
-        return x, y
-
-    def __len__(self):
-        return len(self.scp_reader)
-
-    def __iter__(self):
-        self._start = 0
-        return self
-
-    def __next__(self):
-        if self._start < len(self):
-            ret = self[self._start]
-            self._start += 1
-            return ret
-        else:
-            raise StopIteration
\ No newline at end of file
diff --git a/paddlespeech/vector/utils/data_utils.py b/paddlespeech/vector/utils/data_utils.py
deleted file mode 100755
index 4a33a795..00000000
--- a/paddlespeech/vector/utils/data_utils.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-data utilities
-"""
-import os
-import sys
-import numpy
-import paddle
-
-
-def pad_right_to(array, target_shape, mode="constant", value=0):
-    """
-    This function takes a numpy array of arbitrary shape and pads it to target
-    shape by appending values on the right.
-
-    Args:
-        array: input numpy array. Input array whose dimension we need to pad.
-    target_shape : (list, tuple). Target shape we want for the target array its len must be equal to array.ndim
-    mode : str. Pad mode, please refer to numpy.pad documentation.
-    value : float. Pad value, please refer to numpy.pad documentation.
-
-    Returns:
-        array: numpy.array. Padded array.
-        valid_vals : list. List containing proportion for each dimension of original, non-padded values.
-    """
-    assert len(target_shape) == array.ndim
-    pads = []  # this contains the abs length of the padding for each dimension.
-    valid_vals = []  # thic contains the relative lengths for each dimension.
-    i = 0 # iterating over target_shape ndims
-    while i < len(target_shape):
-        assert (
-            target_shape[i] >= array.shape[i]
-        ), "Target shape must be >= original shape for every dim"
-        pads.append([0, target_shape[i] - array.shape[i]])
-        valid_vals.append(array.shape[i] / target_shape[i])
-        i += 1
-
-    array = numpy.pad(array, pads, mode=mode, constant_values=value)
-
-    return array, valid_vals
-
-
-def batch_pad_right(arrays, mode="constant", value=0):
-    """Given a list of numpy arrays it batches them together by padding to the right
-    on each dimension in order to get same length for all.
-
-    Args:
-        arrays : list. List of array we wish to pad together.
-        mode : str. Padding mode see numpy.pad documentation.
-        value : float. Padding value see numpy.pad documentation.
-
-    Returns:
-        array : numpy.array. Padded array.
-        valid_vals : list. List containing proportion for each dimension of original, non-padded values.
-    """
-
-    if not len(arrays):
-        raise IndexError("arrays list must not be empty")
-
-    if len(arrays) == 1:
-        # if there is only one array in the batch we simply unsqueeze it.
-        return numpy.expand_dims(arrays[0], axis=0), numpy.array([1.0])
-
-    if not (
-        any(
-            [arrays[i].ndim == arrays[0].ndim for i in range(1, len(arrays))]
-        )
-    ):
-        raise IndexError("All arrays must have same number of dimensions")
-
-    # FIXME we limit the support here: we allow padding of only the last dimension
-    # need to remove this when feat extraction is updated to handle multichannel.
-    max_shape = []
-    for dim in range(arrays[0].ndim):
-        if dim != (arrays[0].ndim - 1):
-            if not all(
-                [x.shape[dim] == arrays[0].shape[dim] for x in arrays[1:]]
-            ):
-                raise EnvironmentError(
-                    "arrays should have same dimensions except for last one"
-                )
-        max_shape.append(max([x.shape[dim] for x in arrays]))
-
-    batched = []
-    valid = []
-    for t in arrays:
-        # for each array we apply pad_right_to
-        padded, valid_percent = pad_right_to(
-            t, max_shape, mode=mode, value=value
-        )
-        batched.append(padded)
-        valid.append(valid_percent[-1])
-
-    batched = numpy.stack(batched)
-
-    return batched, numpy.array(valid)
-
-
-def length_to_mask(length, max_len=None, dtype=None):
-    """Creates a binary mask for each sequence.
-    """
-    assert len(length.shape) == 1
-
-    if max_len is None:
-        max_len = paddle.cast(paddle.max(length), dtype="int64")  # using arange to generate mask
-    mask = paddle.arange(max_len, dtype=length.dtype).expand([paddle.shape(length)[0], max_len]) < length.unsqueeze(1)
-
-    if dtype is None:
-        dtype = length.dtype
-
-    mask = paddle.cast(mask, dtype=dtype)
-    return mask
diff --git a/paddlespeech/vector/utils/utils.py b/paddlespeech/vector/utils/utils.py
deleted file mode 100755
index a28cb526..00000000
--- a/paddlespeech/vector/utils/utils.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""
-utilities
-"""
-import os
-import sys
-import paddle
-import numpy as np
-
-from paddlespeech.vector import _logger as log
-
-
-def exit_if_not_exist(in_path):
-    """
-    Check the existence of a file or directory, if not exit, exit the program.
-
-    Args:
-        in_path: input dicrector
-    """
-    if not is_exist(in_path):
-        sys.exit(-1)
-
-
-def is_exist(in_path):
-    """
-    Check the existence of a file or directory
-
-    Args:
-        in_path: input dicrector
-
-    Returns:
-        True or False
-    """
-    if not os.path.exists(in_path):
-        log.error("No such file or directory: %s" % (in_path))
-        return False
-
-    return True
-
-
-def get_latest_file(target_dir):
-    """
-    Get the latest file in target directory
-
-    Args:
-        target_dir: target directory
-
-    Returns:
-        latest_file: a string or None
-    """
-    items = os.listdir(target_dir)
-    items.sort(key=lambda fn: os.path.getmtime(os.path.join(target_dir, fn)) \
-               if not os.path.isdir(os.path.join(target_dir, fn)) else 0)
-    latest_file = None if not items else os.path.join(target_dir, items[-1])
-    return latest_file
-
-
-def avg_models(models):
-    """
-    merge multiple models
-    """
-    checkpoint_dict = paddle.load(models[0])
-    final_state_dict = checkpoint_dict
-
-    if len(models) > 1:
-        for model in models[1:]:
-            checkpoint_dict = paddle.load(model)
-            for k, v in checkpoint_dict.items():
-                final_state_dict[k] += v
-        for k in final_state_dict.keys():
-            final_state_dict[k] /= float(len(models))
-            if np.any(np.isnan(final_state_dict[k])):
-                print("Nan in %s" % (k))
-
-    return final_state_dict
-
-def Q_from_tokens(token_num):
-    """
-    get prior model, data from uniform, would support others(guassian) in future
-    """
-    freq = [1] * token_num
-    Q = paddle.to_tensor(freq, dtype = 'float64')
-    return Q / Q.sum()
-
-
-def read_map_file(map_file, key_func=None, value_func=None, values_func=None):
-    """ Read map file. First colume is key, the rest columes are values.
-
-    Args:
-        map_file: map file
-        key_func: convert function for key
-        value_func: convert function for each value
-        values_func: convert function for values
-
-    Returns:
-        dict: key 2 value
-        dict: value 2 key
-    """
-    if not is_exist(map_file):
-        sys.exit(0)
-
-    key2val = {}
-    val2key = {}
-    with open(map_file, 'r') as f:
-        for line in f:
-            line = line.strip()
-            if not line:
-                continue
-            items = line.split()
-            assert len(items) >= 2
-            key = items[0] if not key_func else key_func(items[0])
-            values = items[1:] if not value_func else [value_func(item) for item in items[1:]]
-            if values_func:
-                values = values_func(values)
-            key2val[key] = values
-            for value in values:
-                val2key[value] = key
-
-    return key2val, val2key