From 559627c8de1399a021fb6728db2abf01db80eb4b Mon Sep 17 00:00:00 2001 From: "th.zhang" <15600919271@163.com> Date: Fri, 21 Apr 2023 14:00:27 +0800 Subject: [PATCH] doc related --- demos/speech_ssl/README.md | 4 +- demos/speech_ssl/README_cn.md | 4 +- docs/source/released_model.md | 2 + examples/librispeech/asr3/local/train.sh | 2 +- examples/librispeech/asr4/RESULTS.md | 9 ++ examples/librispeech/asr4/conf/hubertASR.yaml | 142 ++++++++++++++++++ examples/librispeech/asr4/local/data.sh | 110 ++++++++++++++ examples/librispeech/asr4/local/test.sh | 83 ++++++++++ examples/librispeech/asr4/local/test_wav.sh | 58 +++++++ examples/librispeech/asr4/local/train.sh | 58 +++++++ examples/librispeech/asr4/run.sh | 6 +- examples/librispeech/asr4/utils | 1 + paddlespeech/s2t/models/hubert/__init__.py | 4 +- 13 files changed, 473 insertions(+), 10 deletions(-) create mode 100644 examples/librispeech/asr4/RESULTS.md create mode 100644 examples/librispeech/asr4/conf/hubertASR.yaml create mode 100755 examples/librispeech/asr4/local/data.sh create mode 100755 examples/librispeech/asr4/local/test.sh create mode 100755 examples/librispeech/asr4/local/test_wav.sh create mode 100755 examples/librispeech/asr4/local/train.sh mode change 100644 => 100755 examples/librispeech/asr4/run.sh create mode 120000 examples/librispeech/asr4/utils diff --git a/demos/speech_ssl/README.md b/demos/speech_ssl/README.md index b98a7cc61..937cd95a3 100644 --- a/demos/speech_ssl/README.md +++ b/demos/speech_ssl/README.md @@ -36,7 +36,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav ``` Arguments: - `input`(required): Audio file to recognize. - - `model`: Model type of asr task. Default: `wav2vec2ASR_librispeech`. + - `model`: Model type of asr task. Default: `wav2vec2`, choices: [wav2vec2, hubert]. - `task`: Output type. Default: `asr`. - `lang`: Model language. Default: `en`. - `sample_rate`: Sample rate of the model. Default: `16000`. @@ -56,7 +56,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav # to recognize text text = ssl_executor( - model='wav2vec2ASR_librispeech', + model='wav2vec2', task='asr', lang='en', sample_rate=16000, diff --git a/demos/speech_ssl/README_cn.md b/demos/speech_ssl/README_cn.md index 65961ce90..8455d2c77 100644 --- a/demos/speech_ssl/README_cn.md +++ b/demos/speech_ssl/README_cn.md @@ -36,7 +36,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav ``` 参数: - `input`(必须输入):用于识别的音频文件。 - - `model`:ASR 任务的模型,默认值:`wav2vec2ASR_librispeech`。 + - `model`:ASR 任务的模型,默认值:`wav2vec2`, 可选项:[wav2vec2, hubert]。 - `task`:输出类别,默认值:`asr`。 - `lang`:模型语言,默认值:`en`。 - `sample_rate`:音频采样率,默认值:`16000`。 @@ -56,7 +56,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav # 识别文本 text = ssl_executor( - model='wav2vec2ASR_librispeech', + model='wav2vec2, task='asr', lang='en', sample_rate=16000, diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 9e9221779..5b0cc70dc 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -26,6 +26,8 @@ Model | Pre-Train Method | Pre-Train Data | Finetune Data | Size | Descriptions [Wav2vec2ASR-large-960h-librispeech Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.1.model.tar.gz) | wav2vec2 | Librispeech and LV-60k Dataset (5.3w h) | Librispeech (960 h) | 718 MB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | - | 0.0189 | [Wav2vecASR Librispeech ASR3](../../examples/librispeech/asr3) | [Wav2vec2-large-wenetspeech-self Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2-large-wenetspeech-self_ckpt_1.3.0.model.tar.gz) | wav2vec2 | Wenetspeech Dataset (1w h) | - | 714 MB |Pre-trained Wav2vec2.0 Model | - | - | - | [Wav2vec2ASR-large-aishell1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz) | wav2vec2 | Wenetspeech Dataset (1w h) | aishell1 (train set) | 1.18 GB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | 0.0510 | - | - | +[Hubert-large-lv60 Model](https://paddlespeech.bj.bcebos.com/hubert/hubert-large-lv60.pdparams) | hubert | LV-60k Dataset | - | 1.18 GB |Pre-trained hubert Model | - | - | - | +[Hubert-large-100h-librispeech Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr4/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz) | hubert | LV-60k Dataset | librispeech train-clean-100 | 1.27 GB |Encoder: Hubert, Decoder: Linear + CTC, Decoding method: Greedy search | - | 0.0587 | [HubertASR Librispeech ASR4](../../examples/librispeech/asr4) | ### Whisper Model Demo Link | Training Data | Size | Descriptions | CER | Model diff --git a/examples/librispeech/asr3/local/train.sh b/examples/librispeech/asr3/local/train.sh index 10d254f0b..24776fd17 100755 --- a/examples/librispeech/asr3/local/train.sh +++ b/examples/librispeech/asr3/local/train.sh @@ -38,7 +38,7 @@ python3 -u ${BIN_DIR}/train.py \ --seed ${seed} \ --resume ${resume} else -python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} --log_dir=exp/log/${ckpt_name} ${ips_config} ${BIN_DIR}/train.py \ +python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \ --ngpu ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ diff --git a/examples/librispeech/asr4/RESULTS.md b/examples/librispeech/asr4/RESULTS.md new file mode 100644 index 000000000..81ce6ee9e --- /dev/null +++ b/examples/librispeech/asr4/RESULTS.md @@ -0,0 +1,9 @@ +# LibriSpeech + +## hubertASR +Fintuning on train-clean-100 +train: Epoch 3, 1*V100-32G, batchsize: 4, accum_grad: 8 + +| Model | Params | Config | Augmentation| Test set | Decode method | WER | +| --- | --- | --- | --- | --- | --- | --- | +| hubertASR | 326.16M | conf/hubertASR.yaml | spec_aug | test-clean | greedy search | 0.05868 | diff --git a/examples/librispeech/asr4/conf/hubertASR.yaml b/examples/librispeech/asr4/conf/hubertASR.yaml new file mode 100644 index 000000000..e147815a8 --- /dev/null +++ b/examples/librispeech/asr4/conf/hubertASR.yaml @@ -0,0 +1,142 @@ +############################################ +# Network Architecture # +############################################ +freeze_hubert: False +normalize_wav: True +output_norm: True +init_type: kaiming_uniform # !Warning: need to convergence +enc: + input_shape: 1024 + dnn_blocks: 2 + dnn_neurons: 1024 + activation: True +ctc: + enc_n_units: 1024 + blank_id: 0 + dropout_rate: 0.0 +hubert_params_path: "exp/hubert/pd_hubert_no_fintune.pdparams" + + +task_cfg: + label_rate: 50.0 + sample_rate: 16000 + normalize: True + enable_padding: False + max_keep_size: None + max_sample_size: 250000 + min_sample_size: 32000 + single_target: False + random_crop: True + pad_audio: False + +model_cfg: + dropout_input: 0.0 + final_dropout: 0.0 + dropout: 0.0 + attention_dropout: 0.0 + activation_dropout: 0.1 + apply_mask: True + mask_length: 10 + mask_prob: 0.5 + mask_selection: static + mask_other: 0.0 + no_mask_overlap: False + mask_channel_length: 64 + mask_channel_prob: 0.25 + mask_channel_selection: static + mask_channel_other: 0.0 + no_mask_channel_overlap: False + feature_grad_mult: 0.0 + layerdrop: 0.1 + normalize: True + fp16: True + label_rate: 50 + extractor_mode: layer_norm + encoder_layers: 24 + encoder_embed_dim: 1024 + encoder_ffn_embed_dim: 4096 + encoder_attention_heads: 16 + activation_fn: gelu + encoder_layerdrop: 0.1 + dropout_features: 0.0 + final_dim: 768 + untie_final_proj: True + layer_norm_first: True + conv_feature_layers: "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2" + conv_bias: False + logit_temp: 0.1 + target_glu: False + mask_min_space: 1 + mask_channel_min_space: 1 + conv_pos: 128 + conv_pos_groups: 16 + latent_temp: [2.0, 0.5, 0.999995] + skip_masked: False + skip_nomask: True + +########################################### +# Data # +########################################### +train_manifest: data/manifest.train-clean-100 +dev_manifest: data/manifest.dev +test_manifest: data/manifest.test-clean + +########################################### +# Dataloader # +########################################### +vocab_filepath: data/lang_char/vocab.txt +unit_type: char +mean_std_filepath: "" +preprocess_config: conf/preprocess.yaml +sortagrad: -1 # Feed samples from shortest to longest ; -1: enabled for all epochs 0: disabled other: enabled for other epochs +batch_size: 2 # Different batch_size may cause large differences in results +maxlen_in: 51200000000 # if input length > maxlen-in batchsize is automatically reduced +maxlen_out: 1500000 # if output length > maxlen-out batchsize is automatically reduced +minibatches: 0 # for debug +batch_count: auto +batch_bins: 0 +batch_frames_in: 0 +batch_frames_out: 0 +batch_frames_inout: 0 +num_workers: 0 +subsampling_factor: 1 +num_encs: 1 +dist_sampler: True +shortest_first: True +return_lens_rate: True + +############################################ +# Data Augmentation # +############################################ +audio_augment: # for raw audio + sample_rate: 16000 + speeds: [95, 100, 105] + +########################################### +# Training # +########################################### +n_epoch: 3 +accum_grad: 8 +global_grad_clip: 5.0 +model_optim: adadelta +model_optim_conf: + lr: 1.0 + epsilon: 1.0e-6 + rho: 0.95 +model_scheduler: constantlr +model_scheduler_conf: + warmup_steps: 25000 + lr_decay: 1.0 +hubert_optim: adadelta +hubert_optim_conf: + lr: 1.0 + epsilon: 1.0e-6 + rho: 0.95 +hubert_scheduler: constantlr +hubert_scheduler_conf: + warmup_steps: 25000 + lr_decay: 1.0 +log_interval: 1 +checkpoint: + kbest_n: 50 + latest_n: 5 diff --git a/examples/librispeech/asr4/local/data.sh b/examples/librispeech/asr4/local/data.sh new file mode 100755 index 000000000..7d0613d5a --- /dev/null +++ b/examples/librispeech/asr4/local/data.sh @@ -0,0 +1,110 @@ +#!/bin/bash + +stage=-1 +stop_stage=100 + +unit_type=char +dict_dir=data/lang_char + +source ${MAIN_ROOT}/utils/parse_options.sh + +mkdir -p data +mkdir -p ${dict_dir} +TARGET_DIR=${MAIN_ROOT}/dataset +mkdir -p ${TARGET_DIR} + +if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then + # download data, generate manifests + python3 ${TARGET_DIR}/librispeech/librispeech.py \ + --manifest_prefix="data/manifest" \ + --target_dir="${TARGET_DIR}/librispeech" \ + --full_download="True" + + if [ $? -ne 0 ]; then + echo "Prepare LibriSpeech failed. Terminated." + exit 1 + fi + + for set in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do + mv data/manifest.${set} data/manifest.${set}.raw + done + + rm -rf data/manifest.train.raw data/manifest.dev.raw data/manifest.test.raw + for set in train-clean-100 train-clean-360 train-other-500; do + cat data/manifest.${set}.raw >> data/manifest.train.raw + done + + for set in dev-clean dev-other; do + cat data/manifest.${set}.raw >> data/manifest.dev.raw + done + + for set in test-clean test-other; do + cat data/manifest.${set}.raw >> data/manifest.test.raw + done +fi + +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + # compute mean and stddev for normalizer + num_workers=$(nproc) + python3 ${MAIN_ROOT}/utils/compute_mean_std.py \ + --manifest_path="data/manifest.train.raw" \ + --num_samples=2000 \ + --spectrum_type="fbank" \ + --feat_dim=161 \ + --delta_delta=false \ + --sample_rate=16000 \ + --stride_ms=10 \ + --window_ms=25 \ + --use_dB_normalization=False \ + --num_workers=${num_workers} \ + --output_path="data/mean_std.json" + + if [ $? -ne 0 ]; then + echo "Compute mean and stddev failed. Terminated." + exit 1 + fi +fi + +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + # build vocabulary + python3 ${MAIN_ROOT}/utils/build_vocab.py \ + --unit_type ${unit_type} \ + --count_threshold=0 \ + --vocab_path="${dict_dir}/vocab.txt" \ + --manifest_paths="data/manifest.train.raw" + + if [ $? -ne 0 ]; then + echo "Build vocabulary failed. Terminated." + exit 1 + fi +fi + +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + # format manifest with tokenids, vocab size + for set in train dev test dev-clean dev-other test-clean test-other; do + { + python3 ${MAIN_ROOT}/utils/format_data.py \ + --cmvn_path "data/mean_std.json" \ + --unit_type ${unit_type} \ + --vocab_path="${dict_dir}/vocab.txt" \ + --manifest_path="data/manifest.${set}.raw" \ + --output_path="data/manifest.${set}" + + if [ $? -ne 0 ]; then + echo "Formt mnaifest.${set} failed. Terminated." + exit 1 + fi + }& + done + wait +fi + +echo "LibriSpeech Data preparation done." + +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + mkdir -p exp/hubert + echo "Pretrained hubert model download" + wget -P exp/hubert https://paddlespeech.bj.bcebos.com/hubert/hubert-large-lv60.pdparams +fi + +exit 0 \ No newline at end of file diff --git a/examples/librispeech/asr4/local/test.sh b/examples/librispeech/asr4/local/test.sh new file mode 100755 index 000000000..dfbd56ac2 --- /dev/null +++ b/examples/librispeech/asr4/local/test.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +set -e + +ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +echo "using $ngpu gpus..." + +expdir=exp +datadir=data + +recog_set="test-clean test-other dev-clean dev-other" +recog_set="test-clean" + +config_path=$1 +decode_config_path=$2 +ckpt_prefix=$3 + +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; + +# download language model +#bash local/download_lm_en.sh +#if [ $? -ne 0 ]; then +# exit 1 +#fi + +python3 utils/format_rsl.py \ + --origin_ref data/manifest.test-clean.raw \ + --trans_ref data/manifest.test-clean.text + + +for type in ctc_greedy_search; do + echo "decoding ${type}" + batch_size=16 + python3 -u ${BIN_DIR}/test.py \ + --ngpu ${ngpu} \ + --config ${config_path} \ + --decode_cfg ${decode_config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decode.decoding_method ${type} \ + --opts decode.decode_batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi + python3 utils/format_rsl.py \ + --origin_hyp ${ckpt_prefix}.${type}.rsl \ + --trans_hyp ${ckpt_prefix}.${type}.rsl.text + + python3 utils/compute-wer.py --char=1 --v=1 \ + data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error + echo "decoding ${type} done." +done + +for type in ctc_prefix_beam_search; do + echo "decoding ${type}" + batch_size=1 + python3 -u ${BIN_DIR}/test.py \ + --ngpu ${ngpu} \ + --config ${config_path} \ + --decode_cfg ${decode_config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decode.decoding_method ${type} \ + --opts decode.decode_batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi + python3 utils/format_rsl.py \ + --origin_hyp ${ckpt_prefix}.${type}.rsl \ + --trans_hyp ${ckpt_prefix}.${type}.rsl.text + + python3 utils/compute-wer.py --char=1 --v=1 \ + data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error + echo "decoding ${type} done." +done + +echo "Finished" + +exit 0 diff --git a/examples/librispeech/asr4/local/test_wav.sh b/examples/librispeech/asr4/local/test_wav.sh new file mode 100755 index 000000000..fdf3589f4 --- /dev/null +++ b/examples/librispeech/asr4/local/test_wav.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +if [ $# != 4 ];then + echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file" + exit -1 +fi + +ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +echo "using $ngpu gpus..." + +config_path=$1 +decode_config_path=$2 +ckpt_prefix=$3 +audio_file=$4 + +mkdir -p data +wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/ +if [ $? -ne 0 ]; then + exit 1 +fi + +if [ ! -f ${audio_file} ]; then + echo "Plase input the right audio_file path" + exit 1 +fi + +chunk_mode=false +if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then + chunk_mode=true +fi + +# download language model +#bash local/download_lm_ch.sh +#if [ $? -ne 0 ]; then +# exit 1 +#fi + +for type in ctc_greedy_search; do + echo "decoding ${type}" + batch_size=1 + output_dir=${ckpt_prefix} + mkdir -p ${output_dir} + python3 -u ${BIN_DIR}/test_wav.py \ + --ngpu ${ngpu} \ + --config ${config_path} \ + --decode_cfg ${decode_config_path} \ + --result_file ${output_dir}/${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decode.decoding_method ${type} \ + --opts decode.decode_batch_size ${batch_size} \ + --audio_file ${audio_file} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi +done +exit 0 diff --git a/examples/librispeech/asr4/local/train.sh b/examples/librispeech/asr4/local/train.sh new file mode 100755 index 000000000..24776fd17 --- /dev/null +++ b/examples/librispeech/asr4/local/train.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +if [ $# -lt 2 ] && [ $# -gt 3 ];then + echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)" + exit -1 +fi + +ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +echo "using $ngpu gpus..." + +config_path=$1 +ckpt_name=$2 +resume=$3 +ips=$4 + +if [ ! $ips ];then + ips_config= +else + ips_config="--ips="${ips} +fi + +mkdir -p exp + +# seed may break model convergence +seed=1988 +if [ ${seed} != 0 ]; then + export FLAGS_cudnn_deterministic=True +fi + +# export FLAGS_cudnn_exhaustive_search=true +# export FLAGS_conv_workspace_size_limit=4000 +export FLAGS_allocator_strategy=naive_best_fit +if [ ${ngpu} == 0 ]; then +python3 -u ${BIN_DIR}/train.py \ +--ngpu ${ngpu} \ +--config ${config_path} \ +--output exp/${ckpt_name} \ +--seed ${seed} \ +--resume ${resume} +else +python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \ +--ngpu ${ngpu} \ +--config ${config_path} \ +--output exp/${ckpt_name} \ +--seed ${seed} \ +--resume ${resume} +fi + +if [ ${seed} != 0 ]; then + unset FLAGS_cudnn_deterministic +fi + +if [ $? -ne 0 ]; then + echo "Failed in training!" + exit 1 +fi + +exit 0 diff --git a/examples/librispeech/asr4/run.sh b/examples/librispeech/asr4/run.sh old mode 100644 new mode 100755 index c880c9cbf..47e71d60f --- a/examples/librispeech/asr4/run.sh +++ b/examples/librispeech/asr4/run.sh @@ -7,7 +7,7 @@ set -e gpus=0 stage=0 stop_stage=4 -conf_path=conf/wav2vec2ASR.yaml +conf_path=conf/hubertASR.yaml ips= #xx.xx.xx.xx,xx.xx.xx.xx decode_conf_path=conf/tuning/decode.yaml avg_num=1 @@ -28,7 +28,7 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then # train model, all `ckpt` under `exp` dir - CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${resume} ${ips} + CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${resume} ${ips} fi if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then @@ -44,4 +44,4 @@ fi if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then # test a single .wav file CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1 -fi \ No newline at end of file +fi diff --git a/examples/librispeech/asr4/utils b/examples/librispeech/asr4/utils new file mode 120000 index 000000000..973afe674 --- /dev/null +++ b/examples/librispeech/asr4/utils @@ -0,0 +1 @@ +../../../utils \ No newline at end of file diff --git a/paddlespeech/s2t/models/hubert/__init__.py b/paddlespeech/s2t/models/hubert/__init__.py index 4df88bd34..87887a4ce 100644 --- a/paddlespeech/s2t/models/hubert/__init__.py +++ b/paddlespeech/s2t/models/hubert/__init__.py @@ -12,6 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. from .hubert_ASR import HubertASR -from .wav2vec2_ASR import Wav2vec2Base +from .hubert_ASR import HubertBase -__all__ = ["Wav2vec2ASR", "Wav2vec2Base"] +__all__ = ["HubertASR", "HubertBase"]