|
|
|
@ -3,7 +3,7 @@
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 1,
|
|
|
|
|
"id": "downtown-invalid",
|
|
|
|
|
"id": "medieval-monday",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -213,27 +213,6 @@
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"# batch_reader = create_dataloader(\n",
|
|
|
|
|
"# manifest_path=args.infer_manifest,\n",
|
|
|
|
|
"# vocab_filepath=args.vocab_path,\n",
|
|
|
|
|
"# mean_std_filepath=args.mean_std_path,\n",
|
|
|
|
|
"# augmentation_config='{}',\n",
|
|
|
|
|
"# #max_duration=float('inf'),\n",
|
|
|
|
|
"# max_duration=27.0,\n",
|
|
|
|
|
"# min_duration=0.0,\n",
|
|
|
|
|
"# stride_ms=10.0,\n",
|
|
|
|
|
"# window_ms=20.0,\n",
|
|
|
|
|
"# max_freq=None,\n",
|
|
|
|
|
"# specgram_type=args.specgram_type,\n",
|
|
|
|
|
"# use_dB_normalization=True,\n",
|
|
|
|
|
"# random_seed=0,\n",
|
|
|
|
|
"# keep_transcription_text=True,\n",
|
|
|
|
|
"# is_training=False,\n",
|
|
|
|
|
"# batch_size=args.num_samples,\n",
|
|
|
|
|
"# sortagrad=True,\n",
|
|
|
|
|
"# shuffle_method=None,\n",
|
|
|
|
|
"# dist=False)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"from deepspeech.frontend.utility import read_manifest\n",
|
|
|
|
|
"from deepspeech.frontend.augmentor.augmentation import AugmentationPipeline\n",
|
|
|
|
|
"from deepspeech.frontend.featurizer.speech_featurizer import SpeechFeaturizer\n",
|
|
|
|
@ -375,7 +354,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 9,
|
|
|
|
|
"execution_count": 6,
|
|
|
|
|
"id": "minus-modern",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
@ -391,8 +370,6 @@
|
|
|
|
|
" [97, 37, 26, 79, 26, 1, 38, 82, 1, 58, 102, 1, 17, 79, 64, 87, 37, 26, 79, 1, 61, 64, 97]])\n",
|
|
|
|
|
"test raw: W%\u001a\u0001Wa\u001a=W&\u001aR\n",
|
|
|
|
|
"test raw: a%\u001aO\u001a\u0001&R\u0001:f\u0001\u0011O@W%\u001aO\u0001=@a\n",
|
|
|
|
|
"audio len: Tensor(shape=[5], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n",
|
|
|
|
|
" [163, 173, 184, 190, 203])\n",
|
|
|
|
|
"test len: Tensor(shape=[5], dtype=int64, place=CUDAPlace(0), stop_gradient=True,\n",
|
|
|
|
|
" [12, 13, 11, 22, 23])\n",
|
|
|
|
|
"audio: Tensor(shape=[5, 203, 80], dtype=float32, place=CUDAPinnedPlace, stop_gradient=True,\n",
|
|
|
|
@ -434,7 +411,9 @@
|
|
|
|
|
" ...,\n",
|
|
|
|
|
" [-4.81728077 , -10.65084648, 0.00000000 , ..., 3.19982862 , 8.42359638 , 7.95100546 ],\n",
|
|
|
|
|
" [-7.54755068 , -12.56441689, 0.00000000 , ..., 4.12789631 , 6.98472023 , 7.79936218 ],\n",
|
|
|
|
|
" [-8.79256725 , -11.23776722, 0.00000000 , ..., 1.31829071 , 1.30352044 , 6.80789280 ]]])\n"
|
|
|
|
|
" [-8.79256725 , -11.23776722, 0.00000000 , ..., 1.31829071 , 1.30352044 , 6.80789280 ]]])\n",
|
|
|
|
|
"audio len: Tensor(shape=[5], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n",
|
|
|
|
|
" [163, 173, 184, 190, 203])\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
@ -472,16 +451,16 @@
|
|
|
|
|
" print('test:', text)\n",
|
|
|
|
|
" print(\"test raw:\", ''.join( chr(i) for i in text[0][:int(text_len[0])] ))\n",
|
|
|
|
|
" print(\"test raw:\", ''.join( chr(i) for i in text[-1][:int(text_len[-1])] ))\n",
|
|
|
|
|
" print('audio len:', audio_len)\n",
|
|
|
|
|
" print('test len:', text_len)\n",
|
|
|
|
|
" print('audio:', audio)\n",
|
|
|
|
|
" print('audio len:', audio_len)\n",
|
|
|
|
|
" break"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "chronic-diagram",
|
|
|
|
|
"id": "competitive-mounting",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|