From 48f4bda3c545ac3c4c85a8c0b104f1e624b2f95b Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 16 Apr 2021 06:37:23 +0000 Subject: [PATCH] kaldi fbank and mfcc --- .flake8 | 3 +- .../dataloader_with_tokens_tokenids.ipynb | 946 +++++++++++++++--- .notebook/python_test.ipynb | 173 +++- .pre-commit-config.yaml | 6 +- deepspeech/frontend/audio.py | 12 + deepspeech/frontend/augmentor/spec_augment.py | 1 + .../frontend/featurizer/audio_featurizer.py | 49 +- deepspeech/frontend/normalizer.py | 9 +- examples/aishell/s0/local/data.sh | 4 +- examples/tiny/s1/conf/augmentation.json | 24 + setup.sh | 10 + third_party/README.md | 4 + third_party/python_kaldi_features/LICENSE | 20 + third_party/python_kaldi_features/MANIFEST | 5 + third_party/python_kaldi_features/README.rst | 58 ++ .../lib/python_speech_features/__init__.py | 1 + .../build/lib/python_speech_features/base.py | 166 +++ .../lib/python_speech_features/base_orig.py | 190 ++++ .../lib/python_speech_features/sigproc.py | 158 +++ .../python_speech_features/sigproc_orig.py | 140 +++ .../dist/python_speech_features-0.6-py3.7.egg | Bin 0 -> 23731 bytes .../python_kaldi_features/docs/Makefile | 89 ++ .../python_kaldi_features/docs/make.bat | 113 +++ .../python_kaldi_features/docs/source/conf.py | 202 ++++ .../docs/source/index.rst | 54 + third_party/python_kaldi_features/english.wav | Bin 0 -> 35824 bytes third_party/python_kaldi_features/example.py | 29 + .../python_speech_features.egg-info/PKG-INFO | 10 + .../SOURCES.txt | 12 + .../dependency_links.txt | 1 + .../top_level.txt | 1 + .../python_speech_features/__init__.py | 1 + .../python_speech_features/base.py | 166 +++ .../python_speech_features/base_orig.py | 190 ++++ .../python_speech_features/sigproc.py | 158 +++ .../python_speech_features/sigproc_orig.py | 140 +++ .../python_kaldi_features/requirements.txt | 3 + third_party/python_kaldi_features/setup.py | 14 + .../test/test_sigproc.py | 31 + utils/compute_mean_std.py | 2 +- 40 files changed, 3012 insertions(+), 183 deletions(-) create mode 100644 third_party/README.md create mode 100644 third_party/python_kaldi_features/LICENSE create mode 100644 third_party/python_kaldi_features/MANIFEST create mode 100644 third_party/python_kaldi_features/README.rst create mode 100644 third_party/python_kaldi_features/build/lib/python_speech_features/__init__.py create mode 100644 third_party/python_kaldi_features/build/lib/python_speech_features/base.py create mode 100644 third_party/python_kaldi_features/build/lib/python_speech_features/base_orig.py create mode 100644 third_party/python_kaldi_features/build/lib/python_speech_features/sigproc.py create mode 100644 third_party/python_kaldi_features/build/lib/python_speech_features/sigproc_orig.py create mode 100644 third_party/python_kaldi_features/dist/python_speech_features-0.6-py3.7.egg create mode 100644 third_party/python_kaldi_features/docs/Makefile create mode 100644 third_party/python_kaldi_features/docs/make.bat create mode 100644 third_party/python_kaldi_features/docs/source/conf.py create mode 100644 third_party/python_kaldi_features/docs/source/index.rst create mode 100644 third_party/python_kaldi_features/english.wav create mode 100644 third_party/python_kaldi_features/example.py create mode 100644 third_party/python_kaldi_features/python_speech_features.egg-info/PKG-INFO create mode 100644 third_party/python_kaldi_features/python_speech_features.egg-info/SOURCES.txt create mode 100644 third_party/python_kaldi_features/python_speech_features.egg-info/dependency_links.txt create mode 100644 third_party/python_kaldi_features/python_speech_features.egg-info/top_level.txt create mode 100644 third_party/python_kaldi_features/python_speech_features/__init__.py create mode 100644 third_party/python_kaldi_features/python_speech_features/base.py create mode 100644 third_party/python_kaldi_features/python_speech_features/base_orig.py create mode 100644 third_party/python_kaldi_features/python_speech_features/sigproc.py create mode 100644 third_party/python_kaldi_features/python_speech_features/sigproc_orig.py create mode 100644 third_party/python_kaldi_features/requirements.txt create mode 100644 third_party/python_kaldi_features/setup.py create mode 100644 third_party/python_kaldi_features/test/test_sigproc.py diff --git a/.flake8 b/.flake8 index b49cbf1a9..722899439 100644 --- a/.flake8 +++ b/.flake8 @@ -12,6 +12,7 @@ exclude = .git, # python cache __pycache__, + third_party/, # Provide a comma-separate list of glob patterns to include for checks. filename = *.py @@ -46,4 +47,4 @@ select = E, W, F, - C \ No newline at end of file + C diff --git a/.notebook/dataloader_with_tokens_tokenids.ipynb b/.notebook/dataloader_with_tokens_tokenids.ipynb index 30d492eba..7d93dd009 100644 --- a/.notebook/dataloader_with_tokens_tokenids.ipynb +++ b/.notebook/dataloader_with_tokens_tokenids.ipynb @@ -83,37 +83,39 @@ "text": [ "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", " and should_run_async(code)\n", - "WARNING:root:register user softmax to paddle, remove this when fixed!\n", - "WARNING:root:register user log_softmax to paddle, remove this when fixed!\n", - "WARNING:root:register user sigmoid to paddle, remove this when fixed!\n", - "WARNING:root:register user log_sigmoid to paddle, remove this when fixed!\n", - "WARNING:root:register user relu to paddle, remove this when fixed!\n", - "WARNING:root:override cat of paddle if exists or register, remove this when fixed!\n", - "WARNING:root:override item of paddle.Tensor if exists or register, remove this when fixed!\n", - "WARNING:root:override long of paddle.Tensor if exists or register, remove this when fixed!\n", - "WARNING:root:override new_full of paddle.Tensor if exists or register, remove this when fixed!\n", - "WARNING:root:override eq of paddle.Tensor if exists or register, remove this when fixed!\n", - "WARNING:root:override contiguous of paddle.Tensor if exists or register, remove this when fixed!\n", - "WARNING:root:override size of paddle.Tensor (`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!\n", - "WARNING:root:register user view to paddle.Tensor, remove this when fixed!\n", - "WARNING:root:register user view_as to paddle.Tensor, remove this when fixed!\n", - "WARNING:root:register user masked_fill to paddle.Tensor, remove this when fixed!\n", - "WARNING:root:register user masked_fill_ to paddle.Tensor, remove this when fixed!\n", - "WARNING:root:register user fill_ to paddle.Tensor, remove this when fixed!\n", - "WARNING:root:register user repeat to paddle.Tensor, remove this when fixed!\n", - "WARNING:root:register user softmax to paddle.Tensor, remove this when fixed!\n", - "WARNING:root:register user sigmoid to paddle.Tensor, remove this when fixed!\n", - "WARNING:root:register user relu to paddle.Tensor, remove this when fixed!\n", - "WARNING:root:register user type_as to paddle.Tensor, remove this when fixed!\n", - "WARNING:root:register user to to paddle.Tensor, remove this when fixed!\n", - "WARNING:root:register user float to paddle.Tensor, remove this when fixed!\n", - "WARNING:root:register user glu to paddle.nn.functional, remove this when fixed!\n", - "WARNING:root:override ctc_loss of paddle.nn.functional if exists, remove this when fixed!\n", - "WARNING:root:register user Module to paddle.nn, remove this when fixed!\n", - "WARNING:root:register user ModuleList to paddle.nn, remove this when fixed!\n", - "WARNING:root:register user GLU to paddle.nn, remove this when fixed!\n", - "WARNING:root:register user ConstantPad2d to paddle.nn, remove this when fixed!\n", - "WARNING:root:register user export to paddle.jit, remove this when fixed!\n" + "[WARNING 2021/04/16 06:32:09 __init__.py:93] register user softmax to paddle, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:97] register user log_softmax to paddle, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:101] register user sigmoid to paddle, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:105] register user log_sigmoid to paddle, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:109] register user relu to paddle, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:119] override cat of paddle if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:133] override item of paddle.Tensor if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:144] override long of paddle.Tensor if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:164] override new_full of paddle.Tensor if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:179] override eq of paddle.Tensor if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:185] override eq of paddle if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:195] override contiguous of paddle.Tensor if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:212] override size of paddle.Tensor (`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:223] register user view to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:233] register user view_as to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:259] register user masked_fill to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:277] register user masked_fill_ to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:288] register user fill_ to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:298] register user repeat to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:303] register user softmax to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:308] register user sigmoid to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:312] register user relu to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:322] register user type_as to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:337] register user to to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:346] register user float to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:356] register user tolist to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:371] register user glu to paddle.nn.functional, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:422] override ctc_loss of paddle.nn.functional if exists, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:428] register user Module to paddle.nn, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:434] register user ModuleList to paddle.nn, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:450] register user GLU to paddle.nn, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:483] register user ConstantPad2d to paddle.nn, remove this when fixed!\n", + "[WARNING 2021/04/16 06:32:09 __init__.py:489] register user export to paddle.jit, remove this when fixed!\n" ] }, { @@ -191,6 +193,84 @@ { "cell_type": "code", "execution_count": 4, + "id": "wired-principal", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'num_samples': 5, 'beam_size': 500, 'num_proc_bsearch': 8, 'num_conv_layers': 2, 'num_rnn_layers': 3, 'rnn_layer_size': 2048, 'alpha': 2.5, 'beta': 0.3, 'cutoff_prob': 1.0, 'cutoff_top_n': 40, 'use_gru': False, 'use_gpu': True, 'share_rnn_weights': True, 'unit_type': 'char', 'spm_model_prefix': 'examples/aishell/s1/data/spm_bpe', 'infer_manifest': 'examples/aishell/s1/data/manifest.test', 'mean_std_path': '', 'vocab_path': 'examples/aishell/s1/data/vocab.txt', 'lang_model_path': 'models/lm/common_crawl_00.prune01111.trie.klm', 'model_path': 'examples/aishell/s1/checkpoints/step_final', 'decoding_method': 'ctc_beam_search', 'error_rate_type': 'wer', 'specgram_type': 'fbank', 'feat_dim': 80, 'delta_delta': False}\n" + ] + } + ], + "source": [ + "import sys\n", + "import argparse\n", + "import functools\n", + "from deepspeech.utils.utility import add_arguments, print_arguments\n", + "parser = argparse.ArgumentParser(description=__doc__)\n", + "add_arg = functools.partial(add_arguments, argparser=parser)\n", + "# yapf: disable\n", + "add_arg('num_samples', int, 5, \"# of samples to infer.\")\n", + "add_arg('beam_size', int, 500, \"Beam search width.\")\n", + "add_arg('num_proc_bsearch', int, 8, \"# of CPUs for beam search.\")\n", + "add_arg('num_conv_layers', int, 2, \"# of convolution layers.\")\n", + "add_arg('num_rnn_layers', int, 3, \"# of recurrent layers.\")\n", + "add_arg('rnn_layer_size', int, 2048, \"# of recurrent cells per layer.\")\n", + "add_arg('alpha', float, 2.5, \"Coef of LM for beam search.\")\n", + "add_arg('beta', float, 0.3, \"Coef of WC for beam search.\")\n", + "add_arg('cutoff_prob', float, 1.0, \"Cutoff probability for pruning.\")\n", + "add_arg('cutoff_top_n', int, 40, \"Cutoff number for pruning.\")\n", + "add_arg('use_gru', bool, False, \"Use GRUs instead of simple RNNs.\")\n", + "add_arg('use_gpu', bool, True, \"Use GPU or not.\")\n", + "add_arg('share_rnn_weights',bool, True, \"Share input-hidden weights across \"\n", + " \"bi-directional RNNs. Not for GRU.\")\n", + "add_arg('unit_type', str,\n", + " 'char',\n", + " \"Options: char, word, spm.\",\n", + " choices=['char', 'word', 'spm'])\n", + "add_arg('spm_model_prefix', str,\n", + " 'examples/aishell/s1/data/spm_bpe',\n", + " \"spm model prefix.\",)\n", + "add_arg('infer_manifest', str,\n", + " 'examples/aishell/s1/data/manifest.test',\n", + " \"Filepath of manifest to infer.\")\n", + "add_arg('mean_std_path', str,\n", + " '',\n", + " \"examples/aishell/s1/data/mean_std.npz, Filepath of normalizer's mean & std.\")\n", + "add_arg('vocab_path', str,\n", + " 'examples/aishell/s1/data/vocab.txt',\n", + " \"Filepath of vocabulary.\")\n", + "add_arg('lang_model_path', str,\n", + " 'models/lm/common_crawl_00.prune01111.trie.klm',\n", + " \"Filepath for language model.\")\n", + "add_arg('model_path', str,\n", + " 'examples/aishell/s1/checkpoints/step_final',\n", + " \"If None, the training starts from scratch, \"\n", + " \"otherwise, it resumes from the pre-trained model.\")\n", + "add_arg('decoding_method', str,\n", + " 'ctc_beam_search',\n", + " \"Decoding method. Options: ctc_beam_search, ctc_greedy\",\n", + " choices = ['ctc_beam_search', 'ctc_greedy'])\n", + "add_arg('error_rate_type', str,\n", + " 'wer',\n", + " \"Error rate type for evaluation.\",\n", + " choices=['wer', 'cer'])\n", + "add_arg('specgram_type', str,\n", + " 'fbank',\n", + " \"Audio feature type. Options: linear, mfcc.\",\n", + " choices=['linear', 'mfcc', 'fbank'])\n", + "add_arg('feat_dim', int, 80, \"mfcc or fbank feat dim.\")\n", + "add_arg('delta_delta', bool, False, \"delta delta\")\n", + "# yapf: disable\n", + "args = parser.parse_args([])\n", + "print(vars(args))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, "id": "bearing-physics", "metadata": {}, "outputs": [ @@ -259,7 +339,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "classified-melissa", "metadata": {}, "outputs": [ @@ -268,7 +348,31 @@ "output_type": "stream", "text": [ "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", - " and should_run_async(code)\n", + " and should_run_async(code)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fbank\n", + "[232 387 331 ... 249 249 262] int16\n", + "fbank\n", + "[-138 -219 -192 ... 338 324 351] int16\n", + "fbank\n", + "[ 694 1175 1022 ... 553 514 627] int16\n", + "fbank\n", + "[-39 -79 -53 ... 139 172 99] int16\n", + "fbank\n", + "[-277 -480 -425 ... 758 767 739] int16\n", + "fbank\n", + "[ 399 693 609 ... 1291 1270 1291] int16\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid/dataloader/dataloader_iter.py:354: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. \n", "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", " if arr.dtype == np.object:\n" @@ -278,58 +382,106 @@ "name": "stdout", "output_type": "stream", "text": [ - "test: Tensor(shape=[5, 23], dtype=int32, place=CUDAPinnedPlace, stop_gradient=True,\n", - " [[116, 104, 101, 32, 116, 119, 101, 110, 116, 105, 101, 115, -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ],\n", - " [119, 104, 101, 114, 101, 32, 105, 115, 32, 116, 104, 97, 116, -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ],\n", - " [116, 101, 110, 32, 115, 101, 99, 111, 110, 100, 115, -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ],\n", - " [104, 101, 32, 100, 111, 101, 115, 110, 39, 116, 32, 119, 111, 114, 107, 32, 97, 116, 32, 97, 108, 108, -1 ],\n", - " [119, 104, 101, 114, 101, 32, 105, 115, 32, 109, 121, 32, 98, 114, 111, 116, 104, 101, 114, 32, 110, 111, 119]])\n", - "test raw: the twenties\n", - "test raw: where is my brother now\n", + "fbank\n", + "[ -750 -1254 -1107 ... 2276 1889 2067] int16\n", + "fbank\n", + "[ -127 -199 -149 ... -5243 -5065 -5398] int16\n", + "fbank\n", + "[ 465 783 677 ... 980 903 1008] int16\n", + "fbank\n", + "[ 90 160 157 ... -2 -16 -21] int16\n", + "fbank\n", + "[ 213 345 295 ... 2483 2246 2501] int16\n", + "fbank\n", + "[ -86 -159 -131 ... 270 258 290] int16\n", + "fbank\n", + "[-1023 -1714 -1505 ... 1532 1596 1575] int16\n", + "fbank\n", + "[-366 -602 -527 ... 374 370 379] int16\n", + "fbank\n", + "[ 761 1275 1127 ... 369 413 295] int16\n", + "fbank\n", + "[382 621 550 ... 161 161 174] int16\n", + "fbank\n", + "[ -28 -91 -120 ... 28 34 11] int16\n", + "fbank\n", + "[ -5 -5 -5 ... 268 294 341] int16\n", + "fbank\n", + "[240 417 684 ... 267 262 219] int16\n", + "fbank\n", + "[131 206 194 ... 383 320 343] int16\n", + "test: Tensor(shape=[5, 7], dtype=int32, place=CUDAPinnedPlace, stop_gradient=True,\n", + " [[31069, 21487, 29233, 30340, 20320, -1 , -1 ],\n", + " [20540, 24471, 19968, 25552, 30340, 26159, -1 ],\n", + " [36825, 20010, 31243, 24230, 26159, 32654, 30340],\n", + " [20108, 21040, 20108, -1 , -1 , -1 , -1 ],\n", + " [21435, 34892, 25919, 21270, -1 , -1 , -1 ]])\n", + "fbank\n", + "[1155 1890 1577 ... 1092 989 1130] int16\n", + "fbank\n", + "[296 358 296 ... 140 140 168] int16\n", + "fbank\n", + "[-50 -91 -63 ... 104 104 86] int16\n", + "fbank\n", + "[-37 -66 -50 ... -31 -45 -52] int16\n", + "fbank\n", + "[-401 -652 -547 ... -339 -307 -344] int16\n", + "fbank\n", + "[-21 -47 -51 ... 94 81 107] int16\n", + "fbank\n", + "[ 533 887 755 ... 3074 2853 3254] int16\n", + "fbank\n", + "[ 44 71 66 ... -628 -733 -601] int16\n", + "fbank\n", + "[ 50 86 79 ... 129 116 138] int16\n", + "fbank\n", + "[ 92 146 126 ... -208 -193 -179] int16\n", + "test raw: 祝可爱的你\n", + "test raw: 去行政化\n", "audio len: Tensor(shape=[5], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n", - " [163, 173, 184, 190, 203])\n", + " [184, 194, 196, 204, 207])\n", "test len: Tensor(shape=[5], dtype=int64, place=CUDAPlace(0), stop_gradient=True,\n", - " [12, 13, 11, 22, 23])\n", - "audio: Tensor(shape=[5, 203, 80], dtype=float32, place=CUDAPinnedPlace, stop_gradient=True,\n", - " [[[-51.32406616, -17.91388321, 0.00000000 , ..., -26.66350746, -27.46039391, -27.22303963],\n", - " [-15.19027233, -20.52460480, 0.00000000 , ..., -28.47811317, -26.87953568, -25.13592339],\n", - " [-22.80181694, -19.48889351, 0.00000000 , ..., -29.96320724, -25.96619034, -24.57164192],\n", + " [5, 6, 7, 3, 4])\n", + "audio: Tensor(shape=[5, 207, 80], dtype=float32, place=CUDAPinnedPlace, stop_gradient=True,\n", + " [[[12.25633812, 12.61639309, 10.36936474, ..., 13.02949619, 11.51365757, 10.59789085],\n", + " [13.32148266, 13.41071606, 11.43800735, ..., 13.69783783, 12.83939362, 11.51259613],\n", + " [12.62640572, 12.53621101, 10.97212505, ..., 13.33757591, 12.32293034, 10.75493717],\n", " ...,\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", "\n", - " [[-15.38297653, -18.95307732, 0.00000000 , ..., -15.22777271, -16.46900940, -12.32327461],\n", - " [-14.06289291, -12.69954872, 0.00000000 , ..., -15.68012810, -16.92030334, -13.49134445],\n", - " [-19.78544235, -11.63046265, 0.00000000 , ..., -14.35409069, -14.82787228, -15.72653484],\n", + " [[10.99619484, 11.35202599, 9.56922054 , ..., 9.94971657 , 9.88354111 , 9.55315971 ],\n", + " [10.44461155, 9.81688595 , 5.62538481 , ..., 10.60468388, 10.94417381, 9.42646980 ],\n", + " [10.23835754, 10.23407459, 7.99464273 , ..., 10.68097591, 9.91640091 , 10.04131031],\n", " ...,\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", "\n", - " [[-22.65289879, -21.11938667, 0.00000000 , ..., -31.80981827, -30.58669853, -28.68988228],\n", - " [-31.04699135, -21.68680763, 0.00000000 , ..., -29.90789604, -30.31726456, -30.99709320],\n", - " [-18.16406441, -17.50658417, 0.00000000 , ..., -29.47821617, -29.77137375, -30.45121002],\n", + " [[14.10299397, 14.50298119, 12.87738323, ..., 12.62796497, 12.69949627, 11.43171215],\n", + " [13.85035992, 13.15289116, 10.66541386, ..., 13.34364223, 13.46972179, 11.02160740],\n", + " [13.19866467, 13.23537827, 11.65760899, ..., 12.72559357, 12.42716217, 11.74562359],\n", " ...,\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", "\n", - " [[-16.17608452, -15.22302818, 0.00000000 , ..., -8.82944202 , -7.88900328 , -6.10806322 ],\n", - " [-19.40717316, -12.32932186, 0.00000000 , ..., -8.05214977 , -8.03145599 , -7.35137606 ],\n", - " [-11.01850796, -13.20147514, 0.00000000 , ..., -9.65334892 , -8.96987629 , -9.13897228 ],\n", + " [[12.85668373, 12.82431412, 11.68144703, ..., 14.10119247, 15.12791920, 13.68221378],\n", + " [13.19507027, 13.40244961, 11.43618393, ..., 13.32919979, 13.68267441, 12.73429012],\n", + " [13.02173328, 12.92082500, 11.44303989, ..., 12.77793121, 13.10915661, 11.77327728],\n", " ...,\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", "\n", - " [[-16.55369759, -16.95514297, 0.00000000 , ..., -7.00301647 , -6.53273058 , -10.14600754],\n", - " [-19.51947975, -14.86818218, 0.00000000 , ..., -6.82891273 , -6.22576237 , -9.42883873 ],\n", - " [-15.26447582, -22.26662445, 0.00000000 , ..., -13.31693172, -11.05612659, -12.70977211],\n", + " [[12.90771198, 13.40234852, 13.01435471, ..., 13.80359459, 14.08088684, 13.17883396],\n", + " [14.06678009, 14.06943512, 12.52837276, ..., 13.66423225, 13.66300583, 13.60142994],\n", + " [12.58743191, 12.94520760, 11.75190544, ..., 14.28828907, 14.08229160, 13.02433395],\n", " ...,\n", - " [-4.81728077 , -10.65084648, 0.00000000 , ..., 3.19982862 , 8.42359638 , 7.95100546 ],\n", - " [-7.54755068 , -12.56441689, 0.00000000 , ..., 4.12789631 , 6.98472023 , 7.79936218 ],\n", - " [-8.79256725 , -11.23776722, 0.00000000 , ..., 1.31829071 , 1.30352044 , 6.80789280 ]]])\n" + " [16.20896912, 16.42283821, 14.94358730, ..., 12.91146755, 12.66766262, 11.76361752],\n", + " [13.49324894, 14.14653301, 13.16490936, ..., 13.23435783, 13.45378494, 12.60386276],\n", + " [15.56288910, 15.92445087, 14.90794277, ..., 13.43840790, 13.41075516, 12.55605984]]])\n" ] } ], @@ -354,7 +506,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "minus-modern", "metadata": {}, "outputs": [ @@ -362,58 +514,70 @@ "name": "stdout", "output_type": "stream", "text": [ - "test: Tensor(shape=[5, 23], dtype=int32, place=CUDAPinnedPlace, stop_gradient=True,\n", - " [[87, 37, 26, 1, 87, 97, 26, 61, 87, 38, 26, 82, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],\n", - " [97, 37, 26, 79, 26, 1, 38, 82, 1, 87, 37, 3, 87, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],\n", - " [87, 26, 61, 1, 82, 26, 18, 64, 61, 25, 82, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],\n", - " [37, 26, 1, 25, 64, 26, 82, 61, 2, 87, 1, 97, 64, 79, 52, 1, 3, 87, 1, 3, 53, 53, -1],\n", - " [97, 37, 26, 79, 26, 1, 38, 82, 1, 58, 102, 1, 17, 79, 64, 87, 37, 26, 79, 1, 61, 64, 97]])\n", - "test raw: W%\u001a\u0001Wa\u001a=W&\u001aR\n", - "test raw: a%\u001aO\u001a\u0001&R\u0001:f\u0001\u0011O@W%\u001aO\u0001=@a\n", + "fbank\n", + "[232 387 331 ... 249 249 262] int16\n", + "fbank\n", + "[-138 -219 -192 ... 338 324 351] int16\n", + "fbank\n", + "[ 694 1175 1022 ... 553 514 627] int16\n", + "fbank\n", + "[-39 -79 -53 ... 139 172 99] int16\n", + "fbank\n", + "[-277 -480 -425 ... 758 767 739] int16\n", + "fbank\n", + "test: Tensor(shape=[5, 7], dtype=int32, place=CUDAPinnedPlace, stop_gradient=True,\n", + " [[2695, 505, 2332, 2553, 169, -1 , -1 ],\n", + " [ 230, 1237, 2 , 1556, 2553, 1694, -1 ],\n", + " [3703, 28 , 2739, 1172, 1694, 2966, 2553],\n", + " [ 70 , 355, 70 , -1 , -1 , -1 , -1 ],\n", + " [ 477, 3363, 1621, 412, -1 , -1 , -1 ]])\n", + "[ 399 693 609 ... 1291 1270 1291] int16\n", + "test raw: ઇǹज৹©\n", + "test raw: ǝണٕƜ\n", "test len: Tensor(shape=[5], dtype=int64, place=CUDAPlace(0), stop_gradient=True,\n", - " [12, 13, 11, 22, 23])\n", - "audio: Tensor(shape=[5, 203, 80], dtype=float32, place=CUDAPinnedPlace, stop_gradient=True,\n", - " [[[-51.32406616, -17.91388321, 0.00000000 , ..., -26.66350746, -27.46039391, -27.22303963],\n", - " [-15.19027233, -20.52460480, 0.00000000 , ..., -28.47811317, -26.87953568, -25.13592339],\n", - " [-22.80181694, -19.48889351, 0.00000000 , ..., -29.96320724, -25.96619034, -24.57164192],\n", + " [5, 6, 7, 3, 4])\n", + "audio: Tensor(shape=[5, 207, 80], dtype=float32, place=CUDAPinnedPlace, stop_gradient=True,\n", + " [[[12.25794601, 12.61855793, 10.37306023, ..., 13.12571049, 11.53678799, 10.32210350],\n", + " [13.32333183, 13.41336918, 11.44248962, ..., 13.65861225, 12.79308128, 11.31168747],\n", + " [12.62584686, 12.53506088, 10.96861362, ..., 13.32526493, 12.41560936, 10.71458912],\n", " ...,\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", "\n", - " [[-15.38297653, -18.95307732, 0.00000000 , ..., -15.22777271, -16.46900940, -12.32327461],\n", - " [-14.06289291, -12.69954872, 0.00000000 , ..., -15.68012810, -16.92030334, -13.49134445],\n", - " [-19.78544235, -11.63046265, 0.00000000 , ..., -14.35409069, -14.82787228, -15.72653484],\n", + " [[11.00003052, 11.35529137, 9.56384087 , ..., 10.06063652, 10.16322994, 9.43149185 ],\n", + " [10.44556236, 9.81155300 , 5.49400425 , ..., 10.84116268, 11.02734756, 9.42253590 ],\n", + " [10.23620510, 10.23321152, 7.99466419 , ..., 10.93381882, 10.28395081, 10.00841141],\n", " ...,\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", "\n", - " [[-22.65289879, -21.11938667, 0.00000000 , ..., -31.80981827, -30.58669853, -28.68988228],\n", - " [-31.04699135, -21.68680763, 0.00000000 , ..., -29.90789604, -30.31726456, -30.99709320],\n", - " [-18.16406441, -17.50658417, 0.00000000 , ..., -29.47821617, -29.77137375, -30.45121002],\n", + " [[14.10379314, 14.50375748, 12.87825108, ..., 12.68065739, 12.62359715, 11.53773308],\n", + " [13.84964657, 13.15079498, 10.67198086, ..., 13.24875164, 13.45796680, 10.97363472],\n", + " [13.19808197, 13.23482990, 11.65900230, ..., 12.70375061, 12.41395664, 11.88668156],\n", " ...,\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", "\n", - " [[-16.17608452, -15.22302818, 0.00000000 , ..., -8.82944202 , -7.88900328 , -6.10806322 ],\n", - " [-19.40717316, -12.32932186, 0.00000000 , ..., -8.05214977 , -8.03145599 , -7.35137606 ],\n", - " [-11.01850796, -13.20147514, 0.00000000 , ..., -9.65334892 , -8.96987629 , -9.13897228 ],\n", + " [[12.85676289, 12.82410812, 11.67961884, ..., 14.12018299, 15.14850044, 13.80065727],\n", + " [13.19532776, 13.40243340, 11.43492508, ..., 13.29144669, 13.70278549, 12.67841339],\n", + " [13.02196407, 12.92111111, 11.43998623, ..., 12.71165752, 13.16518497, 11.92028046],\n", " ...,\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", - " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", "\n", - " [[-16.55369759, -16.95514297, 0.00000000 , ..., -7.00301647 , -6.53273058 , -10.14600754],\n", - " [-19.51947975, -14.86818218, 0.00000000 , ..., -6.82891273 , -6.22576237 , -9.42883873 ],\n", - " [-15.26447582, -22.26662445, 0.00000000 , ..., -13.31693172, -11.05612659, -12.70977211],\n", + " [[12.90661621, 13.40162563, 13.01394463, ..., 13.84056377, 14.11240959, 13.21227264],\n", + " [14.06642914, 14.06922340, 12.52955723, ..., 13.55829811, 13.60157204, 13.50268650],\n", + " [12.58881378, 12.94780254, 11.75758171, ..., 14.29055786, 14.12165928, 13.02695847],\n", " ...,\n", - " [-4.81728077 , -10.65084648, 0.00000000 , ..., 3.19982862 , 8.42359638 , 7.95100546 ],\n", - " [-7.54755068 , -12.56441689, 0.00000000 , ..., 4.12789631 , 6.98472023 , 7.79936218 ],\n", - " [-8.79256725 , -11.23776722, 0.00000000 , ..., 1.31829071 , 1.30352044 , 6.80789280 ]]])\n", + " [16.20891571, 16.42290306, 14.94398117, ..., 12.86083794, 12.63515949, 11.67581463],\n", + " [13.49345875, 14.14656067, 13.16498375, ..., 13.28024578, 13.40956783, 12.70357513],\n", + " [15.56265163, 15.92387581, 14.90643024, ..., 13.45694065, 13.44703197, 12.81099033]]])\n", "audio len: Tensor(shape=[5], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n", - " [163, 173, 184, 190, 203])\n" + " [184, 194, 196, 204, 207])\n" ] } ], @@ -464,6 +628,556 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "knowing-military", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'num_samples': 1, 'specgram_type': 'fbank', 'feat_dim': 80, 'delta_delta': False, 'stride_ms': 10.0, 'window_ms': 25.0, 'sample_rate': 16000, 'manifest_path': 'examples/aishell/s1/data/manifest.train', 'output_path': 'examples/aishell/s1/data/mean_std.npz'}\n" + ] + } + ], + "source": [ + "import sys\n", + "import argparse\n", + "import functools\n", + "from deepspeech.utils.utility import add_arguments, print_arguments\n", + "parser = argparse.ArgumentParser(description=__doc__)\n", + "add_arg = functools.partial(add_arguments, argparser=parser)\n", + "\n", + "add_arg('num_samples', int, 1, \"# of samples to for statistics.\")\n", + "add_arg('specgram_type', str, 'fbank',\n", + " \"Audio feature type. Options: linear, mfcc, fbank.\",\n", + " choices=['linear', 'mfcc', 'fbank'])\n", + "add_arg('feat_dim', int, 80, \"Audio feature dim.\")\n", + "add_arg('delta_delta', bool, False,\"Audio feature with delta delta.\")\n", + "add_arg('stride_ms', float, 10.0, \"stride length in ms.\")\n", + "add_arg('window_ms', float, 25.0, \"stride length in ms.\")\n", + "add_arg('sample_rate', int, 16000, \"target sample rate.\")\n", + "add_arg('manifest_path', str,\n", + " 'examples/aishell/s1/data/manifest.train',\n", + " \"Filepath of manifest to compute normalizer's mean and stddev.\")\n", + "add_arg('output_path', str,\n", + " 'examples/aishell/s1/data/mean_std.npz',\n", + " \"Filepath of write mean and stddev to (.npz).\")\n", + "args = parser.parse_args([])\n", + "print(vars(args))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "unnecessary-province", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from deepspeech.frontend.augmentor.augmentation import AugmentationPipeline\n", + "from deepspeech.frontend.featurizer.audio_featurizer import AudioFeaturizer\n", + "from deepspeech.frontend.normalizer import FeatureNormalizer\n", + "from deepspeech.frontend.audio import AudioSegment\n", + "from deepspeech.frontend.utility import load_cmvn\n", + "from deepspeech.frontend.utility import read_manifest\n", + "\n", + "\n", + "\n", + "def mean(args):\n", + " augmentation_pipeline = AugmentationPipeline('{}')\n", + " audio_featurizer = AudioFeaturizer(\n", + " specgram_type=args.specgram_type,\n", + " feat_dim=args.feat_dim,\n", + " delta_delta=args.delta_delta,\n", + " stride_ms=args.stride_ms,\n", + " window_ms=args.window_ms,\n", + " n_fft=None,\n", + " max_freq=None,\n", + " target_sample_rate=args.sample_rate,\n", + " use_dB_normalization=True,\n", + " target_dB=-20,\n", + " dither=0.0)\n", + "\n", + " def augment_and_featurize(audio_segment):\n", + " augmentation_pipeline.transform_audio(audio_segment)\n", + " return audio_featurizer.featurize(audio_segment)\n", + "\n", + " normalizer = FeatureNormalizer(\n", + " mean_std_filepath=None,\n", + " manifest_path=args.manifest_path,\n", + " featurize_func=augment_and_featurize,\n", + " num_samples=args.num_samples)\n", + " normalizer.write_to_file(args.output_path)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "interested-camping", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.00164795 0.00274658 0.00234985 ... 0.00177002 0.00177002 0.00186157]\n", + "[54. 90. 77. ... 58. 58. 61.]\n", + "29746\n", + "fbank\n", + "[54 90 77 ... 58 58 61] int16\n", + "(184, 80) float64\n", + "[[10.61737914 10.07708936 5.32487528 ... 10.2481839 8.89699394\n", + " 7.80671114]\n", + " [11.0440077 10.3180721 6.30866128 ... 11.23730926 10.35838868\n", + " 8.83860079]\n", + " [10.26930555 9.99636567 7.3296638 ... 10.45131595 9.69295303\n", + " 7.96168491]\n", + " ...\n", + " [10.14497345 9.88674207 6.73801138 ... 10.21580627 9.00343472\n", + " 8.75616521]\n", + " [ 9.97745961 9.67949736 7.90660425 ... 10.22436653 9.59456493\n", + " 7.69287184]\n", + " [ 6.47357374 7.76335491 7.75765843 ... 9.96522077 9.6226365\n", + " 8.16007108]]\n", + "(184, 80) float64\n", + "[[10.61737914 10.07708936 5.32487528 ... 10.2481839 8.89699394\n", + " 7.80671114]\n", + " [11.0440077 10.3180721 6.30866128 ... 11.23730926 10.35838868\n", + " 8.83860079]\n", + " [10.26930555 9.99636567 7.3296638 ... 10.45131595 9.69295303\n", + " 7.96168491]\n", + " ...\n", + " [10.14497345 9.88674207 6.73801138 ... 10.21580627 9.00343472\n", + " 8.75616521]\n", + " [ 9.97745961 9.67949736 7.90660425 ... 10.22436653 9.59456493\n", + " 7.69287184]\n", + " [ 6.47357374 7.76335491 7.75765843 ... 9.96522077 9.6226365\n", + " 8.16007108]]\n" + ] + } + ], + "source": [ + "wav='/workspace/DeepSpeech-2.x/examples/aishell/s1/../../..//examples/dataset/aishell/data_aishell/wav/test/S0916/BAC009S0916W0426.wav'\n", + "test='祝可爱的你'\n", + "audio_featurizer = AudioFeaturizer(\n", + " specgram_type=args.specgram_type,\n", + " feat_dim=args.feat_dim,\n", + " delta_delta=args.delta_delta,\n", + " stride_ms=args.stride_ms,\n", + " window_ms=args.window_ms,\n", + " n_fft=None,\n", + " max_freq=None,\n", + " target_sample_rate=args.sample_rate,\n", + " use_dB_normalization=False,\n", + " target_dB=-20,\n", + " dither=0.0)\n", + "samples = AudioSegment.from_file(wav)\n", + "print(samples._samples)\n", + "print(samples._samples * 2**15)\n", + "print(len(samples._samples))\n", + "feat = audio_featurizer.featurize(samples, False, False)\n", + "feat = feat.T\n", + "print(feat.shape, feat.dtype)\n", + "print(feat)\n", + "\n", + "from python_speech_features import logfbank\n", + "max_freq = args.sample_rate / 2\n", + "fbank_feat = logfbank(\n", + " signal=samples.to('int16'),\n", + " samplerate=args.sample_rate,\n", + " winlen=0.001 * args.window_ms,\n", + " winstep=0.001 * args.stride_ms,\n", + " nfilt=args.feat_dim,\n", + " nfft=512,\n", + " lowfreq=20,\n", + " highfreq=max_freq,\n", + " preemph=0.97,\n", + " dither=0.0,\n", + " wintype='povey')\n", + "print(fbank_feat.shape, fbank_feat.dtype)\n", + "print(fbank_feat)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "numeric-analyst", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(184, 160)\n", + "[ 8.59522397 8.43148278 8.36414052 8.45487173 8.31761643 8.04843683\n", + " 8.01683696 7.6574614 7.95521932 8.22945157 10.20138275 9.0447775\n", + " 9.14763398 9.18184349 9.03801065 9.04852307 8.67706728 8.71894271\n", + " 9.54553655 9.19535135 8.76413076 8.47828946 8.52586143 8.49469288\n", + " 8.72461247 8.28562879 8.11581393 7.99922156 7.91023364 8.04142296\n", + " 7.89762773 7.76257636 8.32043745 8.01592886 8.34109665 8.90115454\n", + " 8.48246945 7.98658664 8.05745122 8.11384088 8.18864479 8.8091827\n", + " 11.8067711 13.25258218 14.44311795 13.90515283 14.00120623 13.99801252\n", + " 13.81595394 13.6379904 13.3574897 13.14933334 12.96518543 13.02601156\n", + " 12.70246737 12.54410834 12.15615068 11.86574681 11.67497882 10.79645481\n", + " 10.48150035 10.03758575 10.05637027 9.92891308 10.06923218 12.43382431\n", + " 12.71428321 14.33135052 13.94470959 14.29188291 14.11483993 14.03496606\n", + " 13.78167331 13.66701466 14.40308625 14.73934137 15.09569382 14.89565815\n", + " 15.10519995 14.94383582 15.03275563 15.42194679 15.29219967 15.41602274\n", + " 15.39242545 15.76836177 16.259222 16.47777231 17.03366795 17.46165793\n", + " 17.52596217 17.78844031 17.99878075 18.11446843 17.95761578 17.99900337\n", + " 17.86282737 17.7290163 17.47686504 17.43425516 17.07750485 16.64395242\n", + " 15.68217043 14.90058399 14.45645737 14.0405463 14.89549542 16.00405781\n", + " 16.27301689 16.37572895 16.31219037 16.31765447 16.44819716 16.36281089\n", + " 16.24932823 15.79302555 14.76361963 13.95761882 13.48917053 13.45543501\n", + " 13.00091327 13.13854248 13.74596395 13.86340629 14.00656109 13.77432101\n", + " 13.64267001 13.35742634 13.23042234 12.97916104 12.80694468 12.70005006\n", + " 13.2802483 13.22644525 13.14579624 13.02536594 13.36511022 11.37167205\n", + " 12.11598045 12.47619798 12.83885973 11.63880287 11.42083924 11.08747705\n", + " 11.04093403 11.11263149 10.74353319 10.58734669 10.46180738 10.34157335\n", + " 9.63131146 9.70582692 9.29059204 8.94583657 8.66065094 8.46799095\n", + " 8.25064103 8.30239167 8.19463371 8.12104567 8.02731234 8.06412715\n", + " 7.84889951 7.73090283 7.74119562 7.85444657 7.80717312 7.7129933\n", + " 7.84087442 7.77907788 7.60660865 7.55051479 7.458385 7.496416\n", + " 7.69519793 7.49086759 7.32199493 8.01617458 7.58525375 7.06661122\n", + " 6.94653756 7.19874283 7.28515661 7.17574078]\n", + "(184,)\n", + "(184,)\n", + "[1.48370471 1.52174523 1.46984238 1.67010478 1.88757689 1.68825992\n", + " 1.74270259 1.55497318 1.29200818 1.68446481 1.88133219 1.97138928\n", + " 2.15910096 2.3149476 1.9820247 2.07694378 1.93498835 2.01493974\n", + " 2.39156824 2.02396518 1.69586449 1.63808752 1.64020228 1.43573473\n", + " 1.93092656 1.37466294 1.34704929 1.59600739 1.03960441 1.45276496\n", + " 1.59360131 1.57466343 1.89491479 1.79333746 1.32701974 1.49441767\n", + " 1.51466756 1.63497989 1.42858074 1.51135396 1.61077201 1.81066387\n", + " 1.83367783 2.3507094 2.87885378 3.26231227 2.1313117 1.98557548\n", + " 1.99105426 2.26150533 2.34298751 2.44621608 2.39201042 2.41226503\n", + " 2.5142992 3.03777565 2.81592295 2.75117863 2.78324175 2.68819666\n", + " 2.8945782 2.84464168 2.680973 2.78397395 2.47996808 1.71829563\n", + " 1.60636949 1.65992483 1.38122631 1.74831825 2.16006884 1.68076185\n", + " 1.69329487 1.44929837 1.63763312 1.80101076 2.01166253 2.03254244\n", + " 1.9583913 2.04542255 2.00859694 2.16600883 2.16095629 1.97541122\n", + " 2.13807632 2.06386436 2.2154187 2.84205688 2.54862449 2.64321545\n", + " 2.6805773 2.52300146 2.53209001 2.54682059 2.4521937 2.43155532\n", + " 2.42571275 2.23421289 2.23164529 2.23597192 2.14215121 2.10406703\n", + " 2.07962874 1.88506161 1.80092372 1.61156092 1.77426835 1.98765563\n", + " 2.0356793 1.87964187 1.779513 1.87187681 1.76463632 1.70978684\n", + " 1.76471778 1.75604749 1.62792552 1.73929352 1.6887024 1.8677704\n", + " 2.17342368 2.08166072 2.14567453 2.15936953 2.18351006 2.41010388\n", + " 2.26101752 2.25468001 2.23739715 2.15395133 2.04547813 1.92038843\n", + " 1.85491264 1.91905927 2.16709365 1.99924152 2.1850471 2.55461622\n", + " 2.72476673 1.69682926 1.73249614 2.06992695 2.1210591 1.66854454\n", + " 1.63907505 1.32203822 1.38992558 1.2436937 1.17932877 1.02963653\n", + " 1.26085036 1.16997132 1.09339504 1.14188689 1.18675772 1.31859788\n", + " 1.21746591 1.3872131 1.26095274 1.34885761 1.46633543 1.64506975\n", + " 1.36013821 1.45574721 1.43766588 1.65119054 1.57163772 1.55082968\n", + " 1.29413316 1.38351736 1.64234673 1.57186432 1.45381083 1.71204761\n", + " 1.51828607 1.30639985 1.32928395 1.49004237 1.6057589 1.81815735\n", + " 1.67784678 1.72180861 1.60703743 1.64850255]\n" + ] + } + ], + "source": [ + "a = np.hstack([feat, feat])\n", + "print(a.shape)\n", + "m = np.mean(a, axis=1)\n", + "print(m)\n", + "print(m.shape)\n", + "std = np.std(a, axis=1)\n", + "print(std.shape)\n", + "print(std)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "nonprofit-potato", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "hispanic-ethics", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torchaudio\n", + "import torchaudio.compliance.kaldi as kaldi\n", + "import torchaudio.sox_effects as sox_effects\n", + "from torch.nn.utils.rnn import pad_sequence\n", + "torchaudio.set_audio_backend(\"sox\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "changing-calvin", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1, 29746])\n", + "tensor([[54., 90., 77., ..., 58., 58., 61.]])\n", + "(184, 80)\n", + "[[10.617376 10.077089 5.3248763 ... 10.248186 8.896992 7.8067265]\n", + " [11.044004 10.318072 6.3086634 ... 11.237308 10.358393 8.838616 ]\n", + " [10.269302 9.9963665 7.3296647 ... 10.451319 9.692951 7.9617033]\n", + " ...\n", + " [10.14497 9.886743 6.738012 ... 10.215809 9.0034275 8.756177 ]\n", + " [ 9.977456 9.679498 7.9066052 ... 10.224365 9.594568 7.6928873]\n", + " [ 6.4735703 7.7633557 7.7576594 ... 9.965221 9.622637 8.160085 ]]\n", + "-----------\n", + "[0.00164795 0.00274658 0.00234985 ... 0.00177002 0.00177002 0.00186157]\n", + "(184, 80)\n", + "[[-10.177039 -10.717326 -15.46954 ... -10.546229 -11.897424 -12.987689]\n", + " [ -9.750411 -10.476343 -14.485752 ... -9.557108 -10.436023 -11.955799]\n", + " [-10.525113 -10.798049 -13.46475 ... -10.343097 -11.101464 -12.832712]\n", + " ...\n", + " [-10.649446 -10.907673 -14.056403 ... -10.578607 -11.790988 -12.038239]\n", + " [-10.816959 -11.114918 -12.88781 ... -10.570049 -11.199847 -13.101528]\n", + " [-14.320845 -13.03106 -13.036756 ... -10.829194 -11.171779 -12.634331]]\n", + "**************\n", + "[0.00164795 0.00274658 0.00234985 ... 0.00177002 0.00177002 0.00186157]\n", + "[54. 90. 77. ... 58. 58. 61.] float32\n", + "(184, 80)\n", + "[[10.617376 10.077089 5.3248763 ... 10.248186 8.896992 7.8067265]\n", + " [11.044004 10.318072 6.3086634 ... 11.237308 10.358393 8.838616 ]\n", + " [10.269302 9.9963665 7.3296647 ... 10.451319 9.692951 7.9617033]\n", + " ...\n", + " [10.14497 9.886743 6.738012 ... 10.215809 9.0034275 8.756177 ]\n", + " [ 9.977456 9.679498 7.9066052 ... 10.224365 9.594568 7.6928873]\n", + " [ 6.4735703 7.7633557 7.7576594 ... 9.965221 9.622637 8.160085 ]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel_launcher.py:1: UserWarning: torchaudio.backend.sox_backend.load_wav has been deprecated and will be removed from 0.9.0 release. Please use \"torchaudio.load\".\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "waveform, sample_rate = torchaudio.load_wav(wav)\n", + "print(waveform.shape)\n", + "print(waveform)\n", + "mat = kaldi.fbank(\n", + " waveform,\n", + " num_mel_bins=80,\n", + " frame_length=25,\n", + " frame_shift=10,\n", + " dither=0,\n", + " energy_floor=0.0,\n", + " sample_frequency=sample_rate\n", + " )\n", + "mat = mat.detach().numpy()\n", + "print(mat.shape)\n", + "print(mat)\n", + "\n", + "print('-----------')\n", + "print(samples._samples)\n", + "aud = torch.tensor(samples._samples).view(1, -1)\n", + "mat = kaldi.fbank(\n", + " aud,\n", + " num_mel_bins=80,\n", + " frame_length=25,\n", + " frame_shift=10,\n", + " dither=0,\n", + " energy_floor=0.0,\n", + " sample_frequency=sample_rate\n", + " )\n", + "mat = mat.detach().numpy()\n", + "print(mat.shape)\n", + "print(mat)\n", + "\n", + "print('**************')\n", + "print(samples._samples)\n", + "tmp = samples.to('int16').astype('float32')\n", + "print(tmp, tmp.dtype)\n", + "aud = torch.tensor(tmp).view(1, -1)\n", + "mat = kaldi.fbank(\n", + " aud,\n", + " num_mel_bins=80,\n", + " frame_length=25,\n", + " frame_shift=10,\n", + " dither=0,\n", + " energy_floor=0.0,\n", + " sample_frequency=sample_rate\n", + " )\n", + "mat = mat.detach().numpy()\n", + "print(mat.shape)\n", + "print(mat)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "buried-dependence", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "silver-printing", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "outer-space", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(29746,)\n", + "[54 90 77 ... 58 58 61]\n", + "(184, 80)\n", + "[[10.61737914 10.07708936 5.32487528 ... 10.2481839 8.89699394\n", + " 7.80671114]\n", + " [11.0440077 10.3180721 6.30866128 ... 11.23730926 10.35838868\n", + " 8.83860079]\n", + " [10.26930555 9.99636567 7.3296638 ... 10.45131595 9.69295303\n", + " 7.96168491]\n", + " ...\n", + " [10.14497345 9.88674207 6.73801138 ... 10.21580627 9.00343472\n", + " 8.75616521]\n", + " [ 9.97745961 9.67949736 7.90660425 ... 10.22436653 9.59456493\n", + " 7.69287184]\n", + " [ 6.47357374 7.76335491 7.75765843 ... 9.96522077 9.6226365\n", + " 8.16007108]]\n", + "(184, 13)\n", + "[[ 14.73775998 -13.30393391 5.85974818 ... -3.42359739 2.82785335\n", + " 8.86862748]\n", + " [ 15.31274834 -13.33671651 4.06537223 ... 8.15970347 2.15934846\n", + " 6.78353115]\n", + " [ 13.82218765 -13.39296404 6.8304843 ... 2.55332563 8.86724453\n", + " -0.05919222]\n", + " ...\n", + " [ 13.5837844 -13.42104892 11.21222354 ... 4.81477718 1.66627505\n", + " 5.59045842]\n", + " [ 13.75757034 -13.92626662 13.06074011 ... -0.46694046 5.56214833\n", + " 12.0785146 ]\n", + " [ 11.92813809 -15.9169855 8.78372271 ... -1.42014277 -3.25768086\n", + " 0.88337965]]\n" + ] + } + ], + "source": [ + "from python_speech_features import mfcc\n", + "from python_speech_features import delta\n", + "from python_speech_features import logfbank\n", + "import scipy.io.wavfile as iowav\n", + "\n", + "(rate,sig) = iowav.read(wav)\n", + "print(sig.shape)\n", + "print(sig)\n", + "\n", + "# note that generally nfilt=40 is used for speech recognition\n", + "fbank_feat = logfbank(sig,nfilt=80,lowfreq=20,dither=0,wintype='povey')\n", + "print(fbank_feat.shape)\n", + "print(fbank_feat)\n", + "\n", + "# the computed fbank coefficents of english.wav with dimension [110,23]\n", + "# [ 12.2865\t12.6906\t13.1765\t15.714\t16.064\t15.7553\t16.5746\t16.9205\t16.6472\t16.1302\t16.4576\t16.7326\t16.8864\t17.7215\t18.88\t19.1377\t19.1495\t18.6683\t18.3886\t20.3506\t20.2772\t18.8248\t18.1899\n", + "# 11.9198\t13.146\t14.7215\t15.8642\t17.4288\t16.394\t16.8238\t16.1095\t16.4297\t16.6331\t16.3163\t16.5093\t17.4981\t18.3429\t19.6555\t19.6263\t19.8435\t19.0534\t19.001\t20.0287\t19.7707\t19.5852\t19.1112\n", + "# ...\n", + "# ...\n", + "# the same with that using kaldi commands: compute-fbank-feats --dither=0.0\n", + "\n", + "mfcc_feat = mfcc(sig,dither=0,useEnergy=True,wintype='povey')\n", + "print(mfcc_feat.shape)\n", + "print(mfcc_feat)\n", + "\n", + "# the computed mfcc coefficents of english.wav with dimension [110,13]\n", + "# [ 17.1337\t-23.3651\t-7.41751\t-7.73686\t-21.3682\t-8.93884\t-3.70843\t4.68346\t-16.0676\t12.782\t-7.24054\t8.25089\t10.7292\n", + "# 17.1692\t-23.3028\t-5.61872\t-4.0075\t-23.287\t-20.6101\t-5.51584\t-6.15273\t-14.4333\t8.13052\t-0.0345329\t2.06274\t-0.564298\n", + "# ...\n", + "# ...\n", + "# the same with that using kaldi commands: compute-mfcc-feats --dither=0.0" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "sporting-school", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(184, 80)\n", + "[[-10.17703627 -10.71732606 -15.46954014 ... -10.54623152 -11.89742148\n", + " -12.98770428]\n", + " [ -9.75040771 -10.47634331 -14.48575413 ... -9.55710616 -10.43602673\n", + " -11.95581463]\n", + " [-10.52510987 -10.79804975 -13.46475161 ... -10.34309947 -11.10146239\n", + " -12.83273051]\n", + " ...\n", + " [-10.64944197 -10.90767335 -14.05640404 ... -10.57860915 -11.7909807\n", + " -12.03825021]\n", + " [-10.8169558 -11.11491806 -12.88781116 ... -10.57004889 -11.19985048\n", + " -13.10154358]\n", + " [-14.32084168 -13.03106051 -13.03675699 ... -10.82919465 -11.17177892\n", + " -12.63434434]]\n", + "(184, 13)\n", + "[[ -6.05665544 -13.30393391 5.85974818 ... -3.42359739 2.82785335\n", + " 8.86862748]\n", + " [ -5.48166707 -13.33671651 4.06537223 ... 8.15970347 2.15934846\n", + " 6.78353115]\n", + " [ -6.97222776 -13.39296404 6.8304843 ... 2.55332563 8.86724453\n", + " -0.05919222]\n", + " ...\n", + " [ -7.21063102 -13.42104892 11.21222354 ... 4.81477718 1.66627505\n", + " 5.59045842]\n", + " [ -7.03684508 -13.92626662 13.06074011 ... -0.46694046 5.56214833\n", + " 12.0785146 ]\n", + " [ -8.86627732 -15.9169855 8.78372271 ... -1.42014277 -3.25768086\n", + " 0.88337965]]\n" + ] + } + ], + "source": [ + "fbank_feat = logfbank(samples._samples,nfilt=80,lowfreq=20,dither=0,wintype='povey')\n", + "print(fbank_feat.shape)\n", + "print(fbank_feat)\n", + "\n", + "mfcc_feat = mfcc(samples._samples,dither=0,useEnergy=True,wintype='povey')\n", + "print(mfcc_feat.shape)\n", + "print(mfcc_feat)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "restricted-license", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "specialized-threat", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/.notebook/python_test.ipynb b/.notebook/python_test.ipynb index 0e6bde47f..819d4c48f 100644 --- a/.notebook/python_test.ipynb +++ b/.notebook/python_test.ipynb @@ -637,7 +637,7 @@ { "cell_type": "code", "execution_count": 59, - "id": "engaged-offense", + "id": "first-release", "metadata": {}, "outputs": [ { @@ -660,7 +660,7 @@ { "cell_type": "code", "execution_count": 35, - "id": "level-fairy", + "id": "convertible-roulette", "metadata": {}, "outputs": [ { @@ -705,7 +705,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "beautiful-geometry", + "id": "cutting-fleece", "metadata": {}, "outputs": [ { @@ -728,7 +728,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "african-trustee", + "id": "historical-diving", "metadata": {}, "outputs": [ { @@ -748,7 +748,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "ready-wages", + "id": "similar-spice", "metadata": {}, "outputs": [], "source": [ @@ -758,7 +758,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "distinguished-printer", + "id": "grand-influence", "metadata": {}, "outputs": [ { @@ -776,7 +776,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "precious-limit", + "id": "wireless-hypothetical", "metadata": {}, "outputs": [ { @@ -809,7 +809,7 @@ { "cell_type": "code", "execution_count": 17, - "id": "chemical-convenience", + "id": "designed-fluid", "metadata": {}, "outputs": [ { @@ -839,7 +839,7 @@ { "cell_type": "code", "execution_count": 18, - "id": "round-remark", + "id": "cultural-friendship", "metadata": {}, "outputs": [ { @@ -871,7 +871,7 @@ { "cell_type": "code", "execution_count": 19, - "id": "smaller-shower", + "id": "fossil-lotus", "metadata": {}, "outputs": [ { @@ -903,7 +903,7 @@ { "cell_type": "code", "execution_count": 31, - "id": "integrated-block", + "id": "constitutional-poker", "metadata": {}, "outputs": [ { @@ -935,7 +935,7 @@ { "cell_type": "code", "execution_count": 32, - "id": "favorite-failure", + "id": "threaded-strap", "metadata": {}, "outputs": [ { @@ -966,7 +966,7 @@ { "cell_type": "code", "execution_count": 20, - "id": "boolean-saint", + "id": "infectious-welcome", "metadata": {}, "outputs": [], "source": [ @@ -977,7 +977,7 @@ { "cell_type": "code", "execution_count": 46, - "id": "senior-hospital", + "id": "musical-anatomy", "metadata": {}, "outputs": [ { @@ -997,7 +997,7 @@ { "cell_type": "code", "execution_count": 30, - "id": "consolidated-incident", + "id": "lucky-paraguay", "metadata": {}, "outputs": [], "source": [ @@ -1007,7 +1007,7 @@ { "cell_type": "code", "execution_count": 31, - "id": "pursuant-paragraph", + "id": "annual-christmas", "metadata": {}, "outputs": [], "source": [ @@ -1017,7 +1017,7 @@ { "cell_type": "code", "execution_count": 47, - "id": "mexican-apollo", + "id": "infectious-seeker", "metadata": {}, "outputs": [ { @@ -1038,7 +1038,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "encouraging-integration", + "id": "pregnant-conditioning", "metadata": {}, "outputs": [], "source": [ @@ -1049,7 +1049,7 @@ { "cell_type": "code", "execution_count": 56, - "id": "trying-auckland", + "id": "logical-happiness", "metadata": {}, "outputs": [], "source": [ @@ -1059,7 +1059,7 @@ { "cell_type": "code", "execution_count": 58, - "id": "national-edward", + "id": "rocky-plastic", "metadata": {}, "outputs": [], "source": [ @@ -1069,7 +1069,7 @@ { "cell_type": "code", "execution_count": 60, - "id": "aerial-campaign", + "id": "focused-compensation", "metadata": {}, "outputs": [], "source": [ @@ -1079,7 +1079,7 @@ { "cell_type": "code", "execution_count": 66, - "id": "instant-violence", + "id": "centered-repository", "metadata": {}, "outputs": [], "source": [ @@ -1089,7 +1089,7 @@ { "cell_type": "code", "execution_count": 95, - "id": "medical-globe", + "id": "inner-invite", "metadata": {}, "outputs": [ { @@ -1110,7 +1110,7 @@ { "cell_type": "code", "execution_count": 81, - "id": "three-contrast", + "id": "russian-chosen", "metadata": {}, "outputs": [ { @@ -1131,7 +1131,7 @@ { "cell_type": "code", "execution_count": 11, - "id": "cross-atlas", + "id": "equal-particle", "metadata": {}, "outputs": [], "source": [ @@ -1161,7 +1161,7 @@ { "cell_type": "code", "execution_count": 12, - "id": "empirical-defense", + "id": "tracked-purse", "metadata": {}, "outputs": [], "source": [ @@ -1172,7 +1172,7 @@ { "cell_type": "code", "execution_count": 14, - "id": "rocky-listening", + "id": "steady-mileage", "metadata": {}, "outputs": [ { @@ -1201,7 +1201,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "surrounded-absolute", + "id": "regulated-google", "metadata": {}, "outputs": [ { @@ -1230,7 +1230,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "differential-surgery", + "id": "homeless-forge", "metadata": {}, "outputs": [ { @@ -1260,7 +1260,7 @@ { "cell_type": "code", "execution_count": 29, - "id": "durable-powell", + "id": "exciting-blocking", "metadata": {}, "outputs": [ { @@ -1290,7 +1290,7 @@ { "cell_type": "code", "execution_count": 30, - "id": "young-continuity", + "id": "through-botswana", "metadata": {}, "outputs": [ { @@ -1308,7 +1308,7 @@ { "cell_type": "code", "execution_count": 22, - "id": "geological-sarah", + "id": "cellular-violence", "metadata": {}, "outputs": [ { @@ -1343,7 +1343,7 @@ { "cell_type": "code", "execution_count": 23, - "id": "possible-angle", + "id": "undefined-parade", "metadata": {}, "outputs": [ { @@ -1376,7 +1376,7 @@ { "cell_type": "code", "execution_count": 33, - "id": "novel-sucking", + "id": "special-delicious", "metadata": {}, "outputs": [], "source": [ @@ -1386,7 +1386,7 @@ { "cell_type": "code", "execution_count": 34, - "id": "fixed-wallet", + "id": "seasonal-consensus", "metadata": {}, "outputs": [ { @@ -1428,7 +1428,7 @@ { "cell_type": "code", "execution_count": 35, - "id": "north-seattle", + "id": "dress-distinction", "metadata": {}, "outputs": [], "source": [ @@ -1438,7 +1438,7 @@ { "cell_type": "code", "execution_count": 38, - "id": "above-western", + "id": "rental-anthony", "metadata": {}, "outputs": [ { @@ -1471,7 +1471,7 @@ { "cell_type": "code", "execution_count": 41, - "id": "choice-diabetes", + "id": "separated-restriction", "metadata": {}, "outputs": [], "source": [ @@ -1481,7 +1481,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "white-vessel", + "id": "painted-variable", "metadata": {}, "outputs": [ { @@ -1504,7 +1504,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "treated-freedom", + "id": "satellite-insider", "metadata": {}, "outputs": [ { @@ -1523,7 +1523,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "convinced-safety", + "id": "developed-thirty", "metadata": {}, "outputs": [ { @@ -1543,7 +1543,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "blond-bunny", + "id": "official-bench", "metadata": {}, "outputs": [ { @@ -1560,10 +1560,97 @@ "print(sorted_val_scores)" ] }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ranking-camera", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b'\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x14\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x1e\\x00\\x00\\x00\\x00\\x00\\x00\\x00'\n", + "[ 1 20 2 30]\n", + "[[ 1 20]\n", + " [ 2 30]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel_launcher.py:1: DeprecationWarning: tostring() is deprecated. Use tobytes() instead.\n", + " \"\"\"Entry point for launching an IPython kernel.\n", + "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel_launcher.py:3: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead\n", + " This is separate from the ipykernel package so we can avoid doing imports until\n" + ] + } + ], + "source": [ + "a = scores.tostring()\n", + "print(a)\n", + "b = np.fromstring(a, scores.dtype)\n", + "print(b)\n", + "print(scores)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "breeding-proxy", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "numpy.int16" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.int16" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "coordinate-hungary", + "metadata": {}, + "outputs": [], + "source": [ + "dtype = np.dtype('int16')" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "specified-jackson", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "int16\n", + "16\n" + ] + } + ], + "source": [ + "print(dtype)\n", + "dtype is np.int16\n", + "print(np.iinfo(dtype).bits)" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "utility-monroe", + "id": "activated-insight", "metadata": {}, "outputs": [], "source": [] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 02c084bb8..9621827a0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,6 +3,7 @@ hooks: - id: yapf files: \.py$ + exclude: (?=third_party).*(\.py)$ - repo: https://github.com/pre-commit/pre-commit-hooks sha: a11d9314b22d8f8c7556443875b731ef05965464 hooks: @@ -15,6 +16,7 @@ - id: trailing-whitespace files: \.md$ - id: requirements-txt-fixer + exclude: (?=third_party).*$ - id: check-yaml - id: check-json - id: pretty-format-json @@ -27,6 +29,7 @@ - --ignore=E501,E228,E226,E261,E266,E128,E402,W503 - --builtins=G,request - --jobs=1 + exclude: (?=third_party).*(\.py)$ - repo : https://github.com/Lucas-C/pre-commit-hooks sha: v1.0.1 hooks: @@ -51,8 +54,9 @@ entry: python .pre-commit-hooks/copyright-check.hook language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ - #exclude: (?=decoders/swig).*(\.cpp|\.h)$ + exclude: (?=third_party).*(\.cpp|\.h|\.py)$ - repo: https://github.com/asottile/reorder_python_imports rev: v2.4.0 hooks: - id: reorder-python-imports + exclude: (?=third_party).*(\.py)$ diff --git a/deepspeech/frontend/audio.py b/deepspeech/frontend/audio.py index a45f91d49..a1c7df63e 100644 --- a/deepspeech/frontend/audio.py +++ b/deepspeech/frontend/audio.py @@ -298,6 +298,18 @@ class AudioSegment(object): samples = self._convert_samples_from_float32(self._samples, dtype) return samples.tostring() + def to(self, dtype='int16'): + """Create a `dtype` audio content. + + :param dtype: Data type for export samples. Options: 'int16', 'int32', + 'float32', 'float64'. Default is 'float32'. + :type dtype: str + :return: np.ndarray containing `dtype` audio content. + :rtype: str + """ + samples = self._convert_samples_from_float32(self._samples, dtype) + return samples + def gain_db(self, gain): """Apply gain in decibels to samples. diff --git a/deepspeech/frontend/augmentor/spec_augment.py b/deepspeech/frontend/augmentor/spec_augment.py index 518bbe096..1c2e09fc7 100644 --- a/deepspeech/frontend/augmentor/spec_augment.py +++ b/deepspeech/frontend/augmentor/spec_augment.py @@ -64,6 +64,7 @@ class SpecAugmentor(AugmentorBase): self.n_freq_masks = n_freq_masks self.n_time_masks = n_time_masks self.p = p + #logger.info(f"specaug: F-{F}, T-{T}, F-n-{n_freq_masks}, T-n-{n_time_masks}") # adaptive SpecAugment self.adaptive_number_ratio = adaptive_number_ratio diff --git a/deepspeech/frontend/featurizer/audio_featurizer.py b/deepspeech/frontend/featurizer/audio_featurizer.py index eddfdc8c5..44f67c5c1 100644 --- a/deepspeech/frontend/featurizer/audio_featurizer.py +++ b/deepspeech/frontend/featurizer/audio_featurizer.py @@ -56,7 +56,8 @@ class AudioFeaturizer(object): max_freq=None, target_sample_rate=16000, use_dB_normalization=True, - target_dB=-20): + target_dB=-20, + dither=1.0): self._specgram_type = specgram_type # mfcc and fbank using `feat_dim` self._feat_dim = feat_dim @@ -69,6 +70,7 @@ class AudioFeaturizer(object): self._use_dB_normalization = use_dB_normalization self._target_dB = target_dB self._fft_point = n_fft + self._dither = dither def featurize(self, audio_segment, @@ -101,8 +103,7 @@ class AudioFeaturizer(object): if self._use_dB_normalization: audio_segment.normalize(target_db=self._target_dB) # extract spectrogram - return self._compute_specgram(audio_segment.samples, - audio_segment.sample_rate) + return self._compute_specgram(audio_segment) @property def feature_size(self): @@ -125,9 +126,11 @@ class AudioFeaturizer(object): "Supported values: linear." % self._specgram_type) return feat_dim - def _compute_specgram(self, samples, sample_rate): + def _compute_specgram(self, audio_segment): """Extract various audio features.""" + sample_rate = audio_segment.sample_rate if self._specgram_type == 'linear': + samples = audio_segment.samples return self._compute_linear_specgram( samples, sample_rate, @@ -135,6 +138,7 @@ class AudioFeaturizer(object): window_ms=self._window_ms, max_freq=self._max_freq) elif self._specgram_type == 'mfcc': + samples = audio_segment.to('int16') return self._compute_mfcc( samples, sample_rate, @@ -142,8 +146,10 @@ class AudioFeaturizer(object): stride_ms=self._stride_ms, window_ms=self._window_ms, max_freq=self._max_freq, + dither=self._dither, delta_delta=self._delta_delta) elif self._specgram_type == 'fbank': + samples = audio_segment.to('int16') return self._compute_fbank( samples, sample_rate, @@ -151,6 +157,7 @@ class AudioFeaturizer(object): stride_ms=self._stride_ms, window_ms=self._window_ms, max_freq=self._max_freq, + dither=self._dither, delta_delta=self._delta_delta) else: raise ValueError("Unknown specgram_type %s. " @@ -233,17 +240,18 @@ class AudioFeaturizer(object): sample_rate, feat_dim=13, stride_ms=10.0, - window_ms=20.0, + window_ms=25.0, max_freq=None, + dither=1.0, delta_delta=True): """Compute mfcc from samples. Args: - samples (np.ndarray): the audio signal from which to compute features. Should be an N*1 array + samples (np.ndarray, np.int16): the audio signal from which to compute features. sample_rate (float): the sample rate of the signal we are working with, in Hz. feat_dim (int): the number of cepstrum to return, default 13. stride_ms (float, optional): stride length in ms. Defaults to 10.0. - window_ms (float, optional): window length in ms. Defaults to 20.0. + window_ms (float, optional): window length in ms. Defaults to 25.0. max_freq ([type], optional): highest band edge of mel filters. In Hz, default is samplerate/2. Defaults to None. delta_delta (bool, optional): Whether with delta delta. Defaults to False. @@ -270,14 +278,16 @@ class AudioFeaturizer(object): winlen=0.001 * window_ms, winstep=0.001 * stride_ms, numcep=feat_dim, - nfilt=2 * feat_dim, - nfft=None, - lowfreq=0, + nfilt=23, + nfft=512, + lowfreq=20, highfreq=max_freq, + dither=dither, + remove_dc_offset=True, preemph=0.97, ceplifter=22, - appendEnergy=True, - winfunc=lambda x: np.ones((x, ))) + useEnergy=True, + winfunc='povey') mfcc_feat = np.transpose(mfcc_feat) if delta_delta: mfcc_feat = self._concat_delta_delta(mfcc_feat) @@ -286,15 +296,16 @@ class AudioFeaturizer(object): def _compute_fbank(self, samples, sample_rate, - feat_dim=26, + feat_dim=40, stride_ms=10.0, - window_ms=20.0, + window_ms=25.0, max_freq=None, + dither=1.0, delta_delta=False): """Compute logfbank from samples. Args: - samples (np.ndarray): the audio signal from which to compute features. Should be an N*1 array + samples (np.ndarray, np.int16): the audio signal from which to compute features. Should be an N*1 array sample_rate (float): the sample rate of the signal we are working with, in Hz. feat_dim (int): the number of cepstrum to return, default 13. stride_ms (float, optional): stride length in ms. Defaults to 10.0. @@ -325,9 +336,13 @@ class AudioFeaturizer(object): winstep=0.001 * stride_ms, nfilt=feat_dim, nfft=512, - lowfreq=0, + lowfreq=20, highfreq=max_freq, - preemph=0.97, ) + dither=dither, + remove_dc_offset=True, + preemph=0.97, + wintype='povey') + fbank_feat = np.transpose(fbank_feat) if delta_delta: fbank_feat = self._concat_delta_delta(fbank_feat) diff --git a/deepspeech/frontend/normalizer.py b/deepspeech/frontend/normalizer.py index e9524bf38..9161c1e46 100644 --- a/deepspeech/frontend/normalizer.py +++ b/deepspeech/frontend/normalizer.py @@ -82,13 +82,16 @@ class FeatureNormalizer(object): def _read_mean_std_from_file(self, filepath, eps=1e-20): """Load mean and std from file.""" mean, std = load_cmvn(filepath, filetype='npz') - self._mean = mean - self._istd = 1.0 / std + self._mean = mean.T + self._istd = 1.0 / std.T def _compute_mean_std(self, manifest_path, featurize_func, num_samples): """Compute mean and std from randomly sampled instances.""" manifest = read_manifest(manifest_path) - sampled_manifest = self._rng.sample(manifest, num_samples) + if num_samples == -1: + sampled_manifest = manifest + else: + sampled_manifest = self._rng.sample(manifest, num_samples) features = [] for instance in sampled_manifest: features.append( diff --git a/examples/aishell/s0/local/data.sh b/examples/aishell/s0/local/data.sh index fb2700083..f98e5a854 100644 --- a/examples/aishell/s0/local/data.sh +++ b/examples/aishell/s0/local/data.sh @@ -36,10 +36,12 @@ fi # compute mean and stddev for normalizer python3 ${MAIN_ROOT}/utils/compute_mean_std.py \ --manifest_path="data/manifest.train.raw" \ ---num_samples=2000 \ --specgram_type="fbank" \ --feat_dim=80 \ --delta_delta=false \ +--stride_ms=10.0 \ +--window_ms=25.0 \ +--sample_rate=16000 \ --output_path="data/mean_std.npz" if [ $? -ne 0 ]; then diff --git a/examples/tiny/s1/conf/augmentation.json b/examples/tiny/s1/conf/augmentation.json index a1a759e67..1987ad424 100644 --- a/examples/tiny/s1/conf/augmentation.json +++ b/examples/tiny/s1/conf/augmentation.json @@ -1,4 +1,13 @@ [ + { + "type": "speed", + "params": { + "min_speed_rate": 0.9, + "max_speed_rate": 1.1, + "num_rates": 3 + }, + "prob": 0.0 + }, { "type": "shift", "params": { @@ -6,5 +15,20 @@ "max_shift_ms": 5 }, "prob": 1.0 + }, + { + "type": "specaug", + "params": { + "F": 10, + "T": 50, + "n_freq_masks": 2, + "n_time_masks": 2, + "p": 1.0, + "W": 80, + "adaptive_number_ratio": 0, + "adaptive_size_ratio": 0, + "max_n_time_masks": 20 + }, + "prob": 1.0 } ] diff --git a/setup.sh b/setup.sh index 881fe8078..8d82038d9 100644 --- a/setup.sh +++ b/setup.sh @@ -54,4 +54,14 @@ if [ $? != 0 ]; then exit -1 fi + +# install kaldi-comptiable feature +pushd third_party/python_kaldi_features/ +python setup.py install +if [ $? != 0 ]; then + error_msg "Please check why kaldi feature install error!" + exit -1 +fi +popd + info_msg "Install all dependencies successfully." diff --git a/third_party/README.md b/third_party/README.md new file mode 100644 index 000000000..836e002a8 --- /dev/null +++ b/third_party/README.md @@ -0,0 +1,4 @@ + +* [python_kaldi_features](https://github.com/ZitengWang/python_kaldi_features) +commit: fc1bd6240c2008412ab64dc25045cd872f5e126c +ref: https://zhuanlan.zhihu.com/p/55371926 diff --git a/third_party/python_kaldi_features/LICENSE b/third_party/python_kaldi_features/LICENSE new file mode 100644 index 000000000..f1ae26488 --- /dev/null +++ b/third_party/python_kaldi_features/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2013 James Lyons + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/third_party/python_kaldi_features/MANIFEST b/third_party/python_kaldi_features/MANIFEST new file mode 100644 index 000000000..90d8dbcc9 --- /dev/null +++ b/third_party/python_kaldi_features/MANIFEST @@ -0,0 +1,5 @@ +# file GENERATED by distutils, do NOT edit +setup.py +python_speech_features\__init__.py +python_speech_features\base.py +python_speech_features\sigproc.py diff --git a/third_party/python_kaldi_features/README.rst b/third_party/python_kaldi_features/README.rst new file mode 100644 index 000000000..83ac9efe8 --- /dev/null +++ b/third_party/python_kaldi_features/README.rst @@ -0,0 +1,58 @@ + + +forked from ``_ + +check the readme therein for the usages + +It has been modified to produce the same results as with the compute-mfcc-feats and compute-fbank-feats (check their default parameters first) commands in Kaldi. + +------------------------------- + +The compute-mfcc-feats pipeline: + +src/featbin/Compute-mfcc-feats.cc + + Mfcc mfcc(mfcc_opts) --> src/feat/Feature-mfcc.h + + struct MfccOptions + + typedef OfflineFeatureTpl Mfcc --> src/feat/Feature-common.h + + MfccComputer() --> src/feat/Feature-mfcc.cc + + ComputeDctMatrix() --> src/matrix/Matrix-functions.cc + + ComputeLifterCoeffs() --> src/feat/Mel-computations.cc + + + for each utterance: + mfcc.ComputeFeatures() + +src/feat/Feature-common-inl.h + +    OfflineFeatureTpl::ComputeFeatures() + + Compute() + + ExtractWindow() --> src/feat/Feature-window.cc + + ProcessWindow() + + Dither, remove_dc_offset, log_energy_pre_window, Preemphasize, window + +            computer_.Compute() --> src/feat/Feature-mfcc.cc + + MfccComputer::Compute() + +                                         const MelBanks &mel_banks --> Mel-computations.cc + +                                          srfft_ +                                         +                                         ComputerPowerSpectrum() + + mel_banks.Compute() + + mel_energies_.ApplyLog() + + dct, cepstral_lifter + diff --git a/third_party/python_kaldi_features/build/lib/python_speech_features/__init__.py b/third_party/python_kaldi_features/build/lib/python_speech_features/__init__.py new file mode 100644 index 000000000..9b5ed21c9 --- /dev/null +++ b/third_party/python_kaldi_features/build/lib/python_speech_features/__init__.py @@ -0,0 +1 @@ +from .base import * diff --git a/third_party/python_kaldi_features/build/lib/python_speech_features/base.py b/third_party/python_kaldi_features/build/lib/python_speech_features/base.py new file mode 100644 index 000000000..592cb4f1e --- /dev/null +++ b/third_party/python_kaldi_features/build/lib/python_speech_features/base.py @@ -0,0 +1,166 @@ +# calculate filterbank features. Provides e.g. fbank and mfcc features for use in ASR applications +# Author: James Lyons 2012 +from __future__ import division +import numpy +from python_speech_features import sigproc +from scipy.fftpack import dct + +def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13, + nfilt=23,nfft=512,lowfreq=20,highfreq=None,dither=1.0,remove_dc_offset=True,preemph=0.97, + ceplifter=22,useEnergy=True,wintype='povey'): + """Compute MFCC features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param numcep: the number of cepstrum to return, default 13 + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. + :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming + :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. + """ + feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,dither,remove_dc_offset,preemph,wintype) + feat = numpy.log(feat) + feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep] + feat = lifter(feat,ceplifter) + if useEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy + return feat + +def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01, + nfilt=40,nfft=512,lowfreq=0,highfreq=None,dither=1.0,remove_dc_offset=True, preemph=0.97, + wintype='hamming'): + """Compute Mel-filterbank energy features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming + winfunc=lambda x:numpy.ones((x,)) + :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The + second return value is the energy in each frame (total energy, unwindowed) + """ + highfreq= highfreq or samplerate/2 + frames,raw_frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, dither, preemph, remove_dc_offset, wintype) + pspec = sigproc.powspec(frames,nfft) # nearly the same until this part + energy = numpy.sum(raw_frames**2,1) # this stores the raw energy in each frame + energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log + + fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq) + feat = numpy.dot(pspec,fb.T) # compute the filterbank energies + feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log + + return feat,energy + +def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01, + nfilt=40,nfft=512,lowfreq=64,highfreq=None,dither=1.0,remove_dc_offset=True,preemph=0.97,wintype='hamming'): + """Compute log Mel-filterbank energy features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. + """ + feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,dither, remove_dc_offset,preemph,wintype) + return numpy.log(feat) + +def hz2mel(hz): + """Convert a value in Hertz to Mels + + :param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise. + :returns: a value in Mels. If an array was passed in, an identical sized array is returned. + """ + return 1127 * numpy.log(1+hz/700.0) + + +def mel2hz(mel): + """Convert a value in Mels to Hertz + + :param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise. + :returns: a value in Hertz. If an array was passed in, an identical sized array is returned. + """ + return 700 * (numpy.exp(mel/1127.0)-1) + +def get_filterbanks(nfilt=26,nfft=512,samplerate=16000,lowfreq=0,highfreq=None): + """Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond + to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1) + + :param nfilt: the number of filters in the filterbank, default 20. + :param nfft: the FFT size. Default is 512. + :param samplerate: the samplerate of the signal we are working with. Affects mel spacing. + :param lowfreq: lowest band edge of mel filters, default 0 Hz + :param highfreq: highest band edge of mel filters, default samplerate/2 + :returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter. + """ + highfreq= highfreq or samplerate/2 + assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2" + + # compute points evenly spaced in mels + lowmel = hz2mel(lowfreq) + highmel = hz2mel(highfreq) + + # check kaldi/src/feat/Mel-computations.h + fbank = numpy.zeros([nfilt,nfft//2+1]) + mel_freq_delta = (highmel-lowmel)/(nfilt+1) + for j in range(0,nfilt): + leftmel = lowmel+j*mel_freq_delta + centermel = lowmel+(j+1)*mel_freq_delta + rightmel = lowmel+(j+2)*mel_freq_delta + for i in range(0,nfft//2): + mel=hz2mel(i*samplerate/nfft) + if mel>leftmel and mel 0: + nframes,ncoeff = numpy.shape(cepstra) + n = numpy.arange(ncoeff) + lift = 1 + (L/2.)*numpy.sin(numpy.pi*n/L) + return lift*cepstra + else: + # values of L <= 0, do nothing + return cepstra + +def delta(feat, N): + """Compute delta features from a feature vector sequence. + + :param feat: A numpy array of size (NUMFRAMES by number of features) containing features. Each row holds 1 feature vector. + :param N: For each frame, calculate delta features based on preceding and following N frames + :returns: A numpy array of size (NUMFRAMES by number of features) containing delta features. Each row holds 1 delta feature vector. + """ + if N < 1: + raise ValueError('N must be an integer >= 1') + NUMFRAMES = len(feat) + denominator = 2 * sum([i**2 for i in range(1, N+1)]) + delta_feat = numpy.empty_like(feat) + padded = numpy.pad(feat, ((N, N), (0, 0)), mode='edge') # padded version of feat + for t in range(NUMFRAMES): + delta_feat[t] = numpy.dot(numpy.arange(-N, N+1), padded[t : t+2*N+1]) / denominator # [t : t+2*N+1] == [(N+t)-N : (N+t)+N+1] + return delta_feat diff --git a/third_party/python_kaldi_features/build/lib/python_speech_features/base_orig.py b/third_party/python_kaldi_features/build/lib/python_speech_features/base_orig.py new file mode 100644 index 000000000..3efaec190 --- /dev/null +++ b/third_party/python_kaldi_features/build/lib/python_speech_features/base_orig.py @@ -0,0 +1,190 @@ +# calculate filterbank features. Provides e.g. fbank and mfcc features for use in ASR applications +# Author: James Lyons 2012 +from __future__ import division +import numpy +from python_speech_features import sigproc +from scipy.fftpack import dct + +def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13, + nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True, + winfunc=lambda x:numpy.ones((x,))): + """Compute MFCC features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param numcep: the number of cepstrum to return, default 13 + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. + :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming + :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. + """ + feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc) + feat = numpy.log(feat) + feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep] + feat = lifter(feat,ceplifter) + if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy + return feat + +def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01, + nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97, + winfunc=lambda x:numpy.ones((x,))): + """Compute Mel-filterbank energy features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming + :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The + second return value is the energy in each frame (total energy, unwindowed) + """ + highfreq= highfreq or samplerate/2 + signal = sigproc.preemphasis(signal,preemph) + frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc) + pspec = sigproc.powspec(frames,nfft) + energy = numpy.sum(pspec,1) # this stores the total energy in each frame + energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log + + fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq) + feat = numpy.dot(pspec,fb.T) # compute the filterbank energies + feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log + + return feat,energy + +def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01, + nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97): + """Compute log Mel-filterbank energy features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. + """ + feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph) + return numpy.log(feat) + +def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01, + nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97, + winfunc=lambda x:numpy.ones((x,))): + """Compute Spectral Subband Centroid features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming + :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. + """ + highfreq= highfreq or samplerate/2 + signal = sigproc.preemphasis(signal,preemph) + frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc) + pspec = sigproc.powspec(frames,nfft) + pspec = numpy.where(pspec == 0,numpy.finfo(float).eps,pspec) # if things are all zeros we get problems + + fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq) + feat = numpy.dot(pspec,fb.T) # compute the filterbank energies + R = numpy.tile(numpy.linspace(1,samplerate/2,numpy.size(pspec,1)),(numpy.size(pspec,0),1)) + + return numpy.dot(pspec*R,fb.T) / feat + +def hz2mel(hz): + """Convert a value in Hertz to Mels + + :param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise. + :returns: a value in Mels. If an array was passed in, an identical sized array is returned. + """ + return 2595 * numpy.log10(1+hz/700.) + +def mel2hz(mel): + """Convert a value in Mels to Hertz + + :param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise. + :returns: a value in Hertz. If an array was passed in, an identical sized array is returned. + """ + return 700*(10**(mel/2595.0)-1) + +def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None): + """Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond + to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1) + + :param nfilt: the number of filters in the filterbank, default 20. + :param nfft: the FFT size. Default is 512. + :param samplerate: the samplerate of the signal we are working with. Affects mel spacing. + :param lowfreq: lowest band edge of mel filters, default 0 Hz + :param highfreq: highest band edge of mel filters, default samplerate/2 + :returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter. + """ + highfreq= highfreq or samplerate/2 + assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2" + + # compute points evenly spaced in mels + lowmel = hz2mel(lowfreq) + highmel = hz2mel(highfreq) + melpoints = numpy.linspace(lowmel,highmel,nfilt+2) + # our points are in Hz, but we use fft bins, so we have to convert + # from Hz to fft bin number + bin = numpy.floor((nfft+1)*mel2hz(melpoints)/samplerate) + + fbank = numpy.zeros([nfilt,nfft//2+1]) + for j in range(0,nfilt): + for i in range(int(bin[j]), int(bin[j+1])): + fbank[j,i] = (i - bin[j]) / (bin[j+1]-bin[j]) + for i in range(int(bin[j+1]), int(bin[j+2])): + fbank[j,i] = (bin[j+2]-i) / (bin[j+2]-bin[j+1]) + return fbank + +def lifter(cepstra, L=22): + """Apply a cepstral lifter the the matrix of cepstra. This has the effect of increasing the + magnitude of the high frequency DCT coeffs. + + :param cepstra: the matrix of mel-cepstra, will be numframes * numcep in size. + :param L: the liftering coefficient to use. Default is 22. L <= 0 disables lifter. + """ + if L > 0: + nframes,ncoeff = numpy.shape(cepstra) + n = numpy.arange(ncoeff) + lift = 1 + (L/2.)*numpy.sin(numpy.pi*n/L) + return lift*cepstra + else: + # values of L <= 0, do nothing + return cepstra + +def delta(feat, N): + """Compute delta features from a feature vector sequence. + + :param feat: A numpy array of size (NUMFRAMES by number of features) containing features. Each row holds 1 feature vector. + :param N: For each frame, calculate delta features based on preceding and following N frames + :returns: A numpy array of size (NUMFRAMES by number of features) containing delta features. Each row holds 1 delta feature vector. + """ + if N < 1: + raise ValueError('N must be an integer >= 1') + NUMFRAMES = len(feat) + denominator = 2 * sum([i**2 for i in range(1, N+1)]) + delta_feat = numpy.empty_like(feat) + padded = numpy.pad(feat, ((N, N), (0, 0)), mode='edge') # padded version of feat + for t in range(NUMFRAMES): + delta_feat[t] = numpy.dot(numpy.arange(-N, N+1), padded[t : t+2*N+1]) / denominator # [t : t+2*N+1] == [(N+t)-N : (N+t)+N+1] + return delta_feat diff --git a/third_party/python_kaldi_features/build/lib/python_speech_features/sigproc.py b/third_party/python_kaldi_features/build/lib/python_speech_features/sigproc.py new file mode 100644 index 000000000..b7c78a803 --- /dev/null +++ b/third_party/python_kaldi_features/build/lib/python_speech_features/sigproc.py @@ -0,0 +1,158 @@ +# This file includes routines for basic signal processing including framing and computing power spectra. +# Author: James Lyons 2012 +import decimal + +import numpy +import math +import logging + + +def round_half_up(number): + return int(decimal.Decimal(number).quantize(decimal.Decimal('1'), rounding=decimal.ROUND_HALF_UP)) + + +def rolling_window(a, window, step=1): + # http://ellisvalentiner.com/post/2017-03-21-np-strides-trick + shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) + strides = a.strides + (a.strides[-1],) + return numpy.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)[::step] + + +def framesig(sig, frame_len, frame_step, dither=1.0, preemph=0.97, remove_dc_offset=True, wintype='hamming', stride_trick=True): + """Frame a signal into overlapping frames. + + :param sig: the audio signal to frame. + :param frame_len: length of each frame measured in samples. + :param frame_step: number of samples after the start of the previous frame that the next frame should begin. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. + :param stride_trick: use stride trick to compute the rolling window and window multiplication faster + :returns: an array of frames. Size is NUMFRAMES by frame_len. + """ + slen = len(sig) + frame_len = int(round_half_up(frame_len)) + frame_step = int(round_half_up(frame_step)) + if slen <= frame_len: + numframes = 1 + else: + numframes = 1 + (( slen - frame_len) // frame_step) + + # check kaldi/src/feat/feature-window.h + padsignal = sig[:(numframes-1)*frame_step+frame_len] + if wintype is 'povey': + win = numpy.empty(frame_len) + for i in range(frame_len): + win[i] = (0.5-0.5*numpy.cos(2*numpy.pi/(frame_len-1)*i))**0.85 + else: # the hamming window + win = numpy.hamming(frame_len) + + if stride_trick: + frames = rolling_window(padsignal, window=frame_len, step=frame_step) + else: + indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile( + numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T + indices = numpy.array(indices, dtype=numpy.int32) + frames = padsignal[indices] + win = numpy.tile(win, (numframes, 1)) + + frames = frames.astype(numpy.float32) + raw_frames = numpy.zeros(frames.shape) + for frm in range(frames.shape[0]): + frames[frm,:] = do_dither(frames[frm,:], dither) # dither + frames[frm,:] = do_remove_dc_offset(frames[frm,:]) # remove dc offset + raw_frames[frm,:] = frames[frm,:] + frames[frm,:] = do_preemphasis(frames[frm,:], preemph) # preemphasize + + return frames * win, raw_frames + +def deframesig(frames, siglen, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,))): + """Does overlap-add procedure to undo the action of framesig. + + :param frames: the array of frames. + :param siglen: the length of the desired signal, use 0 if unknown. Output will be truncated to siglen samples. + :param frame_len: length of each frame measured in samples. + :param frame_step: number of samples after the start of the previous frame that the next frame should begin. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. + :returns: a 1-D signal. + """ + frame_len = round_half_up(frame_len) + frame_step = round_half_up(frame_step) + numframes = numpy.shape(frames)[0] + assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len' + + indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile( + numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T + indices = numpy.array(indices, dtype=numpy.int32) + padlen = (numframes - 1) * frame_step + frame_len + + if siglen <= 0: siglen = padlen + + rec_signal = numpy.zeros((padlen,)) + window_correction = numpy.zeros((padlen,)) + win = winfunc(frame_len) + + for i in range(0, numframes): + window_correction[indices[i, :]] = window_correction[ + indices[i, :]] + win + 1e-15 # add a little bit so it is never zero + rec_signal[indices[i, :]] = rec_signal[indices[i, :]] + frames[i, :] + + rec_signal = rec_signal / window_correction + return rec_signal[0:siglen] + + +def magspec(frames, NFFT): + """Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the magnitude spectrum of the corresponding frame. + """ + if numpy.shape(frames)[1] > NFFT: + logging.warn( + 'frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.', + numpy.shape(frames)[1], NFFT) + complex_spec = numpy.fft.rfft(frames, NFFT) + return numpy.absolute(complex_spec) + + +def powspec(frames, NFFT): + """Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the power spectrum of the corresponding frame. + """ + return numpy.square(magspec(frames, NFFT)) + + +def logpowspec(frames, NFFT, norm=1): + """Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :param norm: If norm=1, the log power spectrum is normalised so that the max value (across all frames) is 0. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the log power spectrum of the corresponding frame. + """ + ps = powspec(frames, NFFT); + ps[ps <= 1e-30] = 1e-30 + lps = 10 * numpy.log10(ps) + if norm: + return lps - numpy.max(lps) + else: + return lps + +def do_dither(signal, dither_value=1.0): + signal += numpy.random.normal(size=signal.shape) * dither_value + return signal + +def do_remove_dc_offset(signal): + signal -= numpy.mean(signal) + return signal + +def do_preemphasis(signal, coeff=0.97): + """perform preemphasis on the input signal. + + :param signal: The signal to filter. + :param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95. + :returns: the filtered signal. + """ + return numpy.append((1-coeff)*signal[0], signal[1:] - coeff * signal[:-1]) diff --git a/third_party/python_kaldi_features/build/lib/python_speech_features/sigproc_orig.py b/third_party/python_kaldi_features/build/lib/python_speech_features/sigproc_orig.py new file mode 100644 index 000000000..a786c4fb6 --- /dev/null +++ b/third_party/python_kaldi_features/build/lib/python_speech_features/sigproc_orig.py @@ -0,0 +1,140 @@ +# This file includes routines for basic signal processing including framing and computing power spectra. +# Author: James Lyons 2012 +import decimal + +import numpy +import math +import logging + + +def round_half_up(number): + return int(decimal.Decimal(number).quantize(decimal.Decimal('1'), rounding=decimal.ROUND_HALF_UP)) + + +def rolling_window(a, window, step=1): + # http://ellisvalentiner.com/post/2017-03-21-np-strides-trick + shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) + strides = a.strides + (a.strides[-1],) + return numpy.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)[::step] + + +def framesig(sig, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,)), stride_trick=True): + """Frame a signal into overlapping frames. + + :param sig: the audio signal to frame. + :param frame_len: length of each frame measured in samples. + :param frame_step: number of samples after the start of the previous frame that the next frame should begin. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. + :param stride_trick: use stride trick to compute the rolling window and window multiplication faster + :returns: an array of frames. Size is NUMFRAMES by frame_len. + """ + slen = len(sig) + frame_len = int(round_half_up(frame_len)) + frame_step = int(round_half_up(frame_step)) + if slen <= frame_len: + numframes = 1 + else: + numframes = 1 + int(math.ceil((1.0 * slen - frame_len) / frame_step)) + + padlen = int((numframes - 1) * frame_step + frame_len) + + zeros = numpy.zeros((padlen - slen,)) + padsignal = numpy.concatenate((sig, zeros)) + if stride_trick: + win = winfunc(frame_len) + frames = rolling_window(padsignal, window=frame_len, step=frame_step) + else: + indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile( + numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T + indices = numpy.array(indices, dtype=numpy.int32) + frames = padsignal[indices] + win = numpy.tile(winfunc(frame_len), (numframes, 1)) + + return frames * win + + +def deframesig(frames, siglen, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,))): + """Does overlap-add procedure to undo the action of framesig. + + :param frames: the array of frames. + :param siglen: the length of the desired signal, use 0 if unknown. Output will be truncated to siglen samples. + :param frame_len: length of each frame measured in samples. + :param frame_step: number of samples after the start of the previous frame that the next frame should begin. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. + :returns: a 1-D signal. + """ + frame_len = round_half_up(frame_len) + frame_step = round_half_up(frame_step) + numframes = numpy.shape(frames)[0] + assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len' + + indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile( + numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T + indices = numpy.array(indices, dtype=numpy.int32) + padlen = (numframes - 1) * frame_step + frame_len + + if siglen <= 0: siglen = padlen + + rec_signal = numpy.zeros((padlen,)) + window_correction = numpy.zeros((padlen,)) + win = winfunc(frame_len) + + for i in range(0, numframes): + window_correction[indices[i, :]] = window_correction[ + indices[i, :]] + win + 1e-15 # add a little bit so it is never zero + rec_signal[indices[i, :]] = rec_signal[indices[i, :]] + frames[i, :] + + rec_signal = rec_signal / window_correction + return rec_signal[0:siglen] + + +def magspec(frames, NFFT): + """Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the magnitude spectrum of the corresponding frame. + """ + if numpy.shape(frames)[1] > NFFT: + logging.warn( + 'frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.', + numpy.shape(frames)[1], NFFT) + complex_spec = numpy.fft.rfft(frames, NFFT) + return numpy.absolute(complex_spec) + + +def powspec(frames, NFFT): + """Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the power spectrum of the corresponding frame. + """ + return 1.0 / NFFT * numpy.square(magspec(frames, NFFT)) + + +def logpowspec(frames, NFFT, norm=1): + """Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :param norm: If norm=1, the log power spectrum is normalised so that the max value (across all frames) is 0. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the log power spectrum of the corresponding frame. + """ + ps = powspec(frames, NFFT); + ps[ps <= 1e-30] = 1e-30 + lps = 10 * numpy.log10(ps) + if norm: + return lps - numpy.max(lps) + else: + return lps + + +def preemphasis(signal, coeff=0.95): + """perform preemphasis on the input signal. + + :param signal: The signal to filter. + :param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95. + :returns: the filtered signal. + """ + return numpy.append(signal[0], signal[1:] - coeff * signal[:-1]) diff --git a/third_party/python_kaldi_features/dist/python_speech_features-0.6-py3.7.egg b/third_party/python_kaldi_features/dist/python_speech_features-0.6-py3.7.egg new file mode 100644 index 0000000000000000000000000000000000000000..0936a26299677b69a52e7ebdbf354713ae03fb5f GIT binary patch literal 23731 zcma%iV~{6blWyC#ZQHhO+xWHZY1^E(ZQHiZX&cks)422Bjo5wPjo5qdhZ9xxA@k&u zr=rf2&&g7f1qDL`0s?{plA}#fVQoy!9sK)*2@C{;{P!v@DM>G_Afd>pB>SH?;_<>Z zLrh5Gzj;RVgj&1KMXE4Fq_&3H+vU@Y3l;3pr%)ELF19q&L>vA1zK#eZ?jf;H5=Gb? z)b7;O3MFJ=KBjwc&7Z_N(=|nMLByRns}8lBlUuMXz~$2lW?1YrvCrZ`Z4^4!xn&nk z)5Cl$2-UUS!~UGBSXoZ(sl^OkjcV0yJZAClHPxvO3J?4-8T40nluHt4r5eW-C#W#! z45eQ(ab$~SHb-D)We!B?vO9)WT<3|EyOs#2O3e-?0Bi4}|}(;hUKQ%pJ_k9ZbCp?W`SaUH-vw1OoK$){Q>_CD?zh zF#isWf3tCJylgWP#<=hrQPbPDR@)AZwvG|Kab{~E_XUbngR8rGrzt|%E05W)ZM)x_Av z{IADr9bLyQ9u)t%I;>{IMEsG~RURBV;fLQe_O>9R%tTH~-Js zyTa(}-L|gN0`Z{9Wq<`2>qjr~${11;pM%RPKuUZU;59YLE5&WAGqD2qMO@{R2mtqO z%@nTOYh(!`i9J$)Ta*#0;)+0F*!@DNu603fl29w*@q)v&PgZw^4zL8DG`I$!4T>Y> z7B|uu^(g2rHX4LbUC3}GMEDc{ZX!SRTg7`=2cHqtQ*G5<2rMsWd(qWQRHtY-3TZH& z9nwnWXAZj*+_rOa$oN*ie5ec?w>7KFT^N`ll<({sv|e?Ue|5)TG>N~a_?L&RJ<;AR zq+;cTP<}zT3zCB~z~sG><3#5|6y#}csXYi50uC%qlFv`o)tv>AP}Em+6CyO$-G zz3tB5xklh<<~qh~-(D?H_x&ydkxt)tAZB&K9`&WPm*s|wq# z{BC*RsM_QU(p7lZTms5SvEx+P5#KDo4G>MuLN>LD<{=BWC%+Zz2%;ag=TI|aF-zGW zDxJsy30v}zp+b?aq)#nAbW*1{+jrzMx8DWoUFlHY4^=Ec@?xl-Hk*So-|dW2Y;U%h z;0mDsFs2k@9XLF)kdw!zef6CpuK^ENBi#xXq34!pe*z-Lr8alg{&oFJoX& z=Jit4Q3KMve8N7l#Q$AK-w%3bs6eADISnVq{f4q=l4;Xbxx>i_1~#j#A_AN-8=Fqb zLtA^JC*|wiw>i+kgU1wZ+Hq2!raV`k2>cOPy* zxGHnN&|NtZ!*(;jSROT?`}sI?a*OF`&=LqJC{Y+TnBr>?aC)kI8 zGc||XYUO*8cMlAfL^L)$#-wAoA)=16$^vE#1z%jtoy!P95(eR%@~0b{?O;&mlaJ~rVF>{$+SYcyAIhbPl#H@xQMhjLRV z)`!tybC7ubodb!4_Lg{!vRWzK*dd(dRXO(-Y|$OU$S2E=t}<2GjXpXA>OWQ3xDqMw zprrWBTPFMxGkk)C&HAkw{?5;N=~V&F*Wc;3-^+5#J(Jo_*-WqXY%EY@laB(g$&jZ-UJ!<#n=cXZ4UA}X5y1O{7ffwaMoa@Sr)0g;Rj9h*ud5qZo1;U16wb; zpjiZGa}|v*Xc4M*l;}Kd8-c;@#h6xQY8Yf`u0u^HSbpaINNoJ+UV8fw z{V3u_X7@_oBkuC*zf+X?05LN~wEo9$I`NCF0!J}?UD466rKypI=VrbxTmr25Oo>WI8>aAFiRK`?PU@V&nxTL*(OA2rE#lNMb#apH4Cu zaoNXcx?<0yqmqpa&h%Q9PgzhO7}a1b$Ml1%kfZe8HyYPMP#yHyjKG#=#j}*P&0K6& z<{oVC4;urD8kxy3nvAu{$rCy>^>7#<1s6Am2r6&L6{%>S&L>0AaNm7YDpS@xH+&-` z{M(mBA$>xe>u2wi3`6ZeZUlLH{?yO=vWmIGHNWjQWg;@1wlyVx2|M*_}Px z;w2YZ{2bAH!*&dkpOR2`hC}w;er~!{QVN#pAviE=qzMm|bE2qwcA50)$C>@!3NJ5q{Ox|+Nn{2s_FN>* zujZu~&|g)1bf6x~?0azfxQa9F54AoQS#v{8`=eNrKDLc53ZA=6IY-(av$1o@9l#My z{khCcwYS#xNvXIu^l+p)Wb?8o)}|V!XgNK3arUlB%p!Q{q}sRqn{OH2Vq;l;_jg5y z#}!yUmFWU^D-cADPM9H&S9*DZl1U=z-?D8&pVDckj5)fdEuTLycX$xVJI(a_evil> zpSRgW|$9$QR^%%Gk z7%=;lpj|H!h4`7yZsCRgN77}XU?FBVnq_pR)asV=C8^jbTZPSj@qlE7Tq#%AV3hau zdWCn%w(`DXB1`AS%P7q@yCQIGD+|{9!T|@N_Xgb|B8fqE_|# zY{rRS8@lLUT)7^g-uXS zfkrawsq*5=ma7p^-lQnNSaSAXnX4xkLM>a}oiP5%-->C4rq!fG76OC&NpX2|;%%-F zUrW^^dCi%$Vhn1mWObX-y*7FTt+)n6NSXVi_tht*$jA1~XBKt!!v;G2OgA9*81XA_ zY`4(?yT!ZH<5&vaEM=ao#u<#`y`_=Ub?>F&S~VKi$Js2BG^hxYl5PvVJ21Zva-#ku zG(cItJFPKk_Z%iF%^T7WI_P+u^K40$ZbVhpwr2`?2M?~d*!V~nz0fS>3`%4>)AgqZ zed!W_a1oS__&6Gu6B*wyo}vD3dF5)gqx}dJlwB4jrZ;&YuN9BqQJg&zQEN6DS^!Ob zut@d7Pl2-Y>RNB@eT&Frc>Sr@4Hj=+{`SDlg|)LB_xkWLRXg2rMxQef$Q46Db9j@9 zFf^hY2YOz8r=^X@mg1BaZ&lIq$pq@Yhm!Ie_`iwnKb&w1$+6}3FDJ~P0RkfZzjH!E zM`vrx{~(6ij$2YF{=e&}$Pg*abtt%{f}w_hyEDO>U?sK4GJ!F|jGMiZae%c-o1x{GF>FcW-qF4OG3vZlPo&WN}2aAzO#02M6cldOdR}a;l$RD!% zkI#52&nczN=f;c!X#kFtm22)*BS`#CCh#c;sx){2P_pA&*V`!(gHU8q8b?|u$Q;KriQEQ+` zZ76ShWV5%$2sqOoyPl99R4wTXc_N?UpNQf)g?}f4$$js=wHM2LWO+FCfBoTLHf7rh z%cn*@db!Vg3wG6)gohcb*n<)(t{6N&RVBZ9XGpp?HO!-$K2>>aumdny#%}cfnidu4 zkXkrW#_~R6HlSdX-K}n0=qf!T{VEsQOm#|WGD&56ZCCorAQe65BII9l!;E@eD3WBX zEWEj+GVJrdH*w`TM$iDcL{Sdn@b)i!p|e@7AFQU&pLzWuySIym3%BXU+BTj?2E+JZZqXo@+e zC*w30i}i-h4IBmip%}ejTwJpM9K~q{%P`S;4&&@Lv@lcx8F_%~%ekRQ+}5XmTBj0C z0C&y@fX5$5`vS}f&+d?zYKn1EP^0E|LIeTaoR*<-9*S!xH9b&>79$@#!MFiEJ zM+P2>FLNlM5&^kP&8T29gPcSghLPw<3mk28eX#6Oe>4gNHe^+~EL^X@#JK1JAyYyb zZua4Ht!`JZ*Z&PeLpRx;!Mn^Zld7T<4*iO}VlN*;JCzazdal=| zHj*_P9~d$yAqE#;YBdZV9n(|25%K_QX=e|bS>4Pis*9EJ1C+5Gy$PC~EkP(jeY}W7 zP`HC*)EQaf#+O>FHazIbmNU9u?24w6|@f>s}r-M_G3o98v3x(UhW zL68`HLEvrw&+OFORTV-9bsWQ(xi29?VH`D_8ku+4ct zY{1C)Za=j8Rk?0ENRa>BFj)&&Hee(gR?$xp!*t=k2#M>s$7+HHk2l36z2onR<2Tgd zwl*MEnL`He0VY9KrT3`d;fTMu%4*9HU!ge-y&_ft73maldi`*x5?sV~LncMM=0uak zi72*-eLJ`LibHEeVyOY>NDrJ4!`e3+yvIyb$}d3BZq~7REQ4UxohtxAe>Gd|BkffL zdG)_S#su`g)(ySxaTNH0Dt!@yX7Sdl{Rzpe)KJ+y^)oRn?{%~27#n`TU>1hAb?i!o zslxQ^mM2dNk9tvBAfVaY^Ramot-AR>$1{c=ldsqQBZpZ{C5Ic+B+-#p$} zYy%qCh69}L3BIEaUiZvL^a}8;M~dxw#uPN2dDqAffaD_^1>OnPlz9ojGKYkq1o)xs zHqMoe`4sB~4-txX_zlAL-rtbbMDegeuLOohebByscv(&+ci$y`RXjLGEaqNuR~qi8 zx3nT$1C*S~YUYOj7Dq5he~{<9Xx@vslO+a7bEN0miy=%HWpuK3Lkd1r$NUDU0d_?v6uIGGh7 zwk&sCc2d28{bREh!Tm)* zJi|sidE1aa)NS=dPi%XVs5poO&+&I>tMD2o?-`lKfY@__b1s~_;##rDvcX%=Nc(sZ z81zGMbu*HYBceI+*5YgzXP*;F#j=br+OUA{`-#Fti%V<>?;pId`bGLH>>8|5WCger z>T4=?%JJr`Q=oJX?P(3XwWnz6T=2O3!ynDKCxy~j;zDg2%yvMPqsnF880}33RJQEf z(F4)7;=Uk8U4|_Xv(JIAn3xCJ0MmMW#V3C4g?i^1e8X&8n*?$sP+%EXrH;$roVAdj z@C~@v=Wgx(x6tq1r%+swP4ENld#(SJBT@!d$Iqi%Yaz`m-B`|QDg&54_kST<1vjfsEUje(!_1#8AaD!u=iskmli zQ2fAS;bjh?V~&5$_4O+XING@P`x2zRu=_VLa+2{a^>sWF1vvQDD(x%q$HB487B(&P zkk50>%69G`627Pc6F#B$mXMH0B1Qk^sh<817Y~cB(Z#3umW)(P%^?I-kY*=~<8zP4 zj^mK5_?d7V=V}hLOV?1)^ozmYnc4z0g($gg)4s}v1SVbH-QOB!jwFLY1|vXu1boPT z&vj^YD7eCVELqt1Q{deqJ7L<(xS#uubJgfYFS5@rcvW1`XRWTT|J`vEUbqrrtTo~|${@?kci?t=d+0pbLwuo)(c*OD06#$+%GN3{-p@9)?u+rJz zY7OwPS`y9vG{gEUB62h&m7`XW?HUHK`bwo-9|sqHy22Y}$9p+ha3TJ}@7QyZmN8%+ z%K**DI`Tyd&rUMwY3u8R5+HC*}bA;i<^vFeU;4)J}z`X_L9`OALb#^ zO{XND9yB#;E;LxSR z3Y?PEX@Gd%CeLpXr!CeA6zGjgzdgESbJ*0Xj0@p6d7Ting>hEYmC*P>;oUrY=qg$( zwf~DNmfl56@h4Hi0LK$tFFvjrtMC)mlX{tbg4)MGuw4rkOM$a31`J>Iub#!dMAOE` zz5|{qC1~6#8dnU6U9^5mmL~eM0eECK5^znQ)tW^!?4T)TjkZ54EN}0$Oi)!Lq+1wo z8Yv}aHg0>*a(OEFLSb*Yt7p+Jl;Ox)qS~Z-dV1R=v?4!ML2HSGTB@wJc)~4MGOb^$ zOPIvhV*=HF1TVpc!$jb4-3y0{nuGPa+bD`jW6k`w2w#qw>@@{vqDWe`5A9aCQ%1#R z_KyOiBP%Eg22T;AfoTJ71%TLi9~Y*lg>R~stl_>?UuA7a_tK1mhHSAKLv7Ojp=dh| zalt1W2D=moh$+ds^$&PP{O}D3#MNEw4bG;)W6CFHvK;vlP%w6`BtWap9R-~0J|G$;5a zL4Z({iE@hhE2$r58|hU7lfBItC=@=1Vkv|5DolWF^A;Vj-Fnx}CfBF$Ra@k7hsf8D z2O3f0ovNPHQy;&^&9r=&s3nB~t5^xJC{pgV%>SHgrg;@{e=9UoJ7>I`T`%X`uw75X zSUu7_d%vW^S|7Fc$#B@-xY`l>oYQT$2!EsEy8;$RHQ!IG3MrcghH63@n_673 z7~_jbj4NsRg-e7>>A6NT4G?S;fy0v@uGizjdVm=HMu-8}>kGCFN zot}NSPQD6+Z+)SKTNtY@xlI! zBT|%_IzHZ{O=QR)>|@ZvpPc12v-OZFrh;bw=mN%456j1GL^{&gWw)~MSyXt!#iE8P z{{}wzlnQRl`#=K7QkQ7BnMw{@Pbg)08N5M1V>AWlHx-d^%y>Y zcrnpkZ0$JFA`0JUh7W{!Zq)n3Zt9enxBn>Ju9=g!ncyE!xwOoNHd`y(&LPP-;@j<= z-QD<#Bgncs;PJ33f=z-)n#DCq<+Lug^n`%U>HH&A(VJYyEAqIAjM3MwQy?#f+PcNN zYv}Ng2wz@YpgGqDsipkEesgd+k0k4RK%VbuIPQ-Y5ml*Js{yR;Hm=y}zTd7z^P<5% z%608UvDCT?aJOo9IO7L2eMr)MtT0%5^8!5Nip{pA;`iHFb2e13y2ytW?)Xp2JeLD+ zO6pk7&$aMYW!yN+o5z6=()zTROY24Z;w++gcUeWJZhtk@W#UXVS zWx2gC^N`ducN_g>hX$QG_fHazzxTes9-ah8mlWOJDH!uIG)2;GwE!G;+_WK0d8};3 zs0%AYF2527fX~0T&cc`~rM7m?XuPF4JI^j`betm5d$2C|vILSPb)JzJtPt3N?m*So zv*SFc1lOi!aUbM!S1)ou^kQB|Z|Z=-CMn86cNv-eE`xTf*ql3mTuce@M(cON>2P-^ zS+97D2ff&cMbkrAF_hyCKB=4iXvII8*XgpDlxx#@e0{R(*~Wmg63o`h{Z5b<5?=@m;v`|#Q zzohW>9BwMOrl}NyAb&@jh_K1CiYA;enq}#EPSY-Lmbhje_RYAkWyIMP@vR1J`3?T> z5*3>XRWKwZ5YW}%!&r*{C*}Arl}hXH@g)b^UzLhT1uhXsyFnFAw53C+jcq-Q1Ap_0 z8VXLQ(7q6*B2mrc_u4IXDpXdd2hqjA+TDRGc=x#fh(#WLP z0}ve9it%8ZT+|uO;Iv{57E2wMhKI#<9U7ejL3Hp5bM>N!sO5(s>O$KzT=As^)Q3uD z+X|Kk%$okE>>9KzJ-Jd$QiBy0Y?b;P&1Sq%%LTP(4nDQzYSFHgDs)e*SP5`lllsDp zI_r)cPB$y$t#`QzBK^c%tPl5aLOtgp;#=o|&bqhV}F!zQMWubr{W zsW-yEy26a$7yg(%oH?}0BP0XN8`8p$b`6nc<7c~zBQKHF(dCUdZcrybMOg}_IhxSw zpKB`FF5_c!M-xr=Bd&B84IEKJ-Z2M^-)8h@>^0b#$XT1K_8hspiL5B0mcDpCr}KjF zLBk4L8!qp?*C{)om6T3`Ztn1lNsQg&+hK*Om8Z2u{S!KStbek>C|SP=_bjj}4ra?> z1_puTk$q8rvxW2hporScPmdk9B(K2$C(oXSBmquQZk9gedUXPxaZnrbO;S`)sGUbE z0B)}TN*ebmmC;!Pu9qt#rOjJGBT(ibwScz)vcXmpUVtg{Ti`g0>7Sddj?_bucx=HX zMlb4At_B(tTdHJOSj!cdU7m#nL_lb!A?u;d&t&DHXmf8dmb4Hl!eI(r=cYk=a525{S4wNkXfLo7-b}T z1LDOp?fQCsx%jMVs}oEZY1J)d+aI(CISaxE>Vpyvf6+jLNT3hPS&SR?v)1clj5w-K zKb1XwTGK}4aR(C&1mc8e;%MoqUcMUVveR^pu*>)rldjArPkCI?BzQADcANuuSpUmL z$JHf;58;Ww))iYNegK|nl_ldJU$RZ#!zsf$wTRHZ+P1$b}#+NWwy zR*_KfdKwtbXfUL$4W>pYJ01Vao)qed^YT8%{Mf(&p%~BAbGCB>r#vpv7G!%QN<5js z4H(m{Lyjnwxwx)uJ?puOyMB>=a0wXn1qAG|=Wf8)+l5V~iXwvW{^u(A-q_76YZ3$sa zT&?cmsBBIbcYl6nT;evat}$!qb`a>cNZ4CD=|bDwlEEUfIx+&(1@@@5LLM+KvP2K~ z(qY)>0zT4T4=zsjAvkH{d|U;~?(;zq^daJUPoO7mZgN8K6NP)chAqdAGh^{?5bXH; z;yf^lm1Sfc;I@%5w~^?fKS^3#dKd=;*jM7GofNc|9M36X(+NU5_jEjEa}hg_ z->+pu`Q;ks$RCYi;yvJe(kKQ(-jG@?Rfv$Pfzx?pZQKEJy04btC*Sj_QzY~TnDg7# zEm!M)>SMacx9y@Y)jL7{?oTaAZrYDBN3oOChlZ3w_iN+B0~4yD%w5%BXV;mA^_vw(gQ_65@Y!BN*~dW4TAtaN1-)kKNOd$ z;l*Q}2;(G2sa&(Q)gy0z`@@Jh&c?J3I5T=OpQ$^_V=-tI%~ZXo6c8CPctVqI*O4Q? zh&jg_u_iZwU3D>q%GGGh)80QFwlz5Ghw#qg9D@0mr4lg~2*Ayo@bh%^f^N;Y%>IzR z?+n+vzmWDRXpr)}_r-+6DAQ=};UGAMQ2cb85kn45p4C58cPPW|1#S)6_aR=3tcyHa z==<-j^q)?od-}kkq`zBe)!)JMzi*|6h5#>9V^b@0L&N{>F*5znCM0@R&c9W?smtNX zi3w1!p>rxUiO@SUD2fq?2_zB2FJKgEIO-T~IASLnZB`#5_zxy@af+ z9OS&TEG-qC6rE~9!o19MZ4|;fO(n1BAqq~x5;n>e1T$skarS>{;Q9wCjZ^nV0UQue z%-{AYj{iTT|IzpK?{crBmb3aA?bbI;PNE+ud2LfVZ;7<*rJE9HnEw zDXyA&2(aB+ArYfPA`P=N@xI0La#*p z;)jvG+e#-)UshGiQzyfg>zWn%1$cR$x$0HLpJD?u#)h&DMtO?@GZ_ws$9pT`f|*~RoZp)r0d4_sr8k5_BFY6BVVgs)KHt5a5A`2q zdl2bU-<_QSXgDaOcenTSwj^8MXnb6s5R*i~Bq}E46b(4G4ak^>CVEklht7OB@waOJ z|4|K)KJ5$J31Y5 z2aeQCp&OH%X;#a}phb%nxbkB*i{oV)5&~sJ ztcTQZyS&fW&E)zvpjJ={oW2b=p75OqVlz5|1@L1o9qA}RGNd-Yz~nM|(RqjwjK3h< z_;8%3eq%AcQnbyq;wqp0teaspfI6Q(1mASlTbmeF3&(&aB`~WNfp5jI2OywrrHu^D zcZ@Ze4=QR~0;g$beBVxaUZJ zorpIEqO-Fbr$plNwlQ+8k>7xm{oTB^c=F5a2vM;dSr2WVH^A#ez7Ba@dch zTidf;l#2Eq&UAn7HmXp4rerA;WhM=7VwS9^7IcjGR|f5OqqC)#|5eL1zHACDikFL3 zPOg{XgbO5D;EqTl5&aF*K+N9QX z$?*m1ClB-6gP4A=%g0BLuA;aptr;VIK=KS)o;0|%-x*7W(}#kB1FUs&O8%sU9dSBW zMSe)(v~L`yOo@lh%sBY0E*4-wWXrX|qdNq~%c=$10(3Zk{VDOIDO=MKbysCnN2aD=F zh%P_ZY!QoZ1u0)bSE9j;%vKoNCLvm$aSAO)Z;veqe(hc2IzpXAa| zhAt0~k4!_Zz*6rCL9(+fEa)VgSp1NtZYwD6-*@ehq5yXUW~pJZU1_^c7}}Rxxg^#1 ztBDUhQ~jAldq9DL|2qdwh5q#?$t=lgfN9NjyPZQUx3QWFeqE23|Knj*Rh_?I94C&; z0EX0nEn5fRRS9ejb$g8+2eT+AUKik-u7KLzzRo zBFbc&; zj8@&qh9HnyTqL9of&Riv&}l2AYBE%Kf|?gQnjgp0@hFWdelkWF6mh0bAkoQMIfIAT zbh#sk+;q8P!6x@07-i&xrQ6F>gWFm&7ZOArJR#eaFIk?*l6lt{jK)r2Fm{N@0V$;D z&RJ^XWZV@BNlsMe-vSgJ`uRAwbMF?AO#*&Qx5?Us!KfCFsr(>;!qf3B1V%QrTtE%*~ z6h~>jQ-*wv^?Lse$~B3c@Yg&YQ>V4IN?)h zSVc?psTE$Wn5vzJ0HCqG1S^K2tQYl{AVIO^HQNd_#P^3y?5raGU~xvd+Rs>2`aSNB zEVRy5JKP9PmCEsrbS@X4ivKKs(u3G02~;Y1xX7k8az0{$)ENz>H|M?FTjZRjydXtdfdu*g}J-E=MV95<(v4M#ZOQ2wT=4of$tR9}d0HP@Oiln^jBhm{)S%5C z@KpfT&%{K3=0yf}b8SJeeO^TD7(Xi9{RxM}cV!15RIeC>FlYqR*VEqT!eNO6zKaBu z?76Z5mU?zt&B)J`bbq;T2KGyvGZZMbD#8UpmYtrFmg3u)bUAUwh-P&ldZ-&*2)@rp5+tj@A+YKM0BBMaLRaA2L82;lHB zM4X0GB>E+z%3m?Ne(Q;mqZ<)UXl`vXW#_4|Y@t6S#I1^EpY5}X;rBQygOe8;(jn2h zyodxzbor>gMyHj0Ns^GZP5AVNr>ufqIz%z`BRuZa|#Iq3)^ zGYKlv;D;ePAt3_4L;VmiHbml4`z@y!85eZ>b#F&ONobc^Hfy~Ax}LE!<#6roBiIa4 z44W_dcT%sdIZ052*S5oD_6`ZjqODv^Mn**{JG_)>)>X0jB&gny+s2@2UCx;dQkuK) z&dQ9;6Ya6@T=ekd8oO0?MWfV`DTvEmp6_Hmzqx6PdcV^N)P^N{yNyARlPabzdA#ve4wqzigCkuF?MDshC* zEXQ#RtCg44A@>jzs4nYn@Y-~fPc+0+&xy73$;wcG&$OdVq6%;A2eLk`Wmu+6r?dp) zYy~*(i*y-(axjh9?>p2{r}|rRk~H*8XcH(}Bq*@WwthtdkQ%-HZzf`i0xgU)ObxJI zj7TT#5zjUr9zKs!-dH%rD*w?RBcha8htD@qMY5Q2XdUGWWG5@?U9mdNoixh~JcRro zOA6jQoESO^_39xRARZ`!i{v(l<&a_Xu%eHozDPeCeLek3| z`Aj8mOJHltW~Ki+XfgfR6!EF>4A#d)POH|NUk)2fu0RknxxZwu^M9+%jRI+ zjmU;p|H2IE&=2k?RT2dY*auQa3jp@0bPX9`XI;LGFoDF)Q`=U}F-Jf0a z@=RVo?cAX@+3^=~o~f6Hk+_BlM60>EhOxSasm~3Sd#HRvkB3xKLMu+|2IZyf6>}@$ zMf4Tc9~54ojzvFu#8P_1h9M=#dQ!XhxJA0Au?^HaUE{fiGOW-n8lPPLncN_~SpKOv zksX3k1J#SMHYYzf6wFMQEoi(w-QaMM{pQZWHliwK+U>6|aPhaZGQq?=O@a5DFGK%l zMqCer4T%;z!T~Ymf}s1dJdZitZ%^|-Kxq1HQk8rkEBij+=vqN!YBz~e4HFna&kQwlXqlu)>}N6)JYwLkfre1DawcCYZP=p%0R zw&A&Qw{1>wa1Q0}4^*JD?oagsbl?-T`A)NbqFWgB@4y2DObE#|*njR2yrwm6xSd>) zkdq|hSSvG3N`{tWr4~+>-p6g$z1R|Wlu4}6FV=sedf7^bWQPf~sN82BL-ee^_%IaF;+-)5 z%qS{F`t_b+)wJZ+!6cd+K`5ZQry!G5pdlQ#@4AA?iC-tK`Llip3%&Wmli@cCae{T| zwOkebnm2n(`gu*jU{f=V;QKo?-$!OSM6o7{RL_W81ZeX zYA7BZz1|5P*$Z7XWROdC*$`MB(<~kwE9alvaA4V%MrESDLGW7es9>cho^qQ3;h|kP z^NQ^b%(3^aI9vtyF&2j7=6Vt@J?5TlQR+uaoFkJfwaLRnxAky%f!2g2mq*hX{eTsW zk5v#0Pb8Iu87fQqj%55B3s$T7IY}7gg{q$sgWF}P>}&<9k3}k7!H)`U zJwh`MYx;8p^35?~S!|WnIa$D1Y;E>q&h3~SXLOSwCbLm-3Y7A?fJuI`F#TZQ>$enZzZFpl!m!<^VM1tP9t94o!E zj44W9gFVz!)Obu>QPH@^NS|of3P_t@$=aZm1^lBtxhw4n1Ox3EKl9rYvRsL)DB zmtBjcmQAaV77u+JH&Aer%@`FFZ`ae*#Xc&D3$Fu`uzn0`Y9b&x!42FD6lgK%FOyLs z`9K`OK_u^c(S@i?VF@qzQoQ;}6^g7yK#?xV^d-QdptmgxAe4(3#E$!eBVjBprtNa( z>qW;8u{NKQ^X$`c z10OO!4gX7J z^h!gHW$9lI|5L!ZQh%)P^3?^*(K?<{FRKw(%m4^-3NR zStAwQ3zA0e8-C5Rq;J`IDc847_uhAShiV|p;tZREieE3}O_@7({0fAiVDtb&!J3gX zTpTn(EoVV&eNk}a40r}4be4{o;+K6e@o7vOv&2tCV^}9<^>H|ZZhGq@i)YWm)CU=oYbS?>c`z|G-Wp$Qp; zFn@3CYGK*72L(ZCLEwa(L-e{P6!~z6`~iQszX(74iFjHsxP5}pbdp@P&qmP@AjXG) zcsiLTpszDZlM`Bt4VEMP375tT2gZ%@s=h2JyEUPiLxohWKIxImzG&|#4gUFdGlFJ( zgqQ!kZTEB7#>Z6U{Ox6w5AQ*@T*7y$?094<*?X0h|2Exw*7YM}@JaOV#_w=+%WH+L z87`ByNwCBc(;h+~ASpgY?W>XX#ZP6kg=OV^9}+E1$4#Cory8=dQ)WSx?dVt?U;f5d zytO_zS<)$yq&DvA2f;>oE@Y=BZRbiv%Hok6sDNJTkVKf2AhtaFhsKrR>1&DOxFcEFwOU6lS$ejf%eJlbDW)Uxi~uGu0zt zhjTS;P(&*xD$z-6gG?Gh_bAzpb!jX;tnn*>rFurDG{_lvh33e7r{?#b#%fk(x17wo zn1osHG9%(tp)*aonR9Y3YUB4?tMjFgqbRgaQAa6)bt*SRbUQDrjB&oB z^FFgaazoM>HD2uMjC>0M3WiKwC_gYO{E%j7Wjc7vk1LKP#YvF$4N<9kX*WjrJ_2|L zaTHn^JNRz2!EUkqE)A%D6tHsMnz5p6=?I>I|pF0sfg43E+>($2D?f^aI{Re=R7FWC>*mk>(Tcl`WDL**B9x@l{)Cv*hXB2>_X~ zBYKEV=*w$CmD6(Y+?Xi>hqQ3KQqZhd?ZF4zO4aX&u(TG3-*m!l91lMWL=%4)_tI`z zv!q$4rzmC1L=adaN-sAJ#OBXZlNA(l)EVJu6h46~P~UYtD=R_bX&@=yn;RzM?8Y^= z(4}xEA*4ddsCs_+wC%+uH4QzDjtL=ktA`4U&or#jX^NSC(UC$6X5Tb*)Sa1yaYm-m zrNF;OSG*KIaXAy5KHZi1`&iH9klhW>WYHf%_<@z&4>%t!s{i=Tp{9HyD`i7nVQ5Wx?sTr~BZ&O;KKU7~(PI8r?j&xrPKejXznz03C!UNMk$znY-FFc*Au z1lwA%Y>>yna^I^b?`S@wm zh~r4sSRSG-ELwR)7f6;epod2qA+ZFn5}ZS!uKALYSc?bg7-uBY_E;pq8!Y-&r&+%l zVZV7JlqUxM7I_F!yX!Cd0M^&$ln5w-o6h%S3{HV${4}mrBR)0}mp!g-F<$;*#J4n1 zJR-*d0Osrx_jwCswWhrGZA;8^YyancZ&;5!vir1RKbvX}i;=_rcSy;7 z)1#bAFydZ7$6Z&<*NHKWb8W0kEn*e|16x>4U~4iQMix&Yo{~q;5i0_bad&8n28dX7 z$2O{)oi}$#R4Fj9TYK*n59h(1yL{TPOWgq)to44&G97#hL3+^RJ5pU%enlzqDcLu2 zb!PZ%OjCwj?l~8w?x)NINec*xi1Ka*Ven&*5LjF$&eBpqTHi4lC*LDBb{&MgC-q-J~+f$cyx*ZD76;Et|oQ)$4 zq2LDY;+6~=S*>BP62<5CZE&_~c!kdT`=|q+Y`*t5zg>Fyxs_9|Lt8|^fP)($frESe z&$e=Z-MPKDk<%TFo9QCjh)1CtRc+9n#%j~sV2&aOi-@@(kVn`Hr-AHp% zq0ht|c{=){qJ1e-5b$!o;mmvgaL{&4^Wf?#eJ@TqOFLPd4bY5mW76$+%t8uvT4!@V zwGc>B8Pi_VsN#pZ>=AU1UuZP6M7`s#Zy7p3GJf)*r?lZT;;ZQo+z%^_9XQ|V7WvQB zn!>Hq#RuaKnYhhdqNp~Qz8EYrU9d!JodbUG`QazCTGym>P07*gRSmj4(&OAPpa+LA zYMWJ;GWJpK*o>~&$8|!$t|tL?Qfw~^B@{8I<8P%{;f^Avv8eI#iG$l=l@J56d3ng1T}=zVYQE zaWtB&bN>iB$$U8Hc((jpcpf!Ko6l+PRJL>lc&J)C+}X2(Ej?*WX%E(7QM7A66fRrx zhtjLB9R>%S;$>Bq4I1{Z_Sn6^FJ$_#*5t*z0}z=)3n}eB+|8k|$pKt~E3gq|;>NPa zN4!I%>+VT7G+D~_Ygkw}=~=ov6L zqeaY&{j!;{=-iNMU#3bWCRU}hocufj-ZBUG%o$sQ$5k0RW!3HMWguImi%cm>!(YkpX(qzRA6!d%) z4n3EF>}{iAI>O3NlnrxEbW-+BNVRG|gQAH%X~w-6GUV;vYA5#5VuI9Kjfmn{dnMIB zL4DZ8G!Y19WvkWI@tb4%Bht&I*9uXli%5Nf)s}S^JLnj;i@dinKeDfBobgRZba@f= zn6!Tz3!WzB5?7Vuhv*0W(8`FI`wl9%2A$#7Zco1J!e$zq$2-PxxPAbZd`HGaQPhfO8aOLQ3AE)7m4Rw@kgf!R*+8LBgvF^o zf28nCNO?>`foFSIQw$L8s61J(?wpGS9GkPDb`PY<4rKSAIdJb>vW;`QxJELtz+8lP zaY8HD79@iLfuYn{i=6Vi6f4?CF*efI%;ic2ntstCS*pxxUt66);X^ zP&Ih(Tno<2;o5#wCYIJHS_G=dgC|@E zO)V>X0W%&5%p1soIl8n}MbF^eu7W&-IZ&B4>x##*^ED|vmPvSUH4D9VFC6vVc#*49 zi3<*F;AJnCiR!}%%s};{hf*lPFsC6RQ~D(yj_>82qAwH+WSQe31nb? zTvK9P)ArU70c+8=;1Q<*rq!FPu5-$8?J46AiG6j5v!TLcv5=XRy z(wel!k!K8Tz8w|s7#Sj8)~^*vgTb|eTW<)A`s^JeCJVCMHK~Cpmv55HD*gGO@ur*> z0&#SbG?{5+@a|+N9w}32D=0#^ORvvp)cV0G2?->VdRCmZ>4_SlbkNY2X7VB~p|=Lf z^q};0_axHlXbNK7E|{TodR52beOu5g$5?B(Kl~_Zo6`BJ4Yd7h7*sqlc*G0n$9k# zcGo}a`=U6(WOnQ$ahK>2Ho5e4a7Ak;m|SqXQlmP2Nvq{Yuoz{g>|N@JhB-Z4$_h*gLz%0-rVtIP!tdkJXle> z>TH?of@P)3N)>i9Y`L97{1pZRqMla>Awvb3zNEnushb#yP?gpft)GsZ@gaZCZc*x(ANCCQCnPNW#+xGg={P$y(u?15^8X~!A>438aur}>RkwnX|Z)ITYuAF z5ps21`U%wJxidc9$;M^q_~YucW24elTw`o8Pcb5qb0IIRhte+Ml8QyeXD&88f>yWClk@P| zs2@=zzLk&VTsw1vo65n}uez2DStv|_8fs|cX|+JR`E7d#!=XX4 zF^Xt+CV=+3kAO8K$cX@WKCOD28S149HnAz-4z)!tfOA~x6P8JoMW!|O!X!>D7vm7q z?4o-1Y}wNC8(D-{br9Jz?Xx7TXuef4{X8{yH;S=_fdRgfR=J+gjF3?~oXAIW|q>A}9h2l_i zesm3tY7nrg3|kfDB4|^>sT(r;-jDmq<92E45Aa*IB_1HkHOBGl0!b4%ytjHdLK8uj z;u5lrMGcT>iLkvArDP%C6X6Fbyu@l@N98XcRYG0%=_F+CKMvPX8G~Wxg{IsKf1F zZC0ihDtaxkdVyMa<&#fQm!;e#SRX*|?J$J3Y+1t81321JNQ%~*zGmYZ%xpS-)(b3F zuxe{iOv`K=_yVZ6wU}{iIeZ4J#&G%WsU;q^{{0bi;j7umAIAo`auJqzgBd^aDg-D^ zT^`_&+KmQ_9l}%0*U}i@t+0F#wnYf1t6Idj^zH^=HM3`p%fh26tNz2+Jz-7WK?kCF zqcf^3-}(8Z1Qpg@uJu;pO*23mg2%rH*nSSyV?EzLB;ElJeD3&KV*k}({l{aHyXl%t zo@l@1#%%^5-22(;2tHTdohG!Go`Bjyj;3CoahO(@Ynq<=5sk)(R`ra{EAP^o^zu?P^b2lX3f|6`P71f4DMkf1X-pyyFI!#ru zRojD$?u+YpHz1{v8_1xK+j%2KGks8?-P>UODvv})HI?at?P5WY@~0a`JJKI~f}c*J z`IH=8H`2S<9QurmLx`g$$_tgw@l8TsWj1%|JdbTxIY>Hy8~}cRn$Sf##rstb`HQq3 zd*+M{HA?%Eim~vwgVCDni?UE;N(xuwt)}s|wscok!wU+moh}0@7ty`wm@+|M-7G|+ zJ#azWR!|hRj2Xl8C+|X(yz3!CR9Je+$SN|fTkSZM{M82gb2m5FlHCo3XZhN1LiWa;{O6w*P3ODi_(T|-oVYBk?>13MCfJCsC2SlksxckV zgZY&w7N-f%Aps6trtWq*h*cFB8usrq0s41oP4uVuvJ;01#>FrH~sXHOj=}q zFovEeXp6KjS;llFv7Z;aH8o3{$LCQ^WFx6>(p&l?`e}1wl*>s<{*WN4NjtIzoGa=W zV2VQRdpx-uq2Av42EGleo=f4kb6$vVTKE~pDKD*95H^I(@ra+`rMpYWT0K+GNI*P~ z4KLBq0;i`ZbW1xr@mA)2C%-zDf-|#%vQAhWleZkhQJ5CeSJm3KH}M-zC=W`pRRO-k zrn`CqUXhWn!B`r!c%^Re--ag6j`p%X>SYExX~qaeLt2K`WMyNSp_s0lilv3CU+B_I zj!#N*WH|V}KVtSXI5y?vlS1@`^J7Ycs^a;L?N+VE28?9lb$7A+w|sjq+srBbfo8cV z@I>B+1T<0tB>p&!0;f!5(4AImyOu?wZbwg>hwrgGapS;k()=Fg?nSmXA+*y+IhzGW zT#nv>7jt>tI~b`Fs?NB(3=nZtwedEej+s=QU`|fe*kfJc5adyM`E{@#XlM4OS1!Oo<-}b_laVZE;(;RTV)jk@veE2*qGzhUZ4lOOFg5zaccx9&sM{On58A`h*GghA}>4#ogG3e+J=I4_(1_5*6F#6cRlCA6{1 zcw&BjC}vJb?b{ECNy>4<7*)WBVa>Ol|Qt}B6_ea;qOLZtB^r%m|YBtM^rba=K->z!io)irm z0;d`|w?6Lgta-rx)LwhBu*DYI{$rLEW$O8tmWRCvRfN}1O9=iteA@7sZr7`6X>^{K`K?V;PTi$813$6 zp(O1At&vdzj(L&2w#T} zHvZ6yoT~cw&hf$vt^^(+Q;5J-Q#JSU`_o~n*TH2euG2le5VAy`#TwY*tdZCuC^tGS`kB&S}CDbZ00%Z4^TmK^~eJQL1F@O&f92DpMd3P^-1%t2;aA4X^8(g zpJ*Ptb*nPgO4jS}(_&DrNs@m0U`&+7oMp&#=wbOeE%8RhIX+| z_VT`Lqr;dbJ$E-7KrTT*X=B;0JO*JdKU(W|yOd&QvH-c=Q2RC~^{G;Nj;TPd){2LSbwyP=Rz6rA=c>^_1GlWS zqVOx`IjYJ;zZ3LOM#yh`PAMh3ybnB0}vdWtsA&kh1>&$(N!&uH2s9qjO zF9DdPmrd9<;<|=O*gTK#3A1Sl=wl~2QCaqXIl7llQrTXMxH{Y$W5k&Me%=wRBxSZ$ zo<)xh&jd0GQFL779VC5kkUQCKmn|OWLslKLCr(MVkl~B3?1O%mW=Sh(y`*Hl+$z_2O(nD?oNsetHCTlN%fff%u6B@Ej zk&!2^=xR;GzVNODbb%UX0b!J3I1)FvWRjh2YV23&GP)O^*Dt|SvJ}IWLv7Tpv#i-x zw7%6FqNYo@Y2g=64)C7OBPC&ZWu>VTQ0kghjue9~XzR9wLV{Gu7)qdC9=o)>? z_%Kl+X~l?Qne?Wn=w_*nIEzfzWK7vkju6g4?0Q9_M_@CUC2td_4l_ykX_@oJlVDkL zYfD#80L;M?hiCZh4GLbcQ#mhuJ&7Qj1V74a>O42Z5!+yy$G*OaLagBC>oU(~ zwx=bzNyk27miA9;aLzYEbV=%`&8OrQr~`;`Zq@H?;1ZJ=st}0mxeIHZ|(errv1JA zPh{F(6-f<_ph)0@+jc@#~y#$rQJ8Wf4K6?=%wU8GWu(c|NW@tzRmru<}aHb brT@m}&yR$J{8JC-?z`~-4z60~=fD2}a!c$a literal 0 HcmV?d00001 diff --git a/third_party/python_kaldi_features/docs/Makefile b/third_party/python_kaldi_features/docs/Makefile new file mode 100644 index 000000000..0672ce907 --- /dev/null +++ b/third_party/python_kaldi_features/docs/Makefile @@ -0,0 +1,89 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/python_speech_features.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/python_speech_features.qhc" + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ + "run these through (pdf)latex." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/third_party/python_kaldi_features/docs/make.bat b/third_party/python_kaldi_features/docs/make.bat new file mode 100644 index 000000000..a20d0b0da --- /dev/null +++ b/third_party/python_kaldi_features/docs/make.bat @@ -0,0 +1,113 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +set SPHINXBUILD=sphinx-build +set BUILDDIR=build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. changes to make an overview over all changed/added/deprecated items + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\python_speech_features.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\python_speech_features.ghc + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +:end diff --git a/third_party/python_kaldi_features/docs/source/conf.py b/third_party/python_kaldi_features/docs/source/conf.py new file mode 100644 index 000000000..727fc3275 --- /dev/null +++ b/third_party/python_kaldi_features/docs/source/conf.py @@ -0,0 +1,202 @@ +# -*- coding: utf-8 -*- +# +# python_speech_features documentation build configuration file, created by +# sphinx-quickstart on Thu Oct 31 16:49:58 2013. +# +# This file is execfile()d with the current directory set to its containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys, os + +import mock + +MOCK_MODULES = ['numpy', 'scipy', 'scipy.fftpack'] +for mod_name in MOCK_MODULES: + sys.modules[mod_name] = mock.Mock() + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.insert(0,os.path.abspath('../..')) + +# -- General configuration ----------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'python_speech_features' +copyright = u'2013, James Lyons' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.1.0' +# The full version, including alpha/beta/rc tags. +release = '0.1.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of documents that shouldn't be included in the build. +#unused_docs = [] + +# List of directories, relative to source directory, that shouldn't be searched +# for source files. +exclude_trees = [] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. Major themes that come with +# Sphinx are currently 'default' and 'sphinxdoc'. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_use_modindex = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = '' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'python_speech_featuresdoc' + + +# -- Options for LaTeX output -------------------------------------------------- + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'python_speech_features.tex', u'python\\_speech\\_features Documentation', + u'James Lyons', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_use_modindex = True + +autodoc_member_order = 'bysource' diff --git a/third_party/python_kaldi_features/docs/source/index.rst b/third_party/python_kaldi_features/docs/source/index.rst new file mode 100644 index 000000000..93ec5a4ba --- /dev/null +++ b/third_party/python_kaldi_features/docs/source/index.rst @@ -0,0 +1,54 @@ +.. python_speech_features documentation master file, created by + sphinx-quickstart on Thu Oct 31 16:49:58 2013. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to python_speech_features's documentation! +================================================== + +This library provides common speech features for ASR including MFCCs and filterbank energies. +If you are not sure what MFCCs are, and would like to know more have a look at this MFCC tutorial: +http://www.practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs/. + +You will need numpy and scipy to run these files. The code for this project is available at https://github.com/jameslyons/python_speech_features . + +Supported features: + +- :py:meth:`python_speech_features.mfcc` - Mel Frequency Cepstral Coefficients +- :py:meth:`python_speech_features.fbank` - Filterbank Energies +- :py:meth:`python_speech_features.logfbank` - Log Filterbank Energies +- :py:meth:`python_speech_features.ssc` - Spectral Subband Centroids + +To use MFCC features:: + + from python_speech_features import mfcc + from python_speech_features import logfbank + import scipy.io.wavfile as wav + + (rate,sig) = wav.read("file.wav") + mfcc_feat = mfcc(sig,rate) + fbank_feat = logfbank(sig,rate) + + print(fbank_feat[1:3,:]) + +From here you can write the features to a file etc. + +Functions provided in python_speech_features module +------------------------------------- + +.. automodule:: python_speech_features.base + :members: + + +Functions provided in sigproc module +------------------------------------ +.. automodule:: python_speech_features.sigproc + :members: + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`search` + diff --git a/third_party/python_kaldi_features/english.wav b/third_party/python_kaldi_features/english.wav new file mode 100644 index 0000000000000000000000000000000000000000..bb28291f69123209e6b7cc46b584d0a1f2c7bb16 GIT binary patch literal 35824 zcmW(-19%-<7d__;F5;$cYTIsV+qP}n#;aa!o2kuL+cwiSxVd*`4*u2u%hxtZlY3|O z*;sq+wWo2d>eau_A*4m6=GD6Q8=5Jc5JK@Wu{=H*M+hU~q-&?4oi5>L{(fI6ljx)@ zanxK@n;cUq$Q@E!`BhD_L@mL0SE}OVt4d8ukk{(B(nv6gz(>4FQenhH!tlHCN+(|8 z!XK`La*+^X;Dg~iLHLPC(w9^u|ESsOf!akDkYDNoergywNH&vURQ@hC#(vHNC8hF*kq%hvICE2Ams!?Q= zDyvG8HTcO2B!fy&8ObZzQ%Us=SN%xcQ4>f7QjR=VSJY8*OO;S7NqsUFcie>JBfVHj zZ7aE?dMQFW&~TDjja8rV8ih!IvRjQIQ%P~so=hYC$T@Y1d?sI1N7$q+`Hx&z$JBdu zO)XKu1JzHKP!r_AjI;tT_Nz#xH>YBWxmXeyPJFc#)>OsTEH93|Rr*&ycBfHj; zOrtJxLvs^aM0wSltitszz-gS9E!bx*MjaAA!?9|nMk8f%X*}n z+9}GbIb;pFsUFKysy`h>o0I8k5~)Snk{39Ik?IXRr67Jcqk4&ZyrqiL!D@rdODd?R zsw(-0Ynn}Z(m!e|{IWE;Kr54+RKjXUXa_o*bz&uyBXX!^sMy&H z*|@(p#6x>iliH*PeM_HeK{P+kcmq4h4$)8=LfQ}mSN}lPRpm(*_}M*~9p`;it%fJp zAaN?3%zzgy!#Q^)!|8t{kyNMm$!W5JoTpbwb$LP_kr8T*tfj`tukxMhjBD?ys*}8G zy-Y)P;Ws1Hag|j?lj$UjDy*KtkE)W~q^G(=O4FXC529#4sZA%7mujE-iOBh*zN?w4 z0KB*uu~ZSdQ)O0R_>FtjLJnw6&XT(%GfhV=Qis%`iSX@_qyRkV9`5Rjs)W4ol{6>Wkpaq+ zz3|Risv5o*N4CTA8X160a1F6@h2$Vli4WhsjjtWmj^sjCsEd=@qi*3Gmf*@K!{4*O z+g7NBD!-blE+K-_AoE;Q0Yo^(DYsC$V3%?@$7#6NLZl|?NZe!_@=Qxo2Dzj>?xQ6# z(pf}%E!go5IgjjAg>--q6ed4a5h8Jy8P!s?84*|x|1MQ2;i32CeYrrbLMD0)TlXMC z$t(3qWkw#FLu!+5Bs20=ZZZ=mm>1FVPQ4{3Nq5Bf3)xByB>&*Vi>c=7iJFOg`9lqZ z9}FQ2NhUHHmOibvA;UvgJ#19OG!a`l1x-(kj-8qVkhADX23?{)IH*& zb>L4oWH~aI-X=lHtNN0$G=xr4C**CFmM*4l>Vt1ABs=JCnuhKrzez1RpKhcKr`3qG zhKKzj-|*QywM8D5YmqrMvOxWd_cGw+$;o>}{a{22p~J~DrN1hqTB<6zhbHQ(3WZ<(hSwxRmD;OPk!++D8H9XTT$Lgh$yMC*LAe25a#d|n zHPm^zPUcb{RAn+r6_k6#MVWzgMkHmFRbhw0G#9JP#<31;Dx1yrvszjweVg7uOGEeL zYQt$SdYGKSwa&$TtVLG2i7JAM09*OVYP=$Yx85a1=yLdJHh95K)T5SUGCb~pDvCQ! zf&6e4`D(55!D`b~Fg|+Vw4b34)KZbC3HenkbxPI2dDkMbY7_2tF+9vb+=Rp9%c*xV zQAVlZi1S^zqMV4rp=ukV_l(Mb6Bw>;$z;-y$<|?$QKzAkN4C$Acl?PCSFw|n^4bks(y&*vWUSJ>Vdo| zlc{F-sICUUGBfb|v9L%bc#?&PTddl^GkPG7ub`SQ#rrImd*yGL2e0DBPrjDD(9i=a-%iZJn*4-rFUN_b5X{>eV&ER;j&2Ub^K@5=5?6|~b+)!o}Q z$}DAuo8z>7G>_WE2icbb7XyW@L)LG5k28j!7G8K>B|41V(x@@pXlm5gdueyrK=?v- z)E*9hY$qy+I6hH?%YWo^`CP50f^A^)5nKD%ac!}7pLu9g)QwfDq56f`T_*?2`S{sL zoa`A~Z4{}8>|UFsBe!wis}XTWP$5R+gk37VidKVFN5pw2#GZ{RxEyhn7Bw>^^2K!GCAML*NP}PUYv9?InA9j{DbI$-LX_fl zIOjXWcTq@|L1k~H7O98owTgueYmgh{HOWby%FQCRILP1d3Zjj8#Q!)Iof*zZ?iU;6 z0MX64Vh`qNRXrNd?rJsNM?#h+yB*Obq_g{=@ki^gRnzv9hhnE~`7b7(j_2_$d^E7x z&M!8qU9^^VU(amxGp3l8-PJrt+*izD`dhk5T^C#V8K<;U)VX6|IV-kx>QH zhm7jO^qKkqy}n*v|EW#Xk`O-y=^M3Lt`^xugg7g!)2}R=r2^LMEsLP4D{=yn^G799 zZRKOJSIn17fMN^K^E8HjMh9>MQJ|6a$R|EESEj{zmf}l!3sF_Bk?B=UctvG(S+zj+ znTIZ9J3OVlEGD}0=T1{6r8C+tY@f1oJFT66L1E_%?U6 z9?f#I7j&NNX%F@{_HB*t_^0yk)N%gA&sG7IT)SvIFzdSNy3)8xxC?m81by(Xa%VQC z)3maYQ^We{AL4)MzZ@uLb+8;u*%x>YwS$({7n*I{_uUsQQ^jlKF8wu8)<5h5)= z1*<)=z9ZVM@_{NVYpbU-D(ggFrDrt18He?(EWPrJZsHSH&RDygUC#N+SIDudDfvi_ zQ^~q%Q?zI;2a6>w@pCqB#=kproz2d7XAok4xa^JY<1Hyk$5Ic?L_8{5?i6ouUS;g` z*1CWdSYk!mv+NR1L9tGqV@c+*unwt@r>~i2MEE#Yc5MhPM;a@ab1tb-;*|J^xSzk$ z{Cf1Kg3s+pvdkFn>E&JKt?TXRF}yW{E(h)L_H=dE7N}Rwg1~kkNj#izJdyc-1v=Oh z9ZhJe9X0i>rsi4Zec(;>G;rrNm*_tBk(89N&KkR={msg3)d`dj^tT2%UF9}XpJmev zxfXbbggC*=JwuHmbiIlZMZ{F6p0y>g)9(tr3GA{?Ii+PKnp-br&T^%2?K0BqhiQ8< zL=BZS#Xcvgsd2d;Qq+w08j@k;AiDp!%MFL;Lv+&%`b!%MUm|yvK2QJ$W zogHF>N}!h6AiR9q2ALM6TO1MYT0zT@V{*Q@WvxwI6aO%-TwJC<|NhGIXJld}`#h;- z+TQ*l2SXZ%912SBW!@a#Ri3J@oZ41-(2n*uN~{*YI4)QGw#1@=Ce9(TTs5X`wNb`8 zS3i%>yDBI$XrHHwi)(@uk#SC6d!7|(tqw#7dIl<4%j``2n#fD0vIF`K^Q3FPySz8n z+aRc;dy~F^6co3e4VFLf%U|4|F|apK+WzEp6~Jojs=m|=fuH6vJLq-UWtB(9a^bXa z=GmXE`&MH+uhWiS5N%X|G-k=QfvAAGnl4`R*1Vxp(Kf9?0hbkG4|2NkouZ_wL0_1I z!n>p$oas}#LdgcZYLE_MxwFrH?f;W-GQMN{r1(Hw>p#uoUL;a`1es)J4;~gaIs9u_ z!;n_qy6#}tNb|Bjm7W!u?A-pAi525Z#X8Tk1yeCWA`Vs%+|rB!aF3(lx%R=%%IIK z)7Yg|Wf_QD+_CJWk%^b%_rxuZ%bUdF)kMFEQJ{H}OZ} z3f~%k%RoA-vt7^$6_e#^(nPCda`!NAHt%wGeq%JfDbCxqtoMOlR$hCX9dG;WfldiN zNJOfiWCeSs71G;kXX#v(PgHTwoUL)Db$ zgVM7$d?Bo{A*n>-vG@k@v*UlnABvAoDCJucXe+j}vhE5Yx5G~*b0R*69`|lCUuuk% zBT{5{S_H29J|@KDH_|2CNR08XaKI*L5w0bkltGPy?t1Tf{sCUeYR=YMgReNonf=5+ zC@IDl>-(HEEpXj#D!!;fELdM*gt+#(yLf*D9S;r;KIj=|v?e{A82>NdKHs*a6aI~X zZPsEtH_s=BlOL?S(ZsdZ{llHX9WbIa56L8`bKB}+Ewa+u%j^WZmh-?l%2UhNY7I@J zebe@7OW9-+EDt*2_DpN4b-{XMEwDaWbL|ZLmN-E=nSVkXrp}+CP5R;~3kAPp%XvNP zus>(gfW+wu6%x89v`NU8upyzFZ+KuV&q%YHB&c~*86IUhOv<8aG^pBpyJ<>ZqsAEtq?UeM9tfx$n#cU&vANY&9U;Lqdxl(@n7CaFiD zx%J(i!57Nf^sUy%tmp3HsqG=|SR+XLrD_Uc_pwd|)&@QYCR!!!2ljp^miLr}$!8j= z)zhA`DRj14Chj@|QOOoq^Q_%g1v{fNkFQomjg}!_Q;bZPCtbr7n}g45TSP6pGIIBS ze#5`dH!*Q&LQH(k1Uqq~Kh8cRzp_5A*q~iusUw2IMet{LSK~eFKzow{s+)LX=L~G~ z#Uw-|Tu4~$%Nz)Gvd9{=qn^=~!IQ=t>q&G!b^S5ZnGKBM`T|yutQDW^c>&_@o^&SZ ztADap!bu^}Ez$nkX``_Fz2~*}ruUX-iF>NqN-s&Di(R(k|Cuy6DW5-eppNy)ZpCZJ zbR>+W(a#yeEasYL<~15={pnJ5SCru|?Kf6q#K11AD{|yv9w8e5bG$-7zloeg|2cw@$mdoXSQot zSlX18(^OA&H9W*UO1-oC`|tV}TZJ6kId2yVFyFlRa`B%N^7tz|S=0`e%xvk&8oVuJ zamc%%AMX1`ZfzP326y)X?86RG$zG6jJK=2n+Jp>A-K^al%pfhQ_0`)OSDJ5gdbg_ zHwUz(q(&PGIe29hFn6@Lr= z^FTpoFyH62u6JGfCS&KzFI#+jG_q>~fn}oCoj`DVPE!S7FO>`1HMu*Zk z^^K>uR{4r09E~rK_%*4t-CKY;q_OOP)=JN3C}Xt=)anXy9WbNJRK`H9BpogHwa98ow$?a&)PUjK#fL|*N zF8d!coKyyXFqZ5j=g2ZrhzwKvB+ zbY}BkqBLo!M|yjQ=Sndk6h@O*CvRlk`bi0z(+Nx|naOR5M;LY08g|sIqI{0K;9T-rVvblLzJhyLhQ9k4_|^XSmQuqWZ-oS#Rn<~^ ze;}3h&Hllcin6?dbKz)z6uE+%8Wk&lUF@*GjXE(OD1CbLpn;(teVQB8#1r6zn^Z_{Ucy@Y*gej>`^U zpf`hWt4@>HX}!AH%yrLo$(7p`W$x79vM(e6Mvd@BP95i(bBR9^A?iPs7wkC&gOd+k zd2Q;WJ-~yMWJBqAvKWlacyJJJWDB)jJqN2Z9?aiGxm+%mM`eP{2F8M^gR-)GBo=_J zdM=uygV(@k9mV&Ki8?|<_m)Cr25a-3f8|-lLoryrXIb3Ap*xe6NNz=356)udBZc`= zJIFc0`+?Q3A)@R)Ny`&<#NSQ$C+VA&Ph=!pSd8A#9PE-V$2Hb<+`Mn3HLmHObxUuh zuV$B25q>W)G3k)6Yf{cY7rPhuom#Rsu3!UZJHzP+t(LLH?CM(ND(V_wRyS^IX<1kD zL9Q1M`3!!L7ZESSBycTTWG!T~Qs7;el7?)R#`HN_W#rcR%7e+qI`IBWkQrWrpGzyZ zi5QVb-U17^Qtbz;T~mdlCJdEPh@zv2iYTyGZ@_vzkWJ(|K|~DR=MT>>%EKbp`FWmR zRFTJttv~cm3il-c6PYIbm#4f|NKSHUI2-vOISO-@3(jGGe8RoB$oT1rnFCLqbZS36 zp_MVJm_N*H=n9{kE6mPjO|zX@$6RO})(#Sfx3``p1$;Y`+6UU%jKAj&KftGRm*_25 zkSMJOs{R$%6IWZ;TyvyRK)=A6)8r&XRh7F%VR4@S3wwOw14RnVcc!W=V8#2B5oh!>@0mqilU%!9n_T5wk>)*Z8TrgN zSeyKv{3FpZ4R@q-#NqY;dnLMHL*66Hv@^y>GoR~`S<8$uG8qH)<=Q69$gX1&`c@th zllU&Dt@F%z!}Ea4uSoV{2K0e^CXuuwy+k8fKK6<>qKnB3FufGhlpC1CypWDef$3E$ zWQB+F9K7j?l+vZ{$UgF?7!5ABpLh%|Y@3`WzaevW;*q>OKf+6jx#FJ422Y)?_R!a%ni1T3FN-`A*L&nHm8qNSAN;QC1F_Hm$;3IvjP#kjvR>DTSGdZz0iF6 zHDkQl+*RHE)BVRC?OqHk^atkOZEvx{?TSt=PWUUkomD$f0ciJ@Gec&g`LvOGoF1%R zy@m@V}n^WrR`wwj71U$uSi%%SHZPDktx+2TseQYa<=A@Q^@J5A1uNmf~3 zr+9aK(ZrAbS6t^sS zB=7KcP8p{+? zKT-9cfwLVVeu`?C;{1j^LPSXcSrT3{6WO7oD1vHpnS7x0wHUKTkP~_$YS7$xTC&#Q^$%l0xI{e_qn&)#D}SEAL2DFv-6}Yl@gh!qm&M5@^n(5LHt>c?Mh@ek z9;Nrx4uBJnr?uHZR*&0*sqWuw1?DB~@~utU5*T8)1{?ar z=C&6dYeV?t9V)d!W{|t0J2R{hte?^f=nM7c`V`H>KB@kqtJBZ^$8P3C@}hjX^U=Oz zk98*Uf5inEPTFGL-dlU3z1D7Pf3#wHC%vEER1aw7wDar`JH|NeK&FDd?#mDHsiFd| zznz%MH6Fz|pM#9nm~N-TX-OJI>(ghnF6+-q16>rOwJ^`#fz!_*ZE;of7Ei!_6CTW) z^Gm!eaMyD1+g|a2U*o3e56_s4dBQAkzc1BbvWa~(GX|Fp%M{wt(^s#?3ThjS+^+g& zU9G2T&okQ-16%y@{%lqbyQ7`czHHsF>exM;*LLBAqqBw&pZE(%0X@}UYZI|=Sv3Yix19KZ6CNw%t$*OAg^)I?%9M`Mp zx3tb$er=&PORr~K)!VaZW%C&RPJBf!>@5C;ZJXK^oaTJ2m?itFY@`!e1kSb{O~a0` z5||r&X2I+MW`%jdivOLpjsy;`4+Oo*iFfkye8`Wfc_yBc>pYCN<4^b$QBr=BE!9MI zNTsApbltlw^mXVGRp&NIMQy2~h*Z zDrc}AY0n3HumG9(IG>Iz7=V&yHfct~*a22p%b;Cm$-xTf^aW~e4Jb_J%4Nb2L&yyhfxoLSnu zg?kAx-stD_XuY;((P89|+$t=7gYSi>?sPucUBDeavgbOjcs}t^n;3t`xO-Imu&_LXQ)}n}-CxsY*ocSLg!|UTUPJ!p_gI9{;6ZlL%j$h_O z#WMMbDDAc@t2fqN-<+zaHQ%`Ncwc%Bx>g!OJH!^!wW_8}iHUl8vEQj|UkI!Q<6gvC zWiNLQIRjx~iJCG98TbU<#`gFA>J02njrcBji)1faLUzLE9BKqMm6({Il`P{bkViFWMT>r=U1HPj&gLK4Ib}8#fpJiY#U~;U!X4-kJ(5D=$`gLxmAvQRNXK`Y=#Ny zZy>B*=+VFNJR&c+|M9#SGV5_3DN=~F;;5>qoicm4F6+zKTNYvVbZ>HZcAYlT8>{sg ztpeK&_GK}qv7OZ-@zNP%7qH*jJDrsLvg37Pz?!;|hyD`_fCdLb2a}eTN9TBfz1NoO zhxIndhdZ_LT3gI&m^Oz!gH|aGX@UNrw=5+8h;<^n*vOl}W456a9D&TAPL9Tudx82x z3TV^x)cPm3mOavv8I6sadIGD8DfBLS30jq4=mRdw_HvU*xq7eu7o zjTvZPsK@F-SJp=L#=k9~UYP^ELQeHa?m)jX3W%bu%n0S(Y)p`sh|A&#=8IINr}wp& z#w+07V762v=2o+^nZp>T*VNBwYguhNn+W2jqewKSDAV{mCkx*XJbas%;YZ4H_9*agS-lrz&}8d>*Z;D9i%d#hg~K+0DZ29Mb_f8Y>1wp zBAF~N8{v1y$(-ai>t|d7|96GdmqLrnjq1MSRIOAFJ0n{4n(YXc7JaPnjkH6N~0aagR_}X##3^;tB+z&f1Ku0!Q zPQrA3nrtAm!aJhCT(p!8(Wk7H{mDyK$XID~(o*5-efoB@5vH1HbdMgY7uRmn8RR&r zOT($H3d>KtCLe|PEC4oWD6DZ9QGbXR2ENRSeA);)p$TA3Mv$d+6uZOru!(E|Iw-G} zN6VrGX%|^zc99l_T4^lKHaXc31;vDSqQZ$jFP+}5#o1UdJp#L-2 zEtXG<1U@gvPS76U3gYzo~tC36!#=m-w-dOWp&tU zKD^{N?6C>GS#Nm2LVUJJHj~+98g$F`fYH8*g813>vb0Jf?X(NVdZQ(|2b2TKcd7fQ64^-7CM?w(BzgQ z7Kvn^SY~*`G&YKDVZT`@vc@@9g#8OG))J`4pmBv}HW&DW0`f9650``+I*g3)TR#x; zzli#dWU97VZ^vw?Ny;$8=xCJH9zvJ%hAf~-P&{ozmwrz5f#xSjJ`}^yfsF-riiKxv z1InDJazVKhz*lKukJa#%kJ5`wGa25H3_6Q$a*sTQiF;YOMNCATH-|-MLwmYZ#>qZp zJ_|DTn6->y+IE&xuZ*rUx4B-Q0Usa4rqX=S%p|B2s8YYg9)1yZIuWzks!%MUqjvs) zY0d}5#Sm!b!k|%nMUvwLqu58*LTir9F$Zd{Vyqr(#!7&b{06<=CN&86SW4N5_3ko< z{0fHYHBM(Hddqvz2ZcaA`I6)WcXgjVry1y0+Fq-tpVq?JE;58%B?<71X+Ty#;6?Xj zPnjEcy#?pG1$}xb^qT+T^KkgzSEWH?+YvtV41CU4baUn8ckvclj48P9dWh2B=+A;= zEPOL76fR?AUqsnz=swHRky?yV+x5o?Xzy5*-rOt&hU&C_hRvcEf%c9->7%LE@{(Yp zGT(tI?MvqhKac)quSnwe_#u%8s8W&CP-nVm0Xl^;c*c7~{5tdv&*(^+gLG zjh0a{P}n?{4EOXKy1vQ4Efdivlz}?yq&g2~ijdu86MmZZg?uZ@iiPMCf`MyCLC;b_mPO?XLoanpzEnZdH%O==3s>#)!LGBnchTQs~ffLE}3E$~_&-Y#Y?YwdyQ1cl}TcBf!m-hHkAG z>!MZ0l*fa7(T1G|W_4&QG6gEULx_$ussS`d$H2TrL1*H?I^9$b==wb1h5E}CvY~pV zZbJuINiG&Wz;HGN^LG?>_?tf)qWV0NpTT@YAtN6~AJ?lXh3T zV{~-2Hpl8e*i3Dgp3zvUm(#-OP*nhI*&k66ezslI6O~|@a;P!;#00q){aYLI1a)P; zV$d!&K@LoYp6dkEPv4+H-$}N>&)bpeq$Jd5?V!g@BAIA5>c{^yfwrtTG`9V~Rvks< zJ%+k?0O$Q#{zCpsM895#9A^J&gTN-IhL*`@!?gKq7;@TaXqvO2cKX0yHAQ}mM7L1@ zn%gh(w3>@Kz+kch`pA^3HZ)3qnb1D+0+@w`LX!oNZ?mGFOh?sSiL9{$aaRPq;X5!s zhw$Df|IZO+#P=FN4fupCWkqyp)Nu7SH|jrGCC$*c=zaAQE202@rh>s=R% zP-Xj~7kmxX&=EC=%pw`cY9O8g$T#OmZs=hLLdDq#s=CzB2YrCPIRV<|m6%}cBhT^w z?V$;8K!3nSZ&0UqBR`(U&q`pVeBk7ogQ?02D-S?+zRcdSyU+l(rybY=ZJsucT_;_D zJ5s>UVt}(=;LaAwPcjuWvz_4;jqo1hp~S4Mge-!ecny`?ZRy}@2LkIAM|}3g9h3x1 zxEq!I8R8@(w5BES)BmAv4@3m-K=j;2%{&B4FHwo)9~PuvGeo_es%1-20(@8}bk6}1FZP3JuK-_Z1RkskrW1cP+e=gh;KJ4Q#vA>rKkJx-WR}XwUq=h z84bn!DzM;raIzUtb%T%_i=i&Xz+28p4|Waqp>h>cu^_`MG z(XD=#TY(Jvp@s}lGiY_Ky8b{L&U(pgBpdb z8-i#l3{AF9=aRMRKlofz%n#n9IzN^}P+6t{6Q9I6DmfSaIR;fD2Jv@A76Ou*hYYq8 z9nJ}ngbIEaYPg(GWv9T5<1@*}UTSU4(XK|Wjm8jtCy;j~t(hiS932eB;4@(D>0%yu zjV`DbMddy)t)s*fu@n4SZ7BL@gYB&jwl*ECpAD*eFr3I3MWN=+iw-LoQ?mE;6J1a1 zpeug_tNC&Lcc88R3DxE~)WF_|&lu$WcgWlS$=1kMJLv;V6|b{z^grO&2lQWBknV!c z`w}v3Ms)_t@GsCY&xTsx#+JZ#Xg;q%iCYWr$*?Dq8Cvk!sIRqAjoYCLQ!tNj(CthF z!p*NL!n=xrO^iZTCx{>muXYU8x;}7ADd5`kvH&vwOLdqm$LVx3j+x0^?~S>7H_eai zQUo}>AZq~S{A*x|rudz~zyrPTK6y~#uM6`3N|qTIKA1d4?MMdJ_AT~A;=p@e#O}me zG7!58ZSk)cD&ZsaC;dhb(q`DMI6#)dS`$bQWZVa+rw(Fy3A|<}t~ySpL9|n-k9xAt zEFJ3!X0S4CNm~QEo=3+10Hl@yXD|#?q_W5{8uoPV0M}dsdR_vb`2r2-aYXcE;P@G^ z!zRp@&ZEoOEUzP~BG4tK!?sBsRUcaPDzNZ3eE$@F_9z(Pq!h91Lal zTX^huPfZ9`o*Qf=}Jp;~JhKltH8x);MZ$xJ{OyhPVBjiC<&k6;5b>!=__$m{6r(@{* zM*&m+!W3pKG{argdgSwKxQ-Fv+dHBvt;R%ZJS;L0{9D+JqL1TIymB^_{06;T`0;%w3ZRTaW}4ui`1Hu8^#%mb1g9c?P$ znR-BpGszBgRy&ZP7eUoO4qG~7;Z0M>e0*mYcDY(%_JK_t%q9n7vuY+7>_|+Ymf$Wa z5YJKgPcSO#QXq{ZuviCd3DrQpJ%!V$ij3YGHrR#jlX=)r=z}QS0^d4^EsjZ0ZJ)(w zTaj%#pq4hpYt(@?t02aTLC@V9pH~H%I*IwyX;P0KqC06HS^?Lx0J(AyEkiSb%Pj(S zYAG}dln#QI{jg#kLZDx;NHFf%PvG@G{Wgz#CsgazwPib9PfAnle+%!m7%Cs>kwz7a6VJu zM@MkK+u-#RkfRpix5uK&H-Ufrz3OT>#R~BIhOpyg+}nL*^aUg(T}F4%?yyV+)R8gx z(~PEqZ9K3|cf?dUoes;?1CL(|SS*~}Mc&?`{_ZRdzz&HI6H*EvDThk;_kCL9YMbJ- zR=^h>@l&VJIWm$KaaDxmCwUM%6=C6)Kw%59ZMg=vSptt3Lb?FKH$$AYgExh4EI9NA?DEb$n5@(B2%VW_SLaVKwJuMM!;EL__@SYSD9HC6qC ze7X|Wm4*Mg1rr=fQlTl<4@KA5$pl?h{dAd>yC#NU|?lT}ckT)6VTnDTGE&H;a(3+)8OZ{7y>FO2@%!B1p> z@9bAiNGN?o8?(D?wpLBwpu6;xT0HLP0xHN~KA|e?atfT)EHGTzWnEyf1HgtYz(3{! zZb*fEYO5feW-;Uqj(GVFzevE25k=J0MpoE`GfluAUMzW!^Nm4IxdOSP26&JY@c+E( z2VQ*>I_twgF%h`ZdB_jDaj!X1_Xkm(RYZ;SVsl^*Z2`|+j7;N4wb_hQt%D37hFz9) z@P_Hw`pJQs&=!$c89S3f<_lHae<<@QbpDseOpbcjqB zeTT35z>eS07f(he%8I%3YvA*P;DxSAzbuEm_z+uAEfC#4G60==K9(OdimP-stw9rz zo&WA;v_f9bh>ya+K2_mk-4L&j(1mBid6j{6Y9n7~f)5>r725pYKH;cgLFg(AAiGXN z<#>*H8^_ALVR5s%K*fu6)Kec*obz*dc5_tdCcAHV}8(GJ=keU-7@_@%$s zwxO48K#Hj1@)cBS23U*dvgqIaR`n8=8iw=Eh|K3h&A(16fQPC<-y=s{M0Wp)`;5gb zX9n2q%BV6I;5V`4B~ac@MA~I+yZzNcTtn~O1f0nnFqfCXBh~_^+#YB94s6UE}? zPlw!e7;)DCwKE9)>MwXs7^={i|96@cu~-y&uP9K!b;MUW{KN(L@d9X*j^NaSpgm}V z+%+AZ(h`53s{`aTEw0Vemm3$2YQ}KAqLv8U6{Mbv-@FB;Y$50{Zpk5FL_Q$5YyxgL zh|?H{T%!Zwh9ZYgL|%zO#{C!gdkuK7R$$RQs3^lodUOl-=tJCNYsz7rmWaz(*zgNp z>lfJhE*y z)RhXbbXIuNG-SYJsDY#K9>>tbj3d#A|DEug5On8HV9{WBUprXzHc&+bI2;{W;Wv8q zOSq%az<;6Y6_DCac=TTM-5-zz^8p>s#ubjoJKo0TcYUDsQ`%>JoiWBpX;jj4X^&_N zvQ-Wi(fp3{!g zF*}AjRs?LE9jrS+MhU_;eL?)2$IH@!&}Z;m-c<$uEYb z3LvwOQdtQ{_jw9Bi(oUOdCUklu4{GK60%OF7rPxF^z~hxhJ1!Nji()q2E#cKSh^7= zTjkLU4h53<%QJQZL-7&ObqSa@oQwoZ+X;+XJ}_$4&>zN-!89FP0JsG&3`-r#>VA$KRP&S)BK&ESqezloQ4i+~8 zzkg3QLAGuHe0>E764N$d?wZJo5_{yw;LSTxujirWSHza0gTDU_7``_^_F0hG76U0g z17dg%J>)dxABC!a1s`|8q$}`3*g-)b60hcg6$oX;w2pdbFKG77aOaq;5Z*+@6u+nO99Y27TGQ$(vp#CM2 zKeV9MK@T_f>tT9s?13!BCSn07TPEA-?b-HP=ObSwvyg6>@Q=~B_Fb!}<-uv_?K0pQPPrf5hA3@yzz@#fBc#)rK z1zoQ#GTg52u3D~RW?kc&c9GUrX~ZAGtPCwxWk93f(Vqsu=+CYo>4?4d( z@XZKd@B%EoHj!9=##HJn#~jv=Gu7XHVMvXiXi zuuEz2NOmT_XdA7H{;ysN8|Sf@&$myWkAqMIIsioIEE6+wvfOr#PUi>X_BcV~AQ_axVSqnv(}{*aaUTWek* zM_@!?k(JkZ&QHo-K$69^tJ+!2T7y|A4aT-=dEn*xK;B{Cu!_-vv_E}?8Vd#u8q`jr z7MMa?yq5|yi$Scub_Kegq1tWsk{-wGY^975xuJW`3~sI{u4IGigKWQt-oiX~AMFez znFU@M4rZByPyG-5SY71Qf$BZbYF6wj=SDQ%z@%S5HChNr;0BnV1(?W8#^kq{+Jy}D z9dql)A`EQkWtkQ|!YpKf^@ybrh^V|kM_&*>Pw~#Nz!*vJq8H#U&Y@;k2bQ~tUG;G+ zmA)CeHtJsLdSQ+>7HKKibG1dxat>RU1KR>wtRD7PXM{+hhLUUaI4h$Kz*cT!wi%ss zHz30lU^QA`k~|XVrY-vRgRoX%Iax%D>S7!2`Lleeu9NTd2b-ro*7j&wwH~ZLIJ7Bh z6PVWN;1hdcf?gh1RSwtv7;3kMq!FstVN9H|>mRj&+EvW1B%Xe71=`7%&U>c}6t2_3 zyG>Ag$qhP(T||Y8V2u!G`+;TJgV{QZbH5KgK!4zXkGSWnsHFoiRoH;&E{RHY8C*eG z^o@*kRVSdJTZ9eCC@{+n<#9Ea&eX!q*6u=H?(OVN;dyT+Y1L>pnaNocxRjJVXH1n(OGqcV^U}`eWaPtz zkef*fwnMwDmxn5Hi9TG*$!_5JCYxk}*bEih9Z?MNUH}zy+5fxc)X1fKup3P=#~a4( zv2kLHO~tT)m-fGTJ|13R}MnGTc=&p8yaPd9pJ;ev7Gd;*5f1k1_vkB4Yn8v z%=J`u#=d$JOmSMsI%GMUp*JmOH@9tOsi}8g&5}NtPEq z`EP6ow}eWps|saaW4wDzP|uLlA>)Fpc}u${YhP6v{wz=DeZNESJlcmIMeg-P+G9ooJa7nPoDJ@Jtq3K!) zR-LxSQ=Z-dH}Apu&O_B`huks_m~A8wNIlHtv&lio42{)S)g7qnHJ-DQ8qYBrFP~s@ zx;QorW~19&i!5;s2yZ`XLuY8Tm>AAOxzDKrwf#F0s#cTf?4n-Q8xYTD*zvz(D% zyH28Ejgqzq)Cm+0@W4R34sS2>;aM#m*cnW2&q24+ne_mgy~Q#?AJ|VT3ysq;JjZC7 ze1_ezRydkr$XbC3Y_=!+yWOpp^jTT@0jm3w1vNsI~$AgyZR1<$+zg|G)2j zkJt$YOVJD&>@BkB2Vk0Gu!AOwi37+r;qbt1z_EkT=M0sHpeC(`-QYre1<#AEgBZD2 z6{A_SHO3tG$)G^U&Cs?XKfUW+$@H~km~h)o{55?lakH@cbAbGD4`DgL%s{D6VgaFVIJB#%!|(xT&)0 z4y+@@E>yqt(Av8&SzQSq>;~)~h52a%I!!xmp7Ev%{Sa0xY;!R4NWCKcB!=4y{Zo7% z-+SK-|8lF7(+3+cU!hFBjb}~mlM~^;uk|3;J$EkeDQ{cvO7|6G0L!5I@YA+w$6J~0 zC3bme;AV0^4;9a#=Kaohi9~su*esQC*}UlTxoW$@&B=NUCRZiluZ1xgtc|;Qik{`N zOaUF+cm5Byt8$CHatgN5YOo^OQsku=c8`7n(wPAaR0}A4F}R@r!1@+YD=;|H4-tPs;e}e{L;=B6t{&b-FkOp}ZU}DA~hm8)sanJqg|xL5scn z-Rq1pEQh+tyF1~~A0Kw!II+$ZXB?g!5wLICx9wrhEAGb=N;2tP%onbP?l{*&v$1g! z^|?5-;$wk_IA#Fr;pn z^Bc_PA9#9Nq`Q5AVDbhjrv@0_*H1G|Jxpbh-R zZ>p2GvUQL?}g=bA@sdH*S@pOuNq@bEAemQ6C@78wXM8M+KL=&Zk21CXzPdnmj7np zh_%&D=6ti;+Ec7xYk(E%Xfg|p)JH%``P(zwo8P<9UEDmZ&7f}LLX=j<>#v6*bh=Z) zu5En@Bn0YPm2jHH<#kd_a~pq*!X`I{7>D#YptLm5s%+7umPW6lg|MQesI0~h+aIl? zR&#r%^B8;Ro7D+C5pFH;!YF*~0VCI+ErS-nH#>}J8lLzk^NH@zNXI%wuun4-dq?xo z*I$8lr!5p3q2L>$ai>*q-^ryH_He**ycIgC^ii1`8RQHpqFph225k<#88#<$L{LHZ zbiEKYu0VO3h9`r%cC!_2d96OyYI_8*M?qRiyRGjwJmv$VJ~mAA>RYt*KuSe` zni^>bproR7KVo2|lfhYGudqvFV?P~sZfBD6m_(gMRZW9gY!jN94M101i0uJ3uSx2v z@8SY4&C~EU$T)?dt1J(d!cr(CtH1&&F;9PqP0iY12Im4_6_?@AkrKp30`!I#kgtoA zC-k6R!1GVYvalv$TY_JCwwlGY8`vq^YURXE_k@7g%57^-E<71xj#b=xZuN5h72A}b zPSrgx!Fk9SrI%@XpBj4(*z%KGsUJ+W0 z1lbJJB?2YWMP#STsJj#RRbCDCvMhS`w3xMQ25Q)Zn)sEjG_x%B*OfGzGG(oYsHV zU1u)(<=UP}zN``5B1ZTRc*kp%l|y0;SFzWbz0HecAkE+pYs~6&<~XAYqgN5_yl!WJ z?ebDtsP*;C@D%WLVjND&O{Hp7^ykFA%!gyr4%7gh+!Q;;YH5toeR@F8gPP-=Gn1-j z0og%L^hRCqDR@sNe2!k+8#2T)!V*x5z4lf#Y|&_1@}k$?Z~bWx0>z1PL&X1d)H+kF zY*u9FwQrBy$U#w)qH;v_i;VYo@m!GKyKAk9di!9d)CQ@o zgKDU4C@FMA_wc?&tv=Q>tG442EU|<#NSp8d#~0~8?Thg}@F?1N`M$8-Y0K_(-fUr3 zHtU#Qp+N{3Nko$vvo?JBch)Xvi_lN9;VTSdXUOcCuWbWM+l|I`rhH2JO^OBWj~7lk zN6w$MWa#^T>Uw&su`pON`g0UWOCHT=vvX| zqgq7H_I;&AO9Py;=F(8P;I7mz!EB)>p>3fOM1{||E|o@G5yQ?i!QCeoQr0pGn|yiw zIs6H}2HrB-YPpIy3)E`}YT{O?T0Ai8`>>9V7(=B{5nBlTDItJ6>j zjMYqayc(xwz<73*QbCz0pJSbO5=^Hs%9gT5Q$4rdSTASPVl_2#4zqf*v5Tg_-a0M~ zg-Lz^tU9x}oSZVj1>?0pp-9+d{$h?Wmzf{UA1yzbvUL8h*!uZ zy*QX>VY{8Hi-(n#9=|WiSKXh-Kiuc_-cx77;A%j%xRAA+jKKvZGit4Va5f5Sg+0YQXK=bHEG>4^+b{IS!Az;^ z0&h}#hb|i}>;~>NVUM^(Dj^q_pQ6xwicWJT%GJ8kLAo24+@8)=YnHjy_{#7brHrSB zZMvwy-;xJBrJCAmU14^9LC&_B-2bZGkD9up^htT(%@p-bOu5*_F-M}yMGf=U_e__I z3$^U=#@^6nJT*pz3WVka>j$R?!wA*g&`AA)ansV={$eTRv*vi8`CQ*sUt8ZGZ<6*- z$qkSEHr*wYT7Q8(&uV1)jUxKK&~KqZp+7@g^cH4qJFojd7!G4{yHZeXgf8<}wYMs( zJMqtYpsn|O=UIs_iXzEEPL#sa^=_dj!9JlZ`dOo^b--Tgl!D2WNxUakr;eXbUUghP z0rN9K8bbwk#A#|5w|d~=@EKj!2lEX1^7J%LRAc*p46^@V_ zan)@uyV|&js?pP8md3czBcqo18!-wm-SO5a{YbDwumXNtk3+peYA7aDKGZ0*Ce)GZ zxoXj~kh-evQQyq;XYt4Tx_NVZepde>@9n`VO^?gy1IE6sHPpBtdKp|Byc%p7YOAB( zvvZ<1{a%_Vmr&{}6_ql|R3)giRePz!)SK!YG~h)&ca?Hd83FHGYEdh+DRe$`Kp$tw zXc;qtD6MlwyK99@;%TyqpgdC9gVN-MG!1!Y0^Vww@JO0&-ZisZ<5>y4c)hpAS~bxC zmc})!D(m48nRJBR*$&y`sTDd<&y^HAD1Ck9qesNXqzlE=j_w$7%#%YIE&OU{F^A%! z(MKO-Ofs73%Yw~PF9co%wx?zfl`}kcZo!rgtNFa!dh(UfGVq z^oH=tNwIDk2SeF|qf>pszM&{%l6l>VaBR9I8NoQZinXbcyC}O=)syV`;3?#JrEXIt zd|MvKy~W4wGE~(Yt@36k{I$NA z(qYTX7-xKIwnkSy1#aj_ zGW}xiQ{je`Nx7)J2ebY|&MB=D;u(X6)SKyM{2#rER4@+w%t?Pd)p&x<>cZxJa2bX zsc!znm*$<(z&M19Q!XQqSo7#7* zHOvBj>5YO92mVtV-6!5W0OWP7U&t7EK{3qImIClLqTS%P2ctq$^k9~}`B9ks+~ zYGc#_YBX5sW4V(w5!CFa`AE+YTA#Wha4Rq~cvfF;ma{gRZwmoJ<>pch+V|{u*g2@zmys(>HM5xqs6vXN4o$U+g9W`6ijggL zhb24;Cjdq3jW@*ao(>G;+ZTDc)S zm|EaWbcD=fx0#blyED(_7M=|?sORE@n<(6()S+s6btWUJsf*Po>MPYz66Euu4>$9C zP7-^6Z~HzG|CTuo7tNREHtUFe(CH1jyn)`|A$a6V@Eson;{%UH`cA9q3qOFZ@s+zR zP4#ewbyX1;)<1B3>0%W_tyqstvN&qyRd#-|$wHD(tL*FMTjeRQRz)xTJ1%asy?=Xd zsDtHJ;sQE!9jQ!jv%=<}CEa1H)Gvolh1Tjh&4tz|6naq8KrDR}E zTt$ankNL4i`BgFGLg=(ie51$G&lTK*RJoJv!(@9W$o4Amq{@@s90w0N>C6&j*fod9 z5bxm`^sBN`>81RG;&!|Am9!ey-yG;&SI!RS3aFl9?nE6t*g%1dcQkirJ8tpt^;SO3ohRnhS3?65buaL6G7$6p!xZU&wgePE*ASqkO(tEK_ z#ljfK53j!j+@SA3K|YF8=z^VB(a+1^)z2Qso7IjfKF%ce0ZJZgZIx*DzGR^`A z&_7m&8F4wS3Yd!j`(04pzTzojK|Z({Q^g^$xL@IJ(Va@F8NI;Q?0U=S!&Ju0=i2k+8s?&@u9gD{=46=6tpqSQ1K^pDe@b zO!X?->zV7@t=4uO_$=?yEX;@P7Vao^KrMVPI{WLFAjrF&u)}jc*m0wYXno;aEyHo(DcE!zx{5^5whr{nCZ_QT za==&KNY}e9m~-Y2dKs0^wqZz+w;Tm+6)ux2bcvJ@qW|cna+Vv zmINoMDPw*o>J!z`ScmY%{$%V&a_aU_uF~?M~C( z>(8vr1U^!O=&Hj6i(~bEbPl+?#a4=@-Bv}Vs8o)qY)aFVMD>b#MF~nFVGKF*aq7Kf zkPk(OcVekvcj6uKyZr(@>kbhk3;wS@;!icYDFW;C1vTv=6s$kPhbv6C!&J;H}m$g(-f3B0~*bHaJ9m`y^`>W zJg~GUz|tuV^CrwB4Z`K9&Hrmg?fnKm;Bb0j-RaZygFV}t_2VPYJqj~jCyKWjoUG89oI~DzC8`M+rsEhB|SwVht(5rk!hp03wuQPK~g9khcu3IT)_C;pN zsWe`SDR!2o(zE#lo?@Vkc_e7mjv=_S1Jc-4VdiZ{`Yju{A5QUr7q>tZuhs^++pU0Z(Os}vcyub4F zDys5(MtGi&(HtaGhdZD)kLZ=}WA)tPX@)f)<>)R}6lRF&2=pl^VYS}%>6`Yj5f@Az{`difb(>)wPp(u6Z0PLMmCgr8T5E_ij+0^1px z2JDRu=;n9lJ$v(OAS~fk^yCim+5_YG7PBS}wnYbKN_ILo_uw3VViz&sh@Ay}nhyKw z7gkaN>v0&PGZfD8aCnjZc&`pzVL5mL1zrB|NPalKNl>m*Xx6!`L{FznVu(kDp0#@oQMkec6ZdF)x=>88m}s9mn%7!M>4^ zlPwguL?6>KhLw!N6rRW-=0_?VoC}QgMCQ<3#^ebR%i?7+M+6vTQOp-V)$uj{?I0t3 zmY)v0?C+u1vz2GMf+w~Vw(UID;8eQqBZ)n|S?P7)A%(}~OU5A!qZ7l3c!(D%@J>ST zw?44$65X3ZVJTjA<-BM=(#Z|LS!*!QOY^iZil2xDBgAZQZoBhry0YIDgN3{c1@M=Q z#vLa&YhNHjc67=U$(GPz8O7C?CAR#{dI_jo3@bo9xTmX|@TwES^G`S=W|qCmcp}Vx`3bzC4%|~nh)d&U-4YjbPl@nEZ@@%f zM^-TfUQH|1iC!XQI%a8S*3v$@4z*Eyt;0CEE5PyROKHzhZp$!a92c zS1q>~!MQTM;N^{j{riC3^EqSt6^zhT@N9kXK%2wYYR25k$DULZX4pkg&f@d{w!q}N z%Z!-Ld^*f%U8n0BK3!lLzy9RDSHg^T7QBsfQuz6{%W8Bd@$Ion{?{%U^e0Ia?R zthv$bbKks|kU8|m$wh3xD8xyp<-1B((77?n1i8C3 zNqolrB*U8AAZ{dveg=QMNkm%)>!%jo|9ND|lD!xuaVpHL)$AkL*;{@CqsRcGaEMTt zoU4q~L+TFCIzSdM03O?0VILg+DX2LQfa`8zPk#yP>nJPsBOI^v?4keRk2C{TXCY?H zV&Mp<+T37Qe#lOphnTeqM#B-*7yqz+f8vQJ(Gv}`&_^%=@yz8fxX(QB`#KO|CZl#Z z#=iaq^E4BuS?y=WbcJ=%jMy`sD?G+Ln#}jvs4QRcx!2gcPLMZkBHNqnj3&BHg1NJt zUH7z83=YF8aRKbMD_U*O9x|CrGS0V()CFnZD5aVU?%|1kzW{k~sL1f2kkH+yN{gMt;SSGkwLHQzGhIFDyMp)Ll1UoAu zqq>J!c#YbxJK0G+GMQuW6c!MnS`zs$GRB)=XegXe(h^2;A-*o*dY>~!9bj?vbTq1m zt-N-jU%1V4`e;Yfb!$MDJez$i3+s5K_)%V?rFz3&>chPGwUzQ1F&~Wa%xD;{qC;*X zK65iW9jtWb*T!@GGM&?X#$0nL$lf_;3rd9*WHfb!5D{~_uv07xM)()}(~at3rLXM3 z^^)P+{ta8iK^@Q;4oMLw!G34;Mrode%g$7*F_8@x!OpR@1u$6Jgtb}W36F8&)y6c47 zVzd&_Y+s?s^^ubz621M^ic)}nj_dePe!nCKj&pyt&grLvvBCDi=AkKi&}eVXwiBFg zHU@$M z|AGJ6N&CL@62G6pVi4BkZ!lPX=IaTnt4H#B**$Suer1ff(3xPxgZ3Xchr#HX zkE7on>lj$oY#bZA!xufo=`$VW74j50x7X7zFa2wc47?2zO-v-Rm?YpXx@wF-$xtRZy3B0}%zHJ<<0fByW;DTbK_U zZkn@_T&}z@-dT;0Y+P#FK=#zr!M|W^+_4IiRhI^7N)!i5+2ll&x|@`Wpl=_v@i0y! zw5xJ0QF0ekTl!FzU&n1Eqy64oYb?~8>L2xH=0|I|gOdhoKrc0OPBN=~@+bLArIJz` zH>h;VI^1m%#o@4FKar*85oh3*)51P!nnqQ)b5+r2f1kGZ7a{wKbIUPL-bqtIq5q^q zvypr%5nkW#?tSoro_rW{VG9vpA9 zv=y?}667j7gfFQsno0TOn)uw6)J|!U_%&=&7s~;mpK}O2qqN--eh3Ua`@UJuD8@Mp zmGMCD;&ee-Yf*vR;Oj7)zqZJVQdQ}OGhBKU>P@8-_>W)1s&wtC?j-Sulq7C)XIVec zZWqo!iTHj)vvmQ7jgwgQl{$)&>Fd{GU{W=NS zK8kZu?&8|B2~Na#r@4I)7t+a2Z$ZGRB&#seZg2YNTn%zl#Y*)5FN;<1Vr|7f+XTFa zAchCqa_awgX-u5}SzNLsvR@fgj8Dlq?pfEZCU!P@w@=}NjdOd$r^*GJbQSySpUjg{ zu)^}TNNFM-5x!`H%yLn@gZ?%i`nIM(l#N=z7)zASo(QR zO5tE%!?yCXoAiWt@Jxu7PD|zFyK*wjV-rC;pwL5Z>&=-$6g}J$fn(a@Vdz4!blkDKdZ?Gyl8IU;s~!BtPbBgZRCl+#MGau>GXA95>cm@vaBX0PUKhI}|jTxA6g zaPBf|vr1*eFNHYrk&{AQ{^vU?)rBy>R)JDaa2C3wg{vyfy%|IbG3%v zMc{n1JcpD|;#~JB$nhY!Xp`-MoXf5Z37@&e?isq)8SzuAN&U7_iNaxS z5a%u(r@MJsOm*@0qdz*I&is2i(m9;j^rVX7z!PxCi}T?U=8~6!R3(tHOYG-kz$Pz| zH^#c(kf|rq#T!Vz--Y_17k_u1?7?x`kt4g*^h3!G$~yb(&9DK&stFwrgNn`zcIl(^ z2&&;lSr^PZmYuo=8P*!-n(L8TsRjLWqL0TuiazFRuXGVIpx>In4jnJ-v~L*CgUbSa zQ*s4frIy!sm`CjFuo?S`yWpy|lCR4p)%2dH-hi*M?}fHUelNV`UPPRtM0>8a!W?6? z)>nmIhYA>b%o_GBXDUyjsa$|H(_N{hOjq*aLA6MYQD4esq$a{Drv-lFTj^_UC8s!J z{{^3|E!kuzXBa1Uic+k+R9+~@%j>1jczhHk{~VjzW?;0d>eXnNUAvg#X0g9ii4QX&G&Q_qHSncZ!{ZB3T)Nt!EP z#QQE;z3myoi8`OWy|s8bQ9!K)=YEM*$y#adHje6_LyJQ(dNbppIo7U(3(g+tGd$dT zbZssvCDkLU2p@O3l0$AH4ssvTOP8z*rb_=l#p;67lt#X`hz`w8SliR&hMdgwA5~sU zX(cu4OVFyN=o{*}#c?Y=WZks7!#n?*U2-y!Z!NXbVk(FtV0^R4D34P+cEY3QFiu>p z$Z;pYAzEY?VwMzVcf-?DSHFAgO!cHty4J1w#RR+s{EUrQ(3KE^sM&P^0)PcwA*qXv7KAO zsc!#nm9mza|KQ;1hGvD%h3@H-&E|G)_``ooZ{Y) zrRP})PnE79q7%U@R5ZeEVIxmNA5sbYtFQZzY~=&1I~=hR?d2fl%|M&lfjMP^ncoPn zK>>xsaVHPzpdn&)CEi;gs&ed!n4A$))otPgDz_YP*3P*LtZGJ6Tw6mads6NNB12P+ zZ|(j(xtZu3a-t>Zp>)$Wcp(sQ29=~`kRQW`uI@Nw2bn;%E_;SVLe?LSzEo5;Q*u$-ZW=pdny$%m7(f&k%gU%@TS9BGx zB?mX63rbJw-P@8P^`T;)M}D2xtpRuB5!m}xxWn6-2bMFAoF*rz)<~+oXXpoxQkh<- z4j4lIHlO)$j+K&3cKi!->M>Pnp+!Vgj2RqTB&wBX z0bX72gjejKErm|@VIzzFbMS27Y|7Zc55b4}WGl((3M0K7{(`M}a{s7VyvKc(?~u2b z_PgAik^J51V0Wj3`!hbx&-GmT793AM>3^DwZCGvM4e2KRl0mT4v!e^m&!`Mh%PV`O z3}SEh9yz^()9Mmj;IqRRYhhOepPU32u9#Ss+~~BN1$9qB%Y82Tltt!q)iPlfwFpY_ull=u|7%SYrd&&ceSF{5_b3t2PGdG%p;_>n&k zAmh6ND{~dx-;B6pCdhG~(vjz5uEmUuc&YlOmt?-3rHkTb_cyDdu{3l$wOZhDN@~Cp z8en{B7jq}!_BL8_QSz{>Ykps8|8-x4_j~oU6cje2`SEj>$uanPt&O4jk6aX%Ib;opA7`FfRt$Q~C2gDxx;%IEoNcK2x0)!1W~x=e2L$Q$iUS+G8VH#%Qqv zBiTu$VpQ~AX&0vxeq}#1YU<5{JpyA=1_$N^%W#rH4Qj&nIH+x;Zt1B^(@uLun3;9G zuhcej1)wpX4kde zm@SzjWx%+Ex+(w$6{E)abVQ>kVE!S=zHYzKpTGto-5!|wCR)>3wyX><)1 z+J3sPHRxQ`MhQ|C1n^g(1KjJLzV%TT;Us#BUF6SkE-ub^;-(_;qc?}If`AEcxY z>`l$4?=*jK@QM&i!w{p=R&r>Sz3Y94eT%)DwT4Q7Zs;JK91k*2bk2%9qigymPD6RF zZ!#0@EA9%hh+IQCN1oDJ-3TAa#|eiK>UFt|v_*K~RIvkQ99*{7hHGRrhnn3j%U> z!%hHq?~X^t*Hpe^QI?Hjr7fmzKW&x3$0*#xo5R|vLA0*Jx~hhaxs6>`adGUg+3?H--{OASXZ1p+q;+FZyaEw#t zK*r=}kTsj=T%KyEBeD0eQ-KPhKZr{LtEVEq0sX`NkiJgQuVdek-By*;39IQ|X(FdG z+Ov(#p?txifki1@11*9F^yStO_X~1vuhLqntISj4vYhRxqyU z9%F^^4XlAJ;0^WRx&0-TQ);X4)tp*Tovfz7c#M!wh~EmIaVB14T{Gk1qs=qxv4*~& z0@_T4R)CzMle7*+@IW{mWnl8);6n9L&_0Vx^h)cP^`4GceRiPZb|i|^MowNZ$SW{i zLuC9HoQ~jy_|=PhF4 zs&rThse|!Ry{cAIFH!xZpgN0|jtHgQv3AfrXh_C8y*K=nWb=@<(!PvqdS{$ht9vSY zn|XSu>*O=w4{M}qoN74Neaudm2bNPUxW~U*1+xRirIpoIs9DtO_V8d`w_L2ws>4|Hcu z1dZk9JZp!&9Sooms^i>r^IwR48IyfdX2~Pobx*=hXl`f24gNWk$O$Dk-nylC3|&ff z1EW%R2Y-USdEPeML7d8zN`3c@@>=Pw-Ql#l(cXWx+)4+rk$V{~E-YD2DBXdQBne(q zN$#?<_1fOb$yU|mL5dH)>rG`dPU_F7l{+hK~lZ9JO694y2_3AeF&#T6p?$V z$Gp=bt3-bnxz|%s-UIgO2uI1NL8r{IdYhmXD4g0Um|IUW{)C@95JlZq9A36bUE~Ng zsIByt^TiQYqTxp`q#8XA)*7Ny5O13Bdb7iT%WICZF4&pj71e>AG#+Mef4PuiD9hBN zYE8AMvR%p~UZiV2hN|rznfF*ch|^p3L5Ux_)j;vzFbf(`W0aEnz>`V9P1Hd9+Z5-K znkbhCxG$(`df>^>22RHbry>pl52=$^kmI+dgE<=9|5v&LSE&u3(-F)ApW!U^QXjgg z=fPQ~knLUq4;q0Ie>UjD}_=WOw90)x+8)Ed`$Ic>1^_rKND(CQCnZ0$*Euj(L%& ze~`1oZjl`%!vDAfYkeRXWi)loE2#rcm0>SWvJaeeP3eI&81~m_p^f_^eeY9l9ensI zp*yXI9JZ%j(*2FxtUB7}OVqXaE0JqflUhl8a3LCl-na>FO!4%cO3~*^PcHcnDD^`- z6Npe@#{er`Km|Sv{ckO}47cfj&%jym2^e=idLmsw9Inxgh++hLh$obMz7Y}M_!nvW zrMq}X&zFy>Uuol%kWj`ZOAj5_Uz^*Ro%^kTaTBaRH}1IeoG;P!Z^L=efy0}~j5#Z3 z0xy{ZUv!*Y3=Q%VFphsfUJts8`w)a zblwU&Mc_0xu`fB3SZ8HWl5eGNJC~EsR!MR4PsGDrc+70Xt1gRp2hMC4`d(Y;2+txL zaBPVRz5zX*Z^7EL(&hdUe#KkPeVK-)s;3yky6p#FW{Q-I8nv@DMf?+dx-E)|C)}+c zmEsz_5`SQ1%7B-Yb+6I88wgwDE1ttOdR23Am#IOJ_e zz1Rbni3ePdQ=f=E&v^xp)Gr)%%G!NVE=W!j*b7!3-YL3EIjv41q6@l;(|NQbCP{OVSV& zA{8d|T%K2uc^H8QSaq0RM?mx2;1#l&&-ok0ZEJGg+A!A^)4Lo-*X0eUok4$iAWtZc zuY2jAo4ohoG#e4X^OOyyTt2KYNpzc9s7BdB*G! zpYEp*nvLttj^|AtbRX4-^?j!?%@9}r zk4zmkjU!{?|znFr_#PVOAq~DM(Hk}|CH-^$MqT9F}X4C5RIEw4)nRXdF7&8 zUl2ZHNk*+SpB4V3@aL6C`%^)FA}d#$9(8vNleftmel and mel 0: + nframes,ncoeff = numpy.shape(cepstra) + n = numpy.arange(ncoeff) + lift = 1 + (L/2.)*numpy.sin(numpy.pi*n/L) + return lift*cepstra + else: + # values of L <= 0, do nothing + return cepstra + +def delta(feat, N): + """Compute delta features from a feature vector sequence. + + :param feat: A numpy array of size (NUMFRAMES by number of features) containing features. Each row holds 1 feature vector. + :param N: For each frame, calculate delta features based on preceding and following N frames + :returns: A numpy array of size (NUMFRAMES by number of features) containing delta features. Each row holds 1 delta feature vector. + """ + if N < 1: + raise ValueError('N must be an integer >= 1') + NUMFRAMES = len(feat) + denominator = 2 * sum([i**2 for i in range(1, N+1)]) + delta_feat = numpy.empty_like(feat) + padded = numpy.pad(feat, ((N, N), (0, 0)), mode='edge') # padded version of feat + for t in range(NUMFRAMES): + delta_feat[t] = numpy.dot(numpy.arange(-N, N+1), padded[t : t+2*N+1]) / denominator # [t : t+2*N+1] == [(N+t)-N : (N+t)+N+1] + return delta_feat diff --git a/third_party/python_kaldi_features/python_speech_features/base_orig.py b/third_party/python_kaldi_features/python_speech_features/base_orig.py new file mode 100644 index 000000000..3efaec190 --- /dev/null +++ b/third_party/python_kaldi_features/python_speech_features/base_orig.py @@ -0,0 +1,190 @@ +# calculate filterbank features. Provides e.g. fbank and mfcc features for use in ASR applications +# Author: James Lyons 2012 +from __future__ import division +import numpy +from python_speech_features import sigproc +from scipy.fftpack import dct + +def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13, + nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True, + winfunc=lambda x:numpy.ones((x,))): + """Compute MFCC features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param numcep: the number of cepstrum to return, default 13 + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22. + :param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming + :returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector. + """ + feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc) + feat = numpy.log(feat) + feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep] + feat = lifter(feat,ceplifter) + if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy + return feat + +def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01, + nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97, + winfunc=lambda x:numpy.ones((x,))): + """Compute Mel-filterbank energy features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming + :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The + second return value is the energy in each frame (total energy, unwindowed) + """ + highfreq= highfreq or samplerate/2 + signal = sigproc.preemphasis(signal,preemph) + frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc) + pspec = sigproc.powspec(frames,nfft) + energy = numpy.sum(pspec,1) # this stores the total energy in each frame + energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log + + fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq) + feat = numpy.dot(pspec,fb.T) # compute the filterbank energies + feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log + + return feat,energy + +def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01, + nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97): + """Compute log Mel-filterbank energy features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. + """ + feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph) + return numpy.log(feat) + +def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01, + nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97, + winfunc=lambda x:numpy.ones((x,))): + """Compute Spectral Subband Centroid features from an audio signal. + + :param signal: the audio signal from which to compute features. Should be an N*1 array + :param samplerate: the samplerate of the signal we are working with. + :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds) + :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds) + :param nfilt: the number of filters in the filterbank, default 26. + :param nfft: the FFT size. Default is 512. + :param lowfreq: lowest band edge of mel filters. In Hz, default is 0. + :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2 + :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming + :returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. + """ + highfreq= highfreq or samplerate/2 + signal = sigproc.preemphasis(signal,preemph) + frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc) + pspec = sigproc.powspec(frames,nfft) + pspec = numpy.where(pspec == 0,numpy.finfo(float).eps,pspec) # if things are all zeros we get problems + + fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq) + feat = numpy.dot(pspec,fb.T) # compute the filterbank energies + R = numpy.tile(numpy.linspace(1,samplerate/2,numpy.size(pspec,1)),(numpy.size(pspec,0),1)) + + return numpy.dot(pspec*R,fb.T) / feat + +def hz2mel(hz): + """Convert a value in Hertz to Mels + + :param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise. + :returns: a value in Mels. If an array was passed in, an identical sized array is returned. + """ + return 2595 * numpy.log10(1+hz/700.) + +def mel2hz(mel): + """Convert a value in Mels to Hertz + + :param mel: a value in Mels. This can also be a numpy array, conversion proceeds element-wise. + :returns: a value in Hertz. If an array was passed in, an identical sized array is returned. + """ + return 700*(10**(mel/2595.0)-1) + +def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None): + """Compute a Mel-filterbank. The filters are stored in the rows, the columns correspond + to fft bins. The filters are returned as an array of size nfilt * (nfft/2 + 1) + + :param nfilt: the number of filters in the filterbank, default 20. + :param nfft: the FFT size. Default is 512. + :param samplerate: the samplerate of the signal we are working with. Affects mel spacing. + :param lowfreq: lowest band edge of mel filters, default 0 Hz + :param highfreq: highest band edge of mel filters, default samplerate/2 + :returns: A numpy array of size nfilt * (nfft/2 + 1) containing filterbank. Each row holds 1 filter. + """ + highfreq= highfreq or samplerate/2 + assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2" + + # compute points evenly spaced in mels + lowmel = hz2mel(lowfreq) + highmel = hz2mel(highfreq) + melpoints = numpy.linspace(lowmel,highmel,nfilt+2) + # our points are in Hz, but we use fft bins, so we have to convert + # from Hz to fft bin number + bin = numpy.floor((nfft+1)*mel2hz(melpoints)/samplerate) + + fbank = numpy.zeros([nfilt,nfft//2+1]) + for j in range(0,nfilt): + for i in range(int(bin[j]), int(bin[j+1])): + fbank[j,i] = (i - bin[j]) / (bin[j+1]-bin[j]) + for i in range(int(bin[j+1]), int(bin[j+2])): + fbank[j,i] = (bin[j+2]-i) / (bin[j+2]-bin[j+1]) + return fbank + +def lifter(cepstra, L=22): + """Apply a cepstral lifter the the matrix of cepstra. This has the effect of increasing the + magnitude of the high frequency DCT coeffs. + + :param cepstra: the matrix of mel-cepstra, will be numframes * numcep in size. + :param L: the liftering coefficient to use. Default is 22. L <= 0 disables lifter. + """ + if L > 0: + nframes,ncoeff = numpy.shape(cepstra) + n = numpy.arange(ncoeff) + lift = 1 + (L/2.)*numpy.sin(numpy.pi*n/L) + return lift*cepstra + else: + # values of L <= 0, do nothing + return cepstra + +def delta(feat, N): + """Compute delta features from a feature vector sequence. + + :param feat: A numpy array of size (NUMFRAMES by number of features) containing features. Each row holds 1 feature vector. + :param N: For each frame, calculate delta features based on preceding and following N frames + :returns: A numpy array of size (NUMFRAMES by number of features) containing delta features. Each row holds 1 delta feature vector. + """ + if N < 1: + raise ValueError('N must be an integer >= 1') + NUMFRAMES = len(feat) + denominator = 2 * sum([i**2 for i in range(1, N+1)]) + delta_feat = numpy.empty_like(feat) + padded = numpy.pad(feat, ((N, N), (0, 0)), mode='edge') # padded version of feat + for t in range(NUMFRAMES): + delta_feat[t] = numpy.dot(numpy.arange(-N, N+1), padded[t : t+2*N+1]) / denominator # [t : t+2*N+1] == [(N+t)-N : (N+t)+N+1] + return delta_feat diff --git a/third_party/python_kaldi_features/python_speech_features/sigproc.py b/third_party/python_kaldi_features/python_speech_features/sigproc.py new file mode 100644 index 000000000..b7c78a803 --- /dev/null +++ b/third_party/python_kaldi_features/python_speech_features/sigproc.py @@ -0,0 +1,158 @@ +# This file includes routines for basic signal processing including framing and computing power spectra. +# Author: James Lyons 2012 +import decimal + +import numpy +import math +import logging + + +def round_half_up(number): + return int(decimal.Decimal(number).quantize(decimal.Decimal('1'), rounding=decimal.ROUND_HALF_UP)) + + +def rolling_window(a, window, step=1): + # http://ellisvalentiner.com/post/2017-03-21-np-strides-trick + shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) + strides = a.strides + (a.strides[-1],) + return numpy.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)[::step] + + +def framesig(sig, frame_len, frame_step, dither=1.0, preemph=0.97, remove_dc_offset=True, wintype='hamming', stride_trick=True): + """Frame a signal into overlapping frames. + + :param sig: the audio signal to frame. + :param frame_len: length of each frame measured in samples. + :param frame_step: number of samples after the start of the previous frame that the next frame should begin. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. + :param stride_trick: use stride trick to compute the rolling window and window multiplication faster + :returns: an array of frames. Size is NUMFRAMES by frame_len. + """ + slen = len(sig) + frame_len = int(round_half_up(frame_len)) + frame_step = int(round_half_up(frame_step)) + if slen <= frame_len: + numframes = 1 + else: + numframes = 1 + (( slen - frame_len) // frame_step) + + # check kaldi/src/feat/feature-window.h + padsignal = sig[:(numframes-1)*frame_step+frame_len] + if wintype is 'povey': + win = numpy.empty(frame_len) + for i in range(frame_len): + win[i] = (0.5-0.5*numpy.cos(2*numpy.pi/(frame_len-1)*i))**0.85 + else: # the hamming window + win = numpy.hamming(frame_len) + + if stride_trick: + frames = rolling_window(padsignal, window=frame_len, step=frame_step) + else: + indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile( + numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T + indices = numpy.array(indices, dtype=numpy.int32) + frames = padsignal[indices] + win = numpy.tile(win, (numframes, 1)) + + frames = frames.astype(numpy.float32) + raw_frames = numpy.zeros(frames.shape) + for frm in range(frames.shape[0]): + frames[frm,:] = do_dither(frames[frm,:], dither) # dither + frames[frm,:] = do_remove_dc_offset(frames[frm,:]) # remove dc offset + raw_frames[frm,:] = frames[frm,:] + frames[frm,:] = do_preemphasis(frames[frm,:], preemph) # preemphasize + + return frames * win, raw_frames + +def deframesig(frames, siglen, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,))): + """Does overlap-add procedure to undo the action of framesig. + + :param frames: the array of frames. + :param siglen: the length of the desired signal, use 0 if unknown. Output will be truncated to siglen samples. + :param frame_len: length of each frame measured in samples. + :param frame_step: number of samples after the start of the previous frame that the next frame should begin. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. + :returns: a 1-D signal. + """ + frame_len = round_half_up(frame_len) + frame_step = round_half_up(frame_step) + numframes = numpy.shape(frames)[0] + assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len' + + indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile( + numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T + indices = numpy.array(indices, dtype=numpy.int32) + padlen = (numframes - 1) * frame_step + frame_len + + if siglen <= 0: siglen = padlen + + rec_signal = numpy.zeros((padlen,)) + window_correction = numpy.zeros((padlen,)) + win = winfunc(frame_len) + + for i in range(0, numframes): + window_correction[indices[i, :]] = window_correction[ + indices[i, :]] + win + 1e-15 # add a little bit so it is never zero + rec_signal[indices[i, :]] = rec_signal[indices[i, :]] + frames[i, :] + + rec_signal = rec_signal / window_correction + return rec_signal[0:siglen] + + +def magspec(frames, NFFT): + """Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the magnitude spectrum of the corresponding frame. + """ + if numpy.shape(frames)[1] > NFFT: + logging.warn( + 'frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.', + numpy.shape(frames)[1], NFFT) + complex_spec = numpy.fft.rfft(frames, NFFT) + return numpy.absolute(complex_spec) + + +def powspec(frames, NFFT): + """Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the power spectrum of the corresponding frame. + """ + return numpy.square(magspec(frames, NFFT)) + + +def logpowspec(frames, NFFT, norm=1): + """Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :param norm: If norm=1, the log power spectrum is normalised so that the max value (across all frames) is 0. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the log power spectrum of the corresponding frame. + """ + ps = powspec(frames, NFFT); + ps[ps <= 1e-30] = 1e-30 + lps = 10 * numpy.log10(ps) + if norm: + return lps - numpy.max(lps) + else: + return lps + +def do_dither(signal, dither_value=1.0): + signal += numpy.random.normal(size=signal.shape) * dither_value + return signal + +def do_remove_dc_offset(signal): + signal -= numpy.mean(signal) + return signal + +def do_preemphasis(signal, coeff=0.97): + """perform preemphasis on the input signal. + + :param signal: The signal to filter. + :param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95. + :returns: the filtered signal. + """ + return numpy.append((1-coeff)*signal[0], signal[1:] - coeff * signal[:-1]) diff --git a/third_party/python_kaldi_features/python_speech_features/sigproc_orig.py b/third_party/python_kaldi_features/python_speech_features/sigproc_orig.py new file mode 100644 index 000000000..a786c4fb6 --- /dev/null +++ b/third_party/python_kaldi_features/python_speech_features/sigproc_orig.py @@ -0,0 +1,140 @@ +# This file includes routines for basic signal processing including framing and computing power spectra. +# Author: James Lyons 2012 +import decimal + +import numpy +import math +import logging + + +def round_half_up(number): + return int(decimal.Decimal(number).quantize(decimal.Decimal('1'), rounding=decimal.ROUND_HALF_UP)) + + +def rolling_window(a, window, step=1): + # http://ellisvalentiner.com/post/2017-03-21-np-strides-trick + shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) + strides = a.strides + (a.strides[-1],) + return numpy.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)[::step] + + +def framesig(sig, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,)), stride_trick=True): + """Frame a signal into overlapping frames. + + :param sig: the audio signal to frame. + :param frame_len: length of each frame measured in samples. + :param frame_step: number of samples after the start of the previous frame that the next frame should begin. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. + :param stride_trick: use stride trick to compute the rolling window and window multiplication faster + :returns: an array of frames. Size is NUMFRAMES by frame_len. + """ + slen = len(sig) + frame_len = int(round_half_up(frame_len)) + frame_step = int(round_half_up(frame_step)) + if slen <= frame_len: + numframes = 1 + else: + numframes = 1 + int(math.ceil((1.0 * slen - frame_len) / frame_step)) + + padlen = int((numframes - 1) * frame_step + frame_len) + + zeros = numpy.zeros((padlen - slen,)) + padsignal = numpy.concatenate((sig, zeros)) + if stride_trick: + win = winfunc(frame_len) + frames = rolling_window(padsignal, window=frame_len, step=frame_step) + else: + indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile( + numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T + indices = numpy.array(indices, dtype=numpy.int32) + frames = padsignal[indices] + win = numpy.tile(winfunc(frame_len), (numframes, 1)) + + return frames * win + + +def deframesig(frames, siglen, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,))): + """Does overlap-add procedure to undo the action of framesig. + + :param frames: the array of frames. + :param siglen: the length of the desired signal, use 0 if unknown. Output will be truncated to siglen samples. + :param frame_len: length of each frame measured in samples. + :param frame_step: number of samples after the start of the previous frame that the next frame should begin. + :param winfunc: the analysis window to apply to each frame. By default no window is applied. + :returns: a 1-D signal. + """ + frame_len = round_half_up(frame_len) + frame_step = round_half_up(frame_step) + numframes = numpy.shape(frames)[0] + assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len' + + indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile( + numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T + indices = numpy.array(indices, dtype=numpy.int32) + padlen = (numframes - 1) * frame_step + frame_len + + if siglen <= 0: siglen = padlen + + rec_signal = numpy.zeros((padlen,)) + window_correction = numpy.zeros((padlen,)) + win = winfunc(frame_len) + + for i in range(0, numframes): + window_correction[indices[i, :]] = window_correction[ + indices[i, :]] + win + 1e-15 # add a little bit so it is never zero + rec_signal[indices[i, :]] = rec_signal[indices[i, :]] + frames[i, :] + + rec_signal = rec_signal / window_correction + return rec_signal[0:siglen] + + +def magspec(frames, NFFT): + """Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the magnitude spectrum of the corresponding frame. + """ + if numpy.shape(frames)[1] > NFFT: + logging.warn( + 'frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.', + numpy.shape(frames)[1], NFFT) + complex_spec = numpy.fft.rfft(frames, NFFT) + return numpy.absolute(complex_spec) + + +def powspec(frames, NFFT): + """Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the power spectrum of the corresponding frame. + """ + return 1.0 / NFFT * numpy.square(magspec(frames, NFFT)) + + +def logpowspec(frames, NFFT, norm=1): + """Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1). + + :param frames: the array of frames. Each row is a frame. + :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded. + :param norm: If norm=1, the log power spectrum is normalised so that the max value (across all frames) is 0. + :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the log power spectrum of the corresponding frame. + """ + ps = powspec(frames, NFFT); + ps[ps <= 1e-30] = 1e-30 + lps = 10 * numpy.log10(ps) + if norm: + return lps - numpy.max(lps) + else: + return lps + + +def preemphasis(signal, coeff=0.95): + """perform preemphasis on the input signal. + + :param signal: The signal to filter. + :param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95. + :returns: the filtered signal. + """ + return numpy.append(signal[0], signal[1:] - coeff * signal[:-1]) diff --git a/third_party/python_kaldi_features/requirements.txt b/third_party/python_kaldi_features/requirements.txt new file mode 100644 index 000000000..a881eb051 --- /dev/null +++ b/third_party/python_kaldi_features/requirements.txt @@ -0,0 +1,3 @@ +mock +scipy +numpy diff --git a/third_party/python_kaldi_features/setup.py b/third_party/python_kaldi_features/setup.py new file mode 100644 index 000000000..47c777186 --- /dev/null +++ b/third_party/python_kaldi_features/setup.py @@ -0,0 +1,14 @@ +try: + from setuptools import setup #enables develop +except ImportError: + from distutils.core import setup + +setup(name='python_speech_features', + version='0.6', + description='Python Speech Feature extraction', + author='James Lyons', + author_email='james.lyons0@gmail.com', + license='MIT', + url='https://github.com/jameslyons/python_speech_features', + packages=['python_speech_features'], + ) diff --git a/third_party/python_kaldi_features/test/test_sigproc.py b/third_party/python_kaldi_features/test/test_sigproc.py new file mode 100644 index 000000000..e08a346ba --- /dev/null +++ b/third_party/python_kaldi_features/test/test_sigproc.py @@ -0,0 +1,31 @@ +from python_speech_features import sigproc +import unittest +import numpy as np +import time + + +class test_case(unittest.TestCase): + def test_frame_sig(self): + n = 10000124 + frame_len = 37 + frame_step = 13 + x = np.random.rand(n) + t0 = time.time() + y_old = sigproc.framesig(x, frame_len=frame_len, frame_step=frame_step, stride_trick=False) + t1 = time.time() + y_new = sigproc.framesig(x, frame_len=frame_len, frame_step=frame_step, stride_trick=True) + t_new = time.time() - t1 + t_old = t1 - t0 + self.assertTupleEqual(y_old.shape, y_new.shape) + np.testing.assert_array_equal(y_old, y_new) + self.assertLess(t_new, t_old) + print('new run time %3.2f < %3.2f sec' % (t_new, t_old)) + + def test_rolling(self): + x = np.arange(10) + y = sigproc.rolling_window(x, window=4, step=3) + y_expected = np.array([[0, 1, 2, 3], + [3, 4, 5, 6], + [6, 7, 8, 9]] + ) + y = np.testing.assert_array_equal(y, y_expected) diff --git a/utils/compute_mean_std.py b/utils/compute_mean_std.py index 948e18a68..780568f99 100644 --- a/utils/compute_mean_std.py +++ b/utils/compute_mean_std.py @@ -24,7 +24,7 @@ from deepspeech.utils.utility import print_arguments parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable -add_arg('num_samples', int, 2000, "# of samples to for statistics.") +add_arg('num_samples', int, -1, "# of samples to for statistics.") add_arg('specgram_type', str, 'linear', "Audio feature type. Options: linear, mfcc, fbank.",