From 750e0bdddde3c79841bdf70a926b89dc64c0ebbf Mon Sep 17 00:00:00 2001 From: YangZhou Date: Mon, 19 Sep 2022 06:49:15 +0000 Subject: [PATCH] fix coding style && mv audio docs --- audio/docs/Makefile | 19 -- audio/docs/README.md | 24 -- audio/docs/images/paddle.png | Bin 5043 -> 0 bytes audio/docs/make.bat | 35 --- audio/docs/source/_static/custom.css | 5 - audio/docs/source/_templates/module.rst_t | 9 - audio/docs/source/_templates/package.rst_t | 57 ---- audio/docs/source/_templates/toc.rst_t | 8 - audio/docs/source/conf.py | 181 ------------- audio/docs/source/index.rst | 22 -- audio/paddleaudio/__init__.py | 2 +- audio/paddleaudio/backends/__init__.py | 7 +- .../paddleaudio/backends/soundfile_backend.py | 82 +++--- audio/paddleaudio/backends/sox_io_backend.py | 77 +++--- audio/paddleaudio/backends/utils.py | 6 +- audio/paddleaudio/utils/__init__.py | 4 +- audio/tests/backends/soundfile/common.py | 6 +- audio/tests/backends/soundfile/info_test.py | 79 +++--- audio/tests/backends/soundfile/load_test.py | 185 +++++++------ audio/tests/backends/soundfile/save_test.py | 136 +++++----- audio/tests/common_utils/__init__.py | 23 +- audio/tests/common_utils/wav_utils.py | 38 +-- .../api}/paddleaudio.backends.common.rst | 0 .../api}/paddleaudio.backends.no_backend.rst | 0 .../source/api}/paddleaudio.backends.rst | 0 ...paddleaudio.backends.soundfile_backend.rst | 0 .../paddleaudio.backends.sox_io_backend.rst | 0 .../api}/paddleaudio.backends.utils.rst | 0 .../api}/paddleaudio.compliance.kaldi.rst | 0 .../api}/paddleaudio.compliance.librosa.rst | 0 .../source/api}/paddleaudio.compliance.rst | 0 .../api}/paddleaudio.datasets.dataset.rst | 0 .../api}/paddleaudio.datasets.esc50.rst | 0 .../api}/paddleaudio.datasets.gtzan.rst | 0 .../api}/paddleaudio.datasets.hey_snips.rst | 0 .../api}/paddleaudio.datasets.rirs_noises.rst | 0 .../source/api}/paddleaudio.datasets.rst | 0 .../source/api}/paddleaudio.datasets.tess.rst | 0 .../api}/paddleaudio.datasets.urban_sound.rst | 0 .../api}/paddleaudio.datasets.voxceleb.rst | 0 .../api}/paddleaudio.features.layers.rst | 0 .../source/api}/paddleaudio.features.rst | 0 .../paddleaudio.functional.functional.rst | 0 .../source/api}/paddleaudio.functional.rst | 0 .../api}/paddleaudio.functional.window.rst | 0 .../source/api}/paddleaudio.io.rst | 0 .../source/api}/paddleaudio.metric.eer.rst | 0 .../source/api}/paddleaudio.metric.rst | 0 .../source/api}/paddleaudio.rst | 0 .../source/api}/paddleaudio.sox_effects.rst | 0 examples/voxceleb/sv0/local/data_prepare.py | 2 +- .../make_rirs_noise_csv_dataset_from_json.py | 2 +- .../local/make_vox_csv_dataset_from_json.py | 2 +- paddlespeech/audio/__init__.py | 9 - paddlespeech/audio/_extension.py | 12 +- paddlespeech/audio/backends/sox_io_backend.py | 86 +++--- paddlespeech/audio/backends/utils.py | 6 +- paddlespeech/audio/sox_effects/__init__.py | 15 +- paddlespeech/audio/sox_effects/sox_effects.py | 42 +-- paddlespeech/audio/utils/sox_utils.py | 8 +- paddlespeech/cli/kws/infer.py | 6 +- paddlespeech/cli/vector/infer.py | 4 +- paddlespeech/cls/exps/panns/deploy/predict.py | 3 +- paddlespeech/cls/exps/panns/export_model.py | 2 +- paddlespeech/cls/exps/panns/predict.py | 2 +- paddlespeech/cls/exps/panns/train.py | 4 +- paddlespeech/cls/models/panns/panns.py | 2 +- paddlespeech/kws/exps/mdtc/train.py | 4 +- .../frontend/featurizer/audio_featurizer.py | 3 +- paddlespeech/s2t/models/u2/u2.py | 2 +- paddlespeech/s2t/models/u2_st/u2_st.py | 2 +- .../engine/vector/python/vector_engine.py | 2 +- paddlespeech/server/util.py | 2 +- .../vector/exps/ecapa_tdnn/extract_emb.py | 4 +- paddlespeech/vector/exps/ecapa_tdnn/test.py | 2 +- paddlespeech/vector/exps/ecapa_tdnn/train.py | 2 +- paddlespeech/vector/io/dataset.py | 3 +- paddlespeech/vector/io/dataset_from_json.py | 1 - tests/unit/audio/backends/sox_io/load_test.py | 39 +-- tests/unit/audio/backends/sox_io/save_test.py | 104 ++++---- .../unit/audio/backends/sox_io/smoke_test.py | 168 ++++++------ .../audio/backends/sox_io/sox_effect_test.py | 249 ++++++++++-------- tests/unit/audio/features/base.py | 1 - tests/unit/audio/features/test_istft.py | 2 +- tests/unit/audio/features/test_kaldi_feat.py | 10 +- .../audio/features/test_log_melspectrogram.py | 2 +- tests/unit/audio/features/test_spectrogram.py | 2 +- tests/unit/audio/features/test_stft.py | 2 +- tests/unit/common_utils/__init__.py | 28 +- tests/unit/common_utils/case_utils.py | 13 +- tests/unit/common_utils/wav_utils.py | 38 +-- 91 files changed, 781 insertions(+), 1116 deletions(-) delete mode 100644 audio/docs/Makefile delete mode 100644 audio/docs/README.md delete mode 100644 audio/docs/images/paddle.png delete mode 100644 audio/docs/make.bat delete mode 100644 audio/docs/source/_static/custom.css delete mode 100644 audio/docs/source/_templates/module.rst_t delete mode 100644 audio/docs/source/_templates/package.rst_t delete mode 100644 audio/docs/source/_templates/toc.rst_t delete mode 100644 audio/docs/source/conf.py delete mode 100644 audio/docs/source/index.rst rename {audio/docs/source/source => docs/source/api}/paddleaudio.backends.common.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.backends.no_backend.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.backends.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.backends.soundfile_backend.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.backends.sox_io_backend.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.backends.utils.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.compliance.kaldi.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.compliance.librosa.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.compliance.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.datasets.dataset.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.datasets.esc50.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.datasets.gtzan.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.datasets.hey_snips.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.datasets.rirs_noises.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.datasets.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.datasets.tess.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.datasets.urban_sound.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.datasets.voxceleb.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.features.layers.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.features.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.functional.functional.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.functional.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.functional.window.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.io.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.metric.eer.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.metric.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.rst (100%) rename {audio/docs/source/source => docs/source/api}/paddleaudio.sox_effects.rst (100%) diff --git a/audio/docs/Makefile b/audio/docs/Makefile deleted file mode 100644 index 69fe55ecf..000000000 --- a/audio/docs/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/audio/docs/README.md b/audio/docs/README.md deleted file mode 100644 index 20626f52b..000000000 --- a/audio/docs/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Build docs for PaddleAudio - -Execute the following steps in **current directory**. - -## 1. Install - -`pip install Sphinx sphinx_rtd_theme` - - -## 2. Generate API docs - -Generate API docs from doc string. - -`sphinx-apidoc -fMeT -o source ../paddleaudio ../paddleaudio/utils --templatedir source/_templates` - - -## 3. Build - -`sphinx-build source _html` - - -## 4. Preview - -Open `_html/index.html` for page preview. diff --git a/audio/docs/images/paddle.png b/audio/docs/images/paddle.png deleted file mode 100644 index bc1135abfab7aa48f29392da4bca614f688314af..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5043 zcmV;k6HM%hP)Px|ZAnByRCodHoe8iVMY+cbn;1d107fAM5|&a3LJ*P7Ld`R*M!^N#5=2ErP!zR< zA_!7S^daytPsEDif&wLr7zGqC2&iDt(hzpCh!7y4kbpu=uD}1`&Y@4AUS`hpOwPT| zSM|^Ie0%py&-8Tnxf!v>Fr=Zw{S0Rd&xAY^R)hjanbHz;NnVFVESY<8$iOrM7j>K$_5K zXo$4HaOzl3MCEPJ8B=PM^~U?QW)Un|Tr_|2ZIQ zL`@4+*8+U_enJoG)_deg@tDHFZ@+|05huw)o#i_Y{lQIz@k73U>l~eN&sZ4{+J!;pwTdT8IiM z_F949&r`7hpB($0k!@nh|Hb)Yuq0$_+2lSYrfvj1J?$GJ^P#7lcgi38m!h~wraX95 z9km000k+RrvQ($KX(pv|X8BQlPq28gQ|Y_f@CUXbZSq&zZ8rRaE!k=CFG2p0#ovtc zF&0nXtUj87K<#7GZ}D%(v~|pLUB~(4lXqRxv`tec;myE5L5TDb?<9;~0sa**eiqqm z@F9?QM}{+&$;8*GAViNMblos5H^_7|gh-o&&jtIKam$wRcT6Z~N!f)(@*~!0@Vq1D zAAEk$-3_>Yv2Zo~jjf?FyXbtF`c~36mbYUe(_2dGg8v3p&Y_pWPjw063=MP602Ah_$2-)13St~P*iE3jt?KMrp%|Y2;+*zAzZriCg*OmmVaA9Zi8}?I zD*}4D;kU=*ymfy(23(!tCh~b>-XQqe2wh!!O)i-9 z3@2OT>j{(TE8|D^M%Qehp|8o5*4xYAeFy(X4c2Fs!Ox0!QJ9yVvPzrRiR>sfe`6cy0tGVvMC|s0y4e1LNk&bDWDtsh3};u2G$AoH-Z$Tw{>kq!l55w z9#|w1e0|T;0ADNON4og=qQ{qyJ!{ZTLqU`Dji_OyW@`%(HkxR%Db(Q$`7fkQ7Mj8g z(ZwN$7>)vaSa?0>`+>v3*T91y4}CHeE0f`9&m7KzlML9u2H1IP=rRd>1nBQt#AZoR zX?>fz-X2-HNsq+4NkJX-7kIfxJLrDJ;X^FV?2hgwY3z;%uGvM{U1WDGbtp|<>}tm5 zd@R_&P97t(V`)}$Thk}!F9Fkm z9&YMz-nj>4Yl3;eCf!dOBGyX;)l)e{*Qd;gKzh6Rp14^a9}LArNz<~fo${Hzj)#PN z6qVsm!>`cWOEYNpH$>4eQGH@u3S6`pragJk_J>T%3cCW$WZfRDAn zQQ-H$N3D)!yFgh(!y|#6=MUl8-%l-b_{Rbl)$+jWr~_GS4`NT^$sI-sPKdb`&|AP{03F! z@Ef{G|KW=ZUyZJ{sSq7qMFSG>Wk8GMKGGC?Wkk^b^8|hperveH@oJAwjI9T3uAyX+ z3gUA-xX%;Z*|Fpw{eyw;p?@o~?FRKU?DX9aUoDbr5?zwn9E658hzF)73FdHu?}-W! zpz#hfHTbCk8;O>H;G(9>Z(bh0zpo%3nw{5AQ9a^q0+xH?o-TjLw8f|wT87bkisls4 zmrtNh?;~%=8mv#~TI=4`weW4By8~I}L-VdSAZX}~fq39=RVIe-4HP6m6YZqY)pvrv zWkTfLzGVCB<($AL2ZG<+i_bhZfw&q{x7MIIUJ=O6Cz^v`$2>XY)xn=0+8VOODA=(S z4+k@1P8@Q*!3u5Z%3HUf9zjQfrH+J5mUsJ7(0KtN!8GLak*i~s+P+jEHLohkPW2@W zzmkrHU(Jp^;)(kWC5vnV@C}`8)2Crv4)*p|v^Er!cZ+&#L?9Ad{4X|r;b;|A2}TjLZgl>Z4KYA%}$JC>y5>^ z>UaXs=gEcOI7p?gq%O_BVQ>Jt30NVb}y^lo2DkqrJd6jej`=ngv$ilw$M+5UPt zCx=+u1=rttjCqi+k>Dp;xbGz|*%1G{KyI06Hia#K>)X`qSebs&qo(n-*xfM&@2gXl zL&1}LK?nJl@+w^v(J3qbIu5oEIQg|HqtP_XiG2?g-rHuzh?U7f!;#SL{sH83Vt#s% zf5v0;E3Q-P0Yq-cq4O4p-v6U>t%F|&OT*7oi9b`p$T4-y=WzI4FoPHmrgmP)AC=n& z#Hg~TAc4n#Ya$6~NY}B;p7UC^FZmji_(y=pL5RxklJ5#nu-yXz+Z_p=$sWB0@LTI! zUCt9Q@a=kIX;SP9pkwz2=c(RUx8duPV2fU=(0*g#U<3}{2DH7uEKQ3zxFLHAN{`qY zvrdTE#Kg*96D8UZI|alp^yTF60D3JVhw&6-7ozst3Vls-915cQ4MPr*X)SCFI1uFd z$w3ucXMArf{g0BrBCi%v*28KpCld!**T#j zT2!|=KtGB38^`iQgS`vXMU%6P38*!r*iYV5=k*j0?YGs;NRW>0OXyCE=vN`WIBU05 zN0J;1^!9eqyX`nA?dMV)0nzgGU zr>=c=1eqQQv;=v3`x4~?Bl@YuTD09MYqwNKeCkG9=1KNgUmWx|28%s*Vr%xy*(rTK z>)?~q0ZPt6N2HFg?-IQtFX!;~)Xp9L9)`CK<5&HBT5c3$yOWBK}!X)jOTOyW;1qHh94V>YCqT}hWv@LZm{9T04*J1k;u$9Aa9@bRiUgT25_fO#;;*ure=|N^b z^m}#tXX>DbbsD1Lv8nnWriql!3pwW*itdpvV*hxe?$-kseXmTGI2;AO30AnAM|j@F zW$a!GHc5c@dvZ9kOuZ|yCDDC7$(rolDhXb~DaozY?Mo$6FJ(%-cQr4JQPw0xk|}-s z>;UvTtRm#?Sfwg#7?aFoPeRG3+npcvMlN}ZvDNmD?MrzaR;@$hjwi;HNSyZerHI~8 z6Z@{$FBrzmOHmp8+=HKY(9Sz@)MtXQ3#gQcH<|!?>#l1B(l^|}7O(3R`zv7D6E|9> zIgUEr*=0}_gGBUX3Qe~n9%yf0sw$77e#vR35(`kr_NAi!`&FO%jsd=5+8e9Po4DJ7 zW5L-#v!3(7vmm3=Zw(yjrz0+!rMe*qzOj&h(Pa#8E?B3^_UZ6VFs-wMC^jqC;jfMp zlGS&Bc|l$IA6-W-b;qEhGhCI0r&+q%iMZN%qwgo7)I=Kg`Z1)7^v7VvsB#=*9ZP>7 zcjmp(Oa0zmTE4sm}^sg`FXzwl+a$67ho=92hCwCbR0|u3w zkX6ueJZPv^k-B^s_;%;tj!bK;EutLlSW-k!LscJ5f5BS?{}OQ|SnYp(sc6;6IktUC zinRPfCJCGvNurLwz7$EJGV!Ydo~P43W0Btq7K2rwZqnlF z5?@gA(>K^SpoM5H+P@9>zRR@9`yKdO$gwbC_ayPOApKjgp~Jx>c$3h%k)sy1ZwGf{ z*L%C+Y%TBr2+`RD>MJ7IDWQasX*jV>UnQL|8R|AIQvcO0KFL>I5<3(4{>HkUw>JL1 z)#VF*t~XX@i@r8o$Np3TDlO7p*jbsap5jI6?HFtuFw(Xddw*R(y*82e!VPAipXx8D zEt~oS{i*(t$s%e)@8Bt6!*#qihi+5_KXXyq59GNo)<;io(-!s8v0^u<{`!)J z6MaKGN!~y*dqooZYD7My#at6jsoyfBD-llqYQ#HH&!lz4-(C3K1$-wQzEfJLV{wD- zi+ODSTtDtu@a;CpT0?(4MC0V)PWm=^aF|u{4(g-tsY8MOifwYj?=o-{j^7M^ohLw{ zj)lY6+v<7}(37!mh=)O*?MuV@ZIxXNuFb=gcLvjep2S0ywEO;BK*PR?;EFmbLL6<+ z+n-n({hSN#^TqYes&wgdgYE!R7b9eI0D-URGG2`g@}5cfj|EcDZ>wHS-U9)hVJV_* zHlt#r!N#E18RArJqo1`u>T*V&+UKWP=!d{U^d4~O#d!GH2c0V&dLP8+YzIHe!si&2 zwgp>)mH7El)W}r8L9>0yY>=khv_RPwpi^xDn(a$v8?v!)TEJ`pI#siM$!w6O+_XU1 z7NApU`_dC2MD2}^W#gr?C)r~{2_qY}+m}+cVdJO57T^XvE^>oDjeNrjQz-13eDv3s z?v3PqUSY?LLDK>$T7Vm~*}jybAsZh}3z#iH$9iYvsjpw7YPzwnh@5VT{=kUcA*{7q51te%-b*Sr&gguX`O9pAn#SkP6G`!El^nt{0HuA9HmP~`H}zt002ov JPDHLkV1n`1NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% - -:end -popd diff --git a/audio/docs/source/_static/custom.css b/audio/docs/source/_static/custom.css deleted file mode 100644 index bb65c51a9..000000000 --- a/audio/docs/source/_static/custom.css +++ /dev/null @@ -1,5 +0,0 @@ -.wy-nav-content { - max-width: 80%; -} -.table table{ background:#b9b9b9} -.table table td{ background:#FFF; } diff --git a/audio/docs/source/_templates/module.rst_t b/audio/docs/source/_templates/module.rst_t deleted file mode 100644 index d9a50e6b9..000000000 --- a/audio/docs/source/_templates/module.rst_t +++ /dev/null @@ -1,9 +0,0 @@ -{%- if show_headings %} -{{- basename | e | heading }} - -{% endif -%} -.. automodule:: {{ qualname }} -{%- for option in automodule_options %} - :{{ option }}: -{%- endfor %} - diff --git a/audio/docs/source/_templates/package.rst_t b/audio/docs/source/_templates/package.rst_t deleted file mode 100644 index 7239c11b7..000000000 --- a/audio/docs/source/_templates/package.rst_t +++ /dev/null @@ -1,57 +0,0 @@ -{%- macro automodule(modname, options) -%} -.. automodule:: {{ modname }} -{%- for option in options %} - :{{ option }}: -{%- endfor %} -{%- endmacro %} - -{%- macro toctree(docnames) -%} -.. toctree:: - :maxdepth: {{ maxdepth }} -{% for docname in docnames %} - {{ docname }} -{%- endfor %} -{%- endmacro %} - -{%- if is_namespace %} -{{- [pkgname, "namespace"] | join(" ") | e | heading }} -{% else %} -{{- pkgname | e | heading }} -{% endif %} - -{%- if is_namespace %} -.. py:module:: {{ pkgname }} -{% endif %} - -{%- if modulefirst and not is_namespace %} -{{ automodule(pkgname, automodule_options) }} -{% endif %} - -{%- if subpackages %} -Subpackages ------------ - -{{ toctree(subpackages) }} -{% endif %} - -{%- if submodules %} -Submodules ----------- -{% if separatemodules %} -{{ toctree(submodules) }} -{% else %} -{%- for submodule in submodules %} -{% if show_headings %} -{{- submodule | e | heading(2) }} -{% endif %} -{{ automodule(submodule, automodule_options) }} -{% endfor %} -{%- endif %} -{%- endif %} - -{%- if not modulefirst and not is_namespace %} -Module contents ---------------- - -{{ automodule(pkgname, automodule_options) }} -{% endif %} diff --git a/audio/docs/source/_templates/toc.rst_t b/audio/docs/source/_templates/toc.rst_t deleted file mode 100644 index f0877eeb2..000000000 --- a/audio/docs/source/_templates/toc.rst_t +++ /dev/null @@ -1,8 +0,0 @@ -{{ header | heading }} - -.. toctree:: - :maxdepth: {{ maxdepth }} -{% for docname in docnames %} - {{ docname }} -{%- endfor %} - diff --git a/audio/docs/source/conf.py b/audio/docs/source/conf.py deleted file mode 100644 index 09c4f312f..000000000 --- a/audio/docs/source/conf.py +++ /dev/null @@ -1,181 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Configuration file for the Sphinx documentation builder. -# -# This file does only contain a selection of the most common options. For a -# full list see the documentation: -# http://www.sphinx-doc.org/en/master/config -# -- Path setup -------------------------------------------------------------- -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -import os -import sys -sys.path.insert(0, os.path.abspath('../..')) - -# -- Project information ----------------------------------------------------- - -project = 'PaddleAudio' -copyright = '2022, PaddlePaddle' -author = 'PaddlePaddle' - -# The short X.Y version -version = '' -# The full version, including alpha/beta/rc tags -release = '0.2.0' - -# -- General configuration --------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - 'sphinx.ext.mathjax', - 'sphinx.ext.viewcode', - 'sphinx.ext.napoleon', -] - -napoleon_google_docstring = True - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -master_doc = 'index' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = None - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# - -import sphinx_rtd_theme -html_theme = 'sphinx_rtd_theme' -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] -smartquotes = False - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] -html_logo = '../images/paddle.png' -html_css_files = [ - 'custom.css', -] - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# The default sidebars (for documents that don't match any pattern) are -# defined by theme itself. Builtin themes are using these templates by -# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', -# 'searchbox.html']``. -# -# html_sidebars = {} - -# -- Options for HTMLHelp output --------------------------------------------- - -# Output file base name for HTML help builder. -htmlhelp_basename = 'PaddleAudiodoc' - -# -- Options for LaTeX output ------------------------------------------------ - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'PaddleAudio.tex', 'PaddleAudio Documentation', 'PaddlePaddle', - 'manual'), -] - -# -- Options for manual page output ------------------------------------------ - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [(master_doc, 'paddleaudio', 'PaddleAudio Documentation', [author], - 1)] - -# -- Options for Texinfo output ---------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'PaddleAudio', 'PaddleAudio Documentation', author, - 'PaddleAudio', 'One line description of project.', 'Miscellaneous'), -] - -# -- Options for Epub output ------------------------------------------------- - -# Bibliographic Dublin Core info. -epub_title = project - -# The unique identifier of the text. This can be a ISBN number -# or the project homepage. -# -# epub_identifier = '' - -# A unique identification for the text. -# -# epub_uid = '' - -# A list of files that should not be packed into the epub file. -epub_exclude_files = ['search.html'] - -# -- Extension configuration ------------------------------------------------- - -# -- Options for intersphinx extension --------------------------------------- - -# Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} diff --git a/audio/docs/source/index.rst b/audio/docs/source/index.rst deleted file mode 100644 index 26963308e..000000000 --- a/audio/docs/source/index.rst +++ /dev/null @@ -1,22 +0,0 @@ -.. PaddleAudio documentation master file, created by - sphinx-quickstart on Tue Mar 22 15:57:16 2022. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to PaddleAudio's documentation! -======================================= - -.. toctree:: - :maxdepth: 1 - - Index - - -API References --------------- - -.. toctree:: - :maxdepth: 2 - :titlesonly: - - paddleaudio \ No newline at end of file diff --git a/audio/paddleaudio/__init__.py b/audio/paddleaudio/__init__.py index 381f7e681..38957cbe9 100644 --- a/audio/paddleaudio/__init__.py +++ b/audio/paddleaudio/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from . import backends from . import compliance from . import datasets from . import features @@ -18,4 +19,3 @@ from . import functional from . import io from . import metric from . import sox_effects -from . import backends diff --git a/audio/paddleaudio/backends/__init__.py b/audio/paddleaudio/backends/__init__.py index 8fcd0765f..735fd59ed 100644 --- a/audio/paddleaudio/backends/__init__.py +++ b/audio/paddleaudio/backends/__init__.py @@ -11,16 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from . import utils from .soundfile_backend import depth_convert -from .soundfile_backend import soundfile_load from .soundfile_backend import normalize from .soundfile_backend import resample +from .soundfile_backend import soundfile_load from .soundfile_backend import soundfile_save from .soundfile_backend import to_mono - -from . import utils from .utils import get_audio_backend from .utils import list_audio_backends from .utils import set_audio_backend -utils._init_audio_backend() \ No newline at end of file +utils._init_audio_backend() diff --git a/audio/paddleaudio/backends/soundfile_backend.py b/audio/paddleaudio/backends/soundfile_backend.py index e1546fedd..ae7b5b52d 100644 --- a/audio/paddleaudio/backends/soundfile_backend.py +++ b/audio/paddleaudio/backends/soundfile_backend.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import os import warnings from typing import Optional @@ -204,6 +203,7 @@ def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None: wavfile.write(file, sr, y_out) + def soundfile_load( file: os.PathLike, sr: Optional[int]=None, @@ -256,9 +256,13 @@ def soundfile_load( y = depth_convert(y, dtype) return y, r + #the code below token form: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py with modificaion. -def _get_subtype_for_wav(dtype: paddle.dtype, encoding: str, bits_per_sample: int): + +def _get_subtype_for_wav(dtype: paddle.dtype, + encoding: str, + bits_per_sample: int): if not encoding: if not bits_per_sample: subtype = { @@ -315,7 +319,10 @@ def _get_subtype_for_sphere(encoding: str, bits_per_sample: int): raise ValueError(f"sph does not support {encoding}.") -def _get_subtype(dtype: paddle.dtype, format: str, encoding: str, bits_per_sample: int): +def _get_subtype(dtype: paddle.dtype, + format: str, + encoding: str, + bits_per_sample: int): if format == "wav": return _get_subtype_for_wav(dtype, encoding, bits_per_sample) if format == "flac": @@ -328,7 +335,8 @@ def _get_subtype(dtype: paddle.dtype, format: str, encoding: str, bits_per_sampl return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}" if format in ("ogg", "vorbis"): if encoding or bits_per_sample: - raise ValueError("ogg/vorbis does not support encoding/bits_per_sample.") + raise ValueError( + "ogg/vorbis does not support encoding/bits_per_sample.") return "VORBIS" if format == "sph": return _get_subtype_for_sphere(encoding, bits_per_sample) @@ -336,16 +344,16 @@ def _get_subtype(dtype: paddle.dtype, format: str, encoding: str, bits_per_sampl return "PCM_16" raise ValueError(f"Unsupported format: {format}") + def save( - filepath: str, - src: paddle.Tensor, - sample_rate: int, - channels_first: bool = True, - compression: Optional[float] = None, - format: Optional[str] = None, - encoding: Optional[str] = None, - bits_per_sample: Optional[int] = None, -): + filepath: str, + src: paddle.Tensor, + sample_rate: int, + channels_first: bool=True, + compression: Optional[float]=None, + format: Optional[str]=None, + encoding: Optional[str]=None, + bits_per_sample: Optional[int]=None, ): """Save audio data to file. Note: @@ -441,11 +449,11 @@ def save( if compression is not None: warnings.warn( '`save` function of "soundfile" backend does not support "compression" parameter. ' - "The argument is silently ignored." - ) + "The argument is silently ignored.") if hasattr(filepath, "write"): if format is None: - raise RuntimeError("`format` is required when saving to file object.") + raise RuntimeError( + "`format` is required when saving to file object.") ext = format.lower() else: ext = str(filepath).split(".")[-1].lower() @@ -455,8 +463,7 @@ def save( if bits_per_sample == 24: warnings.warn( "Saving audio with 24 bits per sample might warp samples near -1. " - "Using 16 bits per sample might be able to avoid this." - ) + "Using 16 bits per sample might be able to avoid this.") subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample) # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format, @@ -467,7 +474,13 @@ def save( if channels_first: src = src.t() - soundfile.write(file=filepath, data=src, samplerate=sample_rate, subtype=subtype, format=format) + soundfile.write( + file=filepath, + data=src, + samplerate=sample_rate, + subtype=subtype, + format=format) + _SUBTYPE2DTYPE = { "PCM_S8": "int8", @@ -478,14 +491,14 @@ _SUBTYPE2DTYPE = { "DOUBLE": "float64", } + def load( - filepath: str, - frame_offset: int = 0, - num_frames: int = -1, - normalize: bool = True, - channels_first: bool = True, - format: Optional[str] = None, -) -> Tuple[paddle.Tensor, int]: + filepath: str, + frame_offset: int=0, + num_frames: int=-1, + normalize: bool=True, + channels_first: bool=True, + format: Optional[str]=None, ) -> Tuple[paddle.Tensor, int]: """Load audio data from file. Note: @@ -564,7 +577,7 @@ def load( waveform = paddle.to_tensor(waveform) if channels_first: - waveform = paddle.transpose(waveform, perm=[1,0]) + waveform = paddle.transpose(waveform, perm=[1, 0]) return waveform, sample_rate @@ -588,7 +601,8 @@ _SUBTYPE_TO_BITS_PER_SAMPLE = { "ALAW": 8, # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types "IMA_ADPCM": 0, # IMA ADPCM. "MS_ADPCM": 0, # Microsoft ADPCM. - "GSM610": 0, # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate) + "GSM610": + 0, # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate) "VOX_ADPCM": 0, # OKI / Dialogix ADPCM "G721_32": 0, # 32kbs G721 ADPCM encoding. "G723_24": 0, # 24kbs G723 ADPCM encoding. @@ -606,16 +620,17 @@ _SUBTYPE_TO_BITS_PER_SAMPLE = { "ALAC_32": 32, # Apple Lossless Audio Codec (32 bit). } + def _get_bit_depth(subtype): if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE: warnings.warn( f"The {subtype} subtype is unknown to PaddleAudio. As a result, the bits_per_sample " "attribute will be set to 0. If you are seeing this warning, please " "report by opening an issue on github (after checking for existing/closed ones). " - "You may otherwise ignore this warning." - ) + "You may otherwise ignore this warning.") return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0) + _SUBTYPE_TO_ENCODING = { "PCM_S8": "PCM_S", "PCM_16": "PCM_S", @@ -629,12 +644,14 @@ _SUBTYPE_TO_ENCODING = { "VORBIS": "VORBIS", } + def _get_encoding(format: str, subtype: str): if format == "FLAC": return "FLAC" return _SUBTYPE_TO_ENCODING.get(subtype, "UNKNOWN") -def info(filepath: str, format: Optional[str] = None) -> AudioInfo: + +def info(filepath: str, format: Optional[str]=None) -> AudioInfo: """Get signal information of an audio file. Note: @@ -657,5 +674,4 @@ def info(filepath: str, format: Optional[str] = None) -> AudioInfo: sinfo.frames, sinfo.channels, bits_per_sample=_get_bit_depth(sinfo.subtype), - encoding=_get_encoding(sinfo.format, sinfo.subtype), - ) \ No newline at end of file + encoding=_get_encoding(sinfo.format, sinfo.subtype), ) diff --git a/audio/paddleaudio/backends/sox_io_backend.py b/audio/paddleaudio/backends/sox_io_backend.py index 8dabe75f5..1c2d5f655 100644 --- a/audio/paddleaudio/backends/sox_io_backend.py +++ b/audio/paddleaudio/backends/sox_io_backend.py @@ -1,17 +1,17 @@ -from pathlib import Path -from typing import Callable -from typing import Optional, Tuple, Union +import os +from typing import Optional +from typing import Tuple import paddle import paddleaudio from paddle import Tensor -from .common import AudioInfo -import os +from paddleaudio._internal import module_utils as _mod_utils -from paddleaudio._internal import module_utils as _mod_utils +from .common import AudioInfo #https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py + def _fail_info(filepath: str, format: Optional[str]) -> AudioInfo: raise RuntimeError("Failed to fetch metadata from {}".format(filepath)) @@ -22,73 +22,78 @@ def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioInfo: # Note: need to comply TorchScript syntax -- need annotation and no f-string def _fail_load( - filepath: str, - frame_offset: int = 0, - num_frames: int = -1, - normalize: bool = True, - channels_first: bool = True, - format: Optional[str] = None, -) -> Tuple[Tensor, int]: + filepath: str, + frame_offset: int=0, + num_frames: int=-1, + normalize: bool=True, + channels_first: bool=True, + format: Optional[str]=None, ) -> Tuple[Tensor, int]: raise RuntimeError("Failed to load audio from {}".format(filepath)) def _fail_load_fileobj(fileobj, *args, **kwargs): raise RuntimeError(f"Failed to load audio from {fileobj}") + _fallback_info = _fail_info _fallback_info_fileobj = _fail_info_fileobj _fallback_load = _fail_load _fallback_load_filebj = _fail_load_fileobj + @_mod_utils.requires_sox() def load( filepath: str, - frame_offset: int = 0, + frame_offset: int=0, num_frames: int=-1, - normalize: bool = True, - channels_first: bool = True, + normalize: bool=True, + channels_first: bool=True, format: Optional[str]=None, ) -> Tuple[Tensor, int]: if hasattr(filepath, "read"): ret = paddleaudio._paddleaudio.load_audio_fileobj( - filepath, frame_offset, num_frames, normalize, channels_first, format - ) + filepath, frame_offset, num_frames, normalize, channels_first, + format) if ret is not None: audio_tensor = paddle.to_tensor(ret[0]) return (audio_tensor, ret[1]) - return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format) + return _fallback_load_fileobj(filepath, frame_offset, num_frames, + normalize, channels_first, format) filepath = os.fspath(filepath) ret = paddleaudio._paddleaudio.sox_io_load_audio_file( - filepath, frame_offset, num_frames, normalize, channels_first, format - ) + filepath, frame_offset, num_frames, normalize, channels_first, format) if ret is not None: audio_tensor = paddle.to_tensor(ret[0]) return (audio_tensor, ret[1]) - return _fallback_load(filepath, frame_offset, num_frames, normalize, channels_first, format) + return _fallback_load(filepath, frame_offset, num_frames, normalize, + channels_first, format) @_mod_utils.requires_sox() -def save(filepath: str, - src: Tensor, - sample_rate: int, - channels_first: bool = True, - compression: Optional[float] = None, - format: Optional[str] = None, - encoding: Optional[str] = None, - bits_per_sample: Optional[int] = None, -): +def save( + filepath: str, + src: Tensor, + sample_rate: int, + channels_first: bool=True, + compression: Optional[float]=None, + format: Optional[str]=None, + encoding: Optional[str]=None, + bits_per_sample: Optional[int]=None, ): src_arr = src.numpy() if hasattr(filepath, "write"): paddleaudio._paddleaudio.save_audio_fileobj( - filepath, src_arr, sample_rate, channels_first, compression, format, encoding, bits_per_sample - ) + filepath, src_arr, sample_rate, channels_first, compression, format, + encoding, bits_per_sample) return filepath = os.fspath(filepath) paddleaudio._paddleaudio.sox_io_save_audio_file( - filepath, src_arr, sample_rate, channels_first, compression, format, encoding, bits_per_sample - ) + filepath, src_arr, sample_rate, channels_first, compression, format, + encoding, bits_per_sample) + @_mod_utils.requires_sox() -def info(filepath: str, format: Optional[str] = None,) -> AudioInfo: +def info( + filepath: str, + format: Optional[str]=None, ) -> AudioInfo: if hasattr(filepath, "read"): sinfo = paddleaudio._paddleaudio.get_info_fileobj(filepath, format) if sinfo is not None: diff --git a/audio/paddleaudio/backends/utils.py b/audio/paddleaudio/backends/utils.py index 4a7e51c02..83c1a71ca 100644 --- a/audio/paddleaudio/backends/utils.py +++ b/audio/paddleaudio/backends/utils.py @@ -1,6 +1,5 @@ """Defines utilities for switching audio backends""" #code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/utils.py - import warnings from typing import List from typing import Optional @@ -8,7 +7,9 @@ from typing import Optional import paddleaudio from paddleaudio._internal import module_utils as _mod_utils -from . import no_backend, soundfile_backend, sox_io_backend +from . import no_backend +from . import soundfile_backend +from . import sox_io_backend __all__ = [ "list_audio_backends", @@ -55,6 +56,7 @@ def set_audio_backend(backend: Optional[str]): for func in ["save", "load", "info"]: setattr(paddleaudio, func, getattr(module, func)) + def _init_audio_backend(): backends = list_audio_backends() if "soundfile" in backends: diff --git a/audio/paddleaudio/utils/__init__.py b/audio/paddleaudio/utils/__init__.py index b10731d46..e66d1ab49 100644 --- a/audio/paddleaudio/utils/__init__.py +++ b/audio/paddleaudio/utils/__init__.py @@ -21,7 +21,7 @@ from .env import USER_HOME from .error import ParameterError from .log import Logger from .log import logger -from .time import seconds_to_hms -from .time import Timer from .numeric import depth_convert from .numeric import pcm16to32 +from .time import seconds_to_hms +from .time import Timer diff --git a/audio/tests/backends/soundfile/common.py b/audio/tests/backends/soundfile/common.py index 42a07e1f0..1aaed913e 100644 --- a/audio/tests/backends/soundfile/common.py +++ b/audio/tests/backends/soundfile/common.py @@ -1,8 +1,8 @@ import itertools from unittest import skipIf -from parameterized import parameterized from paddleaudio._internal.module_utils import is_module_available +from parameterized import parameterized def name_func(func, _, params): @@ -31,7 +31,8 @@ def skipIfFormatNotSupported(fmt): def parameterize(*params): - return parameterized.expand(list(itertools.product(*params)), name_func=name_func) + return parameterized.expand( + list(itertools.product(*params)), name_func=name_func) def fetch_wav_subtype(dtype, encoding, bits_per_sample): @@ -54,4 +55,3 @@ def fetch_wav_subtype(dtype, encoding, bits_per_sample): if subtype: return subtype raise ValueError(f"wav does not support ({encoding}, {bits_per_sample}).") - diff --git a/audio/tests/backends/soundfile/info_test.py b/audio/tests/backends/soundfile/info_test.py index 94f167ed9..ffaccebb1 100644 --- a/audio/tests/backends/soundfile/info_test.py +++ b/audio/tests/backends/soundfile/info_test.py @@ -1,37 +1,37 @@ #this code is from: https://github.com/pytorch/audio/blob/main/test/torchaudio_unittest/backend/soundfile/info_test.py - import tarfile -import warnings import unittest +import warnings from unittest.mock import patch import paddle -from paddleaudio._internal import module_utils as _mod_utils +import soundfile +from common import parameterize +from common import skipIfFormatNotSupported from paddleaudio.backends import soundfile_backend -from tests.backends.common import get_bits_per_sample, get_encoding -from tests.common_utils import ( - get_wav_data, - nested_params, - save_wav, - TempDirMixin, -) -from common import parameterize, skipIfFormatNotSupported - -import soundfile +from tests.backends.common import get_bits_per_sample +from tests.backends.common import get_encoding +from tests.common_utils import get_wav_data +from tests.common_utils import nested_params +from tests.common_utils import save_wav +from tests.common_utils import TempDirMixin class TestInfo(TempDirMixin, unittest.TestCase): @parameterize( ["float32", "int32"], [8000, 16000], - [1, 2], - ) + [1, 2], ) def test_wav(self, dtype, sample_rate, num_channels): """`soundfile_backend.info` can check wav file correctly""" duration = 1 path = self.get_temp_path("data.wav") - data = get_wav_data(dtype, num_channels, normalize=False, num_frames=duration * sample_rate) + data = get_wav_data( + dtype, + num_channels, + normalize=False, + num_frames=duration * sample_rate) save_wav(path, data, sample_rate) info = soundfile_backend.info(path) assert info.sample_rate == sample_rate @@ -62,32 +62,31 @@ class TestInfo(TempDirMixin, unittest.TestCase): #@parameterize([8000, 16000], [1, 2]) #@skipIfFormatNotSupported("OGG") #def test_ogg(self, sample_rate, num_channels): - #"""`soundfile_backend.info` can check ogg file correctly""" - #duration = 1 - #num_frames = sample_rate * duration - ##data = torch.randn(num_frames, num_channels).numpy() - #data = paddle.randn(shape=[num_frames, num_channels]).numpy() - #print(len(data)) - #path = self.get_temp_path("data.ogg") - #soundfile.write(path, data, sample_rate) - - #info = soundfile_backend.info(path) - #print(info) - #assert info.sample_rate == sample_rate - #print("info") - #print(info.num_frames) - #print("jiji") - #print(sample_rate*duration) - ##assert info.num_frames == sample_rate * duration - #assert info.num_channels == num_channels - #assert info.bits_per_sample == 0 - #assert info.encoding == "VORBIS" + #"""`soundfile_backend.info` can check ogg file correctly""" + #duration = 1 + #num_frames = sample_rate * duration + ##data = torch.randn(num_frames, num_channels).numpy() + #data = paddle.randn(shape=[num_frames, num_channels]).numpy() + #print(len(data)) + #path = self.get_temp_path("data.ogg") + #soundfile.write(path, data, sample_rate) + + #info = soundfile_backend.info(path) + #print(info) + #assert info.sample_rate == sample_rate + #print("info") + #print(info.num_frames) + #print("jiji") + #print(sample_rate*duration) + ##assert info.num_frames == sample_rate * duration + #assert info.num_channels == num_channels + #assert info.bits_per_sample == 0 + #assert info.encoding == "VORBIS" @nested_params( [8000, 16000], [1, 2], - [("PCM_24", 24), ("PCM_32", 32)], - ) + [("PCM_24", 24), ("PCM_32", 32)], ) @skipIfFormatNotSupported("NIST") def test_sphere(self, sample_rate, num_channels, subtype_and_bit_depth): """`soundfile_backend.info` can check sph file correctly""" @@ -127,7 +126,8 @@ class TestInfo(TempDirMixin, unittest.TestCase): with warnings.catch_warnings(record=True) as w: info = soundfile_backend.info("foo") assert len(w) == 1 - assert "UNSEEN_SUBTYPE subtype is unknown to PaddleAudio" in str(w[-1].message) + assert "UNSEEN_SUBTYPE subtype is unknown to PaddleAudio" in str( + w[-1].message) assert info.bits_per_sample == 0 @@ -195,5 +195,6 @@ class TestFileObject(TempDirMixin, unittest.TestCase): """Query compressed audio via file-like object works""" self._test_tarobj("flac", "PCM_16", 16) + if __name__ == '__main__': unittest.main() diff --git a/audio/tests/backends/soundfile/load_test.py b/audio/tests/backends/soundfile/load_test.py index d315703cb..db2f28458 100644 --- a/audio/tests/backends/soundfile/load_test.py +++ b/audio/tests/backends/soundfile/load_test.py @@ -1,37 +1,31 @@ #this code is from: https://github.com/pytorch/audio/blob/main/test/torchaudio_unittest/backend/soundfile/load_test.py - import os import tarfile import unittest from unittest.mock import patch -import numpy as np -from parameterized import parameterized +import numpy as np import paddle -from paddleaudio._internal import module_utils as _mod_utils +import soundfile +from common import dtype2subtype +from common import parameterize +from common import skipIfFormatNotSupported from paddleaudio.backends import soundfile_backend -from tests.backends.common import get_bits_per_sample, get_encoding -from tests.common_utils import ( - get_wav_data, - load_wav, - nested_params, - normalize_wav, - save_wav, - TempDirMixin, -) - -from common import dtype2subtype, parameterize, skipIfFormatNotSupported +from parameterized import parameterized -import soundfile +from tests.common_utils import get_wav_data +from tests.common_utils import load_wav +from tests.common_utils import normalize_wav +from tests.common_utils import save_wav +from tests.common_utils import TempDirMixin def _get_mock_path( - ext: str, - dtype: str, - sample_rate: int, - num_channels: int, - num_frames: int, -): + ext: str, + dtype: str, + sample_rate: int, + num_channels: int, + num_frames: int, ): return f"{dtype}_{sample_rate}_{num_channels}_{num_frames}.{ext}" @@ -87,9 +81,8 @@ class SoundFileMock: self._params["num_channels"], normalize=False, num_frames=self._params["num_frames"], - channels_first=False, - ).numpy() - return data[self._start : self._start + frames] + channels_first=False, ).numpy() + return data[self._start:self._start + frames] def __enter__(self): return self @@ -99,13 +92,17 @@ class SoundFileMock: class MockedLoadTest(unittest.TestCase): - def assert_dtype(self, ext, dtype, sample_rate, num_channels, normalize, channels_first): + def assert_dtype(self, ext, dtype, sample_rate, num_channels, normalize, + channels_first): """When format is WAV or NIST, normalize=False will return the native dtype Tensor, otherwise float32""" num_frames = 3 * sample_rate path = _get_mock_path(ext, dtype, sample_rate, num_channels, num_frames) - expected_dtype = paddle.float32 if normalize or ext not in ["wav", "nist"] else getattr(paddle, dtype) + expected_dtype = paddle.float32 if normalize or ext not in [ + "wav", "nist" + ] else getattr(paddle, dtype) with patch("soundfile.SoundFile", SoundFileMock): - found, sr = soundfile_backend.load(path, normalize=normalize, channels_first=channels_first) + found, sr = soundfile_backend.load( + path, normalize=normalize, channels_first=channels_first) assert found.dtype == expected_dtype assert sample_rate == sr @@ -114,44 +111,47 @@ class MockedLoadTest(unittest.TestCase): [8000, 16000], [1, 2], [True, False], - [True, False], - ) - def test_wav(self, dtype, sample_rate, num_channels, normalize, channels_first): + [True, False], ) + def test_wav(self, dtype, sample_rate, num_channels, normalize, + channels_first): """Returns native dtype when normalize=False else float32""" - self.assert_dtype("wav", dtype, sample_rate, num_channels, normalize, channels_first) + self.assert_dtype("wav", dtype, sample_rate, num_channels, normalize, + channels_first) @parameterize( ["int32"], [8000, 16000], [1, 2], [True, False], - [True, False], - ) - def test_sphere(self, dtype, sample_rate, num_channels, normalize, channels_first): + [True, False], ) + def test_sphere(self, dtype, sample_rate, num_channels, normalize, + channels_first): """Returns float32 always""" - self.assert_dtype("sph", dtype, sample_rate, num_channels, normalize, channels_first) + self.assert_dtype("sph", dtype, sample_rate, num_channels, normalize, + channels_first) @parameterize([8000, 16000], [1, 2], [True, False], [True, False]) def test_ogg(self, sample_rate, num_channels, normalize, channels_first): """Returns float32 always""" - self.assert_dtype("ogg", "int16", sample_rate, num_channels, normalize, channels_first) + self.assert_dtype("ogg", "int16", sample_rate, num_channels, normalize, + channels_first) @parameterize([8000, 16000], [1, 2], [True, False], [True, False]) def test_flac(self, sample_rate, num_channels, normalize, channels_first): """`soundfile_backend.load` can load ogg format.""" - self.assert_dtype("flac", "int16", sample_rate, num_channels, normalize, channels_first) + self.assert_dtype("flac", "int16", sample_rate, num_channels, normalize, + channels_first) class LoadTestBase(TempDirMixin, unittest.TestCase): def assert_wav( - self, - dtype, - sample_rate, - num_channels, - normalize, - channels_first=True, - duration=1, - ): + self, + dtype, + sample_rate, + num_channels, + normalize, + channels_first=True, + duration=1, ): """`soundfile_backend.load` can load wav format correctly. Wav data loaded with soundfile backend should match those with scipy @@ -163,22 +163,22 @@ class LoadTestBase(TempDirMixin, unittest.TestCase): num_channels, normalize=normalize, num_frames=num_frames, - channels_first=channels_first, - ) + channels_first=channels_first, ) save_wav(path, data, sample_rate, channels_first=channels_first) - expected = load_wav(path, normalize=normalize, channels_first=channels_first)[0] - data, sr = soundfile_backend.load(path, normalize=normalize, channels_first=channels_first) + expected = load_wav( + path, normalize=normalize, channels_first=channels_first)[0] + data, sr = soundfile_backend.load( + path, normalize=normalize, channels_first=channels_first) assert sr == sample_rate np.testing.assert_array_almost_equal(data.numpy(), expected.numpy()) def assert_sphere( - self, - dtype, - sample_rate, - num_channels, - channels_first=True, - duration=1, - ): + self, + dtype, + sample_rate, + num_channels, + channels_first=True, + duration=1, ): """`soundfile_backend.load` can load SPHERE format correctly.""" path = self.get_temp_path("reference.sph") num_frames = duration * sample_rate @@ -187,9 +187,9 @@ class LoadTestBase(TempDirMixin, unittest.TestCase): num_channels, num_frames=num_frames, normalize=False, - channels_first=False, - ) - soundfile.write(path, raw, sample_rate, subtype=dtype2subtype(dtype), format="NIST") + channels_first=False, ) + soundfile.write( + path, raw, sample_rate, subtype=dtype2subtype(dtype), format="NIST") expected = normalize_wav(raw.t() if channels_first else raw) data, sr = soundfile_backend.load(path, channels_first=channels_first) assert sr == sample_rate @@ -197,13 +197,12 @@ class LoadTestBase(TempDirMixin, unittest.TestCase): np.testing.assert_array_almost_equal(data.numpy(), expected.numpy()) def assert_flac( - self, - dtype, - sample_rate, - num_channels, - channels_first=True, - duration=1, - ): + self, + dtype, + sample_rate, + num_channels, + channels_first=True, + duration=1, ): """`soundfile_backend.load` can load FLAC format correctly.""" path = self.get_temp_path("reference.flac") num_frames = duration * sample_rate @@ -212,15 +211,13 @@ class LoadTestBase(TempDirMixin, unittest.TestCase): num_channels, num_frames=num_frames, normalize=False, - channels_first=False, - ) + channels_first=False, ) soundfile.write(path, raw, sample_rate) expected = normalize_wav(raw.t() if channels_first else raw) data, sr = soundfile_backend.load(path, channels_first=channels_first) assert sr == sample_rate #self.assertEqual(data, expected, atol=1e-4, rtol=1e-8) np.testing.assert_array_almost_equal(data.numpy(), expected.numpy()) - class TestLoad(LoadTestBase): @@ -231,41 +228,43 @@ class TestLoad(LoadTestBase): [8000, 16000], [1, 2], [False, True], - [False, True], - ) - def test_wav(self, dtype, sample_rate, num_channels, normalize, channels_first): + [False, True], ) + def test_wav(self, dtype, sample_rate, num_channels, normalize, + channels_first): """`soundfile_backend.load` can load wav format correctly.""" - self.assert_wav(dtype, sample_rate, num_channels, normalize, channels_first) + self.assert_wav(dtype, sample_rate, num_channels, normalize, + channels_first) @parameterize( ["int32"], [16000], [2], - [False], - ) + [False], ) def test_wav_large(self, dtype, sample_rate, num_channels, normalize): """`soundfile_backend.load` can load large wav file correctly.""" two_hours = 2 * 60 * 60 - self.assert_wav(dtype, sample_rate, num_channels, normalize, duration=two_hours) + self.assert_wav( + dtype, sample_rate, num_channels, normalize, duration=two_hours) @parameterize(["float32", "int32"], [4, 8, 16, 32], [False, True]) def test_multiple_channels(self, dtype, num_channels, channels_first): """`soundfile_backend.load` can load wav file with more than 2 channels.""" sample_rate = 8000 normalize = False - self.assert_wav(dtype, sample_rate, num_channels, normalize, channels_first) + self.assert_wav(dtype, sample_rate, num_channels, normalize, + channels_first) #@parameterize(["int32"], [8000, 16000], [1, 2], [False, True]) #@skipIfFormatNotSupported("NIST") #def test_sphere(self, dtype, sample_rate, num_channels, channels_first): - #"""`soundfile_backend.load` can load sphere format correctly.""" - #self.assert_sphere(dtype, sample_rate, num_channels, channels_first) + #"""`soundfile_backend.load` can load sphere format correctly.""" + #self.assert_sphere(dtype, sample_rate, num_channels, channels_first) #@parameterize(["int32"], [8000, 16000], [1, 2], [False, True]) #@skipIfFormatNotSupported("FLAC") #def test_flac(self, dtype, sample_rate, num_channels, channels_first): - #"""`soundfile_backend.load` can load flac format correctly.""" - #self.assert_flac(dtype, sample_rate, num_channels, channels_first) + #"""`soundfile_backend.load` can load flac format correctly.""" + #self.assert_flac(dtype, sample_rate, num_channels, channels_first) class TestLoadFormat(TempDirMixin, unittest.TestCase): @@ -291,21 +290,17 @@ class TestLoadFormat(TempDirMixin, unittest.TestCase): #self.assertEqual(found, expected) np.testing.assert_array_almost_equal(found, expected) - @parameterized.expand( - [ - ("WAV",), - ("wav",), - ] - ) + @parameterized.expand([ + ("WAV", ), + ("wav", ), + ]) def test_wav(self, format_): self._test_format(format_) - @parameterized.expand( - [ - ("FLAC",), - ("flac",), - ] - ) + @parameterized.expand([ + ("FLAC", ), + ("flac", ), + ]) @skipIfFormatNotSupported("FLAC") def test_flac(self, format_): self._test_format(format_) @@ -356,7 +351,6 @@ class TestFileObject(TempDirMixin, unittest.TestCase): #self.assertEqual(expected, found) np.testing.assert_array_almost_equal(found.numpy(), expected) - def test_tarfile_wav(self): """Loading audio via file-like object works""" self._test_tarfile("wav") @@ -365,5 +359,6 @@ class TestFileObject(TempDirMixin, unittest.TestCase): """Loading audio via file-like object works""" self._test_tarfile("flac") + if __name__ == '__main__': unittest.main() diff --git a/audio/tests/backends/soundfile/save_test.py b/audio/tests/backends/soundfile/save_test.py index 28f0e5c79..50c21a673 100644 --- a/audio/tests/backends/soundfile/save_test.py +++ b/audio/tests/backends/soundfile/save_test.py @@ -2,23 +2,18 @@ import io import unittest from unittest.mock import patch -from paddleaudio._internal import module_utils as _mod_utils -from paddleaudio.backends import soundfile_backend -from tests.common_utils import ( - get_wav_data, - load_wav, - nested_params, - normalize_wav, - save_wav, - TempDirMixin, -) - -from common import fetch_wav_subtype, parameterize, skipIfFormatNotSupported - -import paddle import numpy as np - +import paddle import soundfile +from common import fetch_wav_subtype +from common import parameterize +from common import skipIfFormatNotSupported +from paddleaudio.backends import soundfile_backend + +from tests.common_utils import get_wav_data +from tests.common_utils import load_wav +from tests.common_utils import nested_params +from tests.common_utils import TempDirMixin class MockedSaveTest(unittest.TestCase): @@ -41,10 +36,10 @@ class MockedSaveTest(unittest.TestCase): ("ULAW", 8), ("ALAW", None), ("ALAW", 8), - ], - ) + ], ) @patch("soundfile.write") - def test_wav(self, dtype, sample_rate, num_channels, channels_first, enc_params, mocked_write): + def test_wav(self, dtype, sample_rate, num_channels, channels_first, + enc_params, mocked_write): """soundfile_backend.save passes correct subtype to soundfile.write when WAV""" filepath = "foo.wav" input_tensor = get_wav_data( @@ -52,8 +47,7 @@ class MockedSaveTest(unittest.TestCase): num_channels, num_frames=3 * sample_rate, normalize=dtype == "float32", - channels_first=channels_first, - ) + channels_first=channels_first, ) input_tensor = paddle.transpose(input_tensor, [1, 0]) encoding, bits_per_sample = enc_params @@ -63,33 +57,32 @@ class MockedSaveTest(unittest.TestCase): sample_rate, channels_first=channels_first, encoding=encoding, - bits_per_sample=bits_per_sample, - ) + bits_per_sample=bits_per_sample, ) # on +Py3.8 call_args.kwargs is more descreptive args = mocked_write.call_args[1] assert args["file"] == filepath assert args["samplerate"] == sample_rate - assert args["subtype"] == fetch_wav_subtype(dtype, encoding, bits_per_sample) + assert args["subtype"] == fetch_wav_subtype(dtype, encoding, + bits_per_sample) assert args["format"] is None - tensor_result = paddle.transpose(input_tensor, [1, 0]) if channels_first else input_tensor + tensor_result = paddle.transpose( + input_tensor, [1, 0]) if channels_first else input_tensor #self.assertEqual(args["data"], tensor_result.numpy()) - np.testing.assert_array_almost_equal(args["data"].numpy(), tensor_result.numpy()) - - + np.testing.assert_array_almost_equal(args["data"].numpy(), + tensor_result.numpy()) @patch("soundfile.write") def assert_non_wav( - self, - fmt, - dtype, - sample_rate, - num_channels, - channels_first, - mocked_write, - encoding=None, - bits_per_sample=None, - ): + self, + fmt, + dtype, + sample_rate, + num_channels, + channels_first, + mocked_write, + encoding=None, + bits_per_sample=None, ): """soundfile_backend.save passes correct subtype and format to soundfile.write when SPHERE""" filepath = f"foo.{fmt}" input_tensor = get_wav_data( @@ -97,11 +90,11 @@ class MockedSaveTest(unittest.TestCase): num_channels, num_frames=3 * sample_rate, normalize=False, - channels_first=channels_first, - ) + channels_first=channels_first, ) input_tensor = paddle.transpose(input_tensor, [1, 0]) - expected_data = paddle.transpose(input_tensor, [1, 0]) if channels_first else input_tensor + expected_data = paddle.transpose( + input_tensor, [1, 0]) if channels_first else input_tensor soundfile_backend.save( filepath, @@ -109,8 +102,7 @@ class MockedSaveTest(unittest.TestCase): sample_rate, channels_first, encoding=encoding, - bits_per_sample=bits_per_sample, - ) + bits_per_sample=bits_per_sample, ) # on +Py3.8 call_args.kwargs is more descreptive args = mocked_write.call_args[1] @@ -120,7 +112,8 @@ class MockedSaveTest(unittest.TestCase): assert args["format"] == "NIST" else: assert args["format"] is None - np.testing.assert_array_almost_equal(args["data"].numpy(), expected_data.numpy()) + np.testing.assert_array_almost_equal(args["data"].numpy(), + expected_data.numpy()) #self.assertEqual(args["data"], expected_data) @nested_params( @@ -139,45 +132,57 @@ class MockedSaveTest(unittest.TestCase): ("ALAW", 16), ("ALAW", 24), ("ALAW", 32), - ], - ) - def test_sph(self, fmt, dtype, sample_rate, num_channels, channels_first, enc_params): + ], ) + def test_sph(self, fmt, dtype, sample_rate, num_channels, channels_first, + enc_params): """soundfile_backend.save passes default format and subtype (None-s) to soundfile.write when not WAV""" encoding, bits_per_sample = enc_params self.assert_non_wav( - fmt, dtype, sample_rate, num_channels, channels_first, encoding=encoding, bits_per_sample=bits_per_sample - ) + fmt, + dtype, + sample_rate, + num_channels, + channels_first, + encoding=encoding, + bits_per_sample=bits_per_sample) @parameterize( ["int32"], [8000, 16000], [1, 2], [False, True], - [8, 16, 24], - ) - def test_flac(self, dtype, sample_rate, num_channels, channels_first, bits_per_sample): + [8, 16, 24], ) + def test_flac(self, dtype, sample_rate, num_channels, channels_first, + bits_per_sample): """soundfile_backend.save passes default format and subtype (None-s) to soundfile.write when not WAV""" - self.assert_non_wav("flac", dtype, sample_rate, num_channels, channels_first, bits_per_sample=bits_per_sample) + self.assert_non_wav( + "flac", + dtype, + sample_rate, + num_channels, + channels_first, + bits_per_sample=bits_per_sample) @parameterize( ["int32"], [8000, 16000], [1, 2], - [False, True], - ) + [False, True], ) def test_ogg(self, dtype, sample_rate, num_channels, channels_first): """soundfile_backend.save passes default format and subtype (None-s) to soundfile.write when not WAV""" - self.assert_non_wav("ogg", dtype, sample_rate, num_channels, channels_first) + self.assert_non_wav("ogg", dtype, sample_rate, num_channels, + channels_first) class SaveTestBase(TempDirMixin, unittest.TestCase): def assert_wav(self, dtype, sample_rate, num_channels, num_frames): """`soundfile_backend.save` can save wav format.""" path = self.get_temp_path("data.wav") - expected = get_wav_data(dtype, num_channels, num_frames=num_frames, normalize=False) + expected = get_wav_data( + dtype, num_channels, num_frames=num_frames, normalize=False) soundfile_backend.save(path, expected, sample_rate) found, sr = load_wav(path, normalize=False) assert sample_rate == sr @@ -192,7 +197,8 @@ class SaveTestBase(TempDirMixin, unittest.TestCase): """ num_frames = sample_rate * 3 path = self.get_temp_path(f"data.{fmt}") - expected = get_wav_data(dtype, num_channels, num_frames=num_frames, normalize=False) + expected = get_wav_data( + dtype, num_channels, num_frames=num_frames, normalize=False) soundfile_backend.save(path, expected, sample_rate) sinfo = soundfile.info(path) assert sinfo.format == fmt.upper() @@ -220,16 +226,14 @@ class TestSave(SaveTestBase): @parameterize( ["float32", "int32"], [8000, 16000], - [1, 2], - ) + [1, 2], ) def test_wav(self, dtype, sample_rate, num_channels): """`soundfile_backend.save` can save wav format.""" self.assert_wav(dtype, sample_rate, num_channels, num_frames=None) @parameterize( ["float32", "int32"], - [4, 8, 16, 32], - ) + [4, 8, 16, 32], ) def test_multiple_channels(self, dtype, num_channels): """`soundfile_backend.save` can save wav with more than 2 channels.""" sample_rate = 8000 @@ -238,8 +242,7 @@ class TestSave(SaveTestBase): @parameterize( ["int32"], [8000, 16000], - [1, 2], - ) + [1, 2], ) @skipIfFormatNotSupported("NIST") def test_sphere(self, dtype, sample_rate, num_channels): """`soundfile_backend.save` can save sph format.""" @@ -247,8 +250,7 @@ class TestSave(SaveTestBase): @parameterize( [8000, 16000], - [1, 2], - ) + [1, 2], ) @skipIfFormatNotSupported("FLAC") def test_flac(self, sample_rate, num_channels): """`soundfile_backend.save` can save flac format.""" @@ -256,8 +258,7 @@ class TestSave(SaveTestBase): @parameterize( [8000, 16000], - [1, 2], - ) + [1, 2], ) @skipIfFormatNotSupported("OGG") def test_ogg(self, sample_rate, num_channels): """`soundfile_backend.save` can save ogg/vorbis format.""" @@ -318,5 +319,6 @@ class TestFileObject(TempDirMixin, unittest.TestCase): """Saving audio via file-like object works""" self._test_fileobj("OGG") + if __name__ == '__main__': unittest.main() diff --git a/audio/tests/common_utils/__init__.py b/audio/tests/common_utils/__init__.py index 32b785124..efa206a89 100644 --- a/audio/tests/common_utils/__init__.py +++ b/audio/tests/common_utils/__init__.py @@ -1,17 +1,12 @@ -from .wav_utils import get_wav_data, load_wav, save_wav, normalize_wav -from .parameterized_utils import nested_params -from .case_utils import ( - TempDirMixin, - name_func -) +from .case_utils import name_func +from .case_utils import TempDirMixin +from .parameterized_utils import nested_params +from .wav_utils import get_wav_data +from .wav_utils import load_wav +from .wav_utils import normalize_wav +from .wav_utils import save_wav __all__ = [ - "get_wav_data", - "load_wav", - "save_wav", - "normalize_wav", - "get_sinusoid", - "name_func", - "nested_params", - "TempDirMixin" + "get_wav_data", "load_wav", "save_wav", "normalize_wav", "get_sinusoid", + "name_func", "nested_params", "TempDirMixin" ] diff --git a/audio/tests/common_utils/wav_utils.py b/audio/tests/common_utils/wav_utils.py index 25d0b1971..5cae6d8e6 100644 --- a/audio/tests/common_utils/wav_utils.py +++ b/audio/tests/common_utils/wav_utils.py @@ -1,8 +1,8 @@ from typing import Optional -import scipy.io.wavfile import paddle -import numpy as np +import scipy.io.wavfile + def normalize_wav(tensor: paddle.Tensor) -> paddle.Tensor: if tensor.dtype == paddle.float32: @@ -23,13 +23,12 @@ def normalize_wav(tensor: paddle.Tensor) -> paddle.Tensor: def get_wav_data( - dtype: str, - num_channels: int, - *, - num_frames: Optional[int] = None, - normalize: bool = True, - channels_first: bool = True, -): + dtype: str, + num_channels: int, + *, + num_frames: Optional[int]=None, + normalize: bool=True, + channels_first: bool=True, ): """Generate linear signal of the given dtype and num_channels Data range is @@ -53,25 +52,26 @@ def get_wav_data( # paddle linspace not support uint8, int8, int16 #if dtype == "uint8": # base = paddle.linspace(0, 255, num_frames, dtype=dtype_) - #dtype_np = getattr(np, dtype) - #base_np = np.linspace(0, 255, num_frames, dtype_np) - #base = paddle.to_tensor(base_np, dtype=dtype_) + #dtype_np = getattr(np, dtype) + #base_np = np.linspace(0, 255, num_frames, dtype_np) + #base = paddle.to_tensor(base_np, dtype=dtype_) #elif dtype == "int8": # base = paddle.linspace(-128, 127, num_frames, dtype=dtype_) - #dtype_np = getattr(np, dtype) - #base_np = np.linspace(-128, 127, num_frames, dtype_np) - #base = paddle.to_tensor(base_np, dtype=dtype_) + #dtype_np = getattr(np, dtype) + #base_np = np.linspace(-128, 127, num_frames, dtype_np) + #base = paddle.to_tensor(base_np, dtype=dtype_) if dtype == "float32": base = paddle.linspace(-1.0, 1.0, num_frames, dtype=dtype_) elif dtype == "float64": base = paddle.linspace(-1.0, 1.0, num_frames, dtype=dtype_) elif dtype == "int32": - base = paddle.linspace(-2147483648, 2147483647, num_frames, dtype=dtype_) + base = paddle.linspace( + -2147483648, 2147483647, num_frames, dtype=dtype_) #elif dtype == "int16": # base = paddle.linspace(-32768, 32767, num_frames, dtype=dtype_) - #dtype_np = getattr(np, dtype) - #base_np = np.linspace(-32768, 32767, num_frames, dtype_np) - #base = paddle.to_tensor(base_np, dtype=dtype_) + #dtype_np = getattr(np, dtype) + #base_np = np.linspace(-32768, 32767, num_frames, dtype_np) + #base = paddle.to_tensor(base_np, dtype=dtype_) else: raise NotImplementedError(f"Unsupported dtype {dtype}") data = base.tile([num_channels, 1]) diff --git a/audio/docs/source/source/paddleaudio.backends.common.rst b/docs/source/api/paddleaudio.backends.common.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.backends.common.rst rename to docs/source/api/paddleaudio.backends.common.rst diff --git a/audio/docs/source/source/paddleaudio.backends.no_backend.rst b/docs/source/api/paddleaudio.backends.no_backend.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.backends.no_backend.rst rename to docs/source/api/paddleaudio.backends.no_backend.rst diff --git a/audio/docs/source/source/paddleaudio.backends.rst b/docs/source/api/paddleaudio.backends.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.backends.rst rename to docs/source/api/paddleaudio.backends.rst diff --git a/audio/docs/source/source/paddleaudio.backends.soundfile_backend.rst b/docs/source/api/paddleaudio.backends.soundfile_backend.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.backends.soundfile_backend.rst rename to docs/source/api/paddleaudio.backends.soundfile_backend.rst diff --git a/audio/docs/source/source/paddleaudio.backends.sox_io_backend.rst b/docs/source/api/paddleaudio.backends.sox_io_backend.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.backends.sox_io_backend.rst rename to docs/source/api/paddleaudio.backends.sox_io_backend.rst diff --git a/audio/docs/source/source/paddleaudio.backends.utils.rst b/docs/source/api/paddleaudio.backends.utils.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.backends.utils.rst rename to docs/source/api/paddleaudio.backends.utils.rst diff --git a/audio/docs/source/source/paddleaudio.compliance.kaldi.rst b/docs/source/api/paddleaudio.compliance.kaldi.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.compliance.kaldi.rst rename to docs/source/api/paddleaudio.compliance.kaldi.rst diff --git a/audio/docs/source/source/paddleaudio.compliance.librosa.rst b/docs/source/api/paddleaudio.compliance.librosa.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.compliance.librosa.rst rename to docs/source/api/paddleaudio.compliance.librosa.rst diff --git a/audio/docs/source/source/paddleaudio.compliance.rst b/docs/source/api/paddleaudio.compliance.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.compliance.rst rename to docs/source/api/paddleaudio.compliance.rst diff --git a/audio/docs/source/source/paddleaudio.datasets.dataset.rst b/docs/source/api/paddleaudio.datasets.dataset.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.datasets.dataset.rst rename to docs/source/api/paddleaudio.datasets.dataset.rst diff --git a/audio/docs/source/source/paddleaudio.datasets.esc50.rst b/docs/source/api/paddleaudio.datasets.esc50.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.datasets.esc50.rst rename to docs/source/api/paddleaudio.datasets.esc50.rst diff --git a/audio/docs/source/source/paddleaudio.datasets.gtzan.rst b/docs/source/api/paddleaudio.datasets.gtzan.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.datasets.gtzan.rst rename to docs/source/api/paddleaudio.datasets.gtzan.rst diff --git a/audio/docs/source/source/paddleaudio.datasets.hey_snips.rst b/docs/source/api/paddleaudio.datasets.hey_snips.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.datasets.hey_snips.rst rename to docs/source/api/paddleaudio.datasets.hey_snips.rst diff --git a/audio/docs/source/source/paddleaudio.datasets.rirs_noises.rst b/docs/source/api/paddleaudio.datasets.rirs_noises.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.datasets.rirs_noises.rst rename to docs/source/api/paddleaudio.datasets.rirs_noises.rst diff --git a/audio/docs/source/source/paddleaudio.datasets.rst b/docs/source/api/paddleaudio.datasets.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.datasets.rst rename to docs/source/api/paddleaudio.datasets.rst diff --git a/audio/docs/source/source/paddleaudio.datasets.tess.rst b/docs/source/api/paddleaudio.datasets.tess.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.datasets.tess.rst rename to docs/source/api/paddleaudio.datasets.tess.rst diff --git a/audio/docs/source/source/paddleaudio.datasets.urban_sound.rst b/docs/source/api/paddleaudio.datasets.urban_sound.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.datasets.urban_sound.rst rename to docs/source/api/paddleaudio.datasets.urban_sound.rst diff --git a/audio/docs/source/source/paddleaudio.datasets.voxceleb.rst b/docs/source/api/paddleaudio.datasets.voxceleb.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.datasets.voxceleb.rst rename to docs/source/api/paddleaudio.datasets.voxceleb.rst diff --git a/audio/docs/source/source/paddleaudio.features.layers.rst b/docs/source/api/paddleaudio.features.layers.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.features.layers.rst rename to docs/source/api/paddleaudio.features.layers.rst diff --git a/audio/docs/source/source/paddleaudio.features.rst b/docs/source/api/paddleaudio.features.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.features.rst rename to docs/source/api/paddleaudio.features.rst diff --git a/audio/docs/source/source/paddleaudio.functional.functional.rst b/docs/source/api/paddleaudio.functional.functional.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.functional.functional.rst rename to docs/source/api/paddleaudio.functional.functional.rst diff --git a/audio/docs/source/source/paddleaudio.functional.rst b/docs/source/api/paddleaudio.functional.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.functional.rst rename to docs/source/api/paddleaudio.functional.rst diff --git a/audio/docs/source/source/paddleaudio.functional.window.rst b/docs/source/api/paddleaudio.functional.window.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.functional.window.rst rename to docs/source/api/paddleaudio.functional.window.rst diff --git a/audio/docs/source/source/paddleaudio.io.rst b/docs/source/api/paddleaudio.io.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.io.rst rename to docs/source/api/paddleaudio.io.rst diff --git a/audio/docs/source/source/paddleaudio.metric.eer.rst b/docs/source/api/paddleaudio.metric.eer.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.metric.eer.rst rename to docs/source/api/paddleaudio.metric.eer.rst diff --git a/audio/docs/source/source/paddleaudio.metric.rst b/docs/source/api/paddleaudio.metric.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.metric.rst rename to docs/source/api/paddleaudio.metric.rst diff --git a/audio/docs/source/source/paddleaudio.rst b/docs/source/api/paddleaudio.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.rst rename to docs/source/api/paddleaudio.rst diff --git a/audio/docs/source/source/paddleaudio.sox_effects.rst b/docs/source/api/paddleaudio.sox_effects.rst similarity index 100% rename from audio/docs/source/source/paddleaudio.sox_effects.rst rename to docs/source/api/paddleaudio.sox_effects.rst diff --git a/examples/voxceleb/sv0/local/data_prepare.py b/examples/voxceleb/sv0/local/data_prepare.py index 03d054004..b4486b6f0 100644 --- a/examples/voxceleb/sv0/local/data_prepare.py +++ b/examples/voxceleb/sv0/local/data_prepare.py @@ -14,9 +14,9 @@ import argparse import paddle +from paddleaudio.datasets.voxceleb import VoxCeleb from yacs.config import CfgNode -from paddleaudio.datasets.voxceleb import VoxCeleb from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.training.seeding import seed_everything diff --git a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py index 9aa8a2ebe..11908fe63 100644 --- a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py @@ -21,9 +21,9 @@ import os from typing import List import tqdm +from paddleaudio.backends import soundfile_load as load_audio from yacs.config import CfgNode -from paddleaudio.backends import soundfile_load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py index c39dc66df..ebeb598a4 100644 --- a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py @@ -22,9 +22,9 @@ import os import random import tqdm +from paddleaudio.backends import soundfile_load as load_audio from yacs.config import CfgNode -from paddleaudio.backends import soundfile_load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/paddlespeech/audio/__init__.py b/paddlespeech/audio/__init__.py index ad06603a7..102de76ec 100644 --- a/paddlespeech/audio/__init__.py +++ b/paddlespeech/audio/__init__.py @@ -11,17 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - from . import _extension -from . import compliance -from . import datasets -from . import features -from . import functional -from . import io -from . import metric from . import sox_effects from . import streamdata from . import text from . import transform -from .backends import load -from .backends import save diff --git a/paddlespeech/audio/_extension.py b/paddlespeech/audio/_extension.py index ac82c06e5..c5e928403 100644 --- a/paddlespeech/audio/_extension.py +++ b/paddlespeech/audio/_extension.py @@ -1,15 +1,12 @@ -import os -import warnings -from pathlib import Path - -from ._internal import module_utils as _mod_utils # noqa: F401 - - import contextlib import ctypes import os import sys import types +import warnings +from pathlib import Path + +from ._internal import module_utils as _mod_utils # noqa: F401 # Query `hasattr` only once. _SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys, @@ -68,6 +65,7 @@ class _Ops(types.ModuleType): _LIB_DIR = Path(__file__).parent / "lib" + def _get_lib_path(lib: str): suffix = "pyd" if os.name == "nt" else "so" path = _LIB_DIR / f"{lib}.{suffix}" diff --git a/paddlespeech/audio/backends/sox_io_backend.py b/paddlespeech/audio/backends/sox_io_backend.py index fff9e2069..a5cc68988 100644 --- a/paddlespeech/audio/backends/sox_io_backend.py +++ b/paddlespeech/audio/backends/sox_io_backend.py @@ -1,17 +1,17 @@ -from pathlib import Path -from typing import Callable -from typing import Optional, Tuple, Union +import os +from typing import Optional +from typing import Tuple import paddle from paddle import Tensor -from .common import AudioMetaData -import os -from paddlespeech.audio._internal import module_utils as _mod_utils -from paddlespeech.audio import _paddleaudio as paddleaudio +from .common import AudioMetaData +from paddlespeech.audio import _paddleaudio as paddleaudio +from paddlespeech.audio._internal import module_utils as _mod_utils #https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py + def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData: raise RuntimeError("Failed to fetch metadata from {}".format(filepath)) @@ -22,73 +22,77 @@ def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioMetaData: # Note: need to comply TorchScript syntax -- need annotation and no f-string def _fail_load( - filepath: str, - frame_offset: int = 0, - num_frames: int = -1, - normalize: bool = True, - channels_first: bool = True, - format: Optional[str] = None, -) -> Tuple[Tensor, int]: + filepath: str, + frame_offset: int=0, + num_frames: int=-1, + normalize: bool=True, + channels_first: bool=True, + format: Optional[str]=None, ) -> Tuple[Tensor, int]: raise RuntimeError("Failed to load audio from {}".format(filepath)) def _fail_load_fileobj(fileobj, *args, **kwargs): raise RuntimeError(f"Failed to load audio from {fileobj}") + _fallback_info = _fail_info _fallback_info_fileobj = _fail_info_fileobj _fallback_load = _fail_load _fallback_load_filebj = _fail_load_fileobj + @_mod_utils.requires_sox() def load( filepath: str, - frame_offset: int = 0, + frame_offset: int=0, num_frames: int=-1, - normalize: bool = True, - channels_first: bool = True, + normalize: bool=True, + channels_first: bool=True, format: Optional[str]=None, ) -> Tuple[Tensor, int]: if hasattr(filepath, "read"): - ret = paddleaudio.load_audio_fileobj( - filepath, frame_offset, num_frames, normalize, channels_first, format - ) + ret = paddleaudio.load_audio_fileobj(filepath, frame_offset, num_frames, + normalize, channels_first, format) if ret is not None: audio_tensor = paddle.to_tensor(ret[0]) return (audio_tensor, ret[1]) - return _fallback_load_fileobj(filepath, frame_offset, num_frames, normalize, channels_first, format) + return _fallback_load_fileobj(filepath, frame_offset, num_frames, + normalize, channels_first, format) filepath = os.fspath(filepath) - ret = paddleaudio.sox_io_load_audio_file( - filepath, frame_offset, num_frames, normalize, channels_first, format - ) + ret = paddleaudio.sox_io_load_audio_file(filepath, frame_offset, num_frames, + normalize, channels_first, format) if ret is not None: audio_tensor = paddle.to_tensor(ret[0]) return (audio_tensor, ret[1]) - return _fallback_load(filepath, frame_offset, num_frames, normalize, channels_first, format) + return _fallback_load(filepath, frame_offset, num_frames, normalize, + channels_first, format) @_mod_utils.requires_sox() -def save(filepath: str, - src: Tensor, - sample_rate: int, - channels_first: bool = True, - compression: Optional[float] = None, - format: Optional[str] = None, - encoding: Optional[str] = None, - bits_per_sample: Optional[int] = None, -): +def save( + filepath: str, + src: Tensor, + sample_rate: int, + channels_first: bool=True, + compression: Optional[float]=None, + format: Optional[str]=None, + encoding: Optional[str]=None, + bits_per_sample: Optional[int]=None, ): src_arr = src.numpy() if hasattr(filepath, "write"): - paddleaudio.save_audio_fileobj( - filepath, src_arr, sample_rate, channels_first, compression, format, encoding, bits_per_sample - ) + paddleaudio.save_audio_fileobj(filepath, src_arr, sample_rate, + channels_first, compression, format, + encoding, bits_per_sample) return filepath = os.fspath(filepath) - paddleaudio.sox_io_save_audio_file( - filepath, src_arr, sample_rate, channels_first, compression, format, encoding, bits_per_sample - ) + paddleaudio.sox_io_save_audio_file(filepath, src_arr, sample_rate, + channels_first, compression, format, + encoding, bits_per_sample) + @_mod_utils.requires_sox() -def info(filepath: str, format: Optional[str] = None,) -> AudioMetaData: +def info( + filepath: str, + format: Optional[str]=None, ) -> AudioMetaData: if hasattr(filepath, "read"): sinfo = paddleaudio.get_info_fileobj(filepath, format) if sinfo is not None: diff --git a/paddlespeech/audio/backends/utils.py b/paddlespeech/audio/backends/utils.py index 9ea2eaca7..8f49137e8 100644 --- a/paddlespeech/audio/backends/utils.py +++ b/paddlespeech/audio/backends/utils.py @@ -1,15 +1,15 @@ """Defines utilities for switching audio backends""" #code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/utils.py - import warnings from typing import List from typing import Optional import paddlespeech.audio +from . import no_backend +from . import soundfile_backend +from . import sox_io_backend from paddlespeech.audio._internal import module_utils as _mod_utils -from . import no_backend, soundfile_backend, sox_io_backend - __all__ = [ "list_audio_backends", "get_audio_backend", diff --git a/paddlespeech/audio/sox_effects/__init__.py b/paddlespeech/audio/sox_effects/__init__.py index d68158776..57ed4f51c 100644 --- a/paddlespeech/audio/sox_effects/__init__.py +++ b/paddlespeech/audio/sox_effects/__init__.py @@ -1,14 +1,10 @@ +from .sox_effects import apply_effects_file +from .sox_effects import apply_effects_tensor +from .sox_effects import effect_names +from .sox_effects import init_sox_effects +from .sox_effects import shutdown_sox_effects from paddlespeech.audio._internal import module_utils as _mod_utils -from .sox_effects import ( - apply_effects_file, - apply_effects_tensor, - effect_names, - init_sox_effects, - shutdown_sox_effects, -) - - if _mod_utils.is_sox_available(): import atexit @@ -22,4 +18,3 @@ __all__ = [ "apply_effects_tensor", "apply_effects_file", ] - diff --git a/paddlespeech/audio/sox_effects/sox_effects.py b/paddlespeech/audio/sox_effects/sox_effects.py index e9b839c1a..106ebd5e2 100644 --- a/paddlespeech/audio/sox_effects/sox_effects.py +++ b/paddlespeech/audio/sox_effects/sox_effects.py @@ -1,14 +1,17 @@ import os -from typing import List, Optional, Tuple +from typing import List +from typing import Optional +from typing import Tuple + import paddle -import numpy +from paddlespeech.audio import _paddleaudio as paddleaudio from paddlespeech.audio._internal import module_utils as _mod_utils from paddlespeech.audio.utils.sox_utils import list_effects -from paddlespeech.audio import _paddleaudio as paddleaudio #code is from: https://github.com/pytorch/audio/blob/main/torchaudio/sox_effects/sox_effects.py + @_mod_utils.requires_sox() def init_sox_effects(): """Initialize resources required to use sox effects. @@ -54,11 +57,10 @@ def effect_names() -> List[str]: @_mod_utils.requires_sox() def apply_effects_tensor( - tensor: paddle.Tensor, - sample_rate: int, - effects: List[List[str]], - channels_first: bool = True, -) -> Tuple[paddle.Tensor, int]: + tensor: paddle.Tensor, + sample_rate: int, + effects: List[List[str]], + channels_first: bool=True, ) -> Tuple[paddle.Tensor, int]: """Apply sox effects to given Tensor .. devices:: CPU @@ -120,20 +122,20 @@ def apply_effects_tensor( """ tensor_np = tensor.numpy() - ret = paddleaudio.sox_effects_apply_effects_tensor(tensor_np, sample_rate, effects, channels_first) + ret = paddleaudio.sox_effects_apply_effects_tensor(tensor_np, sample_rate, + effects, channels_first) if ret is not None: - return (paddle.to_tensor(ret[0]), ret[1]) + return (paddle.to_tensor(ret[0]), ret[1]) raise RuntimeError("Failed to apply sox effect") @_mod_utils.requires_sox() def apply_effects_file( - path: str, - effects: List[List[str]], - normalize: bool = True, - channels_first: bool = True, - format: Optional[str] = None, -) -> Tuple[paddle.Tensor, int]: + path: str, + effects: List[List[str]], + normalize: bool=True, + channels_first: bool=True, + format: Optional[str]=None, ) -> Tuple[paddle.Tensor, int]: """Apply sox effects to the audio file and load the resulting data as Tensor Note: @@ -227,12 +229,14 @@ def apply_effects_file( >>> pass """ if hasattr(path, "read"): - ret = paddleaudio.apply_effects_fileobj(path, effects, normalize, channels_first, format) + ret = paddleaudio.apply_effects_fileobj(path, effects, normalize, + channels_first, format) if ret is None: raise RuntimeError("Failed to load audio from {}".format(path)) return (paddle.to_tensor(ret[0]), ret[1]) path = os.fspath(path) - ret = paddleaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first, format) + ret = paddleaudio.sox_effects_apply_effects_file(path, effects, normalize, + channels_first, format) if ret is not None: return (paddle.to_tensor(ret[0]), ret[1]) - raise RuntimeError("Failed to load audio from {}".format(path)) \ No newline at end of file + raise RuntimeError("Failed to load audio from {}".format(path)) diff --git a/paddlespeech/audio/utils/sox_utils.py b/paddlespeech/audio/utils/sox_utils.py index 37696a5d9..bd7e9ff4f 100644 --- a/paddlespeech/audio/utils/sox_utils.py +++ b/paddlespeech/audio/utils/sox_utils.py @@ -1,7 +1,11 @@ -from typing import Dict, List +from typing import Dict +from typing import List -from paddlespeech.audio._internal import module_utils as _mod_utils from paddlespeech.audio import _paddleaudio +from paddlespeech.audio._internal import module_utils as _mod_utils + +#Taken form https://github.com/pytorch/audio/blob/main/torchaudio/utils/sox_utils.py with modification. + @_mod_utils.requires_sox() def set_seed(seed: int): diff --git a/paddlespeech/cli/kws/infer.py b/paddlespeech/cli/kws/infer.py index 17482f653..ce2f3f461 100644 --- a/paddlespeech/cli/kws/infer.py +++ b/paddlespeech/cli/kws/infer.py @@ -20,12 +20,12 @@ from typing import Union import paddle import yaml +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.kaldi import fbank as kaldi_fbank from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddleaudio.backends import soundfile_load as load_audio -from paddleaudio.compliance.kaldi import fbank as kaldi_fbank __all__ = ['KWSExecutor'] @@ -139,7 +139,7 @@ class KWSExecutor(BaseExecutor): Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet). """ assert os.path.isfile(audio_file) - waveform, _ = load(audio_file) + waveform, _ = load_audio(audio_file) if isinstance(audio_file, (str, os.PathLike)): logger.debug("Preprocessing audio_file:" + audio_file) diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index b1335f281..57a781656 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -22,13 +22,13 @@ from typing import Union import paddle import soundfile +from paddleaudio.backends import soundfile_load as load_audio +from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper -from paddleaudio.backends import soundfile_load as load_audio -from paddleaudio.compliance.librosa import melspectrogram from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.modules.sid_model import SpeakerIdetification diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py index b13d037f5..ae46890bd 100644 --- a/paddlespeech/cls/exps/panns/deploy/predict.py +++ b/paddlespeech/cls/exps/panns/deploy/predict.py @@ -16,11 +16,10 @@ import os import numpy as np from paddle import inference -from scipy.special import softmax - from paddleaudio.backends import soundfile_load as load_audio from paddleaudio.datasets import ESC50 from paddleaudio.features import melspectrogram +from scipy.special import softmax # yapf: disable parser = argparse.ArgumentParser() diff --git a/paddlespeech/cls/exps/panns/export_model.py b/paddlespeech/cls/exps/panns/export_model.py index c295c6a33..63b22981a 100644 --- a/paddlespeech/cls/exps/panns/export_model.py +++ b/paddlespeech/cls/exps/panns/export_model.py @@ -15,8 +15,8 @@ import argparse import os import paddle - from paddleaudio.datasets import ESC50 + from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/exps/panns/predict.py b/paddlespeech/cls/exps/panns/predict.py index 8064ab0d4..feeee24e3 100644 --- a/paddlespeech/cls/exps/panns/predict.py +++ b/paddlespeech/cls/exps/panns/predict.py @@ -17,10 +17,10 @@ import os import paddle import paddle.nn.functional as F import yaml - from paddleaudio.backends import soundfile_load as load_audio from paddleaudio.features import LogMelSpectrogram from paddleaudio.utils import logger + from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py index 56082bd77..5a9ca92d1 100644 --- a/paddlespeech/cls/exps/panns/train.py +++ b/paddlespeech/cls/exps/panns/train.py @@ -14,10 +14,10 @@ import os import paddle -from yacs.config import CfgNode - from paddleaudio.utils import logger from paddleaudio.utils import Timer +from yacs.config import CfgNode + from paddlespeech.kws.exps.mdtc.collate import collate_features from paddlespeech.kws.models.loss import max_pooling_loss from paddlespeech.kws.models.mdtc import KWSModel diff --git a/paddlespeech/cls/models/panns/panns.py b/paddlespeech/cls/models/panns/panns.py index feefecbe1..6f9af9b52 100644 --- a/paddlespeech/cls/models/panns/panns.py +++ b/paddlespeech/cls/models/panns/panns.py @@ -15,8 +15,8 @@ import os import paddle.nn as nn import paddle.nn.functional as F - from paddleaudio.utils.download import load_state_dict_from_url + from paddlespeech.utils.env import MODEL_HOME __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6'] diff --git a/paddlespeech/kws/exps/mdtc/train.py b/paddlespeech/kws/exps/mdtc/train.py index 56082bd77..5a9ca92d1 100644 --- a/paddlespeech/kws/exps/mdtc/train.py +++ b/paddlespeech/kws/exps/mdtc/train.py @@ -14,10 +14,10 @@ import os import paddle -from yacs.config import CfgNode - from paddleaudio.utils import logger from paddleaudio.utils import Timer +from yacs.config import CfgNode + from paddlespeech.kws.exps.mdtc.collate import collate_features from paddlespeech.kws.models.loss import max_pooling_loss from paddlespeech.kws.models.mdtc import KWSModel diff --git a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py index 12e8a2966..22329d5e0 100644 --- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py +++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py @@ -14,11 +14,10 @@ """Contains the audio featurizer class.""" import numpy as np import paddle +import paddleaudio.compliance.kaldi as kaldi from python_speech_features import delta from python_speech_features import mfcc -import paddleaudio.compliance.kaldi as kaldi - class AudioFeaturizer(): """Audio featurizer, for extracting features from audio contents of diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index 5c2fa3071..80f187282 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -28,10 +28,10 @@ from typing import Tuple import paddle from paddle import jit from paddle import nn - from paddleaudio.utils.tensor_utils import add_sos_eos from paddleaudio.utils.tensor_utils import pad_sequence from paddleaudio.utils.tensor_utils import th_accuracy + from paddlespeech.s2t.decoders.scorers.ctc import CTCPrefixScorer from paddlespeech.s2t.frontend.utility import IGNORE_ID from paddlespeech.s2t.frontend.utility import load_cmvn diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py index 1ba313c46..4b68c1ae1 100644 --- a/paddlespeech/s2t/models/u2_st/u2_st.py +++ b/paddlespeech/s2t/models/u2_st/u2_st.py @@ -24,9 +24,9 @@ from typing import Tuple import paddle from paddle import jit from paddle import nn - from paddleaudio.utils.tensor_utils import add_sos_eos from paddleaudio.utils.tensor_utils import th_accuracy + from paddlespeech.s2t.frontend.utility import IGNORE_ID from paddlespeech.s2t.frontend.utility import load_cmvn from paddlespeech.s2t.modules.cmvn import GlobalCMVN diff --git a/paddlespeech/server/engine/vector/python/vector_engine.py b/paddlespeech/server/engine/vector/python/vector_engine.py index ecbdbfa5a..7d86f3df7 100644 --- a/paddlespeech/server/engine/vector/python/vector_engine.py +++ b/paddlespeech/server/engine/vector/python/vector_engine.py @@ -16,9 +16,9 @@ from collections import OrderedDict import numpy as np import paddle - from paddleaudio.backends import soundfile_load as load_audio from paddleaudio.compliance.librosa import melspectrogram + from paddlespeech.cli.log import logger from paddlespeech.cli.vector.infer import VectorExecutor from paddlespeech.server.engine.base_engine import BaseEngine diff --git a/paddlespeech/server/util.py b/paddlespeech/server/util.py index ac92cf666..6aa6fd589 100644 --- a/paddlespeech/server/util.py +++ b/paddlespeech/server/util.py @@ -24,11 +24,11 @@ from typing import Any from typing import Dict import paddle +import paddleaudio import requests import yaml from paddle.framework import load -import paddleaudio from .entry import client_commands from .entry import server_commands from paddlespeech.cli import download diff --git a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py index 790a4eb67..821b1deed 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py @@ -16,10 +16,10 @@ import os import time import paddle -from yacs.config import CfgNode - from paddleaudio.backends import soundfile_load as load_audio from paddleaudio.compliance.librosa import melspectrogram +from yacs.config import CfgNode + from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn diff --git a/paddlespeech/vector/exps/ecapa_tdnn/test.py b/paddlespeech/vector/exps/ecapa_tdnn/test.py index 1b38075d6..f15dbf9b7 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/test.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/test.py @@ -18,10 +18,10 @@ import numpy as np import paddle from paddle.io import BatchSampler from paddle.io import DataLoader +from paddleaudio.metric import compute_eer from tqdm import tqdm from yacs.config import CfgNode -from paddleaudio.metric import compute_eer from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import batch_feature_normalize from paddlespeech.vector.io.dataset import CSVDataset diff --git a/paddlespeech/vector/exps/ecapa_tdnn/train.py b/paddlespeech/vector/exps/ecapa_tdnn/train.py index 73da16dc7..bf014045d 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/train.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/train.py @@ -20,9 +20,9 @@ import paddle from paddle.io import BatchSampler from paddle.io import DataLoader from paddle.io import DistributedBatchSampler +from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode -from paddleaudio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.io.augment import waveform_augment diff --git a/paddlespeech/vector/io/dataset.py b/paddlespeech/vector/io/dataset.py index 1fa8b6b99..dff8ad9fd 100644 --- a/paddlespeech/vector/io/dataset.py +++ b/paddlespeech/vector/io/dataset.py @@ -15,10 +15,9 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset - from paddleaudio.backends import soundfile_load as load_audio - from paddleaudio.compliance.librosa import melspectrogram + from paddlespeech.s2t.utils.log import Log logger = Log(__name__).getlog() diff --git a/paddlespeech/vector/io/dataset_from_json.py b/paddlespeech/vector/io/dataset_from_json.py index 39b92af66..852f39a94 100644 --- a/paddlespeech/vector/io/dataset_from_json.py +++ b/paddlespeech/vector/io/dataset_from_json.py @@ -16,7 +16,6 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset - from paddleaudio.backends import soundfile_load as load_audio from paddleaudio.compliance.librosa import melspectrogram from paddleaudio.compliance.librosa import mfcc diff --git a/tests/unit/audio/backends/sox_io/load_test.py b/tests/unit/audio/backends/sox_io/load_test.py index 8e141750b..33090a92e 100644 --- a/tests/unit/audio/backends/sox_io/load_test.py +++ b/tests/unit/audio/backends/sox_io/load_test.py @@ -1,28 +1,29 @@ -import unittest import itertools +import unittest -from parameterized import parameterized import numpy as np -from paddlespeech.audio._internal import module_utils as _mod_utils -from paddlespeech.audio.backends import sox_io_backend +from parameterized import parameterized -from tests.unit.common_utils import ( - get_wav_data, - load_wav, - save_wav, -) +from paddlespeech.audio.backends import sox_io_backend +from tests.unit.common_utils import get_wav_data +from tests.unit.common_utils import load_wav +from tests.unit.common_utils import save_wav #code is from:https://github.com/pytorch/audio/blob/main/torchaudio/test/torchaudio_unittest/backend/sox_io/load_test.py -class TestLoad(unittest.TestCase): +class TestLoad(unittest.TestCase): def assert_wav(self, dtype, sample_rate, num_channels, normalize, duration): """`sox_io_backend.load` can load wav format correctly. Wav data loaded with sox_io backend should match those with scipy """ path = 'testdata/reference.wav' - data = get_wav_data(dtype, num_channels, normalize=normalize, num_frames=duration * sample_rate) + data = get_wav_data( + dtype, + num_channels, + normalize=normalize, + num_frames=duration * sample_rate) save_wav(path, data, sample_rate) expected = load_wav(path, normalize=normalize)[0] data, sr = sox_io_backend.load(path, normalize=normalize) @@ -32,16 +33,18 @@ class TestLoad(unittest.TestCase): @parameterized.expand( list( itertools.product( - ["float64", "float32", "int32",], + [ + "float64", + "float32", + "int32", + ], [8000, 16000], [1, 2], - [False, True], - ) - ), - ) + [False, True], )), ) def test_wav(self, dtype, sample_rate, num_channels, normalize): """`sox_io_backend.load` can load wav format correctly.""" self.assert_wav(dtype, sample_rate, num_channels, normalize, duration=1) - + + if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/unit/audio/backends/sox_io/save_test.py b/tests/unit/audio/backends/sox_io/save_test.py index 7942f018d..2d8e982c7 100644 --- a/tests/unit/audio/backends/sox_io/save_test.py +++ b/tests/unit/audio/backends/sox_io/save_test.py @@ -1,23 +1,19 @@ import io -import os import unittest import numpy as np -import paddle -from parameterized import parameterized -from paddlespeech.audio.backends import sox_io_backend -from tests.unit.common_utils import ( - get_wav_data, - load_wav, - save_wav, - nested_params, - TempDirMixin, - sox_utils -) +from paddlespeech.audio.backends import sox_io_backend +from tests.unit.common_utils import get_wav_data +from tests.unit.common_utils import load_wav +from tests.unit.common_utils import nested_params +from tests.unit.common_utils import save_wav +from tests.unit.common_utils import sox_utils +from tests.unit.common_utils import TempDirMixin #code is from:https://github.com/pytorch/audio/blob/main/torchaudio/test/torchaudio_unittest/backend/sox_io/save_test.py + def _get_sox_encoding(encoding): encodings = { "PCM_F": "floating-point", @@ -28,20 +24,20 @@ def _get_sox_encoding(encoding): } return encodings.get(encoding) + class TestSaveBase(TempDirMixin): def assert_save_consistency( - self, - format: str, - *, - compression: float = None, - encoding: str = None, - bits_per_sample: int = None, - sample_rate: float = 8000, - num_channels: int = 2, - num_frames: float = 3 * 8000, - src_dtype: str = "int32", - test_mode: str = "path", - ): + self, + format: str, + *, + compression: float=None, + encoding: str=None, + bits_per_sample: int=None, + sample_rate: float=8000, + num_channels: int=2, + num_frames: float=3 * 8000, + src_dtype: str="int32", + test_mode: str="path", ): """`save` function produces file that is comparable with `sox` command To compare that the file produced by `save` function agains the file produced by @@ -89,15 +85,20 @@ class TestSaveBase(TempDirMixin): ref_path = self.get_temp_path("3.2.ref.wav") # 1. Generate original wav - data = get_wav_data(src_dtype, num_channels, normalize=False, num_frames=num_frames) + data = get_wav_data( + src_dtype, num_channels, normalize=False, num_frames=num_frames) save_wav(src_path, data, sample_rate) # 2.1. Convert the original wav to target format with paddleaudio data = load_wav(src_path, normalize=False)[0] if test_mode == "path": sox_io_backend.save( - tgt_path, data, sample_rate, compression=compression, encoding=encoding, bits_per_sample=bits_per_sample - ) + tgt_path, + data, + sample_rate, + compression=compression, + encoding=encoding, + bits_per_sample=bits_per_sample) elif test_mode == "fileobj": with open(tgt_path, "bw") as file_: sox_io_backend.save( @@ -107,8 +108,7 @@ class TestSaveBase(TempDirMixin): format=format, compression=compression, encoding=encoding, - bits_per_sample=bits_per_sample, - ) + bits_per_sample=bits_per_sample, ) elif test_mode == "bytesio": file_ = io.BytesIO() sox_io_backend.save( @@ -118,33 +118,40 @@ class TestSaveBase(TempDirMixin): format=format, compression=compression, encoding=encoding, - bits_per_sample=bits_per_sample, - ) + bits_per_sample=bits_per_sample, ) file_.seek(0) with open(tgt_path, "bw") as f: f.write(file_.read()) else: raise ValueError(f"Unexpected test mode: {test_mode}") # 2.2. Convert the target format to wav with sox - sox_utils.convert_audio_file(tgt_path, tst_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth) + sox_utils.convert_audio_file( + tgt_path, tst_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth) # 2.3. Load with SciPy found = load_wav(tst_path, normalize=False)[0] # 3.1. Convert the original wav to target format with sox sox_encoding = _get_sox_encoding(encoding) sox_utils.convert_audio_file( - src_path, sox_path, compression=compression, encoding=sox_encoding, bit_depth=bits_per_sample - ) + src_path, + sox_path, + compression=compression, + encoding=sox_encoding, + bit_depth=bits_per_sample) # 3.2. Convert the target format to wav with sox - sox_utils.convert_audio_file(sox_path, ref_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth) + sox_utils.convert_audio_file( + sox_path, ref_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth) # 3.3. Load with SciPy expected = load_wav(ref_path, normalize=False)[0] np.testing.assert_array_almost_equal(found, expected) + class TestSave(TestSaveBase, unittest.TestCase): @nested_params( - ["path",], + [ + "path", + ], [ ("PCM_U", 8), ("PCM_S", 16), @@ -153,23 +160,28 @@ class TestSave(TestSaveBase, unittest.TestCase): ("PCM_F", 64), ("ULAW", 8), ("ALAW", 8), - ], - ) + ], ) def test_save_wav(self, test_mode, enc_params): encoding, bits_per_sample = enc_params - self.assert_save_consistency("wav", encoding=encoding, bits_per_sample=bits_per_sample, test_mode=test_mode) + self.assert_save_consistency( + "wav", + encoding=encoding, + bits_per_sample=bits_per_sample, + test_mode=test_mode) @nested_params( - ["path", ], [ - ("float32",), - ("int32",), + "path", ], - ) + [ + ("float32", ), + ("int32", ), + ], ) def test_save_wav_dtype(self, test_mode, params): - (dtype,) = params - self.assert_save_consistency("wav", src_dtype=dtype, test_mode=test_mode) + (dtype, ) = params + self.assert_save_consistency( + "wav", src_dtype=dtype, test_mode=test_mode) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/unit/audio/backends/sox_io/smoke_test.py b/tests/unit/audio/backends/sox_io/smoke_test.py index 1f191bc51..3051df4a3 100644 --- a/tests/unit/audio/backends/sox_io/smoke_test.py +++ b/tests/unit/audio/backends/sox_io/smoke_test.py @@ -3,12 +3,12 @@ import itertools import unittest from parameterized import parameterized + from paddlespeech.audio.backends import sox_io_backend -from tests.unit.common_utils import ( - get_wav_data, - TempDirMixin, - name_func -) +from tests.unit.common_utils import get_wav_data +from tests.unit.common_utils import name_func +from tests.unit.common_utils import TempDirMixin + class SmokeTest(TempDirMixin, unittest.TestCase): """Run smoke test on various audio format @@ -20,15 +20,23 @@ class SmokeTest(TempDirMixin, unittest.TestCase): however without such tools, the correctness of each function cannot be verified. """ - def run_smoke_test(self, ext, sample_rate, num_channels, *, compression=None, dtype="float32"): + def run_smoke_test(self, + ext, + sample_rate, + num_channels, + *, + compression=None, + dtype="float32"): duration = 1 num_frames = sample_rate * duration #path = self.get_temp_path(f"test.{ext}") path = self.get_temp_path(f"test.{ext}") - original = get_wav_data(dtype, num_channels, normalize=False, num_frames=num_frames) + original = get_wav_data( + dtype, num_channels, normalize=False, num_frames=num_frames) # 1. run save - sox_io_backend.save(path, original, sample_rate, compression=compression) + sox_io_backend.save( + path, original, sample_rate, compression=compression) # 2. run info info = sox_io_backend.info(path) assert info.sample_rate == sample_rate @@ -41,57 +49,51 @@ class SmokeTest(TempDirMixin, unittest.TestCase): @parameterized.expand( list( itertools.product( - ["float32", "int32" ], + ["float32", "int32"], #["float32", "int32", "int16", "uint8"], [8000, 16000], - [1, 2], - ) - ), - name_func=name_func, - ) + [1, 2], )), + name_func=name_func, ) def test_wav(self, dtype, sample_rate, num_channels): """Run smoke test on wav format""" self.run_smoke_test("wav", sample_rate, num_channels, dtype=dtype) #@parameterized.expand( - #list( - #itertools.product( - #[8000, 16000], - #[1, 2], - #[-4.2, -0.2, 0, 0.2, 96, 128, 160, 192, 224, 256, 320], - #) - #) + #list( + #itertools.product( + #[8000, 16000], + #[1, 2], + #[-4.2, -0.2, 0, 0.2, 96, 128, 160, 192, 224, 256, 320], + #) + #) #) #def test_mp3(self, sample_rate, num_channels, bit_rate): - #"""Run smoke test on mp3 format""" - #self.run_smoke_test("mp3", sample_rate, num_channels, compression=bit_rate) + #"""Run smoke test on mp3 format""" + #self.run_smoke_test("mp3", sample_rate, num_channels, compression=bit_rate) #@parameterized.expand( - #list( - #itertools.product( - #[8000, 16000], - #[1, 2], - #[-1, 0, 1, 2, 3, 3.6, 5, 10], - #) - #) + #list( + #itertools.product( + #[8000, 16000], + #[1, 2], + #[-1, 0, 1, 2, 3, 3.6, 5, 10], + #) + #) #) #def test_vorbis(self, sample_rate, num_channels, quality_level): - #"""Run smoke test on vorbis format""" - #self.run_smoke_test("vorbis", sample_rate, num_channels, compression=quality_level) + #"""Run smoke test on vorbis format""" + #self.run_smoke_test("vorbis", sample_rate, num_channels, compression=quality_level) @parameterized.expand( - list( - itertools.product( - [8000, 16000], - [1, 2], - list(range(9)), - ) - ), - name_func=name_func, - ) + list(itertools.product( + [8000, 16000], + [1, 2], + list(range(9)), )), + name_func=name_func, ) def test_flac(self, sample_rate, num_channels, compression_level): """Run smoke test on flac format""" - self.run_smoke_test("flac", sample_rate, num_channels, compression=compression_level) + self.run_smoke_test( + "flac", sample_rate, num_channels, compression=compression_level) class SmokeTestFileObj(unittest.TestCase): @@ -104,14 +106,22 @@ class SmokeTestFileObj(unittest.TestCase): however without such tools, the correctness of each function cannot be verified. """ - def run_smoke_test(self, ext, sample_rate, num_channels, *, compression=None, dtype="float32"): + def run_smoke_test(self, + ext, + sample_rate, + num_channels, + *, + compression=None, + dtype="float32"): duration = 1 num_frames = sample_rate * duration - original = get_wav_data(dtype, num_channels, normalize=False, num_frames=num_frames) + original = get_wav_data( + dtype, num_channels, normalize=False, num_frames=num_frames) fileobj = io.BytesIO() # 1. run save - sox_io_backend.save(fileobj, original, sample_rate, compression=compression, format=ext) + sox_io_backend.save( + fileobj, original, sample_rate, compression=compression, format=ext) # 2. run info fileobj.seek(0) info = sox_io_backend.info(fileobj, format=ext) @@ -124,59 +134,53 @@ class SmokeTestFileObj(unittest.TestCase): assert loaded.shape[0] == num_channels @parameterized.expand( - list( - itertools.product( - ["float32", "int32"], - [8000, 16000], - [1, 2], - ) - ), - name_func=name_func, - ) + list(itertools.product( + ["float32", "int32"], + [8000, 16000], + [1, 2], )), + name_func=name_func, ) def test_wav(self, dtype, sample_rate, num_channels): """Run smoke test on wav format""" self.run_smoke_test("wav", sample_rate, num_channels, dtype=dtype) # not support yet #@parameterized.expand( - #list( - #itertools.product( - #[8000, 16000], - #[1, 2], - #[-4.2, -0.2, 0, 0.2, 96, 128, 160, 192, 224, 256, 320], - #) - #) + #list( + #itertools.product( + #[8000, 16000], + #[1, 2], + #[-4.2, -0.2, 0, 0.2, 96, 128, 160, 192, 224, 256, 320], + #) + #) #) #def test_mp3(self, sample_rate, num_channels, bit_rate): - #"""Run smoke test on mp3 format""" - #self.run_smoke_test("mp3", sample_rate, num_channels, compression=bit_rate) + #"""Run smoke test on mp3 format""" + #self.run_smoke_test("mp3", sample_rate, num_channels, compression=bit_rate) #@parameterized.expand( - #list( - #itertools.product( - #[8000, 16000], - #[1, 2], - #[-1, 0, 1, 2, 3, 3.6, 5, 10], - #) - #) + #list( + #itertools.product( + #[8000, 16000], + #[1, 2], + #[-1, 0, 1, 2, 3, 3.6, 5, 10], + #) + #) #) #def test_vorbis(self, sample_rate, num_channels, quality_level): - #"""Run smoke test on vorbis format""" - #self.run_smoke_test("vorbis", sample_rate, num_channels, compression=quality_level) + #"""Run smoke test on vorbis format""" + #self.run_smoke_test("vorbis", sample_rate, num_channels, compression=quality_level) @parameterized.expand( - list( - itertools.product( - [8000, 16000], - [1, 2], - list(range(9)), - ) - ), - name_func=name_func, - ) + list(itertools.product( + [8000, 16000], + [1, 2], + list(range(9)), )), + name_func=name_func, ) def test_flac(self, sample_rate, num_channels, compression_level): #"""Run smoke test on flac format""" - self.run_smoke_test("flac", sample_rate, num_channels, compression=compression_level) + self.run_smoke_test( + "flac", sample_rate, num_channels, compression=compression_level) + if __name__ == '__main__': #test_func() diff --git a/tests/unit/audio/backends/sox_io/sox_effect_test.py b/tests/unit/audio/backends/sox_io/sox_effect_test.py index d9c70bc5e..4600353cb 100644 --- a/tests/unit/audio/backends/sox_io/sox_effect_test.py +++ b/tests/unit/audio/backends/sox_io/sox_effect_test.py @@ -4,24 +4,18 @@ import itertools import tarfile import unittest from pathlib import Path -import numpy as np +import numpy as np from parameterized import parameterized + from paddlespeech.audio import sox_effects -from paddlespeech.audio._internal import module_utils as _mod_utils -from tests.unit.common_utils import ( - get_sinusoid, - get_wav_data, - load_wav, - save_wav, - sox_utils, - TempDirMixin, - name_func, - load_effects_params -) - -if _mod_utils.is_module_available("requests"): - import requests +from tests.unit.common_utils import get_sinusoid +from tests.unit.common_utils import get_wav_data +from tests.unit.common_utils import load_effects_params +from tests.unit.common_utils import load_wav +from tests.unit.common_utils import save_wav +from tests.unit.common_utils import sox_utils +from tests.unit.common_utils import TempDirMixin class TestSoxEffects(unittest.TestCase): @@ -35,20 +29,24 @@ class TestSoxEffectsTensor(TempDirMixin, unittest.TestCase): """Test suite for `apply_effects_tensor` function""" @parameterized.expand( - list(itertools.product(["float32", "int32"], [8000, 16000], [1, 2, 4, 8], [True, False])), - ) - def test_apply_no_effect(self, dtype, sample_rate, num_channels, channels_first): + list( + itertools.product(["float32", "int32"], [8000, 16000], [1, 2, 4, 8], + [True, False])), ) + def test_apply_no_effect(self, dtype, sample_rate, num_channels, + channels_first): """`apply_effects_tensor` without effects should return identical data as input""" - original = get_wav_data(dtype, num_channels, channels_first=channels_first) + original = get_wav_data( + dtype, num_channels, channels_first=channels_first) expected = original.clone() - - found, output_sample_rate = sox_effects.apply_effects_tensor(expected, sample_rate, [], channels_first) + + found, output_sample_rate = sox_effects.apply_effects_tensor( + expected, sample_rate, [], channels_first) assert (output_sample_rate == sample_rate) # SoxEffect should not alter the input Tensor object #self.assertEqual(original, expected) np.testing.assert_array_almost_equal(original.numpy(), expected.numpy()) - + # SoxEffect should not return the same Tensor object assert expected is not found # Returned Tensor should equal to the input Tensor @@ -69,12 +67,18 @@ class TestSoxEffectsTensor(TempDirMixin, unittest.TestCase): input_path = self.get_temp_path("input.wav") reference_path = self.get_temp_path("reference.wav") - original = get_sinusoid(frequency=800, sample_rate=input_sr, n_channels=num_channels, dtype="float32") + original = get_sinusoid( + frequency=800, + sample_rate=input_sr, + n_channels=num_channels, + dtype="float32") save_wav(input_path, original, input_sr) - sox_utils.run_sox_effect(input_path, reference_path, effects, output_sample_rate=output_sr) + sox_utils.run_sox_effect( + input_path, reference_path, effects, output_sample_rate=output_sr) expected, expected_sr = load_wav(reference_path) - found, sr = sox_effects.apply_effects_tensor(original, input_sr, effects) + found, sr = sox_effects.apply_effects_tensor(original, input_sr, + effects) assert sr == expected_sr #self.assertEqual(expected, found) @@ -90,20 +94,19 @@ class TestSoxEffectsFile(TempDirMixin, unittest.TestCase): ["float32", "int32"], [8000, 16000], [1, 2, 4, 8], - [False, True], - ) - ), + [False, True], )), #name_func=name_func, ) - def test_apply_no_effect(self, dtype, sample_rate, num_channels, channels_first): + def test_apply_no_effect(self, dtype, sample_rate, num_channels, + channels_first): """`apply_effects_file` without effects should return identical data as input""" path = self.get_temp_path("input.wav") - expected = get_wav_data(dtype, num_channels, channels_first=channels_first) + expected = get_wav_data( + dtype, num_channels, channels_first=channels_first) save_wav(path, expected, sample_rate, channels_first=channels_first) found, output_sample_rate = sox_effects.apply_effects_file( - path, [], normalize=False, channels_first=channels_first - ) + path, [], normalize=False, channels_first=channels_first) assert output_sample_rate == sample_rate #self.assertEqual(expected, found) @@ -126,16 +129,17 @@ class TestSoxEffectsFile(TempDirMixin, unittest.TestCase): reference_path = self.get_temp_path("reference.wav") data = get_wav_data(dtype, num_channels, channels_first=channels_first) save_wav(input_path, data, input_sr, channels_first=channels_first) - sox_utils.run_sox_effect(input_path, reference_path, effects, output_sample_rate=output_sr) + sox_utils.run_sox_effect( + input_path, reference_path, effects, output_sample_rate=output_sr) expected, expected_sr = load_wav(reference_path) - found, sr = sox_effects.apply_effects_file(input_path, effects, normalize=False, channels_first=channels_first) + found, sr = sox_effects.apply_effects_file( + input_path, effects, normalize=False, channels_first=channels_first) assert sr == expected_sr #self.assertEqual(found, expected) np.testing.assert_array_almost_equal(expected.numpy(), found.numpy()) - def test_apply_effects_path(self): """`apply_effects_file` should return identical data as sox command when file path is given as a Path Object""" dtype = "int32" @@ -149,12 +153,15 @@ class TestSoxEffectsFile(TempDirMixin, unittest.TestCase): reference_path = self.get_temp_path("reference.wav") data = get_wav_data(dtype, num_channels, channels_first=channels_first) save_wav(input_path, data, input_sr, channels_first=channels_first) - sox_utils.run_sox_effect(input_path, reference_path, effects, output_sample_rate=output_sr) + sox_utils.run_sox_effect( + input_path, reference_path, effects, output_sample_rate=output_sr) expected, expected_sr = load_wav(reference_path) found, sr = sox_effects.apply_effects_file( - Path(input_path), effects, normalize=False, channels_first=channels_first - ) + Path(input_path), + effects, + normalize=False, + channels_first=channels_first) assert sr == expected_sr #self.assertEqual(found, expected) @@ -165,13 +172,10 @@ class TestFileFormats(TempDirMixin, unittest.TestCase): """`apply_effects_file` gives the same result as sox on various file formats""" @parameterized.expand( - list( - itertools.product( - ["float32", "int32"], - [8000, 16000], - [1, 2], - ) - ), + list(itertools.product( + ["float32", "int32"], + [8000, 16000], + [1, 2], )), #name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}', ) def test_wav(self, dtype, sample_rate, num_channels): @@ -186,7 +190,8 @@ class TestFileFormats(TempDirMixin, unittest.TestCase): sox_utils.run_sox_effect(input_path, reference_path, effects) expected, expected_sr = load_wav(reference_path) - found, sr = sox_effects.apply_effects_file(input_path, effects, normalize=False, channels_first=channels_first) + found, sr = sox_effects.apply_effects_file( + input_path, effects, normalize=False, channels_first=channels_first) assert sr == expected_sr #self.assertEqual(found, expected) @@ -194,68 +199,66 @@ class TestFileFormats(TempDirMixin, unittest.TestCase): #not support now #@parameterized.expand( - #list( - #itertools.product( - #[8000, 16000], - #[1, 2], - #) - #), - ##name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}', + #list( + #itertools.product( + #[8000, 16000], + #[1, 2], + #) + #), + ##name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}', #) #def test_flac(self, sample_rate, num_channels): - #"""`apply_effects_file` works on various flac format""" - #channels_first = True - #effects = [["band", "300", "10"]] + #"""`apply_effects_file` works on various flac format""" + #channels_first = True + #effects = [["band", "300", "10"]] - #input_path = self.get_temp_path("input.flac") - #reference_path = self.get_temp_path("reference.wav") - #sox_utils.gen_audio_file(input_path, sample_rate, num_channels) - #sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) + #input_path = self.get_temp_path("input.flac") + #reference_path = self.get_temp_path("reference.wav") + #sox_utils.gen_audio_file(input_path, sample_rate, num_channels) + #sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) - #expected, expected_sr = load_wav(reference_path) - #found, sr = sox_effects.apply_effects_file(input_path, effects, channels_first=channels_first) - #save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first) + #expected, expected_sr = load_wav(reference_path) + #found, sr = sox_effects.apply_effects_file(input_path, effects, channels_first=channels_first) + #save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first) - #assert sr == expected_sr - ##self.assertEqual(found, expected) - #np.testing.assert_array_almost_equal(found.numpy(), expected.numpy()) + #assert sr == expected_sr + ##self.assertEqual(found, expected) + #np.testing.assert_array_almost_equal(found.numpy(), expected.numpy()) #@parameterized.expand( - #list( - #itertools.product( - #[8000, 16000], - #[1, 2], - #) - #), - ##name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}', + #list( + #itertools.product( + #[8000, 16000], + #[1, 2], + #) + #), + ##name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}', #) #def test_vorbis(self, sample_rate, num_channels): - #"""`apply_effects_file` works on various vorbis format""" - #channels_first = True - #effects = [["band", "300", "10"]] + #"""`apply_effects_file` works on various vorbis format""" + #channels_first = True + #effects = [["band", "300", "10"]] - #input_path = self.get_temp_path("input.vorbis") - #reference_path = self.get_temp_path("reference.wav") - #sox_utils.gen_audio_file(input_path, sample_rate, num_channels) - #sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) + #input_path = self.get_temp_path("input.vorbis") + #reference_path = self.get_temp_path("reference.wav") + #sox_utils.gen_audio_file(input_path, sample_rate, num_channels) + #sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) - #expected, expected_sr = load_wav(reference_path) - #found, sr = sox_effects.apply_effects_file(input_path, effects, channels_first=channels_first) - #save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first) + #expected, expected_sr = load_wav(reference_path) + #found, sr = sox_effects.apply_effects_file(input_path, effects, channels_first=channels_first) + #save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first) - #assert sr == expected_sr - ##self.assertEqual(found, expected) - #np.testing.assert_array_almost_equal(found.numpy(), expected.numpy()) + #assert sr == expected_sr + ##self.assertEqual(found, expected) + #np.testing.assert_array_almost_equal(found.numpy(), expected.numpy()) -#@skipIfNoExec("sox") -#@skipIfNoSox + #@skipIfNoExec("sox") + #@skipIfNoSox class TestFileObject(TempDirMixin, unittest.TestCase): - @parameterized.expand( - [ - ("wav", None), - ] - ) + @parameterized.expand([ + ("wav", None), + ]) def test_fileobj(self, ext, compression): """Applying effects via file object works""" sample_rate = 16000 @@ -268,21 +271,25 @@ class TestFileObject(TempDirMixin, unittest.TestCase): data = get_wav_data("int32", 2, channels_first=channels_first) save_wav(input_path, data, sample_rate, channels_first=channels_first) - sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) + sox_utils.run_sox_effect( + input_path, reference_path, effects, output_bitdepth=32) expected, expected_sr = load_wav(reference_path) with open(input_path, "rb") as fileobj: - found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first) - save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first) + found, sr = sox_effects.apply_effects_file( + fileobj, effects, channels_first=channels_first) + save_wav( + self.get_temp_path("result.wav"), + found, + sr, + channels_first=channels_first) assert sr == expected_sr #self.assertEqual(found, expected) np.testing.assert_array_almost_equal(found.numpy(), expected.numpy()) - @parameterized.expand( - [ - ("wav", None), - ] - ) + @parameterized.expand([ + ("wav", None), + ]) def test_bytesio(self, ext, compression): """Applying effects via BytesIO object works""" sample_rate = 16000 @@ -294,13 +301,19 @@ class TestFileObject(TempDirMixin, unittest.TestCase): #sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression) data = get_wav_data("int32", 2, channels_first=channels_first) save_wav(input_path, data, sample_rate, channels_first=channels_first) - sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) + sox_utils.run_sox_effect( + input_path, reference_path, effects, output_bitdepth=32) expected, expected_sr = load_wav(reference_path) with open(input_path, "rb") as file_: fileobj = io.BytesIO(file_.read()) - found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first) - save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first) + found, sr = sox_effects.apply_effects_file( + fileobj, effects, channels_first=channels_first) + save_wav( + self.get_temp_path("result.wav"), + found, + sr, + channels_first=channels_first) assert sr == expected_sr #self.assertEqual(found, expected) print("found") @@ -309,11 +322,9 @@ class TestFileObject(TempDirMixin, unittest.TestCase): print(expected) np.testing.assert_array_almost_equal(found.numpy(), expected.numpy()) - @parameterized.expand( - [ - ("wav", None), - ] - ) + @parameterized.expand([ + ("wav", None), + ]) def test_tarfile(self, ext, compression): """Applying effects to compressed audio via file-like file works""" sample_rate = 16000 @@ -326,22 +337,28 @@ class TestFileObject(TempDirMixin, unittest.TestCase): archive_path = self.get_temp_path("archive.tar.gz") data = get_wav_data("int32", 2, channels_first=channels_first) save_wav(input_path, data, sample_rate, channels_first=channels_first) - - # sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression) - sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32) - + + # sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression) + sox_utils.run_sox_effect( + input_path, reference_path, effects, output_bitdepth=32) + expected, expected_sr = load_wav(reference_path) with tarfile.TarFile(archive_path, "w") as tarobj: tarobj.add(input_path, arcname=audio_file) with tarfile.TarFile(archive_path, "r") as tarobj: fileobj = tarobj.extractfile(audio_file) - found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first) - save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first) + found, sr = sox_effects.apply_effects_file( + fileobj, effects, channels_first=channels_first) + save_wav( + self.get_temp_path("result.wav"), + found, + sr, + channels_first=channels_first) assert sr == expected_sr #self.assertEqual(found, expected) np.testing.assert_array_almost_equal(found.numpy(), expected.numpy()) if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main() diff --git a/tests/unit/audio/features/base.py b/tests/unit/audio/features/base.py index 614fce28c..d183b72ad 100644 --- a/tests/unit/audio/features/base.py +++ b/tests/unit/audio/features/base.py @@ -17,7 +17,6 @@ import urllib.request import numpy as np import paddle - from paddleaudio.backends import soundfile_load as load wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' diff --git a/tests/unit/audio/features/test_istft.py b/tests/unit/audio/features/test_istft.py index 23371200b..9cf8cdd65 100644 --- a/tests/unit/audio/features/test_istft.py +++ b/tests/unit/audio/features/test_istft.py @@ -15,9 +15,9 @@ import unittest import numpy as np import paddle +from paddleaudio.functional.window import get_window from .base import FeatTest -from paddleaudio.functional.window import get_window from paddlespeech.s2t.transform.spectrogram import IStft from paddlespeech.s2t.transform.spectrogram import Stft diff --git a/tests/unit/audio/features/test_kaldi_feat.py b/tests/unit/audio/features/test_kaldi_feat.py index e0ca1fa1d..38fba450b 100644 --- a/tests/unit/audio/features/test_kaldi_feat.py +++ b/tests/unit/audio/features/test_kaldi_feat.py @@ -14,18 +14,17 @@ import unittest import numpy as np -import paddle +from kaldiio import ReadHelper from paddlespeech.audio.kaldi import fbank as fbank from paddlespeech.audio.kaldi import pitch as pitch -from kaldiio import ReadHelper # the groundtruth feats computed in kaldi command below. #compute-fbank-feats --dither=0 scp:$wav_scp ark,t:fbank_feat.ark #compute-kaldi-pitch-feats --sample-frequency=16000 scp:$wav_scp ark,t:pitch_feat.ark -class TestKaldiFbank(unittest.TestCase): +class TestKaldiFbank(unittest.TestCase): def test_fbank(self): fbank_groundtruth = {} with ReadHelper('ark:testdata/fbank_feat.ark') as reader: @@ -42,8 +41,8 @@ class TestKaldiFbank(unittest.TestCase): def test_pitch(self): pitch_groundtruth = {} with ReadHelper('ark:testdata/pitch_feat.ark') as reader: - for key, feat in reader: - pitch_groundtruth[key] = feat + for key, feat in reader: + pitch_groundtruth[key] = feat with ReadHelper('ark:testdata/wav.ark') as reader: for key, wav in reader: @@ -53,6 +52,5 @@ class TestKaldiFbank(unittest.TestCase): pitch_feat, pitch_check, decimal=4) - if __name__ == '__main__': unittest.main() diff --git a/tests/unit/audio/features/test_log_melspectrogram.py b/tests/unit/audio/features/test_log_melspectrogram.py index 0c38de22c..7d5680387 100644 --- a/tests/unit/audio/features/test_log_melspectrogram.py +++ b/tests/unit/audio/features/test_log_melspectrogram.py @@ -15,8 +15,8 @@ import unittest import numpy as np import paddle - import paddleaudio + from .base import FeatTest from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogram diff --git a/tests/unit/audio/features/test_spectrogram.py b/tests/unit/audio/features/test_spectrogram.py index 50b21403b..1774fe619 100644 --- a/tests/unit/audio/features/test_spectrogram.py +++ b/tests/unit/audio/features/test_spectrogram.py @@ -15,8 +15,8 @@ import unittest import numpy as np import paddle - import paddleaudio + from .base import FeatTest from paddlespeech.s2t.transform.spectrogram import Spectrogram diff --git a/tests/unit/audio/features/test_stft.py b/tests/unit/audio/features/test_stft.py index c64b5ebe6..58792ffe2 100644 --- a/tests/unit/audio/features/test_stft.py +++ b/tests/unit/audio/features/test_stft.py @@ -15,9 +15,9 @@ import unittest import numpy as np import paddle +from paddleaudio.functional.window import get_window from .base import FeatTest -from paddleaudio.functional.window import get_window from paddlespeech.s2t.transform.spectrogram import Stft diff --git a/tests/unit/common_utils/__init__.py b/tests/unit/common_utils/__init__.py index 7bc718f38..70e533153 100644 --- a/tests/unit/common_utils/__init__.py +++ b/tests/unit/common_utils/__init__.py @@ -1,19 +1,15 @@ -from .wav_utils import get_wav_data, load_wav, save_wav, normalize_wav -from .parameterized_utils import nested_params -from .data_utils import get_sinusoid, load_params, load_effects_params -from .case_utils import ( - TempDirMixin, - name_func -) +from .case_utils import name_func +from .case_utils import TempDirMixin +from .data_utils import get_sinusoid +from .data_utils import load_effects_params +from .data_utils import load_params +from .parameterized_utils import nested_params +from .wav_utils import get_wav_data +from .wav_utils import load_wav +from .wav_utils import normalize_wav +from .wav_utils import save_wav __all__ = [ - "get_wav_data", - "load_wav", - "save_wav", - "normalize_wav", - "load_params", - "nested_params", - "get_sinusoid", - "name_func", - "load_effects_params" + "get_wav_data", "load_wav", "save_wav", "normalize_wav", "load_params", + "nested_params", "get_sinusoid", "name_func", "load_effects_params" ] diff --git a/tests/unit/common_utils/case_utils.py b/tests/unit/common_utils/case_utils.py index 406d293b6..65a78c5df 100644 --- a/tests/unit/common_utils/case_utils.py +++ b/tests/unit/common_utils/case_utils.py @@ -1,24 +1,13 @@ -import functools import os.path -import shutil -import subprocess -import sys import tempfile -import time -import unittest #code is from:https://github.com/pytorch/audio/blob/main/test/torchaudio_unittest/common_utils/case_utils.py -import paddle -from paddlespeech.audio._internal.module_utils import ( - is_kaldi_available, - is_module_available, - is_sox_available, -) def name_func(func, _, params): return f'{func.__name__}_{"_".join(str(arg) for arg in params.args)}' + class TempDirMixin: """Mixin to provide easy access to temp dir""" diff --git a/tests/unit/common_utils/wav_utils.py b/tests/unit/common_utils/wav_utils.py index 25d0b1971..5cae6d8e6 100644 --- a/tests/unit/common_utils/wav_utils.py +++ b/tests/unit/common_utils/wav_utils.py @@ -1,8 +1,8 @@ from typing import Optional -import scipy.io.wavfile import paddle -import numpy as np +import scipy.io.wavfile + def normalize_wav(tensor: paddle.Tensor) -> paddle.Tensor: if tensor.dtype == paddle.float32: @@ -23,13 +23,12 @@ def normalize_wav(tensor: paddle.Tensor) -> paddle.Tensor: def get_wav_data( - dtype: str, - num_channels: int, - *, - num_frames: Optional[int] = None, - normalize: bool = True, - channels_first: bool = True, -): + dtype: str, + num_channels: int, + *, + num_frames: Optional[int]=None, + normalize: bool=True, + channels_first: bool=True, ): """Generate linear signal of the given dtype and num_channels Data range is @@ -53,25 +52,26 @@ def get_wav_data( # paddle linspace not support uint8, int8, int16 #if dtype == "uint8": # base = paddle.linspace(0, 255, num_frames, dtype=dtype_) - #dtype_np = getattr(np, dtype) - #base_np = np.linspace(0, 255, num_frames, dtype_np) - #base = paddle.to_tensor(base_np, dtype=dtype_) + #dtype_np = getattr(np, dtype) + #base_np = np.linspace(0, 255, num_frames, dtype_np) + #base = paddle.to_tensor(base_np, dtype=dtype_) #elif dtype == "int8": # base = paddle.linspace(-128, 127, num_frames, dtype=dtype_) - #dtype_np = getattr(np, dtype) - #base_np = np.linspace(-128, 127, num_frames, dtype_np) - #base = paddle.to_tensor(base_np, dtype=dtype_) + #dtype_np = getattr(np, dtype) + #base_np = np.linspace(-128, 127, num_frames, dtype_np) + #base = paddle.to_tensor(base_np, dtype=dtype_) if dtype == "float32": base = paddle.linspace(-1.0, 1.0, num_frames, dtype=dtype_) elif dtype == "float64": base = paddle.linspace(-1.0, 1.0, num_frames, dtype=dtype_) elif dtype == "int32": - base = paddle.linspace(-2147483648, 2147483647, num_frames, dtype=dtype_) + base = paddle.linspace( + -2147483648, 2147483647, num_frames, dtype=dtype_) #elif dtype == "int16": # base = paddle.linspace(-32768, 32767, num_frames, dtype=dtype_) - #dtype_np = getattr(np, dtype) - #base_np = np.linspace(-32768, 32767, num_frames, dtype_np) - #base = paddle.to_tensor(base_np, dtype=dtype_) + #dtype_np = getattr(np, dtype) + #base_np = np.linspace(-32768, 32767, num_frames, dtype_np) + #base = paddle.to_tensor(base_np, dtype=dtype_) else: raise NotImplementedError(f"Unsupported dtype {dtype}") data = base.tile([num_channels, 1])