From bf056c013d04fe43abf12f6f7b6372f083772fa9 Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Thu, 2 Jun 2022 16:20:50 +0800 Subject: [PATCH 1/2] Refactor paddleaudio to paddlespeech.audio --- .mergify.yml | 4 +- audio/.gitignore | 2 - audio/CHANGELOG.md | 9 -- audio/README.md | 7 -- audio/docs/Makefile | 19 ---- audio/docs/README.md | 24 ----- audio/docs/images/paddle.png | Bin 5043 -> 0 bytes audio/docs/make.bat | 35 ------- audio/paddleaudio/utils/env.py | 60 ----------- audio/setup.py | 99 ------------------ audio/tests/.gitkeep | 0 demos/audio_searching/README.md | 2 +- demos/audio_searching/README_cn.md | 2 +- .../source/audio}/_static/custom.css | 0 .../source/audio}/_templates/module.rst_t | 0 .../source/audio}/_templates/package.rst_t | 0 .../source/audio}/_templates/toc.rst_t | 0 .../docs/source => docs/source/audio}/conf.py | 0 .../source => docs/source/audio}/index.rst | 0 docs/source/cls/custom_dataset.md | 8 +- examples/esc50/cls0/conf/panns.yaml | 2 +- examples/hey_snips/kws0/conf/mdtc.yaml | 2 +- examples/voxceleb/sv0/local/data_prepare.py | 2 +- .../make_rirs_noise_csv_dataset_from_json.py | 2 +- .../local/make_vox_csv_dataset_from_json.py | 2 +- .../audio}/__init__.py | 0 .../audio}/backends/__init__.py | 0 .../audio}/backends/soundfile_backend.py | 0 .../audio}/backends/sox_backend.py | 0 .../audio}/compliance/__init__.py | 0 .../audio}/compliance/kaldi.py | 0 .../audio}/compliance/librosa.py | 0 .../audio}/datasets/__init__.py | 0 .../audio}/datasets/dataset.py | 0 .../audio}/datasets/esc50.py | 0 .../audio}/datasets/gtzan.py | 0 .../audio}/datasets/hey_snips.py | 0 .../audio}/datasets/rirs_noises.py | 0 .../audio}/datasets/tess.py | 0 .../audio}/datasets/urban_sound.py | 0 .../audio}/datasets/voxceleb.py | 0 .../audio}/features/__init__.py | 0 .../audio}/features/layers.py | 0 .../audio}/functional/__init__.py | 0 .../audio}/functional/functional.py | 0 .../audio}/functional/window.py | 0 .../audio}/io/__init__.py | 0 .../audio}/metric/__init__.py | 0 .../audio}/metric/eer.py | 0 .../audio}/sox_effects/__init__.py | 0 .../audio}/utils/__init__.py | 6 +- .../audio}/utils/download.py | 0 .../audio}/utils/error.py | 0 .../audio}/utils/log.py | 0 .../audio}/utils/numeric.py | 0 .../audio}/utils/time.py | 0 paddlespeech/cli/cls/infer.py | 2 + paddlespeech/cli/utils.py | 5 +- paddlespeech/cli/vector/infer.py | 2 + paddlespeech/cls/exps/panns/deploy/predict.py | 7 +- paddlespeech/cls/exps/panns/export_model.py | 2 +- paddlespeech/cls/exps/panns/predict.py | 6 +- paddlespeech/cls/exps/panns/train.py | 6 +- paddlespeech/cls/models/panns/panns.py | 5 +- paddlespeech/kws/exps/mdtc/train.py | 4 +- .../frontend/featurizer/audio_featurizer.py | 3 +- paddlespeech/s2t/transform/spectrogram.py | 3 +- .../engine/vector/python/vector_engine.py | 4 +- paddlespeech/server/util.py | 4 +- .../vector/exps/ecapa_tdnn/extract_emb.py | 4 +- paddlespeech/vector/exps/ecapa_tdnn/test.py | 2 +- paddlespeech/vector/exps/ecapa_tdnn/train.py | 2 +- paddlespeech/vector/io/dataset.py | 4 +- paddlespeech/vector/io/dataset_from_json.py | 7 +- setup.py | 60 ++++------- .../benchmark/audio}/README.md | 1 - .../benchmark/audio}/log_melspectrogram.py | 20 ++-- .../benchmark/audio}/melspectrogram.py | 20 ++-- .../benchmark/audio}/mfcc.py | 20 ++-- .../unit/audio}/backends/__init__.py | 0 .../unit/audio}/backends/base.py | 0 .../audio}/backends/soundfile/__init__.py | 0 .../unit/audio}/backends/soundfile/test_io.py | 10 +- .../unit/audio}/features/__init__.py | 0 .../unit/audio}/features/base.py | 3 +- .../unit/audio}/features/test_istft.py | 2 +- .../unit/audio}/features/test_kaldi.py | 12 +-- .../unit/audio}/features/test_librosa.py | 32 +++--- .../features/test_log_melspectrogram.py | 5 +- .../unit/audio}/features/test_spectrogram.py | 4 +- .../unit/audio}/features/test_stft.py | 2 +- 91 files changed, 147 insertions(+), 403 deletions(-) delete mode 100644 audio/.gitignore delete mode 100644 audio/CHANGELOG.md delete mode 100644 audio/README.md delete mode 100644 audio/docs/Makefile delete mode 100644 audio/docs/README.md delete mode 100644 audio/docs/images/paddle.png delete mode 100644 audio/docs/make.bat delete mode 100644 audio/paddleaudio/utils/env.py delete mode 100644 audio/setup.py delete mode 100644 audio/tests/.gitkeep rename {audio/docs/source => docs/source/audio}/_static/custom.css (100%) rename {audio/docs/source => docs/source/audio}/_templates/module.rst_t (100%) rename {audio/docs/source => docs/source/audio}/_templates/package.rst_t (100%) rename {audio/docs/source => docs/source/audio}/_templates/toc.rst_t (100%) rename {audio/docs/source => docs/source/audio}/conf.py (100%) rename {audio/docs/source => docs/source/audio}/index.rst (100%) rename {audio/paddleaudio => paddlespeech/audio}/__init__.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/backends/__init__.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/backends/soundfile_backend.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/backends/sox_backend.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/compliance/__init__.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/compliance/kaldi.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/compliance/librosa.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/datasets/__init__.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/datasets/dataset.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/datasets/esc50.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/datasets/gtzan.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/datasets/hey_snips.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/datasets/rirs_noises.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/datasets/tess.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/datasets/urban_sound.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/datasets/voxceleb.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/features/__init__.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/features/layers.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/functional/__init__.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/functional/functional.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/functional/window.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/io/__init__.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/metric/__init__.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/metric/eer.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/sox_effects/__init__.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/utils/__init__.py (88%) rename {audio/paddleaudio => paddlespeech/audio}/utils/download.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/utils/error.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/utils/log.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/utils/numeric.py (100%) rename {audio/paddleaudio => paddlespeech/audio}/utils/time.py (100%) rename {audio/tests/benchmark => tests/benchmark/audio}/README.md (97%) rename {audio/tests/benchmark => tests/benchmark/audio}/log_melspectrogram.py (87%) rename {audio/tests/benchmark => tests/benchmark/audio}/melspectrogram.py (85%) rename {audio/tests/benchmark => tests/benchmark/audio}/mfcc.py (87%) rename {audio/tests => tests/unit/audio}/backends/__init__.py (100%) rename {audio/tests => tests/unit/audio}/backends/base.py (100%) rename {audio/tests => tests/unit/audio}/backends/soundfile/__init__.py (100%) rename {audio/tests => tests/unit/audio}/backends/soundfile/test_io.py (90%) rename {audio/tests => tests/unit/audio}/features/__init__.py (100%) rename {audio/tests => tests/unit/audio}/features/base.py (97%) rename {audio/tests => tests/unit/audio}/features/test_istft.py (96%) rename {audio/tests => tests/unit/audio}/features/test_kaldi.py (87%) rename {audio/tests => tests/unit/audio}/features/test_librosa.py (89%) rename {audio/tests => tests/unit/audio}/features/test_log_melspectrogram.py (90%) rename {audio/tests => tests/unit/audio}/features/test_spectrogram.py (93%) rename {audio/tests => tests/unit/audio}/features/test_stft.py (95%) diff --git a/.mergify.yml b/.mergify.yml index 68b24810..5cb1f486 100644 --- a/.mergify.yml +++ b/.mergify.yml @@ -52,7 +52,7 @@ pull_request_rules: add: ["T2S"] - name: "auto add label=Audio" conditions: - - files~=^paddleaudio/ + - files~=^paddlespeech/audio/ actions: label: add: ["Audio"] @@ -100,7 +100,7 @@ pull_request_rules: add: ["README"] - name: "auto add label=Documentation" conditions: - - files~=^(docs/|CHANGELOG.md|paddleaudio/CHANGELOG.md) + - files~=^(docs/|CHANGELOG.md) actions: label: add: ["Documentation"] diff --git a/audio/.gitignore b/audio/.gitignore deleted file mode 100644 index 1c930053..00000000 --- a/audio/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -.eggs -*.wav diff --git a/audio/CHANGELOG.md b/audio/CHANGELOG.md deleted file mode 100644 index 925d7769..00000000 --- a/audio/CHANGELOG.md +++ /dev/null @@ -1,9 +0,0 @@ -# Changelog - -Date: 2022-3-15, Author: Xiaojie Chen. - - kaldi and librosa mfcc, fbank, spectrogram. - - unit test and benchmark. - -Date: 2022-2-25, Author: Hui Zhang. - - Refactor architecture. - - dtw distance and mcd style dtw. diff --git a/audio/README.md b/audio/README.md deleted file mode 100644 index 697c0173..00000000 --- a/audio/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# PaddleAudio - -PaddleAudio is an audio library for PaddlePaddle. - -## Install - -`pip install .` diff --git a/audio/docs/Makefile b/audio/docs/Makefile deleted file mode 100644 index 69fe55ec..00000000 --- a/audio/docs/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/audio/docs/README.md b/audio/docs/README.md deleted file mode 100644 index 20626f52..00000000 --- a/audio/docs/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Build docs for PaddleAudio - -Execute the following steps in **current directory**. - -## 1. Install - -`pip install Sphinx sphinx_rtd_theme` - - -## 2. Generate API docs - -Generate API docs from doc string. - -`sphinx-apidoc -fMeT -o source ../paddleaudio ../paddleaudio/utils --templatedir source/_templates` - - -## 3. Build - -`sphinx-build source _html` - - -## 4. Preview - -Open `_html/index.html` for page preview. diff --git a/audio/docs/images/paddle.png b/audio/docs/images/paddle.png deleted file mode 100644 index bc1135abfab7aa48f29392da4bca614f688314af..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5043 zcmV;k6HM%hP)Px|ZAnByRCodHoe8iVMY+cbn;1d107fAM5|&a3LJ*P7Ld`R*M!^N#5=2ErP!zR< zA_!7S^daytPsEDif&wLr7zGqC2&iDt(hzpCh!7y4kbpu=uD}1`&Y@4AUS`hpOwPT| zSM|^Ie0%py&-8Tnxf!v>Fr=Zw{S0Rd&xAY^R)hjanbHz;NnVFVESY<8$iOrM7j>K$_5K zXo$4HaOzl3MCEPJ8B=PM^~U?QW)Un|Tr_|2ZIQ zL`@4+*8+U_enJoG)_deg@tDHFZ@+|05huw)o#i_Y{lQIz@k73U>l~eN&sZ4{+J!;pwTdT8IiM z_F949&r`7hpB($0k!@nh|Hb)Yuq0$_+2lSYrfvj1J?$GJ^P#7lcgi38m!h~wraX95 z9km000k+RrvQ($KX(pv|X8BQlPq28gQ|Y_f@CUXbZSq&zZ8rRaE!k=CFG2p0#ovtc zF&0nXtUj87K<#7GZ}D%(v~|pLUB~(4lXqRxv`tec;myE5L5TDb?<9;~0sa**eiqqm z@F9?QM}{+&$;8*GAViNMblos5H^_7|gh-o&&jtIKam$wRcT6Z~N!f)(@*~!0@Vq1D zAAEk$-3_>Yv2Zo~jjf?FyXbtF`c~36mbYUe(_2dGg8v3p&Y_pWPjw063=MP602Ah_$2-)13St~P*iE3jt?KMrp%|Y2;+*zAzZriCg*OmmVaA9Zi8}?I zD*}4D;kU=*ymfy(23(!tCh~b>-XQqe2wh!!O)i-9 z3@2OT>j{(TE8|D^M%Qehp|8o5*4xYAeFy(X4c2Fs!Ox0!QJ9yVvPzrRiR>sfe`6cy0tGVvMC|s0y4e1LNk&bDWDtsh3};u2G$AoH-Z$Tw{>kq!l55w z9#|w1e0|T;0ADNON4og=qQ{qyJ!{ZTLqU`Dji_OyW@`%(HkxR%Db(Q$`7fkQ7Mj8g z(ZwN$7>)vaSa?0>`+>v3*T91y4}CHeE0f`9&m7KzlML9u2H1IP=rRd>1nBQt#AZoR zX?>fz-X2-HNsq+4NkJX-7kIfxJLrDJ;X^FV?2hgwY3z;%uGvM{U1WDGbtp|<>}tm5 zd@R_&P97t(V`)}$Thk}!F9Fkm z9&YMz-nj>4Yl3;eCf!dOBGyX;)l)e{*Qd;gKzh6Rp14^a9}LArNz<~fo${Hzj)#PN z6qVsm!>`cWOEYNpH$>4eQGH@u3S6`pragJk_J>T%3cCW$WZfRDAn zQQ-H$N3D)!yFgh(!y|#6=MUl8-%l-b_{Rbl)$+jWr~_GS4`NT^$sI-sPKdb`&|AP{03F! z@Ef{G|KW=ZUyZJ{sSq7qMFSG>Wk8GMKGGC?Wkk^b^8|hperveH@oJAwjI9T3uAyX+ z3gUA-xX%;Z*|Fpw{eyw;p?@o~?FRKU?DX9aUoDbr5?zwn9E658hzF)73FdHu?}-W! zpz#hfHTbCk8;O>H;G(9>Z(bh0zpo%3nw{5AQ9a^q0+xH?o-TjLw8f|wT87bkisls4 zmrtNh?;~%=8mv#~TI=4`weW4By8~I}L-VdSAZX}~fq39=RVIe-4HP6m6YZqY)pvrv zWkTfLzGVCB<($AL2ZG<+i_bhZfw&q{x7MIIUJ=O6Cz^v`$2>XY)xn=0+8VOODA=(S z4+k@1P8@Q*!3u5Z%3HUf9zjQfrH+J5mUsJ7(0KtN!8GLak*i~s+P+jEHLohkPW2@W zzmkrHU(Jp^;)(kWC5vnV@C}`8)2Crv4)*p|v^Er!cZ+&#L?9Ad{4X|r;b;|A2}TjLZgl>Z4KYA%}$JC>y5>^ z>UaXs=gEcOI7p?gq%O_BVQ>Jt30NVb}y^lo2DkqrJd6jej`=ngv$ilw$M+5UPt zCx=+u1=rttjCqi+k>Dp;xbGz|*%1G{KyI06Hia#K>)X`qSebs&qo(n-*xfM&@2gXl zL&1}LK?nJl@+w^v(J3qbIu5oEIQg|HqtP_XiG2?g-rHuzh?U7f!;#SL{sH83Vt#s% zf5v0;E3Q-P0Yq-cq4O4p-v6U>t%F|&OT*7oi9b`p$T4-y=WzI4FoPHmrgmP)AC=n& z#Hg~TAc4n#Ya$6~NY}B;p7UC^FZmji_(y=pL5RxklJ5#nu-yXz+Z_p=$sWB0@LTI! zUCt9Q@a=kIX;SP9pkwz2=c(RUx8duPV2fU=(0*g#U<3}{2DH7uEKQ3zxFLHAN{`qY zvrdTE#Kg*96D8UZI|alp^yTF60D3JVhw&6-7ozst3Vls-915cQ4MPr*X)SCFI1uFd z$w3ucXMArf{g0BrBCi%v*28KpCld!**T#j zT2!|=KtGB38^`iQgS`vXMU%6P38*!r*iYV5=k*j0?YGs;NRW>0OXyCE=vN`WIBU05 zN0J;1^!9eqyX`nA?dMV)0nzgGU zr>=c=1eqQQv;=v3`x4~?Bl@YuTD09MYqwNKeCkG9=1KNgUmWx|28%s*Vr%xy*(rTK z>)?~q0ZPt6N2HFg?-IQtFX!;~)Xp9L9)`CK<5&HBT5c3$yOWBK}!X)jOTOyW;1qHh94V>YCqT}hWv@LZm{9T04*J1k;u$9Aa9@bRiUgT25_fO#;;*ure=|N^b z^m}#tXX>DbbsD1Lv8nnWriql!3pwW*itdpvV*hxe?$-kseXmTGI2;AO30AnAM|j@F zW$a!GHc5c@dvZ9kOuZ|yCDDC7$(rolDhXb~DaozY?Mo$6FJ(%-cQr4JQPw0xk|}-s z>;UvTtRm#?Sfwg#7?aFoPeRG3+npcvMlN}ZvDNmD?MrzaR;@$hjwi;HNSyZerHI~8 z6Z@{$FBrzmOHmp8+=HKY(9Sz@)MtXQ3#gQcH<|!?>#l1B(l^|}7O(3R`zv7D6E|9> zIgUEr*=0}_gGBUX3Qe~n9%yf0sw$77e#vR35(`kr_NAi!`&FO%jsd=5+8e9Po4DJ7 zW5L-#v!3(7vmm3=Zw(yjrz0+!rMe*qzOj&h(Pa#8E?B3^_UZ6VFs-wMC^jqC;jfMp zlGS&Bc|l$IA6-W-b;qEhGhCI0r&+q%iMZN%qwgo7)I=Kg`Z1)7^v7VvsB#=*9ZP>7 zcjmp(Oa0zmTE4sm}^sg`FXzwl+a$67ho=92hCwCbR0|u3w zkX6ueJZPv^k-B^s_;%;tj!bK;EutLlSW-k!LscJ5f5BS?{}OQ|SnYp(sc6;6IktUC zinRPfCJCGvNurLwz7$EJGV!Ydo~P43W0Btq7K2rwZqnlF z5?@gA(>K^SpoM5H+P@9>zRR@9`yKdO$gwbC_ayPOApKjgp~Jx>c$3h%k)sy1ZwGf{ z*L%C+Y%TBr2+`RD>MJ7IDWQasX*jV>UnQL|8R|AIQvcO0KFL>I5<3(4{>HkUw>JL1 z)#VF*t~XX@i@r8o$Np3TDlO7p*jbsap5jI6?HFtuFw(Xddw*R(y*82e!VPAipXx8D zEt~oS{i*(t$s%e)@8Bt6!*#qihi+5_KXXyq59GNo)<;io(-!s8v0^u<{`!)J z6MaKGN!~y*dqooZYD7My#at6jsoyfBD-llqYQ#HH&!lz4-(C3K1$-wQzEfJLV{wD- zi+ODSTtDtu@a;CpT0?(4MC0V)PWm=^aF|u{4(g-tsY8MOifwYj?=o-{j^7M^ohLw{ zj)lY6+v<7}(37!mh=)O*?MuV@ZIxXNuFb=gcLvjep2S0ywEO;BK*PR?;EFmbLL6<+ z+n-n({hSN#^TqYes&wgdgYE!R7b9eI0D-URGG2`g@}5cfj|EcDZ>wHS-U9)hVJV_* zHlt#r!N#E18RArJqo1`u>T*V&+UKWP=!d{U^d4~O#d!GH2c0V&dLP8+YzIHe!si&2 zwgp>)mH7El)W}r8L9>0yY>=khv_RPwpi^xDn(a$v8?v!)TEJ`pI#siM$!w6O+_XU1 z7NApU`_dC2MD2}^W#gr?C)r~{2_qY}+m}+cVdJO57T^XvE^>oDjeNrjQz-13eDv3s z?v3PqUSY?LLDK>$T7Vm~*}jybAsZh}3z#iH$9iYvsjpw7YPzwnh@5VT{=kUcA*{7q51te%-b*Sr&gguX`O9pAn#SkP6G`!El^nt{0HuA9HmP~`H}zt002ov JPDHLkV1n`1NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% - -:end -popd diff --git a/audio/paddleaudio/utils/env.py b/audio/paddleaudio/utils/env.py deleted file mode 100644 index a2d14b89..00000000 --- a/audio/paddleaudio/utils/env.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -''' -This module is used to store environmental variables in PaddleAudio. -PPAUDIO_HOME --> the root directory for storing PaddleAudio related data. Default to ~/.paddleaudio. Users can change the -├ default value through the PPAUDIO_HOME environment variable. -├─ MODEL_HOME --> Store model files. -└─ DATA_HOME --> Store automatically downloaded datasets. -''' -import os - -__all__ = [ - 'USER_HOME', - 'PPAUDIO_HOME', - 'MODEL_HOME', - 'DATA_HOME', -] - - -def _get_user_home(): - return os.path.expanduser('~') - - -def _get_ppaudio_home(): - if 'PPAUDIO_HOME' in os.environ: - home_path = os.environ['PPAUDIO_HOME'] - if os.path.exists(home_path): - if os.path.isdir(home_path): - return home_path - else: - raise RuntimeError( - 'The environment variable PPAUDIO_HOME {} is not a directory.'. - format(home_path)) - else: - return home_path - return os.path.join(_get_user_home(), '.paddleaudio') - - -def _get_sub_home(directory): - home = os.path.join(_get_ppaudio_home(), directory) - if not os.path.exists(home): - os.makedirs(home) - return home - - -USER_HOME = _get_user_home() -PPAUDIO_HOME = _get_ppaudio_home() -MODEL_HOME = _get_sub_home('models') -DATA_HOME = _get_sub_home('datasets') diff --git a/audio/setup.py b/audio/setup.py deleted file mode 100644 index 80fe07b7..00000000 --- a/audio/setup.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import glob -import os - -import setuptools -from setuptools.command.install import install -from setuptools.command.test import test - -# set the version here -VERSION = '0.0.0' - - -# Inspired by the example at https://pytest.org/latest/goodpractises.html -class TestCommand(test): - def finalize_options(self): - test.finalize_options(self) - self.test_args = [] - self.test_suite = True - - def run(self): - self.run_benchmark() - super(TestCommand, self).run() - - def run_tests(self): - # Run nose ensuring that argv simulates running nosetests directly - import nose - nose.run_exit(argv=['nosetests', '-w', 'tests']) - - def run_benchmark(self): - for benchmark_item in glob.glob('tests/benchmark/*py'): - os.system(f'pytest {benchmark_item}') - - -class InstallCommand(install): - def run(self): - install.run(self) - - -def write_version_py(filename='paddleaudio/__init__.py'): - with open(filename, "a") as f: - f.write(f"__version__ = '{VERSION}'") - - -def remove_version_py(filename='paddleaudio/__init__.py'): - with open(filename, "r") as f: - lines = f.readlines() - with open(filename, "w") as f: - for line in lines: - if "__version__" not in line: - f.write(line) - - -remove_version_py() -write_version_py() - -setuptools.setup( - name="paddleaudio", - version=VERSION, - author="", - author_email="", - description="PaddleAudio, in development", - long_description="", - long_description_content_type="text/markdown", - url="", - packages=setuptools.find_packages(include=['paddleaudio*']), - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], - python_requires='>=3.6', - install_requires=[ - 'numpy >= 1.15.0', 'scipy >= 1.0.0', 'resampy >= 0.2.2', - 'soundfile >= 0.9.0', 'colorlog', 'pathos == 0.2.8' - ], - extras_require={ - 'test': [ - 'nose', 'librosa==0.8.1', 'soundfile==0.10.3.post1', - 'torchaudio==0.10.2', 'pytest-benchmark' - ], - }, - cmdclass={ - 'install': InstallCommand, - 'test': TestCommand, - }, ) - -remove_version_py() diff --git a/audio/tests/.gitkeep b/audio/tests/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/demos/audio_searching/README.md b/demos/audio_searching/README.md index e829d991..db38d14e 100644 --- a/demos/audio_searching/README.md +++ b/demos/audio_searching/README.md @@ -89,7 +89,7 @@ Then to start the system server, and it provides HTTP backend services. Then start the server with Fastapi. ```bash - export PYTHONPATH=$PYTHONPATH:./src:../../paddleaudio + export PYTHONPATH=$PYTHONPATH:./src python src/audio_search.py ``` diff --git a/demos/audio_searching/README_cn.md b/demos/audio_searching/README_cn.md index c13742af..6d38b91f 100644 --- a/demos/audio_searching/README_cn.md +++ b/demos/audio_searching/README_cn.md @@ -91,7 +91,7 @@ ffce340b3790 minio/minio:RELEASE.2020-12-03T00-03-10Z "/usr/bin/docker-ent…" 启动用 Fastapi 构建的服务 ```bash - export PYTHONPATH=$PYTHONPATH:./src:../../paddleaudio + export PYTHONPATH=$PYTHONPATH:./src python src/audio_search.py ``` diff --git a/audio/docs/source/_static/custom.css b/docs/source/audio/_static/custom.css similarity index 100% rename from audio/docs/source/_static/custom.css rename to docs/source/audio/_static/custom.css diff --git a/audio/docs/source/_templates/module.rst_t b/docs/source/audio/_templates/module.rst_t similarity index 100% rename from audio/docs/source/_templates/module.rst_t rename to docs/source/audio/_templates/module.rst_t diff --git a/audio/docs/source/_templates/package.rst_t b/docs/source/audio/_templates/package.rst_t similarity index 100% rename from audio/docs/source/_templates/package.rst_t rename to docs/source/audio/_templates/package.rst_t diff --git a/audio/docs/source/_templates/toc.rst_t b/docs/source/audio/_templates/toc.rst_t similarity index 100% rename from audio/docs/source/_templates/toc.rst_t rename to docs/source/audio/_templates/toc.rst_t diff --git a/audio/docs/source/conf.py b/docs/source/audio/conf.py similarity index 100% rename from audio/docs/source/conf.py rename to docs/source/audio/conf.py diff --git a/audio/docs/source/index.rst b/docs/source/audio/index.rst similarity index 100% rename from audio/docs/source/index.rst rename to docs/source/audio/index.rst diff --git a/docs/source/cls/custom_dataset.md b/docs/source/cls/custom_dataset.md index aaf5943c..e39dcf12 100644 --- a/docs/source/cls/custom_dataset.md +++ b/docs/source/cls/custom_dataset.md @@ -1,8 +1,8 @@ # Customize Dataset for Audio Classification -Following this tutorial you can customize your dataset for audio classification task by using `paddlespeech` and `paddleaudio`. +Following this tutorial you can customize your dataset for audio classification task by using `paddlespeech`. -A base class of classification dataset is `paddleaudio.dataset.AudioClassificationDataset`. To customize your dataset you should write a dataset class derived from `AudioClassificationDataset`. +A base class of classification dataset is `paddlespeech.audio.dataset.AudioClassificationDataset`. To customize your dataset you should write a dataset class derived from `AudioClassificationDataset`. Assuming you have some wave files that stored in your own directory. You should prepare a meta file with the information of filepaths and labels. For example the absolute path of it is `/PATH/TO/META_FILE.txt`: ``` @@ -14,7 +14,7 @@ Assuming you have some wave files that stored in your own directory. You should Here is an example to build your custom dataset in `custom_dataset.py`: ```python -from paddleaudio.datasets.dataset import AudioClassificationDataset +from paddlespeech.audio.datasets.dataset import AudioClassificationDataset class CustomDataset(AudioClassificationDataset): meta_file = '/PATH/TO/META_FILE.txt' @@ -48,7 +48,7 @@ class CustomDataset(AudioClassificationDataset): Then you can build dataset and data loader from `CustomDataset`: ```python import paddle -from paddleaudio.features import LogMelSpectrogram +from paddlespeech.audio.features import LogMelSpectrogram from custom_dataset import CustomDataset diff --git a/examples/esc50/cls0/conf/panns.yaml b/examples/esc50/cls0/conf/panns.yaml index 3a9d42aa..1f0323f0 100644 --- a/examples/esc50/cls0/conf/panns.yaml +++ b/examples/esc50/cls0/conf/panns.yaml @@ -1,5 +1,5 @@ data: - dataset: 'paddleaudio.datasets:ESC50' + dataset: 'paddlespeech.audio.datasets:ESC50' num_classes: 50 train: mode: 'train' diff --git a/examples/hey_snips/kws0/conf/mdtc.yaml b/examples/hey_snips/kws0/conf/mdtc.yaml index 4bd0708c..76e47bc7 100644 --- a/examples/hey_snips/kws0/conf/mdtc.yaml +++ b/examples/hey_snips/kws0/conf/mdtc.yaml @@ -2,7 +2,7 @@ ########################################### # Data # ########################################### -dataset: 'paddleaudio.datasets:HeySnips' +dataset: 'paddlespeech.audio.datasets:HeySnips' data_dir: '/PATH/TO/DATA/hey_snips_research_6k_en_train_eval_clean_ter' ############################################ diff --git a/examples/voxceleb/sv0/local/data_prepare.py b/examples/voxceleb/sv0/local/data_prepare.py index b4486b6f..e5a5dff7 100644 --- a/examples/voxceleb/sv0/local/data_prepare.py +++ b/examples/voxceleb/sv0/local/data_prepare.py @@ -14,9 +14,9 @@ import argparse import paddle -from paddleaudio.datasets.voxceleb import VoxCeleb from yacs.config import CfgNode +from paddlespeech.audio.datasets.voxceleb import VoxCeleb from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.training.seeding import seed_everything diff --git a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py index 0d0163f1..7ad9bd6e 100644 --- a/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py @@ -21,9 +21,9 @@ import os from typing import List import tqdm -from paddleaudio import load as load_audio from yacs.config import CfgNode +from paddlespeech.audio import load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py index ffd0d212..40adf53d 100644 --- a/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py +++ b/examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py @@ -22,9 +22,9 @@ import os import random import tqdm -from paddleaudio import load as load_audio from yacs.config import CfgNode +from paddlespeech.audio import load as load_audio from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.utils.vector_utils import get_chunks diff --git a/audio/paddleaudio/__init__.py b/paddlespeech/audio/__init__.py similarity index 100% rename from audio/paddleaudio/__init__.py rename to paddlespeech/audio/__init__.py diff --git a/audio/paddleaudio/backends/__init__.py b/paddlespeech/audio/backends/__init__.py similarity index 100% rename from audio/paddleaudio/backends/__init__.py rename to paddlespeech/audio/backends/__init__.py diff --git a/audio/paddleaudio/backends/soundfile_backend.py b/paddlespeech/audio/backends/soundfile_backend.py similarity index 100% rename from audio/paddleaudio/backends/soundfile_backend.py rename to paddlespeech/audio/backends/soundfile_backend.py diff --git a/audio/paddleaudio/backends/sox_backend.py b/paddlespeech/audio/backends/sox_backend.py similarity index 100% rename from audio/paddleaudio/backends/sox_backend.py rename to paddlespeech/audio/backends/sox_backend.py diff --git a/audio/paddleaudio/compliance/__init__.py b/paddlespeech/audio/compliance/__init__.py similarity index 100% rename from audio/paddleaudio/compliance/__init__.py rename to paddlespeech/audio/compliance/__init__.py diff --git a/audio/paddleaudio/compliance/kaldi.py b/paddlespeech/audio/compliance/kaldi.py similarity index 100% rename from audio/paddleaudio/compliance/kaldi.py rename to paddlespeech/audio/compliance/kaldi.py diff --git a/audio/paddleaudio/compliance/librosa.py b/paddlespeech/audio/compliance/librosa.py similarity index 100% rename from audio/paddleaudio/compliance/librosa.py rename to paddlespeech/audio/compliance/librosa.py diff --git a/audio/paddleaudio/datasets/__init__.py b/paddlespeech/audio/datasets/__init__.py similarity index 100% rename from audio/paddleaudio/datasets/__init__.py rename to paddlespeech/audio/datasets/__init__.py diff --git a/audio/paddleaudio/datasets/dataset.py b/paddlespeech/audio/datasets/dataset.py similarity index 100% rename from audio/paddleaudio/datasets/dataset.py rename to paddlespeech/audio/datasets/dataset.py diff --git a/audio/paddleaudio/datasets/esc50.py b/paddlespeech/audio/datasets/esc50.py similarity index 100% rename from audio/paddleaudio/datasets/esc50.py rename to paddlespeech/audio/datasets/esc50.py diff --git a/audio/paddleaudio/datasets/gtzan.py b/paddlespeech/audio/datasets/gtzan.py similarity index 100% rename from audio/paddleaudio/datasets/gtzan.py rename to paddlespeech/audio/datasets/gtzan.py diff --git a/audio/paddleaudio/datasets/hey_snips.py b/paddlespeech/audio/datasets/hey_snips.py similarity index 100% rename from audio/paddleaudio/datasets/hey_snips.py rename to paddlespeech/audio/datasets/hey_snips.py diff --git a/audio/paddleaudio/datasets/rirs_noises.py b/paddlespeech/audio/datasets/rirs_noises.py similarity index 100% rename from audio/paddleaudio/datasets/rirs_noises.py rename to paddlespeech/audio/datasets/rirs_noises.py diff --git a/audio/paddleaudio/datasets/tess.py b/paddlespeech/audio/datasets/tess.py similarity index 100% rename from audio/paddleaudio/datasets/tess.py rename to paddlespeech/audio/datasets/tess.py diff --git a/audio/paddleaudio/datasets/urban_sound.py b/paddlespeech/audio/datasets/urban_sound.py similarity index 100% rename from audio/paddleaudio/datasets/urban_sound.py rename to paddlespeech/audio/datasets/urban_sound.py diff --git a/audio/paddleaudio/datasets/voxceleb.py b/paddlespeech/audio/datasets/voxceleb.py similarity index 100% rename from audio/paddleaudio/datasets/voxceleb.py rename to paddlespeech/audio/datasets/voxceleb.py diff --git a/audio/paddleaudio/features/__init__.py b/paddlespeech/audio/features/__init__.py similarity index 100% rename from audio/paddleaudio/features/__init__.py rename to paddlespeech/audio/features/__init__.py diff --git a/audio/paddleaudio/features/layers.py b/paddlespeech/audio/features/layers.py similarity index 100% rename from audio/paddleaudio/features/layers.py rename to paddlespeech/audio/features/layers.py diff --git a/audio/paddleaudio/functional/__init__.py b/paddlespeech/audio/functional/__init__.py similarity index 100% rename from audio/paddleaudio/functional/__init__.py rename to paddlespeech/audio/functional/__init__.py diff --git a/audio/paddleaudio/functional/functional.py b/paddlespeech/audio/functional/functional.py similarity index 100% rename from audio/paddleaudio/functional/functional.py rename to paddlespeech/audio/functional/functional.py diff --git a/audio/paddleaudio/functional/window.py b/paddlespeech/audio/functional/window.py similarity index 100% rename from audio/paddleaudio/functional/window.py rename to paddlespeech/audio/functional/window.py diff --git a/audio/paddleaudio/io/__init__.py b/paddlespeech/audio/io/__init__.py similarity index 100% rename from audio/paddleaudio/io/__init__.py rename to paddlespeech/audio/io/__init__.py diff --git a/audio/paddleaudio/metric/__init__.py b/paddlespeech/audio/metric/__init__.py similarity index 100% rename from audio/paddleaudio/metric/__init__.py rename to paddlespeech/audio/metric/__init__.py diff --git a/audio/paddleaudio/metric/eer.py b/paddlespeech/audio/metric/eer.py similarity index 100% rename from audio/paddleaudio/metric/eer.py rename to paddlespeech/audio/metric/eer.py diff --git a/audio/paddleaudio/sox_effects/__init__.py b/paddlespeech/audio/sox_effects/__init__.py similarity index 100% rename from audio/paddleaudio/sox_effects/__init__.py rename to paddlespeech/audio/sox_effects/__init__.py diff --git a/audio/paddleaudio/utils/__init__.py b/paddlespeech/audio/utils/__init__.py similarity index 88% rename from audio/paddleaudio/utils/__init__.py rename to paddlespeech/audio/utils/__init__.py index afb9cedd..742f9f8e 100644 --- a/audio/paddleaudio/utils/__init__.py +++ b/paddlespeech/audio/utils/__init__.py @@ -11,13 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from ...cli.utils import DATA_HOME +from ...cli.utils import MODEL_HOME from .download import decompress from .download import download_and_decompress from .download import load_state_dict_from_url -from .env import DATA_HOME -from .env import MODEL_HOME -from .env import PPAUDIO_HOME -from .env import USER_HOME from .error import ParameterError from .log import Logger from .log import logger diff --git a/audio/paddleaudio/utils/download.py b/paddlespeech/audio/utils/download.py similarity index 100% rename from audio/paddleaudio/utils/download.py rename to paddlespeech/audio/utils/download.py diff --git a/audio/paddleaudio/utils/error.py b/paddlespeech/audio/utils/error.py similarity index 100% rename from audio/paddleaudio/utils/error.py rename to paddlespeech/audio/utils/error.py diff --git a/audio/paddleaudio/utils/log.py b/paddlespeech/audio/utils/log.py similarity index 100% rename from audio/paddleaudio/utils/log.py rename to paddlespeech/audio/utils/log.py diff --git a/audio/paddleaudio/utils/numeric.py b/paddlespeech/audio/utils/numeric.py similarity index 100% rename from audio/paddleaudio/utils/numeric.py rename to paddlespeech/audio/utils/numeric.py diff --git a/audio/paddleaudio/utils/time.py b/paddlespeech/audio/utils/time.py similarity index 100% rename from audio/paddleaudio/utils/time.py rename to paddlespeech/audio/utils/time.py diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index d31379b8..f4e8baea 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -27,6 +27,8 @@ from paddleaudio.features import LogMelSpectrogram from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper +from paddlespeech.audio import load +from paddlespeech.audio.features import LogMelSpectrogram __all__ = ['CLSExecutor'] diff --git a/paddlespeech/cli/utils.py b/paddlespeech/cli/utils.py index 128767e6..21c887e9 100644 --- a/paddlespeech/cli/utils.py +++ b/paddlespeech/cli/utils.py @@ -24,11 +24,11 @@ from typing import Any from typing import Dict import paddle -import paddleaudio import requests import yaml from paddle.framework import load +import paddlespeech.audio from . import download from .entry import commands try: @@ -190,6 +190,7 @@ def _get_sub_home(directory): PPSPEECH_HOME = _get_paddlespcceh_home() MODEL_HOME = _get_sub_home('models') CONF_HOME = _get_sub_home('conf') +DATA_HOME = _get_sub_home('datasets') def _md5(text: str): @@ -281,7 +282,7 @@ def _note_one_stat(cls_name, params={}): if 'audio_file' in params: try: - _, sr = paddleaudio.load(params['audio_file']) + _, sr = paddlespeech.audio.load(params['audio_file']) except Exception: sr = -1 diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index d049ba7d..c736a53e 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -29,6 +29,8 @@ from yacs.config import CfgNode from ..executor import BaseExecutor from ..log import logger from ..utils import stats_wrapper +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.modules.sid_model import SpeakerIdetification diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py index ee566ed4..fe1c93fa 100644 --- a/paddlespeech/cls/exps/panns/deploy/predict.py +++ b/paddlespeech/cls/exps/panns/deploy/predict.py @@ -16,11 +16,12 @@ import os import numpy as np from paddle import inference -from paddleaudio.backends import load as load_audio -from paddleaudio.datasets import ESC50 -from paddleaudio.features import melspectrogram from scipy.special import softmax +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.datasets import ESC50 +from paddlespeech.audio.features import melspectrogram + # yapf: disable parser = argparse.ArgumentParser() parser.add_argument("--model_dir", type=str, required=True, default="./export", help="The directory to static model.") diff --git a/paddlespeech/cls/exps/panns/export_model.py b/paddlespeech/cls/exps/panns/export_model.py index 63b22981..e62d58f0 100644 --- a/paddlespeech/cls/exps/panns/export_model.py +++ b/paddlespeech/cls/exps/panns/export_model.py @@ -15,8 +15,8 @@ import argparse import os import paddle -from paddleaudio.datasets import ESC50 +from paddlespeech.audio.datasets import ESC50 from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/exps/panns/predict.py b/paddlespeech/cls/exps/panns/predict.py index d0b96354..97759a89 100644 --- a/paddlespeech/cls/exps/panns/predict.py +++ b/paddlespeech/cls/exps/panns/predict.py @@ -17,10 +17,10 @@ import os import paddle import paddle.nn.functional as F import yaml -from paddleaudio.backends import load as load_audio -from paddleaudio.features import LogMelSpectrogram -from paddleaudio.utils import logger +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.features import LogMelSpectrogram +from paddlespeech.audio.utils import logger from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py index 8e06273d..fba38a01 100644 --- a/paddlespeech/cls/exps/panns/train.py +++ b/paddlespeech/cls/exps/panns/train.py @@ -16,10 +16,10 @@ import os import paddle import yaml -from paddleaudio.features import LogMelSpectrogram -from paddleaudio.utils import logger -from paddleaudio.utils import Timer +from paddlespeech.audio.features import LogMelSpectrogram +from paddlespeech.audio.utils import logger +from paddlespeech.audio.utils import Timer from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import diff --git a/paddlespeech/cls/models/panns/panns.py b/paddlespeech/cls/models/panns/panns.py index b442b2fd..f2a1b9ae 100644 --- a/paddlespeech/cls/models/panns/panns.py +++ b/paddlespeech/cls/models/panns/panns.py @@ -15,8 +15,9 @@ import os import paddle.nn as nn import paddle.nn.functional as F -from paddleaudio.utils.download import load_state_dict_from_url -from paddleaudio.utils.env import MODEL_HOME + +from paddlespeech.audio.utils.download import load_state_dict_from_url +from paddlespeech.audio.utils.env import MODEL_HOME __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6'] diff --git a/paddlespeech/kws/exps/mdtc/train.py b/paddlespeech/kws/exps/mdtc/train.py index 5a9ca92d..94e45d59 100644 --- a/paddlespeech/kws/exps/mdtc/train.py +++ b/paddlespeech/kws/exps/mdtc/train.py @@ -14,10 +14,10 @@ import os import paddle -from paddleaudio.utils import logger -from paddleaudio.utils import Timer from yacs.config import CfgNode +from paddlespeech.audio.utils import logger +from paddlespeech.audio.utils import Timer from paddlespeech.kws.exps.mdtc.collate import collate_features from paddlespeech.kws.models.loss import max_pooling_loss from paddlespeech.kws.models.mdtc import KWSModel diff --git a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py index 22329d5e..ac5720fd 100644 --- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py +++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py @@ -14,10 +14,11 @@ """Contains the audio featurizer class.""" import numpy as np import paddle -import paddleaudio.compliance.kaldi as kaldi from python_speech_features import delta from python_speech_features import mfcc +import paddlespeech.audio.compliance.kaldi as kaldi + class AudioFeaturizer(): """Audio featurizer, for extracting features from audio contents of diff --git a/paddlespeech/s2t/transform/spectrogram.py b/paddlespeech/s2t/transform/spectrogram.py index 2a93bedc..19f0237b 100644 --- a/paddlespeech/s2t/transform/spectrogram.py +++ b/paddlespeech/s2t/transform/spectrogram.py @@ -15,9 +15,10 @@ import librosa import numpy as np import paddle -import paddleaudio.compliance.kaldi as kaldi from python_speech_features import logfbank +import paddlespeech.audio.compliance.kaldi as kaldi + def stft(x, n_fft, diff --git a/paddlespeech/server/engine/vector/python/vector_engine.py b/paddlespeech/server/engine/vector/python/vector_engine.py index 85430370..3c72f55d 100644 --- a/paddlespeech/server/engine/vector/python/vector_engine.py +++ b/paddlespeech/server/engine/vector/python/vector_engine.py @@ -16,9 +16,9 @@ from collections import OrderedDict import numpy as np import paddle -from paddleaudio.backends import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.cli.log import logger from paddlespeech.cli.vector.infer import VectorExecutor from paddlespeech.server.engine.base_engine import BaseEngine diff --git a/paddlespeech/server/util.py b/paddlespeech/server/util.py index 13f2ddf6..32546a33 100644 --- a/paddlespeech/server/util.py +++ b/paddlespeech/server/util.py @@ -24,11 +24,11 @@ from typing import Any from typing import Dict import paddle -import paddleaudio import requests import yaml from paddle.framework import load +import paddlespeech.audio from .entry import client_commands from .entry import server_commands from paddlespeech.cli import download @@ -289,7 +289,7 @@ def _note_one_stat(cls_name, params={}): if 'audio_file' in params: try: - _, sr = paddleaudio.load(params['audio_file']) + _, sr = paddlespeech.audio.load(params['audio_file']) except Exception: sr = -1 diff --git a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py index e8d91bf3..cd4538bb 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py @@ -16,10 +16,10 @@ import os import time import paddle -from paddleaudio.backends import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode +from paddlespeech.audio.backends import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import feature_normalize from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn diff --git a/paddlespeech/vector/exps/ecapa_tdnn/test.py b/paddlespeech/vector/exps/ecapa_tdnn/test.py index f15dbf9b..6c87dbe7 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/test.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/test.py @@ -18,10 +18,10 @@ import numpy as np import paddle from paddle.io import BatchSampler from paddle.io import DataLoader -from paddleaudio.metric import compute_eer from tqdm import tqdm from yacs.config import CfgNode +from paddlespeech.audio.metric import compute_eer from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.batch import batch_feature_normalize from paddlespeech.vector.io.dataset import CSVDataset diff --git a/paddlespeech/vector/exps/ecapa_tdnn/train.py b/paddlespeech/vector/exps/ecapa_tdnn/train.py index bf014045..961b75e2 100644 --- a/paddlespeech/vector/exps/ecapa_tdnn/train.py +++ b/paddlespeech/vector/exps/ecapa_tdnn/train.py @@ -20,9 +20,9 @@ import paddle from paddle.io import BatchSampler from paddle.io import DataLoader from paddle.io import DistributedBatchSampler -from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log from paddlespeech.vector.io.augment import build_augment_pipeline from paddlespeech.vector.io.augment import waveform_augment diff --git a/paddlespeech/vector/io/dataset.py b/paddlespeech/vector/io/dataset.py index 1b514f3d..245b2959 100644 --- a/paddlespeech/vector/io/dataset.py +++ b/paddlespeech/vector/io/dataset.py @@ -15,9 +15,9 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset -from paddleaudio import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram +from paddlespeech.audio import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram from paddlespeech.s2t.utils.log import Log logger = Log(__name__).getlog() diff --git a/paddlespeech/vector/io/dataset_from_json.py b/paddlespeech/vector/io/dataset_from_json.py index bf04e113..12e84577 100644 --- a/paddlespeech/vector/io/dataset_from_json.py +++ b/paddlespeech/vector/io/dataset_from_json.py @@ -16,9 +16,10 @@ from dataclasses import dataclass from dataclasses import fields from paddle.io import Dataset -from paddleaudio import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram -from paddleaudio.compliance.librosa import mfcc + +from paddlespeech.audio import load as load_audio +from paddlespeech.audio.compliance.librosa import melspectrogram +from paddlespeech.audio.compliance.librosa import mfcc @dataclass diff --git a/setup.py b/setup.py index 657de6c5..679549b4 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,7 @@ from setuptools import find_packages from setuptools import setup from setuptools.command.develop import develop from setuptools.command.install import install +from setuptools.command.test import test HERE = Path(os.path.abspath(os.path.dirname(__file__))) @@ -31,42 +32,13 @@ VERSION = '0.0.0' COMMITID = 'none' base = [ - "editdistance", - "g2p_en", - "g2pM", - "h5py", - "inflect", - "jieba", - "jsonlines", - "kaldiio", - "librosa==0.8.1", - "loguru", - "matplotlib", - "nara_wpe", - "onnxruntime", - "pandas", - "paddleaudio", - "paddlenlp", - "paddlespeech_feat", - "praatio==5.0.0", - "pypinyin", - "pypinyin-dict", - "python-dateutil", - "pyworld", - "resampy==0.2.2", - "sacrebleu", - "scipy", - "sentencepiece~=0.1.96", - "soundfile~=0.10", - "textgrid", - "timer", - "tqdm", - "typeguard", - "visualdl", - "webrtcvad", - "yacs~=0.1.8", - "prettytable", - "zhon", + "editdistance", "g2p_en", "g2pM", "h5py", "inflect", "jieba", "jsonlines", + "kaldiio", "librosa==0.8.1", "loguru", "matplotlib", "nara_wpe", + "onnxruntime", "pandas", "paddlenlp", "paddlespeech_feat", "praatio==5.0.0", + "pypinyin", "pypinyin-dict", "python-dateutil", "pyworld", "resampy==0.2.2", + "sacrebleu", "scipy", "sentencepiece~=0.1.96", "soundfile~=0.10", + "textgrid", "timer", "tqdm", "typeguard", "visualdl", "webrtcvad", + "yacs~=0.1.8", "prettytable", "zhon", 'colorlog', 'pathos == 0.2.8' ] server = [ @@ -177,7 +149,19 @@ class InstallCommand(install): install.run(self) - # cmd: python setup.py upload +class TestCommand(test): + def finalize_options(self): + test.finalize_options(self) + self.test_args = [] + self.test_suite = True + + def run_tests(self): + # Run nose ensuring that argv simulates running nosetests directly + import nose + nose.run_exit(argv=['nosetests', '-w', 'tests']) + + +# cmd: python setup.py upload class UploadCommand(Command): description = "Build and publish the package." user_options = [] @@ -279,11 +263,13 @@ setup_info = dict( "sphinx", "sphinx-rtd-theme", "numpydoc", "myst_parser", "recommonmark>=0.5.0", "sphinx-markdown-tables", "sphinx-autobuild" ], + 'test': ['nose', 'torchaudio==0.10.2'], }, cmdclass={ 'develop': DevelopCommand, 'install': InstallCommand, 'upload': UploadCommand, + 'test': TestCommand, }, # Package info diff --git a/audio/tests/benchmark/README.md b/tests/benchmark/audio/README.md similarity index 97% rename from audio/tests/benchmark/README.md rename to tests/benchmark/audio/README.md index b9034100..9cade74e 100644 --- a/audio/tests/benchmark/README.md +++ b/tests/benchmark/audio/README.md @@ -15,7 +15,6 @@ Result: ========================================================================== test session starts ========================================================================== platform linux -- Python 3.7.7, pytest-7.0.1, pluggy-1.0.0 benchmark: 3.4.1 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000) -rootdir: /ssd3/chenxiaojie06/PaddleSpeech/DeepSpeech/paddleaudio plugins: typeguard-2.12.1, benchmark-3.4.1, anyio-3.5.0 collected 4 items diff --git a/audio/tests/benchmark/log_melspectrogram.py b/tests/benchmark/audio/log_melspectrogram.py similarity index 87% rename from audio/tests/benchmark/log_melspectrogram.py rename to tests/benchmark/audio/log_melspectrogram.py index 9832aed4..c85fcecf 100644 --- a/audio/tests/benchmark/log_melspectrogram.py +++ b/tests/benchmark/audio/log_melspectrogram.py @@ -17,15 +17,17 @@ import urllib.request import librosa import numpy as np import paddle -import paddleaudio import torch import torchaudio +import paddlespeech.audio + wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' if not os.path.isfile(os.path.basename(wav_url)): urllib.request.urlretrieve(wav_url, os.path.basename(wav_url)) -waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url))) +waveform, sr = paddlespeech.audio.load( + os.path.abspath(os.path.basename(wav_url))) waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0) waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0) @@ -55,7 +57,7 @@ def enable_gpu_device(): paddle.set_device('gpu') -log_mel_extractor = paddleaudio.features.LogMelSpectrogram( +log_mel_extractor = paddlespeech.audio.features.LogMelSpectrogram( **mel_conf, f_min=0.0, top_db=80.0, dtype=waveform_tensor.dtype) @@ -65,20 +67,20 @@ def log_melspectrogram(): def test_log_melspect_cpu(benchmark): enable_cpu_device() - feature_paddleaudio = benchmark(log_melspectrogram) + feature_audio = benchmark(log_melspectrogram) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_log_melspect_gpu(benchmark): enable_gpu_device() - feature_paddleaudio = benchmark(log_melspectrogram) + feature_audio = benchmark(log_melspectrogram) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=2) + feature_librosa, feature_audio, decimal=2) mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram( @@ -102,11 +104,11 @@ def test_log_melspect_cpu_torchaudio(benchmark): waveform_tensor_torch = waveform_tensor_torch.to('cpu') amplitude_to_DB = amplitude_to_DB.to('cpu') - feature_paddleaudio = benchmark(log_melspectrogram_torchaudio) + feature_audio = benchmark(log_melspectrogram_torchaudio) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_log_melspect_gpu_torchaudio(benchmark): diff --git a/audio/tests/benchmark/melspectrogram.py b/tests/benchmark/audio/melspectrogram.py similarity index 85% rename from audio/tests/benchmark/melspectrogram.py rename to tests/benchmark/audio/melspectrogram.py index 5fe3f248..49815894 100644 --- a/audio/tests/benchmark/melspectrogram.py +++ b/tests/benchmark/audio/melspectrogram.py @@ -17,15 +17,17 @@ import urllib.request import librosa import numpy as np import paddle -import paddleaudio import torch import torchaudio +import paddlespeech.audio + wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' if not os.path.isfile(os.path.basename(wav_url)): urllib.request.urlretrieve(wav_url, os.path.basename(wav_url)) -waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url))) +waveform, sr = paddlespeech.audio.load( + os.path.abspath(os.path.basename(wav_url))) waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0) waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0) @@ -55,7 +57,7 @@ def enable_gpu_device(): paddle.set_device('gpu') -mel_extractor = paddleaudio.features.MelSpectrogram( +mel_extractor = paddlespeech.audio.features.MelSpectrogram( **mel_conf, f_min=0.0, dtype=waveform_tensor.dtype) @@ -65,18 +67,18 @@ def melspectrogram(): def test_melspect_cpu(benchmark): enable_cpu_device() - feature_paddleaudio = benchmark(melspectrogram) + feature_audio = benchmark(melspectrogram) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_melspect_gpu(benchmark): enable_gpu_device() - feature_paddleaudio = benchmark(melspectrogram) + feature_audio = benchmark(melspectrogram) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram( @@ -91,10 +93,10 @@ def test_melspect_cpu_torchaudio(benchmark): global waveform_tensor_torch, mel_extractor_torchaudio mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu') waveform_tensor_torch = waveform_tensor_torch.to('cpu') - feature_paddleaudio = benchmark(melspectrogram_torchaudio) + feature_audio = benchmark(melspectrogram_torchaudio) feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_melspect_gpu_torchaudio(benchmark): diff --git a/audio/tests/benchmark/mfcc.py b/tests/benchmark/audio/mfcc.py similarity index 87% rename from audio/tests/benchmark/mfcc.py rename to tests/benchmark/audio/mfcc.py index c6a8c85f..4e286de9 100644 --- a/audio/tests/benchmark/mfcc.py +++ b/tests/benchmark/audio/mfcc.py @@ -17,15 +17,17 @@ import urllib.request import librosa import numpy as np import paddle -import paddleaudio import torch import torchaudio +import paddlespeech.audio + wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' if not os.path.isfile(os.path.basename(wav_url)): urllib.request.urlretrieve(wav_url, os.path.basename(wav_url)) -waveform, sr = paddleaudio.load(os.path.abspath(os.path.basename(wav_url))) +waveform, sr = paddlespeech.audio.load( + os.path.abspath(os.path.basename(wav_url))) waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0) waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0) @@ -64,7 +66,7 @@ def enable_gpu_device(): paddle.set_device('gpu') -mfcc_extractor = paddleaudio.features.MFCC( +mfcc_extractor = paddlespeech.audio.features.MFCC( **mfcc_conf, f_min=0.0, dtype=waveform_tensor.dtype) @@ -74,18 +76,18 @@ def mfcc(): def test_mfcc_cpu(benchmark): enable_cpu_device() - feature_paddleaudio = benchmark(mfcc) + feature_audio = benchmark(mfcc) feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_mfcc_gpu(benchmark): enable_gpu_device() - feature_paddleaudio = benchmark(mfcc) + feature_audio = benchmark(mfcc) feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) del mel_conf_torchaudio['sample_rate'] @@ -103,10 +105,10 @@ def test_mfcc_cpu_torchaudio(benchmark): mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cpu') waveform_tensor_torch = waveform_tensor_torch.to('cpu') - feature_paddleaudio = benchmark(mfcc_torchaudio) + feature_audio = benchmark(mfcc_torchaudio) feature_librosa = librosa.feature.mfcc(waveform, **mel_conf) np.testing.assert_array_almost_equal( - feature_librosa, feature_paddleaudio, decimal=3) + feature_librosa, feature_audio, decimal=3) def test_mfcc_gpu_torchaudio(benchmark): diff --git a/audio/tests/backends/__init__.py b/tests/unit/audio/backends/__init__.py similarity index 100% rename from audio/tests/backends/__init__.py rename to tests/unit/audio/backends/__init__.py diff --git a/audio/tests/backends/base.py b/tests/unit/audio/backends/base.py similarity index 100% rename from audio/tests/backends/base.py rename to tests/unit/audio/backends/base.py diff --git a/audio/tests/backends/soundfile/__init__.py b/tests/unit/audio/backends/soundfile/__init__.py similarity index 100% rename from audio/tests/backends/soundfile/__init__.py rename to tests/unit/audio/backends/soundfile/__init__.py diff --git a/audio/tests/backends/soundfile/test_io.py b/tests/unit/audio/backends/soundfile/test_io.py similarity index 90% rename from audio/tests/backends/soundfile/test_io.py rename to tests/unit/audio/backends/soundfile/test_io.py index 9d092902..26276751 100644 --- a/audio/tests/backends/soundfile/test_io.py +++ b/tests/unit/audio/backends/soundfile/test_io.py @@ -16,16 +16,16 @@ import os import unittest import numpy as np -import paddleaudio import soundfile as sf +import paddlespeech.audio from ..base import BackendTest class TestIO(BackendTest): def test_load_mono_channel(self): sf_data, sf_sr = sf.read(self.files[0]) - pa_data, pa_sr = paddleaudio.load( + pa_data, pa_sr = paddlespeech.audio.load( self.files[0], normal=False, dtype='float64') self.assertEqual(sf_data.dtype, pa_data.dtype) @@ -35,7 +35,7 @@ class TestIO(BackendTest): def test_load_multi_channels(self): sf_data, sf_sr = sf.read(self.files[1]) sf_data = sf_data.T # Channel dim first - pa_data, pa_sr = paddleaudio.load( + pa_data, pa_sr = paddlespeech.audio.load( self.files[1], mono=False, normal=False, dtype='float64') self.assertEqual(sf_data.dtype, pa_data.dtype) @@ -49,7 +49,7 @@ class TestIO(BackendTest): pa_tmp_file = 'pa_tmp.wav' sf.write(sf_tmp_file, waveform, sr) - paddleaudio.save(waveform, sr, pa_tmp_file) + paddlespeech.audio.save(waveform, sr, pa_tmp_file) self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file)) for file in [sf_tmp_file, pa_tmp_file]: @@ -62,7 +62,7 @@ class TestIO(BackendTest): pa_tmp_file = 'pa_tmp.wav' sf.write(sf_tmp_file, waveform.T, sr) - paddleaudio.save(waveform.T, sr, pa_tmp_file) + paddlespeech.audio.save(waveform.T, sr, pa_tmp_file) self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file)) for file in [sf_tmp_file, pa_tmp_file]: diff --git a/audio/tests/features/__init__.py b/tests/unit/audio/features/__init__.py similarity index 100% rename from audio/tests/features/__init__.py rename to tests/unit/audio/features/__init__.py diff --git a/audio/tests/features/base.py b/tests/unit/audio/features/base.py similarity index 97% rename from audio/tests/features/base.py rename to tests/unit/audio/features/base.py index 476f6b8e..6d59f72b 100644 --- a/audio/tests/features/base.py +++ b/tests/unit/audio/features/base.py @@ -17,7 +17,8 @@ import urllib.request import numpy as np import paddle -from paddleaudio import load + +from paddlespeech.audio import load wav_url = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' diff --git a/audio/tests/features/test_istft.py b/tests/unit/audio/features/test_istft.py similarity index 96% rename from audio/tests/features/test_istft.py rename to tests/unit/audio/features/test_istft.py index 9cf8cdd6..f1e6e4e3 100644 --- a/audio/tests/features/test_istft.py +++ b/tests/unit/audio/features/test_istft.py @@ -15,9 +15,9 @@ import unittest import numpy as np import paddle -from paddleaudio.functional.window import get_window from .base import FeatTest +from paddlespeech.audio.functional.window import get_window from paddlespeech.s2t.transform.spectrogram import IStft from paddlespeech.s2t.transform.spectrogram import Stft diff --git a/audio/tests/features/test_kaldi.py b/tests/unit/audio/features/test_kaldi.py similarity index 87% rename from audio/tests/features/test_kaldi.py rename to tests/unit/audio/features/test_kaldi.py index 00a576f6..2b0ece89 100644 --- a/audio/tests/features/test_kaldi.py +++ b/tests/unit/audio/features/test_kaldi.py @@ -15,10 +15,10 @@ import unittest import numpy as np import paddle -import paddleaudio import torch import torchaudio +import paddlespeech.audio from .base import FeatTest @@ -40,17 +40,17 @@ class TestKaldi(FeatTest): self.window_size, periodic=False, dtype=eval(f'torch.{self.dtype}')).pow(0.85) - p_hann_window = paddleaudio.functional.window.get_window( + p_hann_window = paddlespeech.audio.functional.window.get_window( 'hann', self.window_size, fftbins=False, dtype=eval(f'paddle.{self.dtype}')) - p_hamm_window = paddleaudio.functional.window.get_window( + p_hamm_window = paddlespeech.audio.functional.window.get_window( 'hamming', self.window_size, fftbins=False, dtype=eval(f'paddle.{self.dtype}')) - p_povey_window = paddleaudio.functional.window.get_window( + p_povey_window = paddlespeech.audio.functional.window.get_window( 'hann', self.window_size, fftbins=False, @@ -63,7 +63,7 @@ class TestKaldi(FeatTest): def test_fbank(self): ta_features = torchaudio.compliance.kaldi.fbank( torch.from_numpy(self.waveform.astype(self.dtype))) - pa_features = paddleaudio.compliance.kaldi.fbank( + pa_features = paddlespeech.audio.compliance.kaldi.fbank( paddle.to_tensor(self.waveform.astype(self.dtype))) np.testing.assert_array_almost_equal( ta_features, pa_features, decimal=4) @@ -71,7 +71,7 @@ class TestKaldi(FeatTest): def test_mfcc(self): ta_features = torchaudio.compliance.kaldi.mfcc( torch.from_numpy(self.waveform.astype(self.dtype))) - pa_features = paddleaudio.compliance.kaldi.mfcc( + pa_features = paddlespeech.audio.compliance.kaldi.mfcc( paddle.to_tensor(self.waveform.astype(self.dtype))) np.testing.assert_array_almost_equal( ta_features, pa_features, decimal=4) diff --git a/audio/tests/features/test_librosa.py b/tests/unit/audio/features/test_librosa.py similarity index 89% rename from audio/tests/features/test_librosa.py rename to tests/unit/audio/features/test_librosa.py index a1d3e840..ffdec3e7 100644 --- a/audio/tests/features/test_librosa.py +++ b/tests/unit/audio/features/test_librosa.py @@ -16,10 +16,10 @@ import unittest import librosa import numpy as np import paddle -import paddleaudio -from paddleaudio.functional.window import get_window +import paddlespeech.audio from .base import FeatTest +from paddlespeech.audio.functional.window import get_window class TestLibrosa(FeatTest): @@ -117,7 +117,7 @@ class TestLibrosa(FeatTest): htk=False, norm='slaney', dtype=self.waveform.dtype, ) - feature_compliance = paddleaudio.compliance.librosa.compute_fbank_matrix( + feature_compliance = paddlespeech.audio.compliance.librosa.compute_fbank_matrix( sr=self.sr, n_fft=self.n_fft, n_mels=self.n_mels, @@ -127,7 +127,7 @@ class TestLibrosa(FeatTest): norm='slaney', dtype=self.waveform.dtype, ) x = paddle.to_tensor(self.waveform) - feature_functional = paddleaudio.functional.compute_fbank_matrix( + feature_functional = paddlespeech.audio.functional.compute_fbank_matrix( sr=self.sr, n_fft=self.n_fft, n_mels=self.n_mels, @@ -156,8 +156,8 @@ class TestLibrosa(FeatTest): n_mels=self.n_mels, fmin=self.fmin) - # paddleaudio.compliance.librosa: - feature_compliance = paddleaudio.compliance.librosa.melspectrogram( + # paddlespeech.audio.compliance.librosa: + feature_compliance = paddlespeech.audio.compliance.librosa.melspectrogram( x=self.waveform, sr=self.sr, window_size=self.n_fft, @@ -166,10 +166,10 @@ class TestLibrosa(FeatTest): fmin=self.fmin, to_db=False) - # paddleaudio.features.layer + # paddlespeech.audio.features.layer x = paddle.to_tensor( self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim. - feature_extractor = paddleaudio.features.MelSpectrogram( + feature_extractor = paddlespeech.audio.features.MelSpectrogram( sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, @@ -198,8 +198,8 @@ class TestLibrosa(FeatTest): fmin=self.fmin) feature_librosa = librosa.power_to_db(feature_librosa, top_db=None) - # paddleaudio.compliance.librosa: - feature_compliance = paddleaudio.compliance.librosa.melspectrogram( + # paddlespeech.audio.compliance.librosa: + feature_compliance = paddlespeech.audio.compliance.librosa.melspectrogram( x=self.waveform, sr=self.sr, window_size=self.n_fft, @@ -207,10 +207,10 @@ class TestLibrosa(FeatTest): n_mels=self.n_mels, fmin=self.fmin) - # paddleaudio.features.layer + # paddlespeech.audio.features.layer x = paddle.to_tensor( self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim. - feature_extractor = paddleaudio.features.LogMelSpectrogram( + feature_extractor = paddlespeech.audio.features.LogMelSpectrogram( sr=self.sr, n_fft=self.n_fft, hop_length=self.hop_length, @@ -243,8 +243,8 @@ class TestLibrosa(FeatTest): n_mels=self.n_mels, fmin=self.fmin) - # paddleaudio.compliance.librosa: - feature_compliance = paddleaudio.compliance.librosa.mfcc( + # paddlespeech.audio.compliance.librosa: + feature_compliance = paddlespeech.audio.compliance.librosa.mfcc( x=self.waveform, sr=self.sr, n_mfcc=self.n_mfcc, @@ -257,10 +257,10 @@ class TestLibrosa(FeatTest): fmin=self.fmin, top_db=self.top_db) - # paddleaudio.features.layer + # paddlespeech.audio.features.layer x = paddle.to_tensor( self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim. - feature_extractor = paddleaudio.features.MFCC( + feature_extractor = paddlespeech.audio.features.MFCC( sr=self.sr, n_mfcc=self.n_mfcc, n_fft=self.n_fft, diff --git a/audio/tests/features/test_log_melspectrogram.py b/tests/unit/audio/features/test_log_melspectrogram.py similarity index 90% rename from audio/tests/features/test_log_melspectrogram.py rename to tests/unit/audio/features/test_log_melspectrogram.py index 0383c2b8..59eb73e8 100644 --- a/audio/tests/features/test_log_melspectrogram.py +++ b/tests/unit/audio/features/test_log_melspectrogram.py @@ -15,8 +15,8 @@ import unittest import numpy as np import paddle -import paddleaudio +import paddlespeech.audio from .base import FeatTest from paddlespeech.s2t.transform.spectrogram import LogMelSpectrogram @@ -33,8 +33,7 @@ class TestLogMelSpectrogram(FeatTest): ps_res = ps_melspect(self.waveform.T).squeeze(1).T x = paddle.to_tensor(self.waveform) - # paddlespeech.s2t的特征存在幅度谱和功率谱滥用的情况 - ps_melspect = paddleaudio.features.LogMelSpectrogram( + ps_melspect = paddlespeech.audio.features.LogMelSpectrogram( self.sr, self.n_fft, self.hop_length, diff --git a/audio/tests/features/test_spectrogram.py b/tests/unit/audio/features/test_spectrogram.py similarity index 93% rename from audio/tests/features/test_spectrogram.py rename to tests/unit/audio/features/test_spectrogram.py index 1774fe61..7d908a7e 100644 --- a/audio/tests/features/test_spectrogram.py +++ b/tests/unit/audio/features/test_spectrogram.py @@ -15,8 +15,8 @@ import unittest import numpy as np import paddle -import paddleaudio +import paddlespeech.audio from .base import FeatTest from paddlespeech.s2t.transform.spectrogram import Spectrogram @@ -31,7 +31,7 @@ class TestSpectrogram(FeatTest): ps_res = ps_spect(self.waveform.T).squeeze(1).T # Magnitude x = paddle.to_tensor(self.waveform) - pa_spect = paddleaudio.features.Spectrogram( + pa_spect = paddlespeech.audio.features.Spectrogram( self.n_fft, self.hop_length, power=1.0) pa_res = pa_spect(x).squeeze(0).numpy() diff --git a/audio/tests/features/test_stft.py b/tests/unit/audio/features/test_stft.py similarity index 95% rename from audio/tests/features/test_stft.py rename to tests/unit/audio/features/test_stft.py index 58792ffe..03448ca8 100644 --- a/audio/tests/features/test_stft.py +++ b/tests/unit/audio/features/test_stft.py @@ -15,9 +15,9 @@ import unittest import numpy as np import paddle -from paddleaudio.functional.window import get_window from .base import FeatTest +from paddlespeech.audio.functional.window import get_window from paddlespeech.s2t.transform.spectrogram import Stft From 4aaa8effe8339501546661c60bfeccdfe1a2bc9d Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Tue, 14 Jun 2022 17:32:55 +0800 Subject: [PATCH 2/2] Refactor paddleaudio to paddlespeech.audio --- paddlespeech/audio/datasets/esc50.py | 2 +- paddlespeech/audio/datasets/gtzan.py | 2 +- paddlespeech/audio/datasets/tess.py | 2 +- paddlespeech/audio/datasets/urban_sound.py | 2 +- paddlespeech/cli/cls/infer.py | 2 -- paddlespeech/cli/vector/infer.py | 2 -- paddlespeech/cls/models/panns/panns.py | 2 +- 7 files changed, 5 insertions(+), 9 deletions(-) diff --git a/paddlespeech/audio/datasets/esc50.py b/paddlespeech/audio/datasets/esc50.py index e7477d40..f5c7050f 100644 --- a/paddlespeech/audio/datasets/esc50.py +++ b/paddlespeech/audio/datasets/esc50.py @@ -16,8 +16,8 @@ import os from typing import List from typing import Tuple +from ..utils import DATA_HOME from ..utils.download import download_and_decompress -from ..utils.env import DATA_HOME from .dataset import AudioClassificationDataset __all__ = ['ESC50'] diff --git a/paddlespeech/audio/datasets/gtzan.py b/paddlespeech/audio/datasets/gtzan.py index cfea6f37..1f6835a5 100644 --- a/paddlespeech/audio/datasets/gtzan.py +++ b/paddlespeech/audio/datasets/gtzan.py @@ -17,8 +17,8 @@ import random from typing import List from typing import Tuple +from ..utils import DATA_HOME from ..utils.download import download_and_decompress -from ..utils.env import DATA_HOME from .dataset import AudioClassificationDataset __all__ = ['GTZAN'] diff --git a/paddlespeech/audio/datasets/tess.py b/paddlespeech/audio/datasets/tess.py index 8faab9c3..1469fa5e 100644 --- a/paddlespeech/audio/datasets/tess.py +++ b/paddlespeech/audio/datasets/tess.py @@ -17,8 +17,8 @@ import random from typing import List from typing import Tuple +from ..utils import DATA_HOME from ..utils.download import download_and_decompress -from ..utils.env import DATA_HOME from .dataset import AudioClassificationDataset __all__ = ['TESS'] diff --git a/paddlespeech/audio/datasets/urban_sound.py b/paddlespeech/audio/datasets/urban_sound.py index d97c4d1d..0389cd5f 100644 --- a/paddlespeech/audio/datasets/urban_sound.py +++ b/paddlespeech/audio/datasets/urban_sound.py @@ -16,8 +16,8 @@ import os from typing import List from typing import Tuple +from ..utils import DATA_HOME from ..utils.download import download_and_decompress -from ..utils.env import DATA_HOME from .dataset import AudioClassificationDataset __all__ = ['UrbanSound8K'] diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index f4e8baea..942dc3b9 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -21,8 +21,6 @@ from typing import Union import numpy as np import paddle import yaml -from paddleaudio import load -from paddleaudio.features import LogMelSpectrogram from ..executor import BaseExecutor from ..log import logger diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index c736a53e..4bc8e135 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -22,8 +22,6 @@ from typing import Union import paddle import soundfile -from paddleaudio.backends import load as load_audio -from paddleaudio.compliance.librosa import melspectrogram from yacs.config import CfgNode from ..executor import BaseExecutor diff --git a/paddlespeech/cls/models/panns/panns.py b/paddlespeech/cls/models/panns/panns.py index f2a1b9ae..4befe7aa 100644 --- a/paddlespeech/cls/models/panns/panns.py +++ b/paddlespeech/cls/models/panns/panns.py @@ -16,8 +16,8 @@ import os import paddle.nn as nn import paddle.nn.functional as F +from paddlespeech.audio.utils import MODEL_HOME from paddlespeech.audio.utils.download import load_state_dict_from_url -from paddlespeech.audio.utils.env import MODEL_HOME __all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6']