From f29ae92a88451d8f2b6d486870db66cc2635dffe Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Fri, 25 Feb 2022 06:14:31 +0000 Subject: [PATCH 01/45] add unit test for deepspeech2online inference --- .../unit/asr/deepspeech2_online_model_test.py | 85 +++++++++++++++++- .../test_data/static_ds2online_inputs.pickle | Bin 0 -> 45895 bytes 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 tests/unit/asr/test_data/static_ds2online_inputs.pickle diff --git a/tests/unit/asr/deepspeech2_online_model_test.py b/tests/unit/asr/deepspeech2_online_model_test.py index f623c5ac..3d634945 100644 --- a/tests/unit/asr/deepspeech2_online_model_test.py +++ b/tests/unit/asr/deepspeech2_online_model_test.py @@ -15,9 +15,12 @@ import unittest import numpy as np import paddle +import pickle +import os +from paddle import inference from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline - +from paddlespeech.s2t.models.ds2_online import DeepSpeech2InferModelOnline class TestDeepSpeech2ModelOnline(unittest.TestCase): def setUp(self): @@ -182,5 +185,85 @@ class TestDeepSpeech2ModelOnline(unittest.TestCase): paddle.allclose(final_state_c_box, final_state_c_box_chk), True) + + +class TestDeepSpeech2StaticModelOnline(unittest.TestCase): + + def setUp(self): + export_prefix = "exp/deepspeech2_online/checkpoints/test_export" + os.makedirs( os.path.dirname(export_prefix), mode=0o755) + infer_model = DeepSpeech2InferModelOnline( + feat_size=161, + dict_size=4233, + num_conv_layers=2, + num_rnn_layers=5, + rnn_size=1024, + num_fc_layers=0, + fc_layers_size_list=[-1], + use_gru=False) + static_model = infer_model.export() + paddle.jit.save(static_model, export_prefix) + + with open("test_data/static_ds2online_inputs.pickle", "rb") as f: + self.data_dict = pickle.load(f) + + self.setup_model(export_prefix) + + + def setup_model(self, export_prefix): + deepspeech_config = inference.Config( + export_prefix + ".pdmodel", + export_prefix + ".pdiparams") + if ('CUDA_VISIBLE_DEVICES' in os.environ.keys() and os.environ['CUDA_VISIBLE_DEVICES'].strip() != ''): + deepspeech_config.enable_use_gpu(100, 0) + deepspeech_config.enable_memory_optim() + deepspeech_predictor = inference.create_predictor(deepspeech_config) + self.predictor = deepspeech_predictor + + def test_unit(self): + input_names = self.predictor.get_input_names() + audio_handle = self.predictor.get_input_handle(input_names[0]) + audio_len_handle = self.predictor.get_input_handle(input_names[1]) + h_box_handle = self.predictor.get_input_handle(input_names[2]) + c_box_handle = self.predictor.get_input_handle(input_names[3]) + + + x_chunk = self.data_dict["audio_chunk"] + x_chunk_lens = self.data_dict["audio_chunk_lens"] + chunk_state_h_box = self.data_dict["chunk_state_h_box"] + chunk_state_c_box = self.data_dict["chunk_state_c_bos"] + + audio_handle.reshape(x_chunk.shape) + audio_handle.copy_from_cpu(x_chunk) + + audio_len_handle.reshape(x_chunk_lens.shape) + audio_len_handle.copy_from_cpu(x_chunk_lens) + + h_box_handle.reshape(chunk_state_h_box.shape) + h_box_handle.copy_from_cpu(chunk_state_h_box) + + c_box_handle.reshape(chunk_state_c_box.shape) + c_box_handle.copy_from_cpu(chunk_state_c_box) + + + + output_names = self.predictor.get_output_names() + output_handle = self.predictor.get_output_handle( + output_names[0]) + output_lens_handle = self.predictor.get_output_handle( + output_names[1]) + output_state_h_handle = self.predictor.get_output_handle( + output_names[2]) + output_state_c_handle = self.predictor.get_output_handle( + output_names[3]) + self.predictor.run() + + output_chunk_probs = output_handle.copy_to_cpu() + output_chunk_lens = output_lens_handle.copy_to_cpu() + chunk_state_h_box = output_state_h_handle.copy_to_cpu() + chunk_state_c_box = output_state_c_handle.copy_to_cpu() + return True + + if __name__ == '__main__': unittest.main() diff --git a/tests/unit/asr/test_data/static_ds2online_inputs.pickle b/tests/unit/asr/test_data/static_ds2online_inputs.pickle new file mode 100644 index 0000000000000000000000000000000000000000..8aca0543ad69f8bb64b77664375bbd281a3747c6 GIT binary patch literal 45895 zcmZU)XH?cq)HX_$4uTW`=?E%<6@fdMiGqTH6+xs3Vnt986(744>AeU_5ky3a6_Ii$ zGaFFsA{NAo4HPWcuq%GN?|06Rvre*Bek8MIuVf~BU;EnEj+EXVE-_}Ql7xgrV8p6* zVF4?HB0@vLC0B+)ixU<)^AxCxM@@1cEx~At5=4FZr-vfV&xXaaH;=0 zN-=cR|9u)R?IsZ$F5@J*B3w2;e3*N<+!!~>p;ON7aD2G@|1)^imhBr?D~2m9l^U|S zW@@-%VnTell6$!F7&qyql0)~{aFzM<=ezzJI{x?KCb1=4bw#3Mp}K^`HgLw>_x(`) zgXeU&@p^c7q#Q>5OoHnpPlN94Jm?RtfzS6UK!~e^lJrRE&EF2tH5+dJodqx~8lIW$ zgtJbr@Z#eZNHEKU&n=R$cO(mkKO_S7(g%+>cd4e6F)VIw0qd+`VBXzCoJlVo0IvOrT%mGO} zNGG4cVBT5&TgYiLt3Hr>s#89-$cU~ z{U)z=9wJ>&{?W&?OW?}$Bp6%P2BW9$gF&-E_<5zAZa=L=b?vs2B|+cN4&z|_s;-pd z)OT@TPKMBm{e5_ymMx|$>ZwF|5iEA!2?4f=;1P2kavINrka84mkO(MF_W-#l574sN z0nb{OLzeD%m|0o??!N87uPua#tHp3w?+`3^iUK@s8yI=dhDBdRFv0aL?9I@Cchb6a zYa{kNVclUN^O?H9DGCN?@`aJlPb_{l(dQCSbTEO|B zqu@8$9;#m)rD|Vt>C4`!u+=&f5}#Q>zKb5b(F=ofCo@6f&mvG>)&hT`ep367s_^tt zD&&sCknvC#8aEoiQ|o#-)#wY80xIC>-lGsaZ7=wq+X50Xxvi8rs!4RpHAUoxGD^{o(Z2{Yrx^eL0YG;5A%a9;Ewh_NIn(|@*8d8X@3Fi z_1_N`J&_>3Hyh6Paj*kB!;Y0Cy!~K76P*l1;WNDbD$(| zXztTRkhmch9-W*I7YpQ?c4X&5omDKn<0gWmxe>&fg~Gb62jQ5LUX%Iw^{_)?Cn)?2 zgqgomU{3o1$go=jih(&$@-P4#AG$%w$RfCM*ASvtU!)UF9YYDkx(j3gvDlm=!o(KaQ%-17|Kq76Hgdut|v6} z?I3+}Y9koC7=Y2Z?=W{j!7CcwKZlz?QmacwYWs%h&nj9ECh4w z@n9l6reJIeZP(mEZ*mOnFgi$+W!8Xi#4u3VYXn!`uY-BqZ~AO%0KE8>3g?U$!}Af( zsi$y_PWT`X;~me_`xaJE%1eX2&m9Uw-*9HQEetOsJA=(h&nnu!6}! z&d|3`2OccCLr)Z$!mOwyP|=Zv7V}|{cR--yD*NbWm%R|SNd=PUzM=;kEugU=7u3|_ zpr+;_HJtpCt}7D3zmQT;8}i|rp+||2b{baLjJc?qC+Ox?d+Z@$P4ZJt(k|WuY{o`F zuxc)ZN>+g8nM|n8FNH-1=04iq5!u_SgVTa;S+*x}7 zREiS;PE+tX9|E$@KWUNvEEqN~1iJ3rr=v^_V5F@zoV@XZX8PoSZ0BTP_V>~LR8{C) zG7nA;*21X+^WjEGAjr0Egv61Gur0&|TzFZS{I-Z%-_?hi$J41)%4r(@NDrcFvH|wq z2hoRAsLQd2mI`fnkednr#ms><8f!pymL)7wHiCf2bJVJPG^8|GLC@O^SYeq5y~GB> zZ^zJ}r#_IA{)^T&UZ7nwl|Z{CnQD(QfcKx{;igO`j2N5;-HM}Nhm!}G$(V!P=VI8X zDuP7K1E6NB4IXWi;n}4mNHEz6(HYLLRJD;lzp4od?i$cp=tiS*b7!ex1SEaa zAnxEdx+?ND`SqPr@mVXlzBQHZ9cv7hbJA#woEMFh3#Z#=H?gxyi|DC09rV)*7UU#8 z!%1fhK1M)YRz>}V8nx?u2 zRF_#pg1r*tE{=h3=iXD@S8M6*_HXoi!X&tLp%x6U#gbbtQ@OILK+OkY#ES& z0EqKH4BsjX!SvS^2uO~Bt*2eUY~eY0`=kb*-`fxN`az#RGJ&l#zf)ez96ByJfO!0J=+}HlTTWb}HpOco zc2*mB-#!hyl@@~1LPeOiLlHE@E8)$cIs8ybhE2_l@N!oa)Vuh=+lLQfcGUoV+H)G( z79bGZOTpQH0tCK@1&@tk&^*ruG%Bj;<(>}|a09Ge#=_mzj?lCFCY&_72e5WKjC0ry z;_mU#|93OoV$wh;+XV`<^}zj68FW7y4GCIn;iLOyy8ci*?BXW^<+nn)Rv}y&R|mq8 z5D4171}+)9fla?Ld>t1FN#=#{)@dUA^pF8$;|T|Z<-qpHfo6dW_~a~v_h$}4r9A`R zKOF_@hIkn1nFWW#G9Xl10+i_%YU6JKqf4z|)Q5-ko_z|e_!JAG2rKyXB?;16&Qd8e zFKT3|1r}qMfP7dq%()o}odXVFE+v8^-jU$3XE{`@{Q=o2LvgBRkeZHbqAqf+U>$UY z>R1fWhg!F(zxjA@v2}w~#K5QS4Ul>!1lk;cb{`6-r{>vF`#-Lf^Ok|#UJfu=V@J2$ zBM_THAt_CkdLER4*E!BG=I3fCGE|4Xn-_rYv1%we76%`z;y@Ih20x6B!F#_3_+We< z&U$Fc4o0Nqg7O`SrjGteP@Xmml+GrDmE2Y^Ogsyr z&Viu+a0f`dTL-pdO5x*^gYfj}G}x|V2(IPEprf7)@?*z=lGq(w+Kxhaa4oFZ5eQ4a zg+lR|A)nAA3vWs@VR73~>^sZAT3;{7w{ig`#tEd33}KPL9dL0}Jw)h+LjQ#v*x;NB z8-E=D>G-QO%;+aovvq`#WyK)na}4CB-hl@J4bZKd4D&}-!nf2Qh zyxzvaDgXcA^V}Gi^4<%?Z3ghCeFQ{=h~R#o9(1jY0jp1YK<`!>`2D#6!Mm0~eXkSD zj8lTHDkCsS+CGFq{9%#*Z758)h2~>-shhSxXdB1DPkt`QoiPLN`uT9E{5UB7X9^=l zN5MWs3wHj|2HlZ2>81n?IQZBYZficG=gN%Xf+!w*FRTT-owK0zT`0IHuYq-6wBb@$ z6l~6)3=7Vr&`|AJkU6S>4&>ON%=892R`L>@_%T9YARmh6+Jk)E78pD8Aw)T3L)GdO z0Js0Z{cbX3s{yQ-z8wC5ENuKT3%*D#hSo#Y&^saw?heI}U#U|;%5)NJE@wdg#Rb~a zzn$teU#2YIO?6EqVcdW=d>rlp-^LBC^N|YbK2e@(L_ep?$4rMe{u(g2P#R`~E$r;v z1kCS?@O{jGkoa*D;K)b_D5!!5KSIE?XA!LQUj{XF8pGdcc_`d$0xR8LfufT-*hloxf5Z{oq>q4S*mpWx;UIOKzmQs%*V7Ea z7gR45!X(vrSYJ5{R+!I+=Ug_7w3!HZc{h0Do(ek7N5N~$A-H1O1*;N{gG+Y_WM`Mc zijbqwe6SSmCGLm7(n#=|@E=t6?1AdN_o;4xDJ)$R1uaL5z{@BN0*5$(w-J|UYl{~& zb{fLf;r|Wg7x}QrDGjog--Db5JiJ=70YZoN?uDFSID%{FyL+BgJN6ZIm68Cj8eNc? z^pfsptl(D7E}%!&LtsuUjN6n1!+uA=z?(L3ahn0N5Bq?c-CL@cZ2{L#*uep{{czs- zGO*nNuxwWWygN7>f)C_^^OlRS;;<_mNvebCcN$>J#N9CQ=q~6DJ`K&^z2U*y1Za)B z2^;ntf}n>C?4B$SbBki&=iqQSqUC|qPMxB2ji*ztm*xdpAHdx(n9 zai&y(7be6yQbxAC@t6E=LKKbgo70noCC<65Yqvhib6U=PXv`FvGlIqWSFQ2R*^98A zVjr(omvax)AZ@7CN4hLbE(f@!A+Gtn{&$#T(M7|1VAc#(%*m^Qb(r{eUbR?SuZkn znh{MjJp)?|o9fqsLPaIT{))zCOIVv2@AACnV_svDplQhwk6TYm9t$`4jGk}gv^ALU- z??KUTw~(~UU2NW3cf6x$Jia~E4hhGHp$n6jis!zM5$_LDgeBYmk>*6zm?+f!?+GvoLnXF-g0!G=0ox6q&PGoEQ7|6KaVU%e`Do|5`C=R zL0;edi#_V4h=KQP;@fn<{8iQ19u>|W zRZ*imLiUhY(M{oUiCD}|i)U@8rwc(@n&^{}4W9e$8Oo9{A^xksB0aevT7#na6)D>I zw|gWpnluc%l?)^K8iD+ZVPU+|U^AN#Hj0Fo8gZj!3VWy_8mF}^!8dKB zP-b2>nmX|&nw56|U6}G&*r6tc)78yL^VEDqulb=nRo>L@Ey8!bI)pI!vCM&v#mL+8 z2zeCDkd>d0kgINixq`gieUn}#~> zYyT^98LvqPRi_D}wh~f)wv?1>6r-Z$%kd-CF8YMsNnW@xLfWGce)i}BviC(3FQ^!x zSnbQgt@MY&Zm(hZ*oRs+*Y`Iv(+w7OX_lg3vIQl*jS$i_ZjiN?Q^+n`4IDh5L3240N+w^Bb7I!$yld-IM~ck6!A@(K0Vpzpmxduqfd!cY0E48VZ0Sv zWTk)?=2(#p{-=eT)@4M}eK)Dze7Ujv>}%%p;0C1CKaJ6RdV}roX<$vHPGDolQTVl5 zIe|sp?B%(o>|?uL@tc{SX|MSQR55Up4*wa3qGS)!#*Kw+_i}YCx2>5S*fUK~8`Pn) zleN%~U1`i}&*AWDxIjF^?g;;^PqNw72}u5q1{?572kZIiA>TQL#60?q*gHax_^F)} zwp8sz8n+UV%(K7z3ggeL!n31n&GRE@!u0{Ba#IS`G&n^Ry{+jg?0`Ezl?ZhMvDjR? z8OvO65)Aw#VeL+_kTbsn{Rqxvd)89+iLV;Y%lv|T2jg*ep*%$~MnpEbjXBP13NcQh z4)X7`gwFhI(V-?EykTz>YUp-HpU0}8^4A<$+N3Cm6~r@^Gr|7x7_wxStuS|t1Y4DPkk+iLXTPQvphk}eAmJzEks#hG!&-p3P2XzCGw*ntPWB0S(gJ)gLw_E9W6`tyv|6F32Fu z;#GuCO6s_{rVC|{tid5p0p#wsYpk39ea1cDfr8DC^)DJ zX^!>83Yj;U%NZ#ss!N_s_5Z&cV(ID_`u46DJ6+-gO+8mZg6CE9u@Al2pw{*H=XOcFp|cUSBz3S8%D>VVNj{|X ztU0Z*F=vb8w@{JkNhHwctpDI~tWk2BeXU^#mF*_v&vI)tV_+%j89&I5sNI3eHrBG0 z@7AJImqMxD^P9rij%fVnt0AtAiosW=KOshE?eG`_Niw+BiNE5ZjF0#4BQGMV$ne5q ziJs-wRP)uRRn#|>4PCa_G{jacx zt7mndOuzmO3helDag^SlT4mmDqy4ixhHNZh?+m z7x>?8f$07wz^j~P>ArVUM8az=Y_+%sU2C0>?1SVH+?YrY7`L|1tmE!XS{$;K519N`+`L~BR|lo@r$WymTg4^l;Xo7K zJt_|UYWgPBzJG_jbJgj{^YSD#_K0ZqMPu%KpfxFLNkQeq?viMUFXWy}8cldx=y7m19= zP;;Y4%=+6yn9VW~DIC2ZUTifFZCW8_rg!DzsKvinIm1$N>8%IJ(b>d*DL9Gr&tJmN zGb35$?^&p7VKJL9?irc0X9*7LOhJ;$-Dqb|J#w`?g|0437m@doRJ!Lsl3bfY3M(Yp zktLS}<1i`wK2*S3Kb&Z;#C`(achQ-TGS4%R_m5<9 zJ9lHl3MC6xW3MZXEF6aAHYW=T71{L5-TlNjFpJf#=P5GF5N)xH6~)kQR5#w5^cye2 zOl>rM+uA_=rgb2RWrxV$^l`W;xR2c?AxZzMxkgH3BM5z=fW|-B%x`TlQc9+@4;g%e=huDoCJDY9#(4CnnLm3?*E0o@$o!B$(u^>_U6?%%j7M zhrbWXsUC_WD_;m_536(WzQ_17hhO+jaWxvgauI!1cm&l-9vaz8z)QsV3KkbiNSL*YJM+) z%D-u0lb@aFX|oDK(fPu?OeJJGgk!o2fAilRSn}(&EO;!*VGS~Eg#+QSLPlK_S`#Qs zsrLdl<}ip;B^C%ZCP(q{ksj>2GxoU1@(ht5oy+grx&T%7XbP)-7t%vtKBKa==E8UV zedNpdiR>?T1=4p2p={;NXxC$T^3O;DwtEK*weYh8vSGT8k_IL~4-)e~a;%auuia5q)-y;5y?Y|+SjP0ZFq zDyYzg6~4}p;=0yP6Ee;k;^TgiBx^Js53MPDz*q7Wq+dhe4tnfC85>8Ytlua$licLycB{LCKa_Z@9kclJ;A zX!R`Ia7&8SuMyDMkzF5UL3$NG~!(V(8 zOJw6V^3$chppGm7PetSSg=H>e-kfStQ2Q34xm=nXd`9rVtbL+_-h83wkQPc!I886^ zj%9;d{xRov0lVgOHTrs9o7-L^VXM*=gMYM51Wu7rajcC;xcJ*EV28xT+B$F{z4b7 zeo2;w^3=F~5>@&lL9 zq(e*5y|LDC|C$l0EK3z$UXaX5E98E=|+k+5Fs*l>~^*QPujs||do(`6E{=ae8p zoFa`XT(X&#NzVl11sthguatxi!h4Z}@UU~#$A#U-5=uo8}6$9_21 zte!plA`gGVYjO8oz$OO@gueqWOw(K-p&xHyRXm-%DjdUm74O02a0P!%Z(!_{X9&{6 z`%&HKx#E!vQplskZZv&wwji3??yzfO2LH(L5`LH%%x1hhk5U5=8#Ypn{Ulc)Ufhha zX@oBE?F|&2dA^U0w>ZU5QIrwxN2X6xU-28)noh^Hi6(T5Zyb$>QY=^EgC2B^W&O7m z2xm8EGP(sDgohov@F$7o_U%_dpM7>v*IN~M^aX49ZsUpShkX>Ce9Air^{uSF@+SQe+=hd`L=t7C+4#q!sYK__GThc-$kmokLaw;g;iYFNbM47du|aSp+i^e% zZ@Hzz7N3!bAG4Ygh z?d;@`S7LU=IT}V<@RqW1WYO8T!phVrv142g9T$F+juspsPK-F$f>>IiX@b^d&ESTk z7D3)i0!rl<3Hvra!!EUd@Tcd?_%@Gx>@7bEGZoj^@!rYI7|C#I!=5FlX9*6jrzE*; zY9nc7j0q|z_Q5-+nb0c=%H-+lk!Y3eC48VMj$b7w&w4bUM%Nb&TFkq9(KFpK~lXWx#T+zq%c2`-p+f2kKf&hG+a$Er!tkzdNvJi zI*bL0CJUy+cP7q=yXNrCELw=o>R_yMqJ>SDr&6<7pUC3sY%FzVElTx$PF5Osu$`rH zToBYV%1CN?1fUI;F!r;FXe@ED!FL>`e>1vPKvO!;wbaSzrS%#XN9moZvneR zYX3xss z;1Fxit~}aFJ}3pURT8%7j~-9o#1G)t$&c|G`G@GVMk-zfs3o-xB7`*Yh}vFgLUc6UzZelOh^3`En zBBJfe&gN0q)=4mBNUc%9AI1K5-|)zbNsI|rM0cW&Q`bvX%&)u8ggrT)^wsy1C@d0$ z7hY@F>JCe0Sx6Ed5j`KZsaax46i3zd89G5fR=C_`EwbPFi=F#G4?fGRATxM<_EMw- z?X6Q6Cy&+zlLgb@USv1s)iUu&!%OVmAFo7jvWNIEK@q~eo=mfKN@6!$V$ji112pPc zJk^~!gyBJ2uwH+yu}(aQmARGt2Icp5Ws4HYpGiYHq?Kjlm+L5eA*=~CM&CodeR`xN zW(QgMbby@9lg59S?#EN|QmCC#1NE*=qgp8{;!_=xXi}Lu*}15WFJCqTHkvol>t(UR z-O+>zTCK+BPBb3UtQtUHW+$^KL`>eT?c_twOcuUhTrNyAPGfrljJUuB8TOwl{^DQt zQ^E7#I8@P`f>K(u*-GuDcBmm}hdChK_VPh(497hSJufX4qZ zX6mK^G5Mg)i414qn}aKn#}0S?du}+M;Jk<3;B-xpP0tXb%)aoCU=ymUzuV}^AiCq$ zN$SI9A&mut%!Rqb=#Ckc=zSAKiXqPYmLWe-t7(RI-Pc7+!}U<*?W>H>=~~1y!|~s2@ zUcz^>$@aG>*h+~xyE~Iz#Tb*92OhI>NgB|mlIGwdTZG(I%f+g44@i%}Yr1pHTxzjT zi-yi04}Tb4a`~bWU0>279C{dsyA}VTsZD0wicyyxyo6Kq{>9rgj>M+}ewn zxSSPsw613*!~?>vGu`CPmG^v4`9T4vs57?S@|^v?6YP&!f&4Q2;SPrf(uH`9w@g_3 zO5uiYK7I7an9sicRWO`^5i#*;n2&*l_knfu3C~ck(k)9SaquyGNo(B`<8{>l&&;NBSLwHm21s^Ubi#c2J9Nu2ti zH?28nO;5Z}LGLS1kPEL4@T)o=VAjGJY37ED-pEYBe%IUa><Ac+X}hXRe^I+JQ;2J@9-1aL#38=U zq*GOuCf!rP`RA1R_ML_J@ZlAxGY^s9vwt$my64vmNs2A9KuBgtuAKhAIPH!Gl zW+r9%HriJ<()XU#tkTsl?C&NCq!>4a`f1K4sT)_b4yI#Z|HyDWP;`xOP3P!vo6+zt z<%f`Ra5_<$rpDzI9~FG&?j%R=Sdsdf^Br{N^Gw~N%?`7wmht~OEFn;BopAKa*G9EI zEADTV9k5bK;dM_JfeqBT^NhP7b`((-vp>~ ze<_TJm?7HuYd$k)^BFuk=`&rX9mJk@{M1+y;CbXYVv-+_nHw=qFn)Q{56y zwP^7Z+Gk^|Hk)VTV%TSPE<$Tikx-;ImNS1Og-(R65E;F+CEuqD#Qe@#KKi>cx6yGv z?bUD)9;|u8R*aT|sf7!KPR#>o_l_|fy|R;!@fHz-%PePZBgJLfBW`NHC3NlaAonBp z@vZ$D;5|Q*?N7YHe(^?J-{2^CEV)&vT4e|(%u}4@`Gwi_XPYo~Y9TT^`w+hkiNfdK z>T?f{s&Kn+cMM4gEn#kQ8T;w&452Hh2RkcG;RfHfu(i+fg~?rNP_5_vVMhMAPt=Q{? z66^`6M6x~#+^P?H5Yw)J#67cllk8Nyr)?Mt%$69^7`X@r!IQX-t{67zWP!u?2-~54 zqHAo&uNd++l8P%k5?R~-CJgoATp>0om(Y<^H+KHBT6V<3om6VUbgm1@lEux9_=9x@ zT{JeIS8iL3szO{z)r%_()|o=0D=~~Q|4;lm#thf>E^iF&*W#>tH?a%4(**qsUTpYz zRXElX#NU(-cc|Z|#{HH0LzRF2!KZfbVLlw|q#Z6PxYU_2GUi6aHW>4_*IPnWpBve- zM^|XPTu=771n@D5a-wZ&6S%j9`rPZ+A84&;6RG@~j3<}vMRBuJQ2gR1c0L!y82soJ zi}PZM79c49%aG!BS@zV(|7d@Jhzr^El>OqKg1=NMv%6mF5tTDM-?&+aTRi=$O1Y4h z?fyqbyB82U)4TYpL@SLLR9>(Ke?A}06f6H%t=GIqMQD%a>$>~NVWK)Y4m z;gj1ZLag5y?m)2&1XpE|ZKzYU*S?eOf2hwLO1;6z_I;z@4m@f|oOc0Br%r}ak%A!j z)uXAKj6_jy7m(P*`*iFh1>|LHNJ;2LzS3Wk9Wa>2Dg?4H`_?f2NzY&0cw7f=9{Nn> zZ+{h(Ed9v9>*mH3O3%(zbD`{ET( z>ckRvrg3NHms3~%2i@wRC?u|`;< zkB&1LZIfU(?(7A!bF~Gvx>>Bmu>{&PVz#}J7GVSK=`C9Vv2KZ_3uf zVH$#*gfh)2(x5yEK0MQB#y&kRy6RruS0e=nV*T< z3v!DuEcNu83|QTDr%#tWG%&*0>Ml18!VShy3b z&uq4etE=mJf}Iu|7p|+BqK{p7#ft+Q`Jb(AOv9Ldlxf*O3-231` zT5ISq29srnqFJ4?Tf)Cy1tRqTp+DF4NJ-iue5})w3*UQ&PMV#AZLdt>PQNZ;r@9#l z@0TmWhmQ#jN;h=*ssLH&xOjuVq+litl(q=fjjaxBpE2{aZ-WqKY$61Ar7{J(hjA-3 z9MFjFNhHPmP($$G0#<+DNbbCn9DVPR!+TV^)0u%CqO1)vRLyIj$lS4vk+YL!J>Rb; zGgs@T=p82bt><1$h3Oqj;Ff zeTM~31`zZ~A3kJck{_B^Mep9&kV{u5auWmmuz2`*I<;pcjJkT696oIWwcA`tRCprm zS1ApLpO0d4ElZea&He1G-X}~Z(G!I12gViq8bi)FV`4R_YE~)Xrtc2pF|BWt++Ky z%!sq%U))ks$rcPdz=})Ok)!L=&?4tqWTn3nakzY3h*wsEftjY9HXBKLubXjNjzhKc zAJc^Hg3F|4p$51e{Kl7AwF$=_R0-$H-B8+{=|uHJFzV6UNxx!wevbE0TwExEiR?z< zuDdBG(J@1a-6Rd8ljpL3AWU#C0Ct7>2O3>40W`kK;+A?lI*}RuD7Wl zZgY&DjD6g&(PauSf9>er(NC7T)8ON6p3 z4(#(mYpDABm06a;LDDdj2A$Zcq@7!W6CO~uW!^F2p!s;(x$q+m{`eL744X{W#uo}j zpH#&&R@}kaUH#Z_wmu|2cI3^5`ox_0Qu6zp0*92xF%!JI`CSoV`2E6Mq&DRY^LCsx z&>IPif0sPFX#8-vw?ql&{5&R1Oi^IUD%I(TYlBG6Rh#|zz?sj^(*hw^j&Xc!!zsRw z7YbkeV=m}D6N;Z)ad1>G5$J!>MC$ofq*m1-uFAG*I9)u8o%a5OFnoy;
kR@=~(3#i9NSdGI_WLGm8u@#0z9AEO2XQ9wdLp;3ntRVBTw;{KvO_+DA9Yw#^42z1fEur~4O?j3N=46cmvK!}7_|Uo)7us^Y~5G)`)ie`=K7YUq%{b&HRq_^R3VsUu}|+sn5oCeWt~8HMqEz>6~)V zG}>Zzh<4Qe6OOMON&dYmM->|9afd8!tkv@tWgOM#J;_S4c*!H-SJGj^0`9pf`#4+M6NV-e*N>JfG6}mX@G3Y$W&ZcOF%5e#>j-%i-kZ zn0vjZf<+y->C2O+nR^SL5Z5hV8vGwGX4cQSD&BfjQK+{T;fF9446>5=sdE_i_3`Ia z>#q~vEx#X~VWW7nuXQ{lEiJlWJq~wN+z?798iC}9*`kXxzKUN+xCtF|{}YdGm*ze1 zd5Y#sY#YK6id>gRj9^hbhWoTc#I@*d782G?7AI9arqiO;Amrdgc(=o>@yrS*#QZo- zH>#&Qum{vZ-yn`%pMF<(8Qm>RaiJoK(dG~;m(MtaY)7AtMI(0MXl`VCXZ}vr7q|aRktt)5c}S5$hBNHF)-itn-fO+@_kF+aiIZvMolnZRE$Je>c_fL{ z*uH^kJ*CN>y(1t7i|kS55_O7R+9i5(@21E(R+^kGehEL=b&%3ZSWLfoCK7&>ddE8v zxk~u!L@fTY?`}u?D?9rnu#tAwi!V5Y( z?Wg25wb-tGh4}D#B_?5-h(EANoBY04i`A1m1Yho!qH4uj`P$GSIC*O!B{6>hTIDAJ z$#r61lDq}`xJvvcGMk2?nr<;Ff$#8Cpif&YG(;a>Rq`)2sqwW|%M&ScstC(H|B;!^ zHBkGK8M}CUJ#z=WhdB|(bU;4ArfZf9b6%>jS)=(Ne|0zU_|Xc&P0f*0nsqC+2h zaAA6dVC}QzqLqq&_#d9zvhIuf!N&!ixO9#>%fa?+KnEW+WRAh#pQoe7lj-DQu!E2& zp3Uz6p^DzkP@}FNsKpunh9b#B)2PI=`fO))3LzS{gC-9&kg2LWC&pvw?8Eer;FyFq zujZ8&ckZg&y!ct`$f-{Yz+zuFBFVPDM5ef{ zIrko?f@{EL=o_-A-=(^W375-EG14T67ma$$1|Ds7=Bu{Eo_}j%gPN2A#@zV=+$rKgpGNZ1qz{{V%48 znjVq66$G8{Jjm2D(QTHs))ZD)CHhPl3uf)kX3UPB=6p$!<0_7S!-sc{344X5MC~DTep_7Xj z6A@j@sY#LM-n=D4^?uw>sGTemU8r#)xL+-VnU~bz?2QVwhO#74u7p~iNb)qn(j$ZHD z&e@u8B@|A6DR}H+#D2NtMbw$4!gTi_I&q_Mo5avtINp>6)3X2 zcD(*Sbp%v*(nE1 zuI0bbTP%EvEgw zHEg2Y1$C{B=SOj_xvKFaNe-zBxV@`uy<_=1d=E7C0RPN3Z@6ystUMF@9=- zW}Li9mp&ue9XtE!<3c;ue^3b!b6+$2)023`(`30bWo1Nvc;#?<_kifduI12vbr)av zR1qAtecc|g$yvOk{VZ5G86q9e9{z%`HTZG47W?PuQgF=MlJ)1_0S3DF9pqi4IFB{X z(-L*TWU1yu_;K4XQxR1_vl?f_-kf~;{nG|u@VS96VN=v9HQCvLx8`m8x5t;bah-Ok zbNmY|k*MLgc_vp*f^gS9-gY@ml?^ex!fih#Cz7AZL9NrR1tZP1MUz6yh;&0rJVrO;C5G^6Nj%l0{ybJvZ% zq7H4fV@)0_Gde%C+q?gh5Ungr6}GOdhHAea@OOqQl0MSfw#W6nNQv@R_)M&*Pn$Lq zMz!S;#u;gp>%Rc8{&lQb`Pv`)Mc)Z})zp`~=d)AD6+3_Pvdjwzo%k7i)&5%HYF&BW z+3`~Q89^DMjIEtuPk@2&;3OF|JzD{}W@U&ZqEh(2oz0y7ef1*su4TMkWC?+#F5-W> zyV33@8Tw8ShH0hj|-6WdSN5raP^mNCu)NYh~@H%^bz#7O^(B(ZfM$q zj8n=aZW|)HOft#8RviveA18?hH+kU3rQ;8tpb9Ksc`kFzk<0_=AiorrV)-I&gfpa4gXuEHdPi>KEI^lI2b&s zBwW@QB2bz9ALn@^2kl%R-abBmfNbzkhc*SbTeEEWY=h=Ak>^exst4u5WW{p0?v=WI zeBM0j-C-f#el>$yGI~Q8x2K4s-qgekTwEo}G1up))dkX?e>B)o&#gpsb2@MGmZUQM3xCr{nwa(Tnk6FBL^ZfbCLs{W#WnI?#lLdCa zc7oTd{0iO|hA{ELKFr&QaLQy!ENAbRVcVvr>4F;pwcNd?Ewta$3cjU>A$R1J25GoP zmC-$Sj($bN39C2O3)j|oGqb-p3r+`D3e|8C@5}8_j5qOFuS3TKRyxM?F4>!HPo@;} z-b`)brnWkv<)vn3~27w@E&Q)!}6Ns6;cLXPM0(v0SZNWh-bbApvOTFCDM z8KM^#{IFw+x5(Q}hg#D=pRe#x5=aZbh#Ks#;qcnM;Em`6FEp{9Y`J9ER=C7~`}aW( z;jbP;e+WMeWYlWf2iI!yMjm-EXWG--qBMq~msFPEaMv0-<2XW{{!+}Gj2q;}2l+Gm{dUGJDv&cWb`H_d6!(&P=rSG4~uZUX%e4A2|)sl!pZBs$W% zS;!y7!W&hUutfEkXbo=@PS$?`qZ*URj~muSQQf{PyfBS;ix#+FEIcrsWkUXPOh6@UPgyrv? zk&T_R*rO-*on$zW7vhKTkMECKH%|IUH61W#HOC?TbmWR~*z>ZTf=50t&Df6anD`It z87zQV0Ui`_^peQ@ku7hqqM0fhS_*mBN`;$!*HZK=3U+n9b!bkWMDE%!K-xSHB(zmB zo422xM7@n2!(G;U$QTU+?0U3aq&aODfl|y_^HCwN`e1~_Z+9w4pI}3#KIes(cM4zK z9DrjNIc)2r*L2v$-gf7RG=2Di6Lu=oW?97^??F4b@|tJK~Vu=cr+R-DXY^rEbZIk>#W#p z$(?Xx%?9o*0U>6OAoL_bO=u!z$8mD;C6{J?v0wJ{GPD{CqHdi#BAhgH9*e{tgNONZ z*}NeUJzkl`{FO=Ibcqz`T~&!-5pOR4s^_})jx-V~jQ@a5=U-7BM)jh+KZwbk$qGn6 zLrS6fI#~39;7>^WWP)E2aN1%+=ult8lQ9#}mzUSmf&M$GJz9yN(n>`3&w59#9Nta) zp+y|Q-Q9Ex6)k-8*pV&kOc8jmcHlk{?PDZ7q$%x`EMc0GE|~l~iaeTGMM}&5;=0MF z3H?l>n3xPn5M0uXYp!1voUT~M(+(ZtS$7l=U8?=eA^qdLZh<3W&+G&;<;|j7CmwTr zY)+EJj>-0E?P_Fll?7E^`@kWse!4KHO&Y;TrQlDoDw?tK1YcpSTO{}O1;4Mpj{CgK z3}2XN;%u@RC7P);YT)fTrnU7q|B#-IFzA(%uylG3H9t@ZT-&mr6HVR2&nC@g*KdE$ zL`4=r&i88^WmPR;Gv8cb<#&Q`72YQ98A);$@$U(eKUab_N`Y7c_wZ&nG?V`%89IMg zG#PrYQeYQygWH=ZDKbiz!_u4U@v^9|cAOpob4yK`zbQM2UtPAr;SYHiKY`kKnFh`M znq6wAD^n|4(W%FiIGYJGCq1Fh&BziyFqdXsc1x4*E{wseB3=GXNhe|2un7b78-*`kO9#VxkOR zO&{iVBuWZT9;+n_<}8BaOC;!3M@`7gX%kF`u`;{h)&SfUW`geyrVELFeR%R{5k({) zg*D$yp+m(lZsiP9hMccPDkcQs_>=_#tF&?aAXx>j?0?KZYv{(ex$MRZUq6SNo^XoE z-li)sf3FB$Z!hF8?d1W{+3&*5*{9&fdDBI+?EYgCJ@g&kt@bB_Hx##r9No)o?tRU4 z$gZKT9_O>|v-An2u#2Mgc0FxNytM=;XD)8u8!Ux_uNDH+a82YG;sOiC{@_>n*O=o; z#?;uBEYfe!3)mN{hw48>3Zd%_=KSSDcJF&H@>fqwfZh{Vu*L1^U{~c@!rSB?R#OV% z)5a zu_+DDg(J1^c%iFP>0R5zpp=#zVsPaiZp)UtgiH?~1iV!geSBR&pUyffsE)7^Di)_Q zSG&`MO3gNG`OW3zGgo;CX~vXb-oi!VPvn8R9|p5yoV zRT4#aD%#f$?`!S;_#e6Eo)2#-SAx}DST7tjUw||J)p9yLwty{_qxPZci@C8Xw`pLi z0dK8RCei{5+obl<_W1KEAe4S7@>zKm)McLHByZgYeViT1NEIJ}5-q`wYMXHXjb6oD zSHC2_%XaYeb99)(Mne?rGsv-j@)#?sxq(~zH-m1W1IXxXAwTx-!1D*KL=lO`>{7Ug zKGq}0f~{(z9dB8Frp$Md5g2ABD@l^6CpI~hFQ39Vnj50!{gb%y_4!SrNN08eR*A+V z&3NjYJKLk(JcX8LgM@O%Wdw0qhvRJyMUT97+0^6p+#`C=sE2}1Songp_}#$&bz6yrcArJR&80aV^Nsi`MihWZ zTRf*eDiy>^U*tX0YNC?-4XE9|5rR{nYv|8BbF_6lSu`rA%9iAqQpYSPaCd_NINQ+% z^a z_DItIe!U>)x;PWt*W~fl!+Suhr90nRp`Y{Vn=Bipmq{#s{2r>GdnOz^_O!kCK^8T- z{H~Z2a1Erc8Ke{=n?xh7w}mB+-K0P5DR}#>liJVpQs-nj6tU?obXjGA&h7K07TAdY zjf;1JV4)#eao?MmQE~)#^-ZOIOZU*r_uF%5r!n|8>J>E3?*RRM^}?EYcEonI6lVSf zcknRDhiZ9yk8@YCKvYP2icXvxp^slTrjj(I(6J}7bXVLoR$|{6T%l3O|F>{Ec&B*| zpS#{l_6tfGYe^C5=HtnmV|tC>xk`#tHvE|Qe113Sku*T5mk+k*f4d}Xi*Chx?iA3` zPWwqil_f+>)>6j8-+|gZyqUP5P%9$SayiZW@(I_{NM?ulj$9gS#C>@65uYpnfIn}y z1R@=^na5qGB6&9{9BS!HU%TeU`*Y&~&vRn{C{1kU=xe*tODeeN<%j)3Io^mcXMHHy zuh>SUO-`lvSEW-rb|%89Q46URYbUdN=1%3mQy0%+pVdI17Xq+OLUifl6-fFGGX@{6 z_=u8ZGR8Ux;RYpi0G_WQi95>=GC?-4glfuMro?3o`W$N75$!kLfFzQQP-M!L8#rS+5kAbHUY z!@MmPl<~7y z_#RIpc-dlN=gUDnKO&YX@9Ty;f;zB%q#65VBAWZRWfI|b@E+g6aUmIIY=ACQ{sebz zv-vJ2S1AMVN?2UbAh^3-8qN75$v5*Fg0%s&i3baR<6HxTl^%$=t|TJsFA*e(M#N_$ zkJvF;oI!c8jsLc$lz+vf7o;psB^zJWaXSi}$!h5^e5|t#CwlVCn&i4060g-9 zh#HsU@W{Ez>}o#;zM9(xO4`waUa2BSPPE-5s~`M=52`l-`bi=2u8rnDdM-~TRrOJU zRTEUhGF>8aM>nl;@HzK`XDOdQNs3qeS|06_*iLWG&cS6ru5kO~&cn^m+{s+sY2bEK z53MNPjfdPBq_@;g25w@%bhSqvR-2~DYxIL-FdYmpEPD#CMk|8n zD#<{5Z8~%6&2~}Tp+jUG+rY=mTZo4R^U;kgYkI`XQJjIALX0V8(7&CnMDGJ@+UNet zrKFXDi8s|#+LKl^5*gR@nCSX>bhzIvL=;RBy-nIjH@+1IJ-q;XxKM*XSZs`j6^e*I zdE5AlrtOC3p01#38JfQ@I0>hR^$TY%Ucz+N-GVVcpAnzKJgL$}UgTNpcB0|)HsRax zHZD_k}r+U5TU8=b*O8)2j+&7tciIUGrmHa9vNuvr=T*=TN` z*!2508GSiSc-PAusFv&#wa-{du8zGW(wQAW^DXxAy;QZ}Tz;X0b7C(4r{xEVQ81vl zW!v&<#L&Fee?noB<1f-8e40>KUjZG`@g+^W_b@F#fAJL)7E}926NFev0&aIXExK9r zoIG-3kh0%Bk6iXt58f*|NTuuiqxgj(gp|yrAU#>eJ zKcw~PJ}+lWok-m zJ+h~Af~Fyb#k+a+GSU2HO@zqjnk>U}RwHCzIte!~Hb$4)pNag^THw|n6^vYOD#_V= zn=yw^);`Gk!zM9%~~C#Bwr6wfcp`0$3=qTPM@%-l`0_&ts?!of|-z`|b=3;s(H79>BQ zPPyg@XIQ`IzHOBy&wqObzv*Yi+cBv-WL za~|oDJfq#`r2?4So+f;$EQ1xbv$=JXqZw_lhh&Xj5F@CPC1wu4&ARG17V{vpiIPw^666P^(1G#|^m!Ra!5&!3cM^>9CkEG( zJNvfrAK$qQ_v|re_IVZaAKF|e+6Rou$cI;8+^x;j)e92n<_S5o~e@b#2-{_=b} zG*bz!9+yOZI-jY{>44s)B#AulX7jgJC{s2gdRS3bgMNS58J{_S3wHEef?3(#M8MQH zoQ%AEMB2T6O22w_#9GTK@q-ep0uJ6n>u+4+@@Z&*w6E`Q*w`a2V8 zu5C=g+~e@myKEd{c^MeUm|jSBPD02*;F zyd4XE2;Ju%g`4*&pg3s@Zqf-!G+6fl8`jNYoMXEoXLc{|a`}0odEF~mw8>A@E^&yS z%ULa|TvmfsPbTn#ZddVb%+$!w50~Nj-H&0j$2fd<$bH#?`G*i?ue2n^R zhD7^^r_rw2>7uZ<9y(834&8{IPj}1f(ns4{=pQ_D=4eV95u^|;YO%Ev#Wbl48=0%% z)j=P6#)>?eR@O#Vj~^0h$;$+tRinHOZIEqVp$wE>T%kfv8uH64{{sKq-{i`tBG|8* zEyPjM+&5Ex!Vl;qW&Yn7Z==gLT6NrwcW37))0aF9w?$4vJrYTL|0`$d^mmJ}0jox- zS>{kLmTJ&mvyCNexs?d=8SfL}JsuX)r<$Q?QecLj2(@(apsL~j*b z+i?*nG)ocBB^D7M60P{Uoxx1Z4t?~gxQsXDd=6~kO@iAGI^gXk@|NgtB(%CYXw5%GCUChm^_za4CtKWzj8;9Y^V^$RC zE#sXJ6GEA3EBQ%)r7b^{KwWD~cIj7LYG$Z21pQa|2O9np{gke{g+s67xJF5d^rF2R1Pd1*A(n=VGmUgIysLPRo27PLw4$YBicCT0l6#hCmLL(ug9GX!UP$C&+ZhTyFKVF<=3ALQ<>+y%eM^}zbR3$Rmh zCTL#z2U~uLVydEa*sD1__);VV6;9Q|8A%V|yP?~3e)b0@E%hHDbN9fAr|qC%Vjpw( z$Qo!GBZT8Y71;h~4s)$~6*JFn3|K{dVSYa*n99?N@Fh{ktUk8@#->{1l#A}j<%St! zn41Tkr=Eh($Fp#5Q!GwRI?ZTh{-PCsOTvKab9mZ^Ka6z%moBiIgLR^3!8LR$c6^u( zXI9eGEu(yz&-gG!j)VMa*q@F~%RA8oiA7eYypFP!wR}-t?6Q}_V zG6HbUG$Sn0c?-n+GiP+}0d{oD17_brMYg0i7VdKIz`bA;|7iIOcHRntC)ZwMdiiG9 zYeXW09DT{T8HswPWqyzXRF2dgPe~I~YAS2)(xDK&&$lF`o^v zf$1dn(e3%rDLxN&zj;SO?22`cWpj=jm5bhRZPiOHFj`36x*~E zV%x6@Bz$E_-(GYKR7~<_A5>L9)mlv)Q=bE>&S}EK$OR9)*2CYmmXMv<&Nyn;M;u1Q zqWag>IQG|hSh9T~(sz-iu)%+^%=GKw?9vSd6Z9g{FJP6~vrXcB?_n5Nl z{;&iovB&wz46&s`qb4h$LH@*N4-k%7Cl$H0D;LAzb%j z7xeoj#7l!tFh6fRCaYiCvW<;>%>0#47^39>{r0F00=MF@bBI0!8=TNUZ5})#tH~y9 zkB6>FJ8@B59{roX0oOOo#nsLo&{caDHosR)mtBjc4<7prG|khP9aGc6jqaIj*oF*X zyx9~Lxfr1}x2#!?voxyTUkOjbschTdRM1215QB~)3nkxjij!5kP!V;YZq zp^mZ$8|n4XyfzC;=qZwN>F?IKW1afVw`?V z6<<{6LIo29Kdd+iGh2OO?^+`$m@r|Nd1#=FoMLElKaY;Mv=Lvrv4xrUCk8FKBFj!S z-wxY|)ws!z!7gi7U~elU9KJRH?l3DLL;Pa*@%JC^`{ zM9f9vbPv33wGzKKo(JOm+#vH*i&|-C#&+nNL!T05_RaAUX!h>{-e^UW_ZDA;mtR;z zh51jJr^+RGahr1nZy7WZ(X$ zF2;uG;u9;jF(w;rQDi{?RCd>98osGOE4_zgO#C|Z>+>!0?#8p&rvDNn)3q9VS3H2% z6PGgUWJjT;Kn5)^Tfn{voxxV$abzM|L!p@;7v$EDfv0j}pH)~RO#a;tZ;UL*HkOm| z!h4!9>`@smTxmdToaD{u&B-TtYqy}=F5&RP^P{kBKob63KLgjK&u4xLUiLc~)G?Wa+2iEB>}PAWH`{ukOTT&_a;0-vDyj3bBD_6O_9kU{qYr z(uzZ+%zJMM^suxOxNWvTgw#SD_uUlgO?O2ZB{JyiUy`<69RjtL%b1rt+nGbsJE3;< zLU?TLOS~yTmA!n{hc28W?xP^e=&q}qSd#-1r11f5@WE^Z`Yh7Kt8bIY$*GswQ89pB z@`mU@+fU4db}mzUqXABCh=eI|4D|mo8>`jHLa@S@{C=#G|0~FcHIA`|vwJLASE$C$ zzNrjP*KLA2dyC;~-FKkqy#W-6mN9$FGjPeFXF%K3jaJw%V8+gBpgi9?Ona@thqD&4 zuj`AzRo{P@|VOu@Xx+V;ImTh8FJC`!8HHy&BcrQG=odC9Z zuITgqWN=<<3QDmp!};$lak=>ic>bRh3KUL3)_yy%{$6>cFk*)t9j%!OZw#|n+Tskp z_zrdTM-|718U9Wl^QCka9grxbVYwG7b~yu|zD&hOMqbiSqhrwK>EU2l(Sq4*bQ5e$ z@MVn;D#L;uv(T1^sc5t_7+HT3b0|j;aGjcri>xNWcingJ=xbwk%G&F6(X%LawABX0 z>?{Q@>!TU#uEW4FY7E+Dt--6e+~@l|%z{eHDX98$A3V?h3#Q$#WClmB(MfkG=*+%m z9u-Z&5fKqgn|uyk5Gw;+I$Q98#lOi%xl2)wxEp2ugKGMDcL}xXW|?><>0laT)1m8! zcwn+r3JuX$A^yYCPaKVvT1@^(7@^}rBsvG!zr zXfxPhY7D)HbdYG83B#4Y4u;RwF^wx+(A1J)ST?Pd3Hn`xABwe_z1^Sj%GWbt;|CS= z&@BuG&Dn+B@=U;%hCp~jbrbw^yo$DQ3q;GBn?YoYBAQ|83A}BV;|i@4Oq0%YIx-;> zW@pVn`d6fY?wS2C+C~a|UMz(x*kUL<_z7fHr@-(_`Y`U?XQ+Q96Et1EjZ-S~@yL=O z9JT%s*p;Y{`=KffIkgy{XXZo)Yk%@`*8+9gT7Ebx^%54v#Ujq2N*< zV<;JerJt!m|GppezBkR#An-nv@+yOF;|a{RO(mcwSpb!$cfqZ*nqkPdM5q>Y8YYf4 zgVT>C*|%vtu*%pOFi9B>jNoj9OfK#}`KyrQ;;=M`x(D9xTc*fJMKR z*#DgG2!hg_!Fr3`cqnQn%v71gNJ^Z+`F@Xaf0F{-ca6(Vu8SmZ)k^?sojQ`0F2RrF zRzoAJ5}c#EoK5@p7Cl01FPujD=TaglzI%U32WZS zlZ$5d&~m0?U*psv!7^Xa0dUH_r#Y7wYy zdIn@{#=y}p_duj|A4XRWFrHV|qs_P2KQ$J;xD>@asY`^W-QxEU?>UoavWhvm{XHzT{mf7NxdJra%E8-@sNnh9 z5+wc48sA+c<{l*npp9t^P<`cye!dQeVD%Evy44BuzV> zB>U<1E8I2579YHHL3}m)!?_8A%v`5fq$P$O6s2FFKWMwcdt0tU+s_U7qg@SHz_(|w z-*Ewo3!<3kU-O_pkie4z(qKq!G<(@)IjZNR;131*=!;qpoHscaiY6@4{fMpT2dbs} z?{5ZAH$}jWl0ncYtdjiZ7KRkojpIpMJlHsmMi_W(K5G(I4fl)hlD*QWnKKjj;ha_5 zLF&K+d|JO7USIVMsOFzx#=ILroWlq>R3Xj~skbt-xE`z=bDO-M7lk#_+Husd4V3n6 z!(M--fRX7Cy>s>%W_RH@?DE+|rWoCTN<%jI)A@GX=mp?5GYKrUBa-zgm4jD}M2uTl z2u{iFg@c}fOlx>P=m~uW1WumJz*TW)*y-y`o7GVW4L_2gwg#M9-vPuuKq$(j5c)aA zFy*x;8Cj3R%pQsbYYLXak{_n9bZsy5;9v^*-l-q2GRwtUn-+th9rIz)pG+W|d;~V| zRM~%%PD2Z~3A{DzB$L_dh1W!XpS>nA07ZWz;8Rm ze3b3dV1w=uJd>{h-^klzzpZktr@uFBYGfI|-+3_f_-sb?{u`LOP9Gf!TtJV)05;&P zIuq>~0hb-=fd};inWSZxn0J#LK*<@1AH~l^n`<7x>VTj8yIxXE+j%XZ=COnwj+qH$ zOxmEsi3D_P)d{$MCLhJc&mgSuMYu_G6*6j50n%IqTu+A9ZhN_7(9&+&R z<_co?t_LgM-A5n2pNNcWgV2(2b$I)o*vtRTjy=_Jg$damOWxXkpJ=}~S?v9FWwSr^ z;I~%};d_I3;L*b_?7+I+z;wWs9bCJDa%$A4sZTqo;l0~f=YMBl?SWpnejp0v8O=bi zqSmuEQHj{(c|QJ@t%4>#y<)~A@4~vTf#|7ZDb&C3hw|^bu$R0faN4LO`{;Ng`(ST1 z{JL-eua?}(ZW{Ru*L6%`zpXt0p)J6>mrrA_zwCpN{ma;QML9&l5gFukJP~=1lI+_3 z#khB99}s=~3+H61pkCi4DD3n%X6H3|)0%oKFBa5JnI)lgDZ^2p+Jwdhye1F-z61Y^2u9#$OwpsJ&$+ zdv4Bk@%Ji#T|5o6KO_`Qerd?+*)Cn0;H zh}D~!urDWG#+|2R!e_&tm`=Fl>3@vXs3|ZI1*6xl0b;gPBD?yJ6Z4@=pAGc*g!gqE zW$HMIEUjgUf~%LXiB6U%?c^XlBJ%|He;WmVT@0|Kt}5HSW*sB7E08S_NPy~X1iXGI zihb;Z@CSP*@czg=jMMw@Rl^2sa&bGm##suFSiHt|ry6igggj2-+wgY$sG?Q*lHl5H zc^KZQ2lsV3vtD9Ww9`2qVj8an^CSP^)I*`@z2;x?mE1Ra-M0kRN3|5hIr~82+)G$w z-b*GuU5VCwoJCJvcMt0}Fu0P{rl9+@qt zhrJgA6wr55oJ~(4hqnC#(K@BLvg9G@)YOX2o+@HrCJZvOm!h5XZZeB4t$>P4I(=wZ zpZ!e$z{R#uA&?BcjieZ@QV4{2GfOkP08>FOW1uyp<`d{;)Sd+`CgX8J0y{8SH|tyYXl@)5K3oD!<_ zbq4dZ1M#mlJXS)g2A@)sVzV^0>A~xYu=&5UjGki{HXd8gpv4dIzX}m_w<{x}|i#ZCp776MXYO^jsBaq1_T_peP4s+v-3efh{qT36_dirS>e8)1E zG00tsEG4C(qxlP5JAZ`S@^3asDez@2Y=-b6zsoQpJp}rBFfjXW4Scw3J-nu{0-d|n z&-k7VhPl|3bssT9W#LDdb&1oM(hupdcK$TfeN*gZA@_)60y!K!`ORC6xDR&To62|=_UN{Suvbq91GSZSTb@2ThJvR9jLuB20YW719ET#%v$&uGzpJm z$@fiALNo#sVppQ3;usK9X^hXM#52`0F*s<_F4Q8k2u#+F!!JhL>BW_U*z~`T_yzRD zLS{ds<+&E6)XT6h)t|uBr+G~O$S|&}_X24XuK{mZhb${MWzCC!z}D-3vG-Jf@4cP^ zDwk-p$stpKls~{GUzZ_`GcK^>@N-C<-v_^`%|@x;#fJFd<&VtLb`?;vH;8UM8^H#AQbWde%ItD%!R%@C zWXvX+u?h~4@V9?kSfiu_#Jin~2fucMlSRk z_a(eliZf9qpw>(+)+ZPkiV!r{7xF5#KOSWTaokH<^ zc^fi@i*c+;I5hs*20qm0!$a=&Y@0?8Ep(Sdj^74AbhR!$IeZBk4va(rp0^=)`C_JQ z#yCtZi)F{T$>^EpCGfH`44uo`37jgAgO#P0xRMt{yv&-6vd3j%_`{1ZFzp49%tvsZ zq9QsW*nr2P}(8UEUIzGs^)XBdht?m-^FfdsprS!ep!b4 z3{Nw|t996)Z`ILb<<%%;);##!_77}dvVon|?!ngd^I^?OBQ)K1BV0fDl}=yp4!7DK z#4cj?-7&#;X3N7Bs9em=Sn=Wob51!0gvv|GVO&F3iPCrU%#|>{5z*h;$Xic&La*#eK^pe~{YH1na&*{g3uj_64 zo$MuGbf*~>wroe8%mT)2MY zYIva!$%WA<|LrZxcw#3@dB$S=MG5tG?8W2dX7Cnk3ofmzXB^V)aDG_}*4B?^Rla+( zD=%LHfCzNj- z#zxEcV~g4q>`cOu|C}dot=wP;fMUl4n4 z2Mc@a=v$W_L85aCYg(<(+6db~Ypw(vv}!%$ahPP~2F~L*Ymb1gFWO}2zw>w#_dDEs z-jkhYbPj}E%VbQeuETRX;!q)H0_3P{WVd`!BcsiSz{!HOY)JKd^4;t+qQ?s?*$4k- z2yc6+!jYOqjAhqBX!z?dzE);R43&-(Y-=d{CVDHIvM`fb_kKQ77?MXLf!k0|w+c9O z1G1#|8$zJ+RqPQzg1gNcp-kO&_P)5n>Ag66I`O6l3}noO9Su#m)UOFQ*0wMO zPTJt)sMs_6%a)yY=>x9%;|6q#71?#;Kj~!+tJxN>B-Y%16mjT!yJj6t0XX1T+B)TiHfL8tFNN!+?uubA7 zHX^o#$!+#P(?-(Z!BI~%Z^<@Ry-o`Ul^S7R-UQ6MCFaTz2XVu$t5|bxpSTNb4K00H zOPrD1$Qbm!ryB02utK>&cALjqY;Z^!?YFoH9w+Q#=iGK?72PMHRo>#x>_kf@?jslH zx9t~qvsaPHBP4p?kVP7=P(X0)26oqoE5wKe%RaiH{drY*sZ9_%I_(008^rx7pEt6J z=Q~9WL$xCL+DYsc1#QxyV*|=~coisynLy&rRxCl}0yP6=Y_y+%6RrvDiOn%ATxHHi zp%C`Fjsx0rOx!D*gu@-|56AsyU-cp-&iXyM^%9vsd`d4Gr{S zuNI@TZa#?q{R|rXDW#VUe<7~TPhelBE5Y@<^-<5eEi7_92tRhkvpSMXpk+c0SZ1LF z&;E@^#(DQ)-&F;yc)MDJ^KEf*Fc*ZSiT8xt5h}VsG54p|!IR#JY^TQ^^1{_3_(X3xm&>0o|JOHF5X0apZ zH?ywV8T70{8Ibqao*i=A!#wXlMrVr;$#msTpyn#acD$R#uHa~~IYxVcWo`|aqZNdT zWDDu7k8F|T`Wfu{b!%~4_Bd^zOMoqk+nGH+Vb~+EnK|tf#nyfsB>CBkQNyzR;J>$9 zSx221!XQ19In3&U_?qc#PRReO=sW{*`u+gkP?AKY5)BQNh@$m8_nv#&N*YQkLVNGM z3rSg7CCMz3klA?dJ?9uvW@cn%6p9Fu`jP+rzkFUiFP`VzbI?T5iMu>&&ok5haA{oHw70QZbU~y)rpoOhl(^_;K4j+5*#5x%8kpg z(_;!Ac{P>nUzh+ZzA|J*xfOXG@d`1C1x&*GrEp}h3ZHQ^AQHOUm|X3z?3RER%xWVI zM(^<(EVs1@3_j^a%HzxM4yn2Lm&^+2k}wJ;T`0s~`+tGBu`JZQ+kwPX~dzP^?`8h$RgbNHw{a7 zvWTgCL>=YF$|XLi($m*Gzyk(}q_Js)EB2nns!~zVtT_%IbgJcostz+neP0>ZrO9N1 zmppbaiUQ)3rSOAyQ6%Nk6Xucrbs!buKxUrwCkMA~!O9)sw6m-{xDxr8fltR1Lk*gF zRi?Lv!QV_c=22dth^KqH)^_**33;@-?0XKX0Pl>M5M)%nY=rAsb2utD=a4Koawi zW4+{N&{t&RVb-`zxIQ}>WnU^rpQrXQVtZGT%Tzvu6!0I~C7u>jbgN z2|%v5l=&jJ8K)o5g;Js>q!-lAc+F+8zM2a1e&fRgD0U$23>#odpF|ThGsrT@TkO>= zLuT<_H}2UYH#jpnm;T}0h{Z-;0=26)WZGM4WIETEmJ7&&{X8R(lA8?Ej%$EbTbGgz zGiBfvu#Cy%Op%!gakBR5&=@)VXtIFqzW)pNB0o&kC1X=fFSu;79n9=jLoQPT;bE~7Ec@977=3D> zKR@jSTDp$#=GxoBp8gERTuT|=E{-9QZqcx=%!XVZQpX1il&Hr%i}cs+RdAZl6Gl9L z7I|awzD4!oQs}=fl32IfbCvt}ntKfmq-&<&nOXj%a>f~8Ep;A?{bo?bk#j)riwG!8 z%p^ta_qZD`GvHs-4D#fC45^C{;YXA*zPF;EuAdY@5+91wYV)~kpLEIHs1O*?)4+s%xy&@`+OZRr_{-t8LZ(;t z6Iz%a#3jC71)radVx^NsqH5=KXnVzskv0ZKg}!-^8Ezv*E&qRbboPOHA#k1(=vblY&h`(5BOj9QG>{ zwWv^VpZ~m)f4*zWZGgMcc+#}ghgQp~qYj1&AT6iO*e(b|n|ZILR;?$F{~o|5UEPGa zK|Z0l7=h8yc=E~P4jNInpd2zH$!Ro=%rR;O^1kWt>%bfwHu*jZ4ZI1wY~oRb?s24i zAd9(S=fEXsRx!J#*D*f~L%0M}KYTXKllV@W1C^dlrrYOPz}KseagEm!VETapx_!lN z6l1Rndz59k?LI8)qvX!K+I$I57e(XI&WTPRV>0HO+3aDVMC(*o5noFq9Uw28ieO!!`}Y5g_a1zzfq*LH*% zmV%;luFBBOk7sAzr>jmK6@M;`9+JKcO&SUm$ZpX9g+Jn2bSnxjf9-efymD(KYjpElAq9%)0 zFmPTUcH1w+Cv+0Y#oj2mZ$&QLv|urFfB$Ay+U_6lqjCu{F#~hTf*{zZNrq3E!%y%V zv(I3FK4kV2ZTTS1Jip(ETX>(HS#UNobWA3C%8Q`O{%h!;T7m_*~ zMGBiOIKj&X#2miEwJv-?(~Xr3wJ(AA!D3FnD4U3_$|83rLHyTr5v=+vfnBub z5Xljm`F*1i4ca{fPYvdfHW*5>4dj_fmLkiO5(!!0M%156;ceFAM8*dO@xN8GS+Nc2 zq~7l$E>~FwzdrDWw};I@=@fo8FaHh|7gS0oDI|lkr>7YrUrtx*CKJK5m8AGWGLe=K zhpVc8Ak)7!AWmZe_!f}~?c;Y*i+``C&)yo(oIX>7?w1mv816t8#|lB2j6ac^nZRVP z&m#t#Q(9EkUBTZfZZnGgfiTOq6qm~x606!EayO=rdBlX^P`7$+p36Ga!1ge5tp+?p zCzR}q*F}Ci7LkeH#BluGXsA-Z3+P+jMql-Fm_oNJ;E(H0COapB=*{&2q0A}ZQ58sb z-j{``+hmaZF=tS%q=li$0{Ae@2PkC%>=Wus{MI%AJwX~Qyp)7)2p<9GN1;$+|l@$^`iLE7G}sK68VZ-f-#{k?mu6{nBTpP4PIxGdV|C4U)AO0 z%%-*6tK-MG=uO^a)qDr&F;|D=W-cW1>MGb&*&jKH&H+<3N75+22HTlx(PC+`q-I4d zT=#q%mKt>@E1aXqo(CuK>MJy(Jy{p8J{JYY%LK#z!pF?<(KRSJ?PqS zc(hPajY#}9ZaF->7~8F=VU!>G!RK3+kgvO@FiU1_MD=z0pekPz-uOCV>4u2h9%k4ih z1za+lf$txlig)X5ruME)hpK<|s6x4Xa#?l~yt@AcV|L^a*v-eGbEZF{zw6cGjzv0f zwdPAud{Pby#?0Uge{;raTPa@nI}Sv6Igy4ydw8rZ2P)23h0iG&lQC8ry<143Jt&6x z9rJ)W=B18w7uqwM-C|JU>I2w3Z4VOP@Rrqx!1G-`nAjlp}s}Y7D3)F6Q2^F@rrS zMqK(c4|J>40oi*jCkpj$q~nJx>}sgT(W@ZGOa}1ASTG7!gN;8XD*Gp!|V(ltZ|929raA&41e0-(R=31!J+4PC|niNo1cJNWhQuO zAx&_=JL*b*8WjG`MqT;hY|cB)!?b);#LET*@Vv}oE^6Fqs^siFaCt={O!>v%SyxAro{ig>BvzK3 zc1>z&<8$(lS`2u{AVsEhOER+uBx#Am{CAaRGl>)LBijiISZA{~53Y2=zrXtvlTt&% zt@(_uegQNtI8T4R!;p6-YoO1|z3j;5$uPxeh*3>zz)25Q!Gh2JFpyt2S?xr!p6|&T z-VzAceAU1eO|gWzIUfBuZ41rLRnu}q<&?aeBVCjAjtbq+dr!`EGpV|vu4V)&m^3w{bpM>S_zCD%OCIkwMy{Zw8D|_lL)n6PV!3Uht7=0(0@^Kd|_O z1n8UR3TtzZqV|;&!F@ho>$c=J=bRKz;IrGf=u{r*l1L-g!_p+b4KZ#FMNEgz9fsbb zfs$9v!KV(-fFW-T;omjtaPiz-wC1_(!Z=AOzJF*k`_i-%Y47@ro6JtIk|wgq9DSl* zPYEKy(I1(!?_S_(P85;;oj}}HrNOoG{!nvXFigAIPftl{ru~n2lcibi@Cv(|^)Ip| znMz+cL*;9H4BwjHSXH3S3@La&_MvXZQTvh2- zVb7uyT;;}SlJ&=eybBn@f(%#Tq0}O({kA(P_!vei=Kn-47VFu8@86jw_48oKcQ+Ah zbAy|ZH=cE~e=J&4!H{Eay9MoBz92J6hBUt_7QH?dOn+|6CcSljw873SBK>Cy_09Gw z<5zu6C>#D2SomJVZ@Yt_%GMm({qRN*@k_Cz!I&+PXlXg$;RElz zSWKrWqeOi=3eq(vTX0QL~-W|z|gjyFjQ@>ZZ$8e!Y|MLX)_HAoobC`yA zOKjk?N5=Saemkl;lLaU84zIo~2L*Q@NkA*xKE|@^tZ;W~A}p_PA(`D_Fmz`))^nOl z7o9$YQ>F6B(pLn9M7CJHzoW`UsD}$ACYu3}7(rQ_^d>S@nNYRsEVn*(CAm9eBKI#U z4Z76!Gb58s;qoafsp^~AFwrgxezz_Z?2eLQ{tH?O$Ffp|kuiToO=TIdJhn?{^{f`F zjTRx3lx5`Y@GYGDW-?n8bOGcUC6oGx*HP}Lm)L(AUtLH_gKAUT7^Zp#S9~Fo_~y+K zD%y$(1Lw{F#*W&}S63CG(i7QmzmWp_`rJ?IPrfxg=RbyCSbpaw#pIHo1-Ymse~`X) zyabS#Y{EXyB`MR-(vg%k_rRb?;N*S<=crHSB9)AxrT#_`ohD1aJ<&_sx_dH3k@=+L zxIX#&yp5CG??{%}#$e~+L>REyf|bh+gAdJZAsXjS^gB{W_tP)jKG#rUyiy6M)T-bG z8q?dtPYAowP@7ie6+r1FpU!PB%mmL$z_o<+-p8jyiHyUm_auz#oDMSCltWk%7F(MP& zz_av5>h@S!bI5F80VByZ|9XE0=mf;TzWH;YcGw}h=Y9eu79I)@jn9B78v@~$ogc84 z%qUKtpGvAG2jB|DSfcyr1QYJ&ND4}vX}v9esBpa$t(-mF*yKK+$el43g4ZH;hdbZ* zw$q~dXXiq0=zS>bgr*3#Z}^Ix%{O!JExScx+bUXAhVIfbHB*u5cFZQ#W}}hsnxw#! zg14^j$LH@Eky+(BwCBssK-tujGu4eJHq&y5dVnL$rL1nD5==|ABI7J~fNcxUv1`vLQngu0MA3B>=~Ia#_5G`bUu(5F-}W@3 zW1S8gq63JhO#-PnHJdK5CtRjf99a}0LRN3Y(Zif3?u0@r`CT`{JlX$=zU&^|7K!F-OUjmEKi3SQJB8rp+($-;$VfjuzBSjJ#=|$4fl-qS;QH))4tI~!q_XL z^j|X%Sh&9fNytqDvXvJo7cE(CYj-yg3rihLI%~AbhE^hD3}dz}Uq(@Y?ruxPEjh=U`+_dxeC-b+XrSK)4P3Qd&kU zW=BEos(Kpkj3s>+pR@7&`|0fE6u#cN-RjQ3YO-k27T!JgfsM=DLl!<29kyg2KCwv$ zG{1=;x)BGFS@8r=cqoO;A_#@1ZKi8iDAA31lJKDR81??dQIQSj##F>B(@8JvU|EAk zbA{_-y2sHT3L>_mhx4OgWY|Tnbl@W+LubI}iI-UC1CeloWG8igem<0#T0teWPU58Z zJhVbmO~6t|NfhyHE<3(=zSZNrRH*3Yz}{=@q<_a`LKCNJEn&g=#HH*X+VU#~%7tu0 z$M`*RcSE^Atbae3x(1M`wLU0y3dQdJ`$9PWN*>hTC5v>vUZAF*?dEzzmXIBjwo#s3 z5kCw!Bs!uh%kPExf(2BEF!rG@eD<`Ao7n0G^A(dx>%1S$-IrrY{WBL!*2sm`?`FsZ zWp8|AVhKH~cP_m8Xf}STxf@AO^d*~&-w02HYYFx*p+xhYtLcO#G}&@vlq!!9V=Bv1 z;L`;*q)XEV`L92TJt9hw!oUvnsHj4uyXAXJz^-maj@^uzukj@Cl9?!Ka0Q2yZi@co zU!=xW_*33_c~GY^i&U*#3A+p~;q&)XiGJl&(wkQ)40j(#Tg#;qttI-5j=MCw`0qCG z^H7z@_dj23&(}vqt8;PeX+)Dl0`zUHgoLCz8xaU?SCAUY*N78#CG7WN)I9o(E?%6yW`DR>9jxUBPut zBSEH}50uu{U>{hQQn}TiQ2(|jrXp8?vbmfo*tcbV^D?b;IAb<|SL(!tzfvl>Q_po- zyA2yq+UFctKXQh3GqWPlY>4^mvySOY*QO#xwwx)O3NJWi!h6b-h}R8Ya#JN6zS^EZ z*voBHT0{uldqxg!-Iod*YHPXa#WU!cpL#`FjjQ26!g6AE&=ZcBrjoYZKU*ez$cO)> zRskS-slO;;a#0YfL6K+!iKC z333ysKVU~KbKwi=G@_wa!nB^Fxr-MNo3@sBK`h-RJYX(IPn#YEOFx=J_p+`=uPp#i z-{DTS&-Q{-XCx7eX=Ugp&$zbG&mrLH1Q_0F2j_m7_kM&0I>>JE9?r9~gYUsdABa+E+vpHPjuUKgL5Yq?yjkp${7u-IREyBnZ@6ky2 zeDv*HKH<}1?D~sqtxOKY@%(;&;@lNWYCH|WilnsCTu7R4*^~jNuLsZ1VJf1W84m_tX3p`_Y zvEx!dqO;HvZm2oN`pdl#J=w60tC*GzPcylKH`NN{pj06F_VOcwGrO1%b(+*3=Q+@r zGiv^<&`Zy&%VvU7tcAN`W!Pcs5O~%lkRD42g-PyCOwPdpu5)GvXe*2(3UvZ%o`w=R zV3kd-49b#W6?3chchTf5y_<4W%M*$+!=ReoC){^W8{gd}1G>eo3E+h!5_h#-=)5%- zx~5D+`{WOEkMk5sRZk`~MHa9K=+I9#+p>Q5ZNaIZRlJ9-&+2xSY)hju$IehYEt2!# z_xh_cM6A6;OZTfHb*Wue_L0VTdy6uRb`E2H6;@dOb#xcnih$5ybTxH;)?9M??L_je z*Mb_hv|=i$Q6id{P#- zv~nr@C1DF&Tsx`2yDJG--AkYA?!`{mt)PeP8hEYNlM={m;>>4kWa45qVd|4yGCOb{ zeQ?x_INa4G2X17-S-Swa!8=p8pWQy-9V!@jQ4OZ~WSQ$iR!Niv4`-kS`|{V39018F%O~1F)R22Sri82>QNL%$h}+F$=Ka=ZU(RzDxm50AcK3zT(P7Ww6hS^4wD9>s)60Rr><4zQSms(Xo zM>hmMiq9k7u9e)6v(`kf;U6|yZb_P6s*~Z{8g$a&5l$vxI`&m4r3=`9R``JkFDQ0| zF8pOaSvY#(br$~|dB>2F8*>D{Mmczkw32Ay;zJ^pvsdT}oz>7m3~sf;8ZMkVd>T?y3#xYZ~Kr#8ng4s$8){HviqCB zag9Ot>bq#HlQLNt)o>C0{ucx%$K=3;HzRo0lO}4ru@tCi4T8*!0S1^5>chGWGXLCG z)+hI;z#*xGGM2awmR|Cv`+K*uzIA|-tmPfO4l_9u@kyfIMVFDvvTT@VBF6>}0b(1n zk-aia3+_Fn!ZgeefxV8g*r)Oc+xuh=iLRE1#SU`3Ie02_eytefd16kdu|A!}c&QHUbw`= zL1adsKU0Fr+@7F4%~Qeal!?Gd=n9TD>A`q6XBecgRdBh&kmP;)Mp07yyTR-fyYkOv zQM-i(SL862Y<(}ujP;D6#ZAY!sYEr-(SttdF0IK7%pdFeKCwWzgpX6i8uyChT(VV#>F-(QDpng3TWy z;LbUk_|NoAPEXtk&o=+T72kyL=z5;{qIeEixf~Gq)GXt;gYod1bTM9SlgCNM57Who z>zK{154cih0a>|5gMD>Im)qWCNvsr`g#U$DAnoHAG>-oU3}Uyko}-a4KQNo`;aN&e zFP0&>Q(WZ4^0Omq*#LCun{51MJ*c%CuX^VPyC^4ZcrHfNBb=a3kNF znbAB*&)ZZ$kFGq>qJqLRI{+1mPemE2LFq;8EZ4!&iL;*`<%np146W(8=?h=a-R z&SB5!eDFiJm}ylP19=sHIYTuka?_Z14f$#ab3IN9Pkea+mgm<3a6KLN4EM8c_tpS~ ztV1BqVkvPM%Ec;4YH$t{O#XYlh+GeN4|av5(~Uinpfd3c@RCf1o%`dM!_ipCE_}eM zdL-km@ztVz{%$Zr!IBOh<9mV5wDCFXHDt{rPm-YhRZu)36%e^9aKg`$T-7{HOQ+S) zcCNaVWX%tul$;(js!;&`iya2=)(hZ`Ug7j}W=v`5BQ!0o2x-SA!jFYl=tf%__Bs@T zQ+H!votHg~oa;yiV%0d+7JaZQ`UdT>C<*55Jk3>na3-%y0_l@$_EIwA1;n&t5|`7V z&WOn=p+jTkqD2!|(Nd#tTD~$9K?N@v3Vn;K72a z^qh}Va9h2OXIG2mJhz0*}yQOi4gA$P84VjduBPof-vP{I@OKXO#u?;)Qm|cGV-= z$bqA3?pRY+3*zB~tl7Bt`c%S?=uqMJgfIlI1n4XP?E@lIajOMSf>`OO1b6|;%) zy9o0BSt1&~^OT719}RootM_)@+C7l-UnJ?dmMz?-PRbr$v!AsVYY6 z>uUI#9|79FrcXFAC!8)pJGqVj`C;$Dg~Hj4ALsul8!O!9eQkfcz+#9f-@TGFRpLeH zTwk*PTRav0eZ8oKpJ&ntYY_SG@Uw`^@nN-nPJ=q#(=8s$?oyK9&VZxh*61zoOTXEc z2bc0SjD1H#Ii+P;P-<-;+h5?#EW9#IrS)i$z4I?|Z;w_}jgEWic3lT9>|QCY`_%zf z9}5Om$Hx=B#aeXZhJ&1QS|;4ZR)TxU)%29AH1c)9Haf=WD(^*?MC(s5WN|_TThLm< z<=@}Ke0}^=)Z4!k#k+muG!`VnS0}b`iNmtsc#If2n;QjHRTUV|wbNQkT`XZ=Z5y4F zln3L&pKuT6ClQ;biF9@bVs%4|824ZSnDdkc*DeV0#F%`Ldg6{~yhIDoeshc+z;d0dh9-->aPzlz{AS#C~MFTG}gKBcIl zg2uo8MXOX@R&3Wcdr^5kNK0K{~(;>x|gDSmSPmed5g}+^q zLQ8WaU{3W>&i=`4D8mH73rh?{2@725&`dw*K4L)M+dYn`8y@C1{tP0U{#~LETrgzk z$_2xF9WmtD{ctXR^IC5A>0i0c z^xMv82C&XVJ~)Ydy!-=9ZcU<}P9EUG>(_Cmx!rW8S{Rh@d_pZ|vYClyJDI_0|4}`w z7LsE1ZRkmHHk6E1W#W5fxdDrv;8|QAyjoPv4n^kSHSreM>E1^4)af**$u33J^BP5g zz9o!CiYoOpu9T^qVhAo|^1n~T5XXH80!vFd5SS$mi}SuS&vyvW==*rslC6nk+N3$o zVJpr$oR3=al41KXZzv-(8@%pb%B&ybiz9qq$TkC3 ztvW|)KlzSp$fk1Swu?<@Yv!G$WI8ywu3{KM9Mb|vi!B)~%NbY1gX#CJa z4}2`4Zn&G^-^+Q&cDIl)b93Q1x$A7mf<4T|hD12%J|K*cP2iqcxCnIJ2WY7qRT!kq z28D;6Y2Ah?$ntssJa~EqWD`q;RqoT^)Niwi+O<5CI+6|NP$z{xVW)7KY8-s@c@gh{ z-NTt~KOh=IC$XBB3ti?YVnz$n>5YYE#3()#=J9i1COtrxr9He3n$j^F8+B2D3C)a2^pP9ivd}HEN`9F7)Yb2F5R%=?>XAkXIH0^+f@!XM8rvb^F5n2))8Q zS#g%$P_Yy~XHDo&nL5~1=nG8(73uhyA&@*74?p_3lO2i?95cm??pQyLdhp~w)Vn1f zrj_)uSq62~)Nvcwx4{q5*zgcto25l3;CrrQR^W0@1)lzF0jBP&K(kJ`(A=C@_@PD?`I@c;s;$GQX0bO^4~1Zo&H(juz8UG9 zM$k*(M2vTD#QV2yr>Uv|>cBU5d~$9av3j$NRt{7~1!H_(Lw_gt$3+Y!yFI3!Rp`?klqNN{@ow*|n%iCIZ&}<+&VgQXIPcnAO{xN#{k1=-t7OQ0K{|B<7`vdSz)z z>dSAmTncs}I|HV2C)5n7CGv=Q^tuPdcT5ua>DAzrMZdYj-W=N;zmTJ z#)K<~$^NqN~~XxADGFD1!PM->*5S6ZQP_srDx4bo9gqjN`a4+~XHSoZG^hNfw$jL8>dnc_y5)D%lX9uOu`ycWO2d@AC literal 0 HcmV?d00001 From c437a7c5c193625ecbc6bfead3c5ee216e16c808 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 25 Feb 2022 07:12:56 +0000 Subject: [PATCH 02/45] refactor paddleaudio, test=doc --- .gitignore | 1 + paddleaudio/CHANGELOG.md | 3 + paddleaudio/features/augment.py | 170 ------------------ .../{backends => paddleaudio}/__init__.py | 1 - .../{ => paddleaudio/backends}/__init__.py | 2 - .../paddleaudio/backends/soundfile_backend.py | 0 .../paddleaudio/backends/sox_backend.py | 0 .../{ => paddleaudio}/datasets/__init__.py | 0 .../{ => paddleaudio}/datasets/dataset.py | 0 .../{ => paddleaudio}/datasets/esc50.py | 0 .../{ => paddleaudio}/datasets/gtzan.py | 0 .../{ => paddleaudio}/datasets/tess.py | 0 .../{ => paddleaudio}/datasets/urban_sound.py | 0 .../{ => paddleaudio}/features/__init__.py | 7 +- .../features/librosa.py} | 2 +- .../paddleaudio/functional/__init__.py | 0 .../functional/functional.py} | 155 +++++++++++++++- .../functional}/window.py | 44 +++-- paddleaudio/paddleaudio/io/__init__.py | 6 + .../{backends => paddleaudio/io}/audio.py | 0 paddleaudio/paddleaudio/kaldi/__init__.py | 0 .../paddleaudio/sox_effects/__init__.py | 0 .../{ => paddleaudio}/utils/__init__.py | 22 ++- .../{ => paddleaudio}/utils/download.py | 5 + paddleaudio/{ => paddleaudio}/utils/env.py | 6 + paddleaudio/{ => paddleaudio}/utils/error.py | 0 paddleaudio/{ => paddleaudio}/utils/log.py | 5 +- paddleaudio/{ => paddleaudio}/utils/time.py | 4 + setup_audio.py => paddleaudio/setup.py | 2 +- requirements.txt | 48 ----- 30 files changed, 234 insertions(+), 249 deletions(-) delete mode 100644 paddleaudio/features/augment.py rename paddleaudio/{backends => paddleaudio}/__init__.py (96%) rename paddleaudio/{ => paddleaudio/backends}/__init__.py (92%) create mode 100644 paddleaudio/paddleaudio/backends/soundfile_backend.py create mode 100644 paddleaudio/paddleaudio/backends/sox_backend.py rename paddleaudio/{ => paddleaudio}/datasets/__init__.py (100%) rename paddleaudio/{ => paddleaudio}/datasets/dataset.py (100%) rename paddleaudio/{ => paddleaudio}/datasets/esc50.py (100%) rename paddleaudio/{ => paddleaudio}/datasets/gtzan.py (100%) rename paddleaudio/{ => paddleaudio}/datasets/tess.py (100%) rename paddleaudio/{ => paddleaudio}/datasets/urban_sound.py (100%) rename paddleaudio/{ => paddleaudio}/features/__init__.py (84%) rename paddleaudio/{features/spectrum.py => paddleaudio/features/librosa.py} (99%) create mode 100644 paddleaudio/paddleaudio/functional/__init__.py rename paddleaudio/{features/core.py => paddleaudio/functional/functional.py} (79%) rename paddleaudio/{features => paddleaudio/functional}/window.py (98%) create mode 100644 paddleaudio/paddleaudio/io/__init__.py rename paddleaudio/{backends => paddleaudio/io}/audio.py (100%) create mode 100644 paddleaudio/paddleaudio/kaldi/__init__.py create mode 100644 paddleaudio/paddleaudio/sox_effects/__init__.py rename paddleaudio/{ => paddleaudio}/utils/__init__.py (61%) rename paddleaudio/{ => paddleaudio}/utils/download.py (94%) rename paddleaudio/{ => paddleaudio}/utils/env.py (95%) rename paddleaudio/{ => paddleaudio}/utils/error.py (100%) rename paddleaudio/{ => paddleaudio}/utils/log.py (98%) rename paddleaudio/{ => paddleaudio}/utils/time.py (97%) rename setup_audio.py => paddleaudio/setup.py (99%) delete mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index cc8fff87..374276b4 100644 --- a/.gitignore +++ b/.gitignore @@ -30,5 +30,6 @@ tools/OpenBLAS/ tools/Miniconda3-latest-Linux-x86_64.sh tools/activate_python.sh tools/miniconda.sh +tools/CRF++-0.58/ *output/ diff --git a/paddleaudio/CHANGELOG.md b/paddleaudio/CHANGELOG.md index 825c32f0..e6889567 100644 --- a/paddleaudio/CHANGELOG.md +++ b/paddleaudio/CHANGELOG.md @@ -1 +1,4 @@ # Changelog + +Date: 2022-2-25, Author: Hui Zhang. + - Refactor architecture. \ No newline at end of file diff --git a/paddleaudio/features/augment.py b/paddleaudio/features/augment.py deleted file mode 100644 index 6f903bdb..00000000 --- a/paddleaudio/features/augment.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import List - -import numpy as np -from numpy import ndarray as array - -from ..backends import depth_convert -from ..utils import ParameterError - -__all__ = [ - 'depth_augment', - 'spect_augment', - 'random_crop1d', - 'random_crop2d', - 'adaptive_spect_augment', -] - - -def randint(high: int) -> int: - """Generate one random integer in range [0 high) - - This is a helper function for random data augmentaiton - """ - return int(np.random.randint(0, high=high)) - - -def rand() -> float: - """Generate one floating-point number in range [0 1) - - This is a helper function for random data augmentaiton - """ - return float(np.random.rand(1)) - - -def depth_augment(y: array, - choices: List=['int8', 'int16'], - probs: List[float]=[0.5, 0.5]) -> array: - """ Audio depth augmentation - - Do audio depth augmentation to simulate the distortion brought by quantization. - """ - assert len(probs) == len( - choices - ), 'number of choices {} must be equal to size of probs {}'.format( - len(choices), len(probs)) - depth = np.random.choice(choices, p=probs) - src_depth = y.dtype - y1 = depth_convert(y, depth) - y2 = depth_convert(y1, src_depth) - - return y2 - - -def adaptive_spect_augment(spect: array, tempo_axis: int=0, - level: float=0.1) -> array: - """Do adpative spectrogram augmentation - - The level of the augmentation is gowern by the paramter level, - ranging from 0 to 1, with 0 represents no augmentation。 - - """ - assert spect.ndim == 2., 'only supports 2d tensor or numpy array' - if tempo_axis == 0: - nt, nf = spect.shape - else: - nf, nt = spect.shape - - time_mask_width = int(nt * level * 0.5) - freq_mask_width = int(nf * level * 0.5) - - num_time_mask = int(10 * level) - num_freq_mask = int(10 * level) - - if tempo_axis == 0: - for _ in range(num_time_mask): - start = randint(nt - time_mask_width) - spect[start:start + time_mask_width, :] = 0 - for _ in range(num_freq_mask): - start = randint(nf - freq_mask_width) - spect[:, start:start + freq_mask_width] = 0 - else: - for _ in range(num_time_mask): - start = randint(nt - time_mask_width) - spect[:, start:start + time_mask_width] = 0 - for _ in range(num_freq_mask): - start = randint(nf - freq_mask_width) - spect[start:start + freq_mask_width, :] = 0 - - return spect - - -def spect_augment(spect: array, - tempo_axis: int=0, - max_time_mask: int=3, - max_freq_mask: int=3, - max_time_mask_width: int=30, - max_freq_mask_width: int=20) -> array: - """Do spectrogram augmentation in both time and freq axis - - Reference: - - """ - assert spect.ndim == 2., 'only supports 2d tensor or numpy array' - if tempo_axis == 0: - nt, nf = spect.shape - else: - nf, nt = spect.shape - - num_time_mask = randint(max_time_mask) - num_freq_mask = randint(max_freq_mask) - - time_mask_width = randint(max_time_mask_width) - freq_mask_width = randint(max_freq_mask_width) - - if tempo_axis == 0: - for _ in range(num_time_mask): - start = randint(nt - time_mask_width) - spect[start:start + time_mask_width, :] = 0 - for _ in range(num_freq_mask): - start = randint(nf - freq_mask_width) - spect[:, start:start + freq_mask_width] = 0 - else: - for _ in range(num_time_mask): - start = randint(nt - time_mask_width) - spect[:, start:start + time_mask_width] = 0 - for _ in range(num_freq_mask): - start = randint(nf - freq_mask_width) - spect[start:start + freq_mask_width, :] = 0 - - return spect - - -def random_crop1d(y: array, crop_len: int) -> array: - """ Do random cropping on 1d input signal - - The input is a 1d signal, typically a sound waveform - """ - if y.ndim != 1: - 'only accept 1d tensor or numpy array' - n = len(y) - idx = randint(n - crop_len) - return y[idx:idx + crop_len] - - -def random_crop2d(s: array, crop_len: int, tempo_axis: int=0) -> array: - """ Do random cropping for 2D array, typically a spectrogram. - - The cropping is done in temporal direction on the time-freq input signal. - """ - if tempo_axis >= s.ndim: - raise ParameterError('axis out of range') - - n = s.shape[tempo_axis] - idx = randint(high=n - crop_len) - sli = [slice(None) for i in range(s.ndim)] - sli[tempo_axis] = slice(idx, idx + crop_len) - out = s[tuple(sli)] - return out diff --git a/paddleaudio/backends/__init__.py b/paddleaudio/paddleaudio/__init__.py similarity index 96% rename from paddleaudio/backends/__init__.py rename to paddleaudio/paddleaudio/__init__.py index f2f77ffe..185a92b8 100644 --- a/paddleaudio/backends/__init__.py +++ b/paddleaudio/paddleaudio/__init__.py @@ -11,4 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .audio import * diff --git a/paddleaudio/__init__.py b/paddleaudio/paddleaudio/backends/__init__.py similarity index 92% rename from paddleaudio/__init__.py rename to paddleaudio/paddleaudio/backends/__init__.py index 2685cf57..185a92b8 100644 --- a/paddleaudio/__init__.py +++ b/paddleaudio/paddleaudio/backends/__init__.py @@ -11,5 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .backends import * -from .features import * diff --git a/paddleaudio/paddleaudio/backends/soundfile_backend.py b/paddleaudio/paddleaudio/backends/soundfile_backend.py new file mode 100644 index 00000000..e69de29b diff --git a/paddleaudio/paddleaudio/backends/sox_backend.py b/paddleaudio/paddleaudio/backends/sox_backend.py new file mode 100644 index 00000000..e69de29b diff --git a/paddleaudio/datasets/__init__.py b/paddleaudio/paddleaudio/datasets/__init__.py similarity index 100% rename from paddleaudio/datasets/__init__.py rename to paddleaudio/paddleaudio/datasets/__init__.py diff --git a/paddleaudio/datasets/dataset.py b/paddleaudio/paddleaudio/datasets/dataset.py similarity index 100% rename from paddleaudio/datasets/dataset.py rename to paddleaudio/paddleaudio/datasets/dataset.py diff --git a/paddleaudio/datasets/esc50.py b/paddleaudio/paddleaudio/datasets/esc50.py similarity index 100% rename from paddleaudio/datasets/esc50.py rename to paddleaudio/paddleaudio/datasets/esc50.py diff --git a/paddleaudio/datasets/gtzan.py b/paddleaudio/paddleaudio/datasets/gtzan.py similarity index 100% rename from paddleaudio/datasets/gtzan.py rename to paddleaudio/paddleaudio/datasets/gtzan.py diff --git a/paddleaudio/datasets/tess.py b/paddleaudio/paddleaudio/datasets/tess.py similarity index 100% rename from paddleaudio/datasets/tess.py rename to paddleaudio/paddleaudio/datasets/tess.py diff --git a/paddleaudio/datasets/urban_sound.py b/paddleaudio/paddleaudio/datasets/urban_sound.py similarity index 100% rename from paddleaudio/datasets/urban_sound.py rename to paddleaudio/paddleaudio/datasets/urban_sound.py diff --git a/paddleaudio/features/__init__.py b/paddleaudio/paddleaudio/features/__init__.py similarity index 84% rename from paddleaudio/features/__init__.py rename to paddleaudio/paddleaudio/features/__init__.py index d8ac7c4b..1688cc5c 100644 --- a/paddleaudio/features/__init__.py +++ b/paddleaudio/paddleaudio/features/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .augment import * -from .core import * -from .spectrum import * + +from .librosa import Spectrogram +from .librosa import MelSpectrogram +from .librosa import LogMelSpectrogram \ No newline at end of file diff --git a/paddleaudio/features/spectrum.py b/paddleaudio/paddleaudio/features/librosa.py similarity index 99% rename from paddleaudio/features/spectrum.py rename to paddleaudio/paddleaudio/features/librosa.py index 154b6484..1cbd2d1a 100644 --- a/paddleaudio/features/spectrum.py +++ b/paddleaudio/paddleaudio/features/librosa.py @@ -19,7 +19,7 @@ from typing import Union import paddle import paddle.nn as nn -from .window import get_window +from ..functional.window import get_window __all__ = [ 'Spectrogram', diff --git a/paddleaudio/paddleaudio/functional/__init__.py b/paddleaudio/paddleaudio/functional/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/paddleaudio/features/core.py b/paddleaudio/paddleaudio/functional/functional.py similarity index 79% rename from paddleaudio/features/core.py rename to paddleaudio/paddleaudio/functional/functional.py index 01925ec6..ce49cdc4 100644 --- a/paddleaudio/features/core.py +++ b/paddleaudio/paddleaudio/functional/functional.py @@ -21,11 +21,14 @@ import numpy as np import scipy from numpy import ndarray as array from numpy.lib.stride_tricks import as_strided -from scipy.signal import get_window +from scipy import signal from ..utils import ParameterError +from ..backends import depth_convert + __all__ = [ + # dsp 'stft', 'mfcc', 'hz_to_mel', @@ -38,6 +41,12 @@ __all__ = [ 'spectrogram', 'mu_encode', 'mu_decode', + # augmentation + 'depth_augment', + 'spect_augment', + 'random_crop1d', + 'random_crop2d', + 'adaptive_spect_augment', ] @@ -303,7 +312,7 @@ def stft(x: array, if hop_length is None: hop_length = int(win_length // 4) - fft_window = get_window(window, win_length, fftbins=True) + fft_window = signal.get_window(window, win_length, fftbins=True) # Pad the window out to n_fft size fft_window = pad_center(fft_window, n_fft) @@ -576,3 +585,145 @@ def mu_decode(y: array, mu: int=255, quantized: bool=True) -> array: y = y * 2 / mu - 1 x = np.sign(y) / mu * ((1 + mu)**np.abs(y) - 1) return x + + +def randint(high: int) -> int: + """Generate one random integer in range [0 high) + + This is a helper function for random data augmentaiton + """ + return int(np.random.randint(0, high=high)) + + +def rand() -> float: + """Generate one floating-point number in range [0 1) + + This is a helper function for random data augmentaiton + """ + return float(np.random.rand(1)) + + +def depth_augment(y: array, + choices: List=['int8', 'int16'], + probs: List[float]=[0.5, 0.5]) -> array: + """ Audio depth augmentation + + Do audio depth augmentation to simulate the distortion brought by quantization. + """ + assert len(probs) == len( + choices + ), 'number of choices {} must be equal to size of probs {}'.format( + len(choices), len(probs)) + depth = np.random.choice(choices, p=probs) + src_depth = y.dtype + y1 = depth_convert(y, depth) + y2 = depth_convert(y1, src_depth) + + return y2 + + +def adaptive_spect_augment(spect: array, tempo_axis: int=0, + level: float=0.1) -> array: + """Do adpative spectrogram augmentation + + The level of the augmentation is gowern by the paramter level, + ranging from 0 to 1, with 0 represents no augmentation。 + + """ + assert spect.ndim == 2., 'only supports 2d tensor or numpy array' + if tempo_axis == 0: + nt, nf = spect.shape + else: + nf, nt = spect.shape + + time_mask_width = int(nt * level * 0.5) + freq_mask_width = int(nf * level * 0.5) + + num_time_mask = int(10 * level) + num_freq_mask = int(10 * level) + + if tempo_axis == 0: + for _ in range(num_time_mask): + start = randint(nt - time_mask_width) + spect[start:start + time_mask_width, :] = 0 + for _ in range(num_freq_mask): + start = randint(nf - freq_mask_width) + spect[:, start:start + freq_mask_width] = 0 + else: + for _ in range(num_time_mask): + start = randint(nt - time_mask_width) + spect[:, start:start + time_mask_width] = 0 + for _ in range(num_freq_mask): + start = randint(nf - freq_mask_width) + spect[start:start + freq_mask_width, :] = 0 + + return spect + + +def spect_augment(spect: array, + tempo_axis: int=0, + max_time_mask: int=3, + max_freq_mask: int=3, + max_time_mask_width: int=30, + max_freq_mask_width: int=20) -> array: + """Do spectrogram augmentation in both time and freq axis + + Reference: + + """ + assert spect.ndim == 2., 'only supports 2d tensor or numpy array' + if tempo_axis == 0: + nt, nf = spect.shape + else: + nf, nt = spect.shape + + num_time_mask = randint(max_time_mask) + num_freq_mask = randint(max_freq_mask) + + time_mask_width = randint(max_time_mask_width) + freq_mask_width = randint(max_freq_mask_width) + + if tempo_axis == 0: + for _ in range(num_time_mask): + start = randint(nt - time_mask_width) + spect[start:start + time_mask_width, :] = 0 + for _ in range(num_freq_mask): + start = randint(nf - freq_mask_width) + spect[:, start:start + freq_mask_width] = 0 + else: + for _ in range(num_time_mask): + start = randint(nt - time_mask_width) + spect[:, start:start + time_mask_width] = 0 + for _ in range(num_freq_mask): + start = randint(nf - freq_mask_width) + spect[start:start + freq_mask_width, :] = 0 + + return spect + + +def random_crop1d(y: array, crop_len: int) -> array: + """ Do random cropping on 1d input signal + + The input is a 1d signal, typically a sound waveform + """ + if y.ndim != 1: + 'only accept 1d tensor or numpy array' + n = len(y) + idx = randint(n - crop_len) + return y[idx:idx + crop_len] + + +def random_crop2d(s: array, crop_len: int, tempo_axis: int=0) -> array: + """ Do random cropping for 2D array, typically a spectrogram. + + The cropping is done in temporal direction on the time-freq input signal. + """ + if tempo_axis >= s.ndim: + raise ParameterError('axis out of range') + + n = s.shape[tempo_axis] + idx = randint(high=n - crop_len) + sli = [slice(None) for i in range(s.ndim)] + sli[tempo_axis] = slice(idx, idx + crop_len) + out = s[tuple(sli)] + return out \ No newline at end of file diff --git a/paddleaudio/features/window.py b/paddleaudio/paddleaudio/functional/window.py similarity index 98% rename from paddleaudio/features/window.py rename to paddleaudio/paddleaudio/functional/window.py index 629989fc..e34862b4 100644 --- a/paddleaudio/features/window.py +++ b/paddleaudio/paddleaudio/functional/window.py @@ -20,6 +20,19 @@ from paddle import Tensor __all__ = [ 'get_window', + + # windows + 'taylor', + 'hamming', + 'hann', + 'tukey', + 'kaiser', + 'gaussian', + 'exponential', + 'triang', + 'bohman', + 'blackman', + 'cosine', ] @@ -73,6 +86,21 @@ def general_gaussian(M: int, p, sig, sym: bool=True, return _truncate(w, needs_trunc) +def general_cosine(M: int, a: float, sym: bool=True, + dtype: str='float64') -> Tensor: + """Compute a generic weighted sum of cosine terms window. + This function is consistent with scipy.signal.windows.general_cosine(). + """ + if _len_guards(M): + return paddle.ones((M, ), dtype=dtype) + M, needs_trunc = _extend(M, sym) + fac = paddle.linspace(-math.pi, math.pi, M, dtype=dtype) + w = paddle.zeros((M, ), dtype=dtype) + for k in range(len(a)): + w += a[k] * paddle.cos(k * fac) + return _truncate(w, needs_trunc) + + def general_hamming(M: int, alpha: float, sym: bool=True, dtype: str='float64') -> Tensor: """Compute a generalized Hamming window. @@ -143,21 +171,6 @@ def taylor(M: int, return _truncate(w, needs_trunc) -def general_cosine(M: int, a: float, sym: bool=True, - dtype: str='float64') -> Tensor: - """Compute a generic weighted sum of cosine terms window. - This function is consistent with scipy.signal.windows.general_cosine(). - """ - if _len_guards(M): - return paddle.ones((M, ), dtype=dtype) - M, needs_trunc = _extend(M, sym) - fac = paddle.linspace(-math.pi, math.pi, M, dtype=dtype) - w = paddle.zeros((M, ), dtype=dtype) - for k in range(len(a)): - w += a[k] * paddle.cos(k * fac) - return _truncate(w, needs_trunc) - - def hamming(M: int, sym: bool=True, dtype: str='float64') -> Tensor: """Compute a Hamming window. The Hamming window is a taper formed by using a raised cosine with @@ -375,6 +388,7 @@ def cosine(M: int, sym: bool=True, dtype: str='float64') -> Tensor: return _truncate(w, needs_trunc) +## factory function def get_window(window: Union[str, Tuple[str, float]], win_length: int, fftbins: bool=True, diff --git a/paddleaudio/paddleaudio/io/__init__.py b/paddleaudio/paddleaudio/io/__init__.py new file mode 100644 index 00000000..3a9a01e8 --- /dev/null +++ b/paddleaudio/paddleaudio/io/__init__.py @@ -0,0 +1,6 @@ +from .audio import save_wav +from .audio import load +from .audio import normalize +from .audio import to_mono +from .audio import resample +from .audio import depth_convert \ No newline at end of file diff --git a/paddleaudio/backends/audio.py b/paddleaudio/paddleaudio/io/audio.py similarity index 100% rename from paddleaudio/backends/audio.py rename to paddleaudio/paddleaudio/io/audio.py diff --git a/paddleaudio/paddleaudio/kaldi/__init__.py b/paddleaudio/paddleaudio/kaldi/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/paddleaudio/paddleaudio/sox_effects/__init__.py b/paddleaudio/paddleaudio/sox_effects/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/paddleaudio/utils/__init__.py b/paddleaudio/paddleaudio/utils/__init__.py similarity index 61% rename from paddleaudio/utils/__init__.py rename to paddleaudio/paddleaudio/utils/__init__.py index 1c1b4a90..5fe0980b 100644 --- a/paddleaudio/utils/__init__.py +++ b/paddleaudio/paddleaudio/utils/__init__.py @@ -11,8 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .download import * -from .env import * -from .error import * -from .log import * -from .time import * + +from .env import USER_HOME +from .env import PPAUDIO_HOME +from .env import MODEL_HOME +from .env import DATA_HOME + +from .download import decompress +from .download import download_and_decompress +from .download import load_state_dict_from_url + +from .error import ParameterError + +from .log import logger +from .log import Logger + +from .time import Timer +from .time import seconds_to_hms diff --git a/paddleaudio/utils/download.py b/paddleaudio/paddleaudio/utils/download.py similarity index 94% rename from paddleaudio/utils/download.py rename to paddleaudio/paddleaudio/utils/download.py index 45a8e57b..fd4785cd 100644 --- a/paddleaudio/utils/download.py +++ b/paddleaudio/paddleaudio/utils/download.py @@ -22,6 +22,11 @@ from .log import logger download.logger = logger +__all__ = [ + 'decompress', + 'download_and_decompress', + 'load_state_dict_from_url', +] def decompress(file: str): """ diff --git a/paddleaudio/utils/env.py b/paddleaudio/paddleaudio/utils/env.py similarity index 95% rename from paddleaudio/utils/env.py rename to paddleaudio/paddleaudio/utils/env.py index 59c6b621..e202c380 100644 --- a/paddleaudio/utils/env.py +++ b/paddleaudio/paddleaudio/utils/env.py @@ -20,6 +20,12 @@ PPAUDIO_HOME --> the root directory for storing PaddleAudio related data. D ''' import os +__all__ = [ + 'USER_HOME', + 'PPAUDIO_HOME', +'MODEL_HOME' , +'DATA_HOME' , +] def _get_user_home(): return os.path.expanduser('~') diff --git a/paddleaudio/utils/error.py b/paddleaudio/paddleaudio/utils/error.py similarity index 100% rename from paddleaudio/utils/error.py rename to paddleaudio/paddleaudio/utils/error.py diff --git a/paddleaudio/utils/log.py b/paddleaudio/paddleaudio/utils/log.py similarity index 98% rename from paddleaudio/utils/log.py rename to paddleaudio/paddleaudio/utils/log.py index 5e7db68a..5656b286 100644 --- a/paddleaudio/utils/log.py +++ b/paddleaudio/paddleaudio/utils/log.py @@ -19,7 +19,10 @@ import time import colorlog -loggers = {} +__all__ = [ + 'Logger', + 'logger', +] log_config = { 'DEBUG': { diff --git a/paddleaudio/utils/time.py b/paddleaudio/paddleaudio/utils/time.py similarity index 97% rename from paddleaudio/utils/time.py rename to paddleaudio/paddleaudio/utils/time.py index 6f0c7585..23af62fc 100644 --- a/paddleaudio/utils/time.py +++ b/paddleaudio/paddleaudio/utils/time.py @@ -14,6 +14,10 @@ import math import time +__all__ = [ + 'Timer', + 'seconds_to_hms', +] class Timer(object): '''Calculate runing speed and estimated time of arrival(ETA)''' diff --git a/setup_audio.py b/paddleaudio/setup.py similarity index 99% rename from setup_audio.py rename to paddleaudio/setup.py index 21204998..98bf8a6f 100644 --- a/setup_audio.py +++ b/paddleaudio/setup.py @@ -14,7 +14,7 @@ import setuptools # set the version here -VERSION = '0.1.0' +VERSION = '0.2.0' def write_version_py(filename='paddleaudio/__init__.py'): diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 76082166..00000000 --- a/requirements.txt +++ /dev/null @@ -1,48 +0,0 @@ -ConfigArgParse -coverage -editdistance -g2p_en -g2pM -gpustat -h5py -inflect -jieba -jsonlines -kaldiio -librosa -loguru -matplotlib -nara_wpe -nltk -paddleaudio -paddlenlp -paddlespeech_ctcdecoders -paddlespeech_feat -pandas -phkit -Pillow -praatio==5.0.0 -pre-commit -pybind11 -pypi-kenlm -pypinyin -python-dateutil -pyworld -resampy==0.2.2 -sacrebleu -scipy -sentencepiece~=0.1.96 -snakeviz -soundfile~=0.10 -sox -soxbindings -textgrid -timer -tqdm -typeguard -unidecode -visualdl -webrtcvad -yacs~=0.1.8 -yq -zhon From 54f06041d48d32ea6bc81a461ec6ee645b993897 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 25 Feb 2022 07:29:26 +0000 Subject: [PATCH 03/45] add tests, metric dir and format, test=doc --- paddleaudio/CHANGELOG.md | 2 +- .../paddleaudio/backends/soundfile_backend.py | 13 +++++++++++++ .../paddleaudio/backends/sox_backend.py | 13 +++++++++++++ paddleaudio/paddleaudio/features/__init__.py | 5 ++--- .../paddleaudio/functional/__init__.py | 13 +++++++++++++ .../paddleaudio/functional/functional.py | 5 ++--- paddleaudio/paddleaudio/functional/window.py | 2 +- paddleaudio/paddleaudio/io/__init__.py | 19 ++++++++++++++++--- paddleaudio/paddleaudio/kaldi/__init__.py | 13 +++++++++++++ paddleaudio/paddleaudio/metric/__init__.py | 13 +++++++++++++ .../paddleaudio/sox_effects/__init__.py | 13 +++++++++++++ paddleaudio/paddleaudio/utils/__init__.py | 17 ++++++----------- paddleaudio/paddleaudio/utils/download.py | 1 + paddleaudio/paddleaudio/utils/env.py | 5 +++-- paddleaudio/paddleaudio/utils/time.py | 1 + paddleaudio/tests/.gitkeep | 0 16 files changed, 111 insertions(+), 24 deletions(-) create mode 100644 paddleaudio/paddleaudio/metric/__init__.py create mode 100644 paddleaudio/tests/.gitkeep diff --git a/paddleaudio/CHANGELOG.md b/paddleaudio/CHANGELOG.md index e6889567..52d44dd3 100644 --- a/paddleaudio/CHANGELOG.md +++ b/paddleaudio/CHANGELOG.md @@ -1,4 +1,4 @@ # Changelog Date: 2022-2-25, Author: Hui Zhang. - - Refactor architecture. \ No newline at end of file + - Refactor architecture. diff --git a/paddleaudio/paddleaudio/backends/soundfile_backend.py b/paddleaudio/paddleaudio/backends/soundfile_backend.py index e69de29b..97043fd7 100644 --- a/paddleaudio/paddleaudio/backends/soundfile_backend.py +++ b/paddleaudio/paddleaudio/backends/soundfile_backend.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/paddleaudio/paddleaudio/backends/sox_backend.py b/paddleaudio/paddleaudio/backends/sox_backend.py index e69de29b..97043fd7 100644 --- a/paddleaudio/paddleaudio/backends/sox_backend.py +++ b/paddleaudio/paddleaudio/backends/sox_backend.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/paddleaudio/paddleaudio/features/__init__.py b/paddleaudio/paddleaudio/features/__init__.py index 1688cc5c..469b4c9b 100644 --- a/paddleaudio/paddleaudio/features/__init__.py +++ b/paddleaudio/paddleaudio/features/__init__.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from .librosa import Spectrogram +from .librosa import LogMelSpectrogram from .librosa import MelSpectrogram -from .librosa import LogMelSpectrogram \ No newline at end of file +from .librosa import Spectrogram diff --git a/paddleaudio/paddleaudio/functional/__init__.py b/paddleaudio/paddleaudio/functional/__init__.py index e69de29b..97043fd7 100644 --- a/paddleaudio/paddleaudio/functional/__init__.py +++ b/paddleaudio/paddleaudio/functional/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/paddleaudio/paddleaudio/functional/functional.py b/paddleaudio/paddleaudio/functional/functional.py index ce49cdc4..167795c3 100644 --- a/paddleaudio/paddleaudio/functional/functional.py +++ b/paddleaudio/paddleaudio/functional/functional.py @@ -23,9 +23,8 @@ from numpy import ndarray as array from numpy.lib.stride_tricks import as_strided from scipy import signal -from ..utils import ParameterError from ..backends import depth_convert - +from ..utils import ParameterError __all__ = [ # dsp @@ -726,4 +725,4 @@ def random_crop2d(s: array, crop_len: int, tempo_axis: int=0) -> array: sli = [slice(None) for i in range(s.ndim)] sli[tempo_axis] = slice(idx, idx + crop_len) out = s[tuple(sli)] - return out \ No newline at end of file + return out diff --git a/paddleaudio/paddleaudio/functional/window.py b/paddleaudio/paddleaudio/functional/window.py index e34862b4..f321b38e 100644 --- a/paddleaudio/paddleaudio/functional/window.py +++ b/paddleaudio/paddleaudio/functional/window.py @@ -20,7 +20,7 @@ from paddle import Tensor __all__ = [ 'get_window', - + # windows 'taylor', 'hamming', diff --git a/paddleaudio/paddleaudio/io/__init__.py b/paddleaudio/paddleaudio/io/__init__.py index 3a9a01e8..cc2538f7 100644 --- a/paddleaudio/paddleaudio/io/__init__.py +++ b/paddleaudio/paddleaudio/io/__init__.py @@ -1,6 +1,19 @@ -from .audio import save_wav +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .audio import depth_convert from .audio import load from .audio import normalize -from .audio import to_mono from .audio import resample -from .audio import depth_convert \ No newline at end of file +from .audio import save_wav +from .audio import to_mono diff --git a/paddleaudio/paddleaudio/kaldi/__init__.py b/paddleaudio/paddleaudio/kaldi/__init__.py index e69de29b..97043fd7 100644 --- a/paddleaudio/paddleaudio/kaldi/__init__.py +++ b/paddleaudio/paddleaudio/kaldi/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/paddleaudio/paddleaudio/metric/__init__.py b/paddleaudio/paddleaudio/metric/__init__.py new file mode 100644 index 00000000..97043fd7 --- /dev/null +++ b/paddleaudio/paddleaudio/metric/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/paddleaudio/paddleaudio/sox_effects/__init__.py b/paddleaudio/paddleaudio/sox_effects/__init__.py index e69de29b..97043fd7 100644 --- a/paddleaudio/paddleaudio/sox_effects/__init__.py +++ b/paddleaudio/paddleaudio/sox_effects/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/paddleaudio/paddleaudio/utils/__init__.py b/paddleaudio/paddleaudio/utils/__init__.py index 5fe0980b..afb9cedd 100644 --- a/paddleaudio/paddleaudio/utils/__init__.py +++ b/paddleaudio/paddleaudio/utils/__init__.py @@ -11,20 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -from .env import USER_HOME -from .env import PPAUDIO_HOME -from .env import MODEL_HOME -from .env import DATA_HOME - from .download import decompress from .download import download_and_decompress from .download import load_state_dict_from_url - +from .env import DATA_HOME +from .env import MODEL_HOME +from .env import PPAUDIO_HOME +from .env import USER_HOME from .error import ParameterError - -from .log import logger from .log import Logger - -from .time import Timer +from .log import logger from .time import seconds_to_hms +from .time import Timer diff --git a/paddleaudio/paddleaudio/utils/download.py b/paddleaudio/paddleaudio/utils/download.py index fd4785cd..4658352f 100644 --- a/paddleaudio/paddleaudio/utils/download.py +++ b/paddleaudio/paddleaudio/utils/download.py @@ -28,6 +28,7 @@ __all__ = [ 'load_state_dict_from_url', ] + def decompress(file: str): """ Extracts all files from a compressed file. diff --git a/paddleaudio/paddleaudio/utils/env.py b/paddleaudio/paddleaudio/utils/env.py index e202c380..a2d14b89 100644 --- a/paddleaudio/paddleaudio/utils/env.py +++ b/paddleaudio/paddleaudio/utils/env.py @@ -23,10 +23,11 @@ import os __all__ = [ 'USER_HOME', 'PPAUDIO_HOME', -'MODEL_HOME' , -'DATA_HOME' , + 'MODEL_HOME', + 'DATA_HOME', ] + def _get_user_home(): return os.path.expanduser('~') diff --git a/paddleaudio/paddleaudio/utils/time.py b/paddleaudio/paddleaudio/utils/time.py index 23af62fc..105208f9 100644 --- a/paddleaudio/paddleaudio/utils/time.py +++ b/paddleaudio/paddleaudio/utils/time.py @@ -19,6 +19,7 @@ __all__ = [ 'seconds_to_hms', ] + class Timer(object): '''Calculate runing speed and estimated time of arrival(ETA)''' diff --git a/paddleaudio/tests/.gitkeep b/paddleaudio/tests/.gitkeep new file mode 100644 index 00000000..e69de29b From 7249d0ba59b8ea8601f80c37d0e2c548c78079f0 Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Fri, 25 Feb 2022 07:41:05 +0000 Subject: [PATCH 04/45] fix benchmark --- tests/test_tipc/configs/conformer/train_benchmark.txt | 2 +- tests/test_tipc/configs/pwgan/train_benchmark.txt | 2 +- tests/test_tipc/prepare.sh | 10 +++++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/test_tipc/configs/conformer/train_benchmark.txt b/tests/test_tipc/configs/conformer/train_benchmark.txt index 3833f144..33b1debd 100644 --- a/tests/test_tipc/configs/conformer/train_benchmark.txt +++ b/tests/test_tipc/configs/conformer/train_benchmark.txt @@ -54,4 +54,4 @@ batch_size:16|30 fp_items:fp32 iteration:50 --profiler-options:"batch_range=[10,35];state=GPU;tracer_option=Default;profile_path=model.profile" -flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 +flags:null diff --git a/tests/test_tipc/configs/pwgan/train_benchmark.txt b/tests/test_tipc/configs/pwgan/train_benchmark.txt index e936da3c..c64984dc 100644 --- a/tests/test_tipc/configs/pwgan/train_benchmark.txt +++ b/tests/test_tipc/configs/pwgan/train_benchmark.txt @@ -54,4 +54,4 @@ batch_size:6|16 fp_items:fp32 iteration:50 --profiler_options:"batch_range=[10,35];state=GPU;tracer_option=Default;profile_path=model.profile" -flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 +flags:null diff --git a/tests/test_tipc/prepare.sh b/tests/test_tipc/prepare.sh index 0280e5d4..f10cb06c 100644 --- a/tests/test_tipc/prepare.sh +++ b/tests/test_tipc/prepare.sh @@ -26,7 +26,9 @@ if [ ${MODE} = "benchmark_train" ];then curPath=$(readlink -f "$(dirname "$0")") echo "curPath:"${curPath} cd ${curPath}/../.. - pip install . + apt-get install libsndfile1 + pip install pytest-runner kaldiio setuptools_scm -i https://pypi.tuna.tsinghua.edu.cn/simple + pip install . -i https://pypi.tuna.tsinghua.edu.cn/simple cd - if [ ${model_name} == "conformer" ]; then # set the URL for aishell_tiny dataset @@ -35,6 +37,8 @@ if [ ${MODE} = "benchmark_train" ];then if [ ${URL} == 'None' ];then echo "please contact author to get the URL.\n" exit + else + wget -P ${curPath}/../../dataset/aishell/ ${URL} fi sed -i "s#^URL_ROOT_TAG#URL_ROOT = '${URL}'#g" ${curPath}/conformer/scripts/aishell_tiny.py cp ${curPath}/conformer/scripts/aishell_tiny.py ${curPath}/../../dataset/aishell/ @@ -42,6 +46,7 @@ if [ ${MODE} = "benchmark_train" ];then source path.sh # download audio data sed -i "s#aishell.py#aishell_tiny.py#g" ./local/data.sh + sed -i "s#python3#python#g" ./local/data.sh bash ./local/data.sh || exit -1 if [ $? -ne 0 ]; then exit 1 @@ -56,7 +61,6 @@ if [ ${MODE} = "benchmark_train" ];then sed -i "s#conf/#test_tipc/conformer/benchmark_train/conf/#g" ${curPath}/conformer/benchmark_train/conf/conformer.yaml sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/tuning/decode.yaml sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/preprocess.yaml - fi if [ ${model_name} == "pwgan" ]; then @@ -73,4 +77,4 @@ if [ ${MODE} = "benchmark_train" ];then python ../paddlespeech/t2s/exps/gan_vocoder/normalize.py --metadata=dump/test/raw/metadata.jsonl --dumpdir=dump/test/norm --stats=dump/train/feats_stats.npy fi -fi \ No newline at end of file +fi From cea5728dd7c0be96578a13eb8ea3ddbe7665fc25 Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Fri, 25 Feb 2022 08:01:15 +0000 Subject: [PATCH 05/45] fix unit test --- tests/unit/asr/deepspeech2_online_model_test.py | 3 ++- tests/unit/asr/deepspeech2_online_model_test.sh | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 tests/unit/asr/deepspeech2_online_model_test.sh diff --git a/tests/unit/asr/deepspeech2_online_model_test.py b/tests/unit/asr/deepspeech2_online_model_test.py index 3d634945..d26e5b15 100644 --- a/tests/unit/asr/deepspeech2_online_model_test.py +++ b/tests/unit/asr/deepspeech2_online_model_test.py @@ -191,7 +191,8 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase): def setUp(self): export_prefix = "exp/deepspeech2_online/checkpoints/test_export" - os.makedirs( os.path.dirname(export_prefix), mode=0o755) + if not os.path.exists(os.path.dirname(export_prefix)): + os.makedirs(os.path.dirname(export_prefix), mode=0o755) infer_model = DeepSpeech2InferModelOnline( feat_size=161, dict_size=4233, diff --git a/tests/unit/asr/deepspeech2_online_model_test.sh b/tests/unit/asr/deepspeech2_online_model_test.sh new file mode 100644 index 00000000..cd5a2d3a --- /dev/null +++ b/tests/unit/asr/deepspeech2_online_model_test.sh @@ -0,0 +1,3 @@ +mkdir -p test_data +wget -P test_data https://paddlespeech.bj.bcebos.com/datasets/unit_test/asr/static_ds2online_inputs.pickle +python deepspeech2_online_model_test.py From f2eb5db0bdc7fdc350993478797c5bfc3862b5e1 Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Fri, 25 Feb 2022 08:49:34 +0000 Subject: [PATCH 06/45] rename config files --- .../conformer/{train_benchmark.txt => train_infer_python.txt} | 0 .../pwgan/{train_benchmark.txt => train_infer_python.txt} | 0 tests/test_tipc/prepare.sh | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) rename tests/test_tipc/configs/conformer/{train_benchmark.txt => train_infer_python.txt} (100%) rename tests/test_tipc/configs/pwgan/{train_benchmark.txt => train_infer_python.txt} (100%) diff --git a/tests/test_tipc/configs/conformer/train_benchmark.txt b/tests/test_tipc/configs/conformer/train_infer_python.txt similarity index 100% rename from tests/test_tipc/configs/conformer/train_benchmark.txt rename to tests/test_tipc/configs/conformer/train_infer_python.txt diff --git a/tests/test_tipc/configs/pwgan/train_benchmark.txt b/tests/test_tipc/configs/pwgan/train_infer_python.txt similarity index 100% rename from tests/test_tipc/configs/pwgan/train_benchmark.txt rename to tests/test_tipc/configs/pwgan/train_infer_python.txt diff --git a/tests/test_tipc/prepare.sh b/tests/test_tipc/prepare.sh index f10cb06c..b46b2032 100644 --- a/tests/test_tipc/prepare.sh +++ b/tests/test_tipc/prepare.sh @@ -32,7 +32,7 @@ if [ ${MODE} = "benchmark_train" ];then cd - if [ ${model_name} == "conformer" ]; then # set the URL for aishell_tiny dataset - URL='None' + URL=${conformer_data_URL:-"None"} echo "URL:"${URL} if [ ${URL} == 'None' ];then echo "please contact author to get the URL.\n" From 852d0ab92b41b4a8b85a2d134ddffc8dfd8b608a Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 25 Feb 2022 09:48:23 +0000 Subject: [PATCH 07/45] dtw metric for tts, test=doc --- paddleaudio/CHANGELOG.md | 1 + paddleaudio/paddleaudio/metric/__init__.py | 2 + paddleaudio/paddleaudio/metric/dtw.py | 42 +++++++++++++++++++ paddleaudio/paddleaudio/metric/mcd.py | 47 ++++++++++++++++++++++ paddleaudio/setup.py | 2 + 5 files changed, 94 insertions(+) create mode 100644 paddleaudio/paddleaudio/metric/dtw.py create mode 100644 paddleaudio/paddleaudio/metric/mcd.py diff --git a/paddleaudio/CHANGELOG.md b/paddleaudio/CHANGELOG.md index 52d44dd3..91b0fef0 100644 --- a/paddleaudio/CHANGELOG.md +++ b/paddleaudio/CHANGELOG.md @@ -2,3 +2,4 @@ Date: 2022-2-25, Author: Hui Zhang. - Refactor architecture. + - dtw distance and mcd style dtw diff --git a/paddleaudio/paddleaudio/metric/__init__.py b/paddleaudio/paddleaudio/metric/__init__.py index 97043fd7..a96530ff 100644 --- a/paddleaudio/paddleaudio/metric/__init__.py +++ b/paddleaudio/paddleaudio/metric/__init__.py @@ -11,3 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from .dtw import dtw_distance +from .mcd import mcd_distance diff --git a/paddleaudio/paddleaudio/metric/dtw.py b/paddleaudio/paddleaudio/metric/dtw.py new file mode 100644 index 00000000..d27f56e2 --- /dev/null +++ b/paddleaudio/paddleaudio/metric/dtw.py @@ -0,0 +1,42 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +from dtaidistance import dtw_ndim + +__all__ = [ + 'dtw_distance', +] + + +def dtw_distance(xs: np.ndarray, ys: np.ndarray) -> float: + """dtw distance + + Dynamic Time Warping. + This function keeps a compact matrix, not the full warping paths matrix. + Uses dynamic programming to compute: + + wps[i, j] = (s1[i]-s2[j])**2 + min( + wps[i-1, j ] + penalty, // vertical / insertion / expansion + wps[i , j-1] + penalty, // horizontal / deletion / compression + wps[i-1, j-1]) // diagonal / match + dtw = sqrt(wps[-1, -1]) + + Args: + xs (np.ndarray): ref sequence, [T,D] + ys (np.ndarray): hyp sequence, [T,D] + + Returns: + float: dtw distance + """ + return dtw_ndim.distance(xs, ys) diff --git a/paddleaudio/paddleaudio/metric/mcd.py b/paddleaudio/paddleaudio/metric/mcd.py new file mode 100644 index 00000000..281e5765 --- /dev/null +++ b/paddleaudio/paddleaudio/metric/mcd.py @@ -0,0 +1,47 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import mcd.metrics_fast as mt +from mcd import dtw + +__all__ = [ + 'mcd_distance', +] + + +def mcd_distance(xs: np.ndarray, ys: np.ndarray, cost_fn=mt.logSpecDbDist): + """Mel cepstral distortion (MCD), dtw distance. + + Dynamic Time Warping. + Uses dynamic programming to compute: + wps[i, j] = cost_fn(xs[i], ys[j]) + min( + wps[i-1, j ], // vertical / insertion / expansion + wps[i , j-1], // horizontal / deletion / compression + wps[i-1, j-1]) // diagonal / match + dtw = sqrt(wps[-1, -1]) + + Cost Function: + logSpecDbConst = 10.0 / math.log(10.0) * math.sqrt(2.0) + def logSpecDbDist(x, y): + diff = x - y + return logSpecDbConst * math.sqrt(np.inner(diff, diff)) + + Args: + xs (np.ndarray): ref sequence, [T,D] + ys (np.ndarray): hyp sequence, [T,D] + + Returns: + float: dtw distance + """ + min_cost, path = dtw.dtw(xs, ys, cost_fn) + return min_cost diff --git a/paddleaudio/setup.py b/paddleaudio/setup.py index 98bf8a6f..7623443a 100644 --- a/paddleaudio/setup.py +++ b/paddleaudio/setup.py @@ -59,6 +59,8 @@ setuptools.setup( 'resampy >= 0.2.2', 'soundfile >= 0.9.0', 'colorlog', + 'dtaidistance >= 2.3.6', + 'mcd >= 0.4', ], ) remove_version_py() From f8375764b998af28a31983c76db11e2434ca6aae Mon Sep 17 00:00:00 2001 From: lym0302 Date: Fri, 25 Feb 2022 19:21:18 +0800 Subject: [PATCH 08/45] add paddlespeech stats, test=doc --- paddlespeech/cli/__init__.py | 1 + paddlespeech/cli/stats/__init__.py | 14 +++ paddlespeech/cli/stats/infer.py | 145 +++++++++++++++++++++++++++++ setup.py | 1 + 4 files changed, 161 insertions(+) create mode 100644 paddlespeech/cli/stats/__init__.py create mode 100644 paddlespeech/cli/stats/infer.py diff --git a/paddlespeech/cli/__init__.py b/paddlespeech/cli/__init__.py index cecf76fe..12ff9919 100644 --- a/paddlespeech/cli/__init__.py +++ b/paddlespeech/cli/__init__.py @@ -20,5 +20,6 @@ from .cls import CLSExecutor from .st import STExecutor from .text import TextExecutor from .tts import TTSExecutor +from .stats import StatsExecutor _locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8']) diff --git a/paddlespeech/cli/stats/__init__.py b/paddlespeech/cli/stats/__init__.py new file mode 100644 index 00000000..9fe6c4ab --- /dev/null +++ b/paddlespeech/cli/stats/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .infer import StatsExecutor diff --git a/paddlespeech/cli/stats/infer.py b/paddlespeech/cli/stats/infer.py new file mode 100644 index 00000000..c50fc4f9 --- /dev/null +++ b/paddlespeech/cli/stats/infer.py @@ -0,0 +1,145 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +from typing import List + +from prettytable import PrettyTable + +from ..log import logger +from ..utils import cli_register +from ..utils import stats_wrapper + +__all__ = ['StatsExecutor'] + +model_name_format = { + 'asr': 'Model-Language-Sample Rate', + 'cls': 'Model-Sample Rate', + 'st': 'Model-Source language-Target language', + 'text': 'Model-Task-Sample Rate', + 'tts': 'Model-Language' +} + + +@cli_register(name='paddlespeech.stats', description='Text infer command.') +class StatsExecutor(): + def __init__(self): + super(StatsExecutor, self).__init__() + + self.parser = argparse.ArgumentParser( + prog='paddlespeech.stats', add_help=True) + self.parser.add_argument( + '--task', + type=str, + default='asr', + choices=['asr', 'cls', 'st', 'text', 'tts'], + help='Choose speech task.', + required=True) + self.task_choices = ['asr', 'cls', 'st', 'text', 'tts'] + + def show_support_models(self, pretrained_models: dict): + fields = model_name_format[self.task].split("-") + table = PrettyTable(fields) + for key in pretrained_models: + table.add_row(key.split("-")) + print(table) + + def execute(self, argv: List[str]) -> bool: + """ + Command line entry. + """ + parser_args = self.parser.parse_args(argv) + self.task = parser_args.task + if self.task not in self.task_choices: + logger.error( + "Please input correct speech task, choices = ['asr', 'cls', 'st', 'text', 'tts']" + ) + return False + + if self.task == 'asr': + try: + from ..asr.infer import pretrained_models + logger.info( + "Here is the list of ASR pretrained models released by PaddleSpeech that can be used by command line and python API" + ) + self.show_support_models(pretrained_models) + # TODO show pretrained static model + return True + except BaseException: + logger.error("Failed to get the list of ASR pretrained models.") + return False + + elif self.task == 'cls': + try: + from ..cls.infer import pretrained_models + logger.info( + "Here is the list of CLS pretrained models released by PaddleSpeech that can be used by command line and python API" + ) + self.show_support_models(pretrained_models) + return True + except BaseException: + logger.error("Failed to get the list of CLS pretrained models.") + return False + + elif self.task == 'st': + try: + from ..st.infer import pretrained_models + logger.info( + "Here is the list of ST pretrained models released by PaddleSpeech that can be used by command line and python API" + ) + self.show_support_models(pretrained_models) + return True + except BaseException: + logger.error("Failed to get the list of ST pretrained models.") + return False + + elif self.task == 'text': + try: + from ..text.infer import pretrained_models + logger.info( + "Here is the list of TEXT pretrained models released by PaddleSpeech that can be used by command line and python API" + ) + self.show_support_models(pretrained_models) + return True + except BaseException: + logger.error( + "Failed to get the list of TEXT pretrained models.") + return False + + elif self.task == 'tts': + try: + from ..tts.infer import pretrained_models + logger.info( + "Here is the list of TTS pretrained models released by PaddleSpeech that can be used by command line and python API" + ) + self.show_support_models(pretrained_models) + # TODO show pretrained static model + return True + except BaseException: + logger.error("Failed to get the list of TTS pretrained models.") + return False + + @stats_wrapper + def __call__( + self, + task: str=None, ): + """ + Python API to call an executor. + """ + if task not in ['asr', 'cls', 'st', 'text', 'tts']: + print( + "Please input correct speech task, choices = ['asr', 'cls', 'st', 'text', 'tts']" + ) + res = "" + + return res diff --git a/setup.py b/setup.py index 9bb11d0d..ca19a575 100644 --- a/setup.py +++ b/setup.py @@ -66,6 +66,7 @@ requirements = { # fastapi server "fastapi", "uvicorn", + "prettytable" ], "develop": [ "ConfigArgParse", From 50bcb581410760f6f107ece977eca76a1f62f350 Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Fri, 25 Feb 2022 11:40:10 +0000 Subject: [PATCH 09/45] add ctc_loss speed compare topic, test=doc --- docs/topic/ctc/ctc_loss_speed_compare.ipynb | 356 ++++++++++++++++++++ 1 file changed, 356 insertions(+) create mode 100644 docs/topic/ctc/ctc_loss_speed_compare.ipynb diff --git a/docs/topic/ctc/ctc_loss_speed_compare.ipynb b/docs/topic/ctc/ctc_loss_speed_compare.ipynb new file mode 100644 index 00000000..0682247f --- /dev/null +++ b/docs/topic/ctc/ctc_loss_speed_compare.ipynb @@ -0,0 +1,356 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a1e738e0", + "metadata": {}, + "source": [ + "## 获取测试的 logit 数据" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "29d3368b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "hlens.npy\n", + "logits.npy\n", + "ys_lens.npy\n", + "ys_pad.npy\n" + ] + } + ], + "source": [ + "!mkdir -p ./test_data\n", + "!test -f ./test_data/ctc_loss_compare_data.tgz || wget -P ./test_data https://paddlespeech.bj.bcebos.com/datasets/unit_test/asr/ctc_loss_compare_data.tgz\n", + "!tar xzvf test_data/ctc_loss_compare_data.tgz -C ./test_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "240caf1d", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import time\n", + "\n", + "data_dir=\"./test_data\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "91bad949", + "metadata": {}, + "outputs": [], + "source": [ + "logits_np = np.load(os.path.join(data_dir, \"logits.npy\"))\n", + "ys_pad_np = np.load(os.path.join(data_dir, \"ys_pad.npy\"))\n", + "hlens_np = np.load(os.path.join(data_dir, \"hlens.npy\"))\n", + "ys_lens_np = np.load(os.path.join(data_dir, \"ys_lens.npy\"))" + ] + }, + { + "cell_type": "markdown", + "id": "4cef2f15", + "metadata": {}, + "source": [ + "## 使用 torch 的 ctc loss" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "90612004", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1.10.1+cu102'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import torch\n", + "torch.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "00799f97", + "metadata": {}, + "outputs": [], + "source": [ + "def torch_ctc_loss(use_cpu):\n", + " if use_cpu:\n", + " device = torch.device(\"cpu\")\n", + " else:\n", + " device = torch.device(\"cuda\")\n", + "\n", + " reduction_type = \"sum\" \n", + "\n", + " ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)\n", + "\n", + " ys_hat = torch.tensor(logits_np, device = device)\n", + " ys_pad = torch.tensor(ys_pad_np, device = device)\n", + " hlens = torch.tensor(hlens_np, device = device)\n", + " ys_lens = torch.tensor(ys_lens_np, device = device)\n", + "\n", + " ys_hat = ys_hat.transpose(0, 1)\n", + " \n", + " # 开始计算时间\n", + " start_time = time.time()\n", + " ys_hat = ys_hat.log_softmax(2)\n", + " loss = ctc_loss(ys_hat, ys_pad, hlens, ys_lens)\n", + " end_time = time.time()\n", + " \n", + " loss = loss / ys_hat.size(1)\n", + " return end_time - start_time, loss.item()" + ] + }, + { + "cell_type": "markdown", + "id": "ba47b5a4", + "metadata": {}, + "source": [ + "## 使用 paddle 的 ctc loss" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6882a06e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2.2.2'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import paddle\n", + "paddle.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3cfa3b7c", + "metadata": {}, + "outputs": [], + "source": [ + "def paddle_ctc_loss(use_cpu): \n", + " import paddle.nn as pn\n", + " if use_cpu:\n", + " device = \"cpu\"\n", + " else:\n", + " device = \"gpu\"\n", + "\n", + " paddle.set_device(device)\n", + "\n", + " logits = paddle.to_tensor(logits_np)\n", + " ys_pad = paddle.to_tensor(ys_pad_np,dtype='int32')\n", + " hlens = paddle.to_tensor(hlens_np, dtype='int64')\n", + " ys_lens = paddle.to_tensor(ys_lens_np, dtype='int64')\n", + "\n", + " logits = logits.transpose([1,0,2])\n", + "\n", + " ctc_loss = pn.CTCLoss(reduction='sum')\n", + " # 开始计算时间\n", + " start_time = time.time()\n", + " pn_loss = ctc_loss(logits, ys_pad, hlens, ys_lens)\n", + " end_time = time.time()\n", + " \n", + " pn_loss = pn_loss / logits.shape[1]\n", + " return end_time - start_time, pn_loss.item()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "40413ef9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU, iteration 10\n", + "torch_ctc_loss 159.17137145996094\n", + "paddle_ctc_loss 159.16574096679688\n", + "paddle average time 1.718252992630005\n", + "torch average time 0.17536230087280275\n", + "paddle time / torch time (cpu) 9.798303193320452\n", + "\n", + "GPU, iteration 10\n", + "torch_ctc_loss 159.172119140625\n", + "paddle_ctc_loss 159.17205810546875\n", + "paddle average time 0.018606925010681154\n", + "torch average time 0.0026710033416748047\n", + "paddle time / torch time (gpu) 6.966267963938231\n" + ] + } + ], + "source": [ + "# 使用 CPU\n", + "\n", + "iteration = 10\n", + "use_cpu = True\n", + "torch_total_time = 0\n", + "paddle_total_time = 0\n", + "for _ in range(iteration):\n", + " cost_time, torch_loss = torch_ctc_loss(use_cpu)\n", + " torch_total_time += cost_time\n", + "for _ in range(iteration):\n", + " cost_time, paddle_loss = paddle_ctc_loss(use_cpu)\n", + " paddle_total_time += cost_time\n", + "print (\"CPU, iteration\", iteration)\n", + "print (\"torch_ctc_loss\", torch_loss)\n", + "print (\"paddle_ctc_loss\", paddle_loss)\n", + "print (\"paddle average time\", paddle_total_time / iteration)\n", + "print (\"torch average time\", torch_total_time / iteration)\n", + "print (\"paddle time / torch time (cpu)\" , paddle_total_time/ torch_total_time)\n", + "\n", + "print (\"\")\n", + "\n", + "# 使用 GPU\n", + "\n", + "use_cpu = False\n", + "torch_total_time = 0\n", + "paddle_total_time = 0\n", + "for _ in range(iteration):\n", + " cost_time, torch_loss = torch_ctc_loss(use_cpu)\n", + " torch_total_time += cost_time\n", + "for _ in range(iteration):\n", + " cost_time, paddle_loss = paddle_ctc_loss(use_cpu)\n", + " paddle_total_time += cost_time\n", + "print (\"GPU, iteration\", iteration)\n", + "print (\"torch_ctc_loss\", torch_loss)\n", + "print (\"paddle_ctc_loss\", paddle_loss)\n", + "print (\"paddle average time\", paddle_total_time / iteration)\n", + "print (\"torch average time\", torch_total_time / iteration)\n", + "print (\"paddle time / torch time (gpu)\" , paddle_total_time/ torch_total_time)" + ] + }, + { + "cell_type": "markdown", + "id": "7cdf8697", + "metadata": {}, + "source": [ + "## 其他: 使用 PaddleSpeech 中的 ctcloss 查一下loss值" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "73fad81d", + "metadata": {}, + "outputs": [], + "source": [ + "logits_np = np.load(os.path.join(data_dir, \"logits.npy\"))\n", + "ys_pad_np = np.load(os.path.join(data_dir, \"ys_pad.npy\"))\n", + "hlens_np = np.load(os.path.join(data_dir, \"hlens.npy\"))\n", + "ys_lens_np = np.load(os.path.join(data_dir, \"ys_lens.npy\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2b41e45d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2022-02-25 11:34:34.143 | INFO | paddlespeech.s2t.modules.loss:__init__:41 - CTCLoss Loss reduction: sum, div-bs: True\n", + "2022-02-25 11:34:34.143 | INFO | paddlespeech.s2t.modules.loss:__init__:42 - CTCLoss Grad Norm Type: instance\n", + "2022-02-25 11:34:34.144 | INFO | paddlespeech.s2t.modules.loss:__init__:73 - CTCLoss() kwargs:{'norm_by_times': True}, not support: {'norm_by_batchsize': False, 'norm_by_total_logits_len': False}\n", + "loss 159.17205810546875\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/dygraph/math_op_patch.py:253: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.float32, but right dtype is paddle.int32, the right dtype will convert to paddle.float32\n", + " format(lhs_dtype, rhs_dtype, lhs_dtype))\n" + ] + } + ], + "source": [ + "use_cpu = False\n", + "\n", + "from paddlespeech.s2t.modules.loss import CTCLoss\n", + "\n", + "if use_cpu:\n", + " device = \"cpu\"\n", + "else:\n", + " device = \"gpu\"\n", + "\n", + "paddle.set_device(device)\n", + "\n", + "blank_id=0\n", + "reduction_type='sum'\n", + "batch_average= True\n", + "grad_norm_type='instance'\n", + "\n", + "criterion = CTCLoss(\n", + " blank=blank_id,\n", + " reduction=reduction_type,\n", + " batch_average=batch_average,\n", + " grad_norm_type=grad_norm_type)\n", + "\n", + "logits = paddle.to_tensor(logits_np)\n", + "ys_pad = paddle.to_tensor(ys_pad_np,dtype='int32')\n", + "hlens = paddle.to_tensor(hlens_np, dtype='int64')\n", + "ys_lens = paddle.to_tensor(ys_lens_np, dtype='int64')\n", + "\n", + "pn_ctc_loss = criterion(logits, ys_pad, hlens, ys_lens)\n", + "print(\"loss\", pn_ctc_loss.item())\n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 35357e775e74ca94bbcb0aefc6ffa15a33875c05 Mon Sep 17 00:00:00 2001 From: lym0302 Date: Fri, 25 Feb 2022 19:46:44 +0800 Subject: [PATCH 10/45] update, test=doc --- paddlespeech/cli/stats/infer.py | 22 +++++++++++++++++++--- setup.py | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/paddlespeech/cli/stats/infer.py b/paddlespeech/cli/stats/infer.py index c50fc4f9..7e6df3d2 100644 --- a/paddlespeech/cli/stats/infer.py +++ b/paddlespeech/cli/stats/infer.py @@ -31,7 +31,9 @@ model_name_format = { } -@cli_register(name='paddlespeech.stats', description='Text infer command.') +@cli_register( + name='paddlespeech.stats', + description='Get speech tasks support models list.') class StatsExecutor(): def __init__(self): super(StatsExecutor, self).__init__() @@ -73,7 +75,14 @@ class StatsExecutor(): "Here is the list of ASR pretrained models released by PaddleSpeech that can be used by command line and python API" ) self.show_support_models(pretrained_models) - # TODO show pretrained static model + + # show ASR static pretrained model + from paddlespeech.server.engine.asr.paddleinference.asr_engine import pretrained_models + logger.info( + "Here is the list of ASR static pretrained models released by PaddleSpeech that can be used by command line and python API" + ) + self.show_support_models(pretrained_models) + return True except BaseException: logger.error("Failed to get the list of ASR pretrained models.") @@ -123,7 +132,14 @@ class StatsExecutor(): "Here is the list of TTS pretrained models released by PaddleSpeech that can be used by command line and python API" ) self.show_support_models(pretrained_models) - # TODO show pretrained static model + + # show TTS static pretrained model + from paddlespeech.server.engine.tts.paddleinference.tts_engine import pretrained_models + logger.info( + "Here is the list of TTS static pretrained models released by PaddleSpeech that can be used by command line and python API" + ) + self.show_support_models(pretrained_models) + return True except BaseException: logger.error("Failed to get the list of TTS pretrained models.") diff --git a/setup.py b/setup.py index 0823cc38..d7bd9682 100644 --- a/setup.py +++ b/setup.py @@ -62,13 +62,13 @@ base = [ "visualdl", "webrtcvad", "yacs~=0.1.8", + "prettytable", ] server = [ "fastapi", "uvicorn", "pattern_singleton", - "prettytable", ] requirements = { From d60813a9e79af3f9e1bc474a56f3a8a08159921a Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Fri, 25 Feb 2022 11:47:15 +0000 Subject: [PATCH 11/45] remove pickle data --- tests/unit/asr/deepspeech2_online_model_test.sh | 4 ++-- .../test_data/static_ds2online_inputs.pickle | Bin 45895 -> 0 bytes 2 files changed, 2 insertions(+), 2 deletions(-) delete mode 100644 tests/unit/asr/test_data/static_ds2online_inputs.pickle diff --git a/tests/unit/asr/deepspeech2_online_model_test.sh b/tests/unit/asr/deepspeech2_online_model_test.sh index cd5a2d3a..629238fd 100644 --- a/tests/unit/asr/deepspeech2_online_model_test.sh +++ b/tests/unit/asr/deepspeech2_online_model_test.sh @@ -1,3 +1,3 @@ -mkdir -p test_data -wget -P test_data https://paddlespeech.bj.bcebos.com/datasets/unit_test/asr/static_ds2online_inputs.pickle +mkdir -p ./test_data +wget -P ./test_data https://paddlespeech.bj.bcebos.com/datasets/unit_test/asr/static_ds2online_inputs.pickle python deepspeech2_online_model_test.py diff --git a/tests/unit/asr/test_data/static_ds2online_inputs.pickle b/tests/unit/asr/test_data/static_ds2online_inputs.pickle deleted file mode 100644 index 8aca0543ad69f8bb64b77664375bbd281a3747c6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 45895 zcmZU)XH?cq)HX_$4uTW`=?E%<6@fdMiGqTH6+xs3Vnt986(744>AeU_5ky3a6_Ii$ zGaFFsA{NAo4HPWcuq%GN?|06Rvre*Bek8MIuVf~BU;EnEj+EXVE-_}Ql7xgrV8p6* zVF4?HB0@vLC0B+)ixU<)^AxCxM@@1cEx~At5=4FZr-vfV&xXaaH;=0 zN-=cR|9u)R?IsZ$F5@J*B3w2;e3*N<+!!~>p;ON7aD2G@|1)^imhBr?D~2m9l^U|S zW@@-%VnTell6$!F7&qyql0)~{aFzM<=ezzJI{x?KCb1=4bw#3Mp}K^`HgLw>_x(`) zgXeU&@p^c7q#Q>5OoHnpPlN94Jm?RtfzS6UK!~e^lJrRE&EF2tH5+dJodqx~8lIW$ zgtJbr@Z#eZNHEKU&n=R$cO(mkKO_S7(g%+>cd4e6F)VIw0qd+`VBXzCoJlVo0IvOrT%mGO} zNGG4cVBT5&TgYiLt3Hr>s#89-$cU~ z{U)z=9wJ>&{?W&?OW?}$Bp6%P2BW9$gF&-E_<5zAZa=L=b?vs2B|+cN4&z|_s;-pd z)OT@TPKMBm{e5_ymMx|$>ZwF|5iEA!2?4f=;1P2kavINrka84mkO(MF_W-#l574sN z0nb{OLzeD%m|0o??!N87uPua#tHp3w?+`3^iUK@s8yI=dhDBdRFv0aL?9I@Cchb6a zYa{kNVclUN^O?H9DGCN?@`aJlPb_{l(dQCSbTEO|B zqu@8$9;#m)rD|Vt>C4`!u+=&f5}#Q>zKb5b(F=ofCo@6f&mvG>)&hT`ep367s_^tt zD&&sCknvC#8aEoiQ|o#-)#wY80xIC>-lGsaZ7=wq+X50Xxvi8rs!4RpHAUoxGD^{o(Z2{Yrx^eL0YG;5A%a9;Ewh_NIn(|@*8d8X@3Fi z_1_N`J&_>3Hyh6Paj*kB!;Y0Cy!~K76P*l1;WNDbD$(| zXztTRkhmch9-W*I7YpQ?c4X&5omDKn<0gWmxe>&fg~Gb62jQ5LUX%Iw^{_)?Cn)?2 zgqgomU{3o1$go=jih(&$@-P4#AG$%w$RfCM*ASvtU!)UF9YYDkx(j3gvDlm=!o(KaQ%-17|Kq76Hgdut|v6} z?I3+}Y9koC7=Y2Z?=W{j!7CcwKZlz?QmacwYWs%h&nj9ECh4w z@n9l6reJIeZP(mEZ*mOnFgi$+W!8Xi#4u3VYXn!`uY-BqZ~AO%0KE8>3g?U$!}Af( zsi$y_PWT`X;~me_`xaJE%1eX2&m9Uw-*9HQEetOsJA=(h&nnu!6}! z&d|3`2OccCLr)Z$!mOwyP|=Zv7V}|{cR--yD*NbWm%R|SNd=PUzM=;kEugU=7u3|_ zpr+;_HJtpCt}7D3zmQT;8}i|rp+||2b{baLjJc?qC+Ox?d+Z@$P4ZJt(k|WuY{o`F zuxc)ZN>+g8nM|n8FNH-1=04iq5!u_SgVTa;S+*x}7 zREiS;PE+tX9|E$@KWUNvEEqN~1iJ3rr=v^_V5F@zoV@XZX8PoSZ0BTP_V>~LR8{C) zG7nA;*21X+^WjEGAjr0Egv61Gur0&|TzFZS{I-Z%-_?hi$J41)%4r(@NDrcFvH|wq z2hoRAsLQd2mI`fnkednr#ms><8f!pymL)7wHiCf2bJVJPG^8|GLC@O^SYeq5y~GB> zZ^zJ}r#_IA{)^T&UZ7nwl|Z{CnQD(QfcKx{;igO`j2N5;-HM}Nhm!}G$(V!P=VI8X zDuP7K1E6NB4IXWi;n}4mNHEz6(HYLLRJD;lzp4od?i$cp=tiS*b7!ex1SEaa zAnxEdx+?ND`SqPr@mVXlzBQHZ9cv7hbJA#woEMFh3#Z#=H?gxyi|DC09rV)*7UU#8 z!%1fhK1M)YRz>}V8nx?u2 zRF_#pg1r*tE{=h3=iXD@S8M6*_HXoi!X&tLp%x6U#gbbtQ@OILK+OkY#ES& z0EqKH4BsjX!SvS^2uO~Bt*2eUY~eY0`=kb*-`fxN`az#RGJ&l#zf)ez96ByJfO!0J=+}HlTTWb}HpOco zc2*mB-#!hyl@@~1LPeOiLlHE@E8)$cIs8ybhE2_l@N!oa)Vuh=+lLQfcGUoV+H)G( z79bGZOTpQH0tCK@1&@tk&^*ruG%Bj;<(>}|a09Ge#=_mzj?lCFCY&_72e5WKjC0ry z;_mU#|93OoV$wh;+XV`<^}zj68FW7y4GCIn;iLOyy8ci*?BXW^<+nn)Rv}y&R|mq8 z5D4171}+)9fla?Ld>t1FN#=#{)@dUA^pF8$;|T|Z<-qpHfo6dW_~a~v_h$}4r9A`R zKOF_@hIkn1nFWW#G9Xl10+i_%YU6JKqf4z|)Q5-ko_z|e_!JAG2rKyXB?;16&Qd8e zFKT3|1r}qMfP7dq%()o}odXVFE+v8^-jU$3XE{`@{Q=o2LvgBRkeZHbqAqf+U>$UY z>R1fWhg!F(zxjA@v2}w~#K5QS4Ul>!1lk;cb{`6-r{>vF`#-Lf^Ok|#UJfu=V@J2$ zBM_THAt_CkdLER4*E!BG=I3fCGE|4Xn-_rYv1%we76%`z;y@Ih20x6B!F#_3_+We< z&U$Fc4o0Nqg7O`SrjGteP@Xmml+GrDmE2Y^Ogsyr z&Viu+a0f`dTL-pdO5x*^gYfj}G}x|V2(IPEprf7)@?*z=lGq(w+Kxhaa4oFZ5eQ4a zg+lR|A)nAA3vWs@VR73~>^sZAT3;{7w{ig`#tEd33}KPL9dL0}Jw)h+LjQ#v*x;NB z8-E=D>G-QO%;+aovvq`#WyK)na}4CB-hl@J4bZKd4D&}-!nf2Qh zyxzvaDgXcA^V}Gi^4<%?Z3ghCeFQ{=h~R#o9(1jY0jp1YK<`!>`2D#6!Mm0~eXkSD zj8lTHDkCsS+CGFq{9%#*Z758)h2~>-shhSxXdB1DPkt`QoiPLN`uT9E{5UB7X9^=l zN5MWs3wHj|2HlZ2>81n?IQZBYZficG=gN%Xf+!w*FRTT-owK0zT`0IHuYq-6wBb@$ z6l~6)3=7Vr&`|AJkU6S>4&>ON%=892R`L>@_%T9YARmh6+Jk)E78pD8Aw)T3L)GdO z0Js0Z{cbX3s{yQ-z8wC5ENuKT3%*D#hSo#Y&^saw?heI}U#U|;%5)NJE@wdg#Rb~a zzn$teU#2YIO?6EqVcdW=d>rlp-^LBC^N|YbK2e@(L_ep?$4rMe{u(g2P#R`~E$r;v z1kCS?@O{jGkoa*D;K)b_D5!!5KSIE?XA!LQUj{XF8pGdcc_`d$0xR8LfufT-*hloxf5Z{oq>q4S*mpWx;UIOKzmQs%*V7Ea z7gR45!X(vrSYJ5{R+!I+=Ug_7w3!HZc{h0Do(ek7N5N~$A-H1O1*;N{gG+Y_WM`Mc zijbqwe6SSmCGLm7(n#=|@E=t6?1AdN_o;4xDJ)$R1uaL5z{@BN0*5$(w-J|UYl{~& zb{fLf;r|Wg7x}QrDGjog--Db5JiJ=70YZoN?uDFSID%{FyL+BgJN6ZIm68Cj8eNc? z^pfsptl(D7E}%!&LtsuUjN6n1!+uA=z?(L3ahn0N5Bq?c-CL@cZ2{L#*uep{{czs- zGO*nNuxwWWygN7>f)C_^^OlRS;;<_mNvebCcN$>J#N9CQ=q~6DJ`K&^z2U*y1Za)B z2^;ntf}n>C?4B$SbBki&=iqQSqUC|qPMxB2ji*ztm*xdpAHdx(n9 zai&y(7be6yQbxAC@t6E=LKKbgo70noCC<65Yqvhib6U=PXv`FvGlIqWSFQ2R*^98A zVjr(omvax)AZ@7CN4hLbE(f@!A+Gtn{&$#T(M7|1VAc#(%*m^Qb(r{eUbR?SuZkn znh{MjJp)?|o9fqsLPaIT{))zCOIVv2@AACnV_svDplQhwk6TYm9t$`4jGk}gv^ALU- z??KUTw~(~UU2NW3cf6x$Jia~E4hhGHp$n6jis!zM5$_LDgeBYmk>*6zm?+f!?+GvoLnXF-g0!G=0ox6q&PGoEQ7|6KaVU%e`Do|5`C=R zL0;edi#_V4h=KQP;@fn<{8iQ19u>|W zRZ*imLiUhY(M{oUiCD}|i)U@8rwc(@n&^{}4W9e$8Oo9{A^xksB0aevT7#na6)D>I zw|gWpnluc%l?)^K8iD+ZVPU+|U^AN#Hj0Fo8gZj!3VWy_8mF}^!8dKB zP-b2>nmX|&nw56|U6}G&*r6tc)78yL^VEDqulb=nRo>L@Ey8!bI)pI!vCM&v#mL+8 z2zeCDkd>d0kgINixq`gieUn}#~> zYyT^98LvqPRi_D}wh~f)wv?1>6r-Z$%kd-CF8YMsNnW@xLfWGce)i}BviC(3FQ^!x zSnbQgt@MY&Zm(hZ*oRs+*Y`Iv(+w7OX_lg3vIQl*jS$i_ZjiN?Q^+n`4IDh5L3240N+w^Bb7I!$yld-IM~ck6!A@(K0Vpzpmxduqfd!cY0E48VZ0Sv zWTk)?=2(#p{-=eT)@4M}eK)Dze7Ujv>}%%p;0C1CKaJ6RdV}roX<$vHPGDolQTVl5 zIe|sp?B%(o>|?uL@tc{SX|MSQR55Up4*wa3qGS)!#*Kw+_i}YCx2>5S*fUK~8`Pn) zleN%~U1`i}&*AWDxIjF^?g;;^PqNw72}u5q1{?572kZIiA>TQL#60?q*gHax_^F)} zwp8sz8n+UV%(K7z3ggeL!n31n&GRE@!u0{Ba#IS`G&n^Ry{+jg?0`Ezl?ZhMvDjR? z8OvO65)Aw#VeL+_kTbsn{Rqxvd)89+iLV;Y%lv|T2jg*ep*%$~MnpEbjXBP13NcQh z4)X7`gwFhI(V-?EykTz>YUp-HpU0}8^4A<$+N3Cm6~r@^Gr|7x7_wxStuS|t1Y4DPkk+iLXTPQvphk}eAmJzEks#hG!&-p3P2XzCGw*ntPWB0S(gJ)gLw_E9W6`tyv|6F32Fu z;#GuCO6s_{rVC|{tid5p0p#wsYpk39ea1cDfr8DC^)DJ zX^!>83Yj;U%NZ#ss!N_s_5Z&cV(ID_`u46DJ6+-gO+8mZg6CE9u@Al2pw{*H=XOcFp|cUSBz3S8%D>VVNj{|X ztU0Z*F=vb8w@{JkNhHwctpDI~tWk2BeXU^#mF*_v&vI)tV_+%j89&I5sNI3eHrBG0 z@7AJImqMxD^P9rij%fVnt0AtAiosW=KOshE?eG`_Niw+BiNE5ZjF0#4BQGMV$ne5q ziJs-wRP)uRRn#|>4PCa_G{jacx zt7mndOuzmO3helDag^SlT4mmDqy4ixhHNZh?+m z7x>?8f$07wz^j~P>ArVUM8az=Y_+%sU2C0>?1SVH+?YrY7`L|1tmE!XS{$;K519N`+`L~BR|lo@r$WymTg4^l;Xo7K zJt_|UYWgPBzJG_jbJgj{^YSD#_K0ZqMPu%KpfxFLNkQeq?viMUFXWy}8cldx=y7m19= zP;;Y4%=+6yn9VW~DIC2ZUTifFZCW8_rg!DzsKvinIm1$N>8%IJ(b>d*DL9Gr&tJmN zGb35$?^&p7VKJL9?irc0X9*7LOhJ;$-Dqb|J#w`?g|0437m@doRJ!Lsl3bfY3M(Yp zktLS}<1i`wK2*S3Kb&Z;#C`(achQ-TGS4%R_m5<9 zJ9lHl3MC6xW3MZXEF6aAHYW=T71{L5-TlNjFpJf#=P5GF5N)xH6~)kQR5#w5^cye2 zOl>rM+uA_=rgb2RWrxV$^l`W;xR2c?AxZzMxkgH3BM5z=fW|-B%x`TlQc9+@4;g%e=huDoCJDY9#(4CnnLm3?*E0o@$o!B$(u^>_U6?%%j7M zhrbWXsUC_WD_;m_536(WzQ_17hhO+jaWxvgauI!1cm&l-9vaz8z)QsV3KkbiNSL*YJM+) z%D-u0lb@aFX|oDK(fPu?OeJJGgk!o2fAilRSn}(&EO;!*VGS~Eg#+QSLPlK_S`#Qs zsrLdl<}ip;B^C%ZCP(q{ksj>2GxoU1@(ht5oy+grx&T%7XbP)-7t%vtKBKa==E8UV zedNpdiR>?T1=4p2p={;NXxC$T^3O;DwtEK*weYh8vSGT8k_IL~4-)e~a;%auuia5q)-y;5y?Y|+SjP0ZFq zDyYzg6~4}p;=0yP6Ee;k;^TgiBx^Js53MPDz*q7Wq+dhe4tnfC85>8Ytlua$licLycB{LCKa_Z@9kclJ;A zX!R`Ia7&8SuMyDMkzF5UL3$NG~!(V(8 zOJw6V^3$chppGm7PetSSg=H>e-kfStQ2Q34xm=nXd`9rVtbL+_-h83wkQPc!I886^ zj%9;d{xRov0lVgOHTrs9o7-L^VXM*=gMYM51Wu7rajcC;xcJ*EV28xT+B$F{z4b7 zeo2;w^3=F~5>@&lL9 zq(e*5y|LDC|C$l0EK3z$UXaX5E98E=|+k+5Fs*l>~^*QPujs||do(`6E{=ae8p zoFa`XT(X&#NzVl11sthguatxi!h4Z}@UU~#$A#U-5=uo8}6$9_21 zte!plA`gGVYjO8oz$OO@gueqWOw(K-p&xHyRXm-%DjdUm74O02a0P!%Z(!_{X9&{6 z`%&HKx#E!vQplskZZv&wwji3??yzfO2LH(L5`LH%%x1hhk5U5=8#Ypn{Ulc)Ufhha zX@oBE?F|&2dA^U0w>ZU5QIrwxN2X6xU-28)noh^Hi6(T5Zyb$>QY=^EgC2B^W&O7m z2xm8EGP(sDgohov@F$7o_U%_dpM7>v*IN~M^aX49ZsUpShkX>Ce9Air^{uSF@+SQe+=hd`L=t7C+4#q!sYK__GThc-$kmokLaw;g;iYFNbM47du|aSp+i^e% zZ@Hzz7N3!bAG4Ygh z?d;@`S7LU=IT}V<@RqW1WYO8T!phVrv142g9T$F+juspsPK-F$f>>IiX@b^d&ESTk z7D3)i0!rl<3Hvra!!EUd@Tcd?_%@Gx>@7bEGZoj^@!rYI7|C#I!=5FlX9*6jrzE*; zY9nc7j0q|z_Q5-+nb0c=%H-+lk!Y3eC48VMj$b7w&w4bUM%Nb&TFkq9(KFpK~lXWx#T+zq%c2`-p+f2kKf&hG+a$Er!tkzdNvJi zI*bL0CJUy+cP7q=yXNrCELw=o>R_yMqJ>SDr&6<7pUC3sY%FzVElTx$PF5Osu$`rH zToBYV%1CN?1fUI;F!r;FXe@ED!FL>`e>1vPKvO!;wbaSzrS%#XN9moZvneR zYX3xss z;1Fxit~}aFJ}3pURT8%7j~-9o#1G)t$&c|G`G@GVMk-zfs3o-xB7`*Yh}vFgLUc6UzZelOh^3`En zBBJfe&gN0q)=4mBNUc%9AI1K5-|)zbNsI|rM0cW&Q`bvX%&)u8ggrT)^wsy1C@d0$ z7hY@F>JCe0Sx6Ed5j`KZsaax46i3zd89G5fR=C_`EwbPFi=F#G4?fGRATxM<_EMw- z?X6Q6Cy&+zlLgb@USv1s)iUu&!%OVmAFo7jvWNIEK@q~eo=mfKN@6!$V$ji112pPc zJk^~!gyBJ2uwH+yu}(aQmARGt2Icp5Ws4HYpGiYHq?Kjlm+L5eA*=~CM&CodeR`xN zW(QgMbby@9lg59S?#EN|QmCC#1NE*=qgp8{;!_=xXi}Lu*}15WFJCqTHkvol>t(UR z-O+>zTCK+BPBb3UtQtUHW+$^KL`>eT?c_twOcuUhTrNyAPGfrljJUuB8TOwl{^DQt zQ^E7#I8@P`f>K(u*-GuDcBmm}hdChK_VPh(497hSJufX4qZ zX6mK^G5Mg)i414qn}aKn#}0S?du}+M;Jk<3;B-xpP0tXb%)aoCU=ymUzuV}^AiCq$ zN$SI9A&mut%!Rqb=#Ckc=zSAKiXqPYmLWe-t7(RI-Pc7+!}U<*?W>H>=~~1y!|~s2@ zUcz^>$@aG>*h+~xyE~Iz#Tb*92OhI>NgB|mlIGwdTZG(I%f+g44@i%}Yr1pHTxzjT zi-yi04}Tb4a`~bWU0>279C{dsyA}VTsZD0wicyyxyo6Kq{>9rgj>M+}ewn zxSSPsw613*!~?>vGu`CPmG^v4`9T4vs57?S@|^v?6YP&!f&4Q2;SPrf(uH`9w@g_3 zO5uiYK7I7an9sicRWO`^5i#*;n2&*l_knfu3C~ck(k)9SaquyGNo(B`<8{>l&&;NBSLwHm21s^Ubi#c2J9Nu2ti zH?28nO;5Z}LGLS1kPEL4@T)o=VAjGJY37ED-pEYBe%IUa><Ac+X}hXRe^I+JQ;2J@9-1aL#38=U zq*GOuCf!rP`RA1R_ML_J@ZlAxGY^s9vwt$my64vmNs2A9KuBgtuAKhAIPH!Gl zW+r9%HriJ<()XU#tkTsl?C&NCq!>4a`f1K4sT)_b4yI#Z|HyDWP;`xOP3P!vo6+zt z<%f`Ra5_<$rpDzI9~FG&?j%R=Sdsdf^Br{N^Gw~N%?`7wmht~OEFn;BopAKa*G9EI zEADTV9k5bK;dM_JfeqBT^NhP7b`((-vp>~ ze<_TJm?7HuYd$k)^BFuk=`&rX9mJk@{M1+y;CbXYVv-+_nHw=qFn)Q{56y zwP^7Z+Gk^|Hk)VTV%TSPE<$Tikx-;ImNS1Og-(R65E;F+CEuqD#Qe@#KKi>cx6yGv z?bUD)9;|u8R*aT|sf7!KPR#>o_l_|fy|R;!@fHz-%PePZBgJLfBW`NHC3NlaAonBp z@vZ$D;5|Q*?N7YHe(^?J-{2^CEV)&vT4e|(%u}4@`Gwi_XPYo~Y9TT^`w+hkiNfdK z>T?f{s&Kn+cMM4gEn#kQ8T;w&452Hh2RkcG;RfHfu(i+fg~?rNP_5_vVMhMAPt=Q{? z66^`6M6x~#+^P?H5Yw)J#67cllk8Nyr)?Mt%$69^7`X@r!IQX-t{67zWP!u?2-~54 zqHAo&uNd++l8P%k5?R~-CJgoATp>0om(Y<^H+KHBT6V<3om6VUbgm1@lEux9_=9x@ zT{JeIS8iL3szO{z)r%_()|o=0D=~~Q|4;lm#thf>E^iF&*W#>tH?a%4(**qsUTpYz zRXElX#NU(-cc|Z|#{HH0LzRF2!KZfbVLlw|q#Z6PxYU_2GUi6aHW>4_*IPnWpBve- zM^|XPTu=771n@D5a-wZ&6S%j9`rPZ+A84&;6RG@~j3<}vMRBuJQ2gR1c0L!y82soJ zi}PZM79c49%aG!BS@zV(|7d@Jhzr^El>OqKg1=NMv%6mF5tTDM-?&+aTRi=$O1Y4h z?fyqbyB82U)4TYpL@SLLR9>(Ke?A}06f6H%t=GIqMQD%a>$>~NVWK)Y4m z;gj1ZLag5y?m)2&1XpE|ZKzYU*S?eOf2hwLO1;6z_I;z@4m@f|oOc0Br%r}ak%A!j z)uXAKj6_jy7m(P*`*iFh1>|LHNJ;2LzS3Wk9Wa>2Dg?4H`_?f2NzY&0cw7f=9{Nn> zZ+{h(Ed9v9>*mH3O3%(zbD`{ET( z>ckRvrg3NHms3~%2i@wRC?u|`;< zkB&1LZIfU(?(7A!bF~Gvx>>Bmu>{&PVz#}J7GVSK=`C9Vv2KZ_3uf zVH$#*gfh)2(x5yEK0MQB#y&kRy6RruS0e=nV*T< z3v!DuEcNu83|QTDr%#tWG%&*0>Ml18!VShy3b z&uq4etE=mJf}Iu|7p|+BqK{p7#ft+Q`Jb(AOv9Ldlxf*O3-231` zT5ISq29srnqFJ4?Tf)Cy1tRqTp+DF4NJ-iue5})w3*UQ&PMV#AZLdt>PQNZ;r@9#l z@0TmWhmQ#jN;h=*ssLH&xOjuVq+litl(q=fjjaxBpE2{aZ-WqKY$61Ar7{J(hjA-3 z9MFjFNhHPmP($$G0#<+DNbbCn9DVPR!+TV^)0u%CqO1)vRLyIj$lS4vk+YL!J>Rb; zGgs@T=p82bt><1$h3Oqj;Ff zeTM~31`zZ~A3kJck{_B^Mep9&kV{u5auWmmuz2`*I<;pcjJkT696oIWwcA`tRCprm zS1ApLpO0d4ElZea&He1G-X}~Z(G!I12gViq8bi)FV`4R_YE~)Xrtc2pF|BWt++Ky z%!sq%U))ks$rcPdz=})Ok)!L=&?4tqWTn3nakzY3h*wsEftjY9HXBKLubXjNjzhKc zAJc^Hg3F|4p$51e{Kl7AwF$=_R0-$H-B8+{=|uHJFzV6UNxx!wevbE0TwExEiR?z< zuDdBG(J@1a-6Rd8ljpL3AWU#C0Ct7>2O3>40W`kK;+A?lI*}RuD7Wl zZgY&DjD6g&(PauSf9>er(NC7T)8ON6p3 z4(#(mYpDABm06a;LDDdj2A$Zcq@7!W6CO~uW!^F2p!s;(x$q+m{`eL744X{W#uo}j zpH#&&R@}kaUH#Z_wmu|2cI3^5`ox_0Qu6zp0*92xF%!JI`CSoV`2E6Mq&DRY^LCsx z&>IPif0sPFX#8-vw?ql&{5&R1Oi^IUD%I(TYlBG6Rh#|zz?sj^(*hw^j&Xc!!zsRw z7YbkeV=m}D6N;Z)ad1>G5$J!>MC$ofq*m1-uFAG*I9)u8o%a5OFnoy;
kR@=~(3#i9NSdGI_WLGm8u@#0z9AEO2XQ9wdLp;3ntRVBTw;{KvO_+DA9Yw#^42z1fEur~4O?j3N=46cmvK!}7_|Uo)7us^Y~5G)`)ie`=K7YUq%{b&HRq_^R3VsUu}|+sn5oCeWt~8HMqEz>6~)V zG}>Zzh<4Qe6OOMON&dYmM->|9afd8!tkv@tWgOM#J;_S4c*!H-SJGj^0`9pf`#4+M6NV-e*N>JfG6}mX@G3Y$W&ZcOF%5e#>j-%i-kZ zn0vjZf<+y->C2O+nR^SL5Z5hV8vGwGX4cQSD&BfjQK+{T;fF9446>5=sdE_i_3`Ia z>#q~vEx#X~VWW7nuXQ{lEiJlWJq~wN+z?798iC}9*`kXxzKUN+xCtF|{}YdGm*ze1 zd5Y#sY#YK6id>gRj9^hbhWoTc#I@*d782G?7AI9arqiO;Amrdgc(=o>@yrS*#QZo- zH>#&Qum{vZ-yn`%pMF<(8Qm>RaiJoK(dG~;m(MtaY)7AtMI(0MXl`VCXZ}vr7q|aRktt)5c}S5$hBNHF)-itn-fO+@_kF+aiIZvMolnZRE$Je>c_fL{ z*uH^kJ*CN>y(1t7i|kS55_O7R+9i5(@21E(R+^kGehEL=b&%3ZSWLfoCK7&>ddE8v zxk~u!L@fTY?`}u?D?9rnu#tAwi!V5Y( z?Wg25wb-tGh4}D#B_?5-h(EANoBY04i`A1m1Yho!qH4uj`P$GSIC*O!B{6>hTIDAJ z$#r61lDq}`xJvvcGMk2?nr<;Ff$#8Cpif&YG(;a>Rq`)2sqwW|%M&ScstC(H|B;!^ zHBkGK8M}CUJ#z=WhdB|(bU;4ArfZf9b6%>jS)=(Ne|0zU_|Xc&P0f*0nsqC+2h zaAA6dVC}QzqLqq&_#d9zvhIuf!N&!ixO9#>%fa?+KnEW+WRAh#pQoe7lj-DQu!E2& zp3Uz6p^DzkP@}FNsKpunh9b#B)2PI=`fO))3LzS{gC-9&kg2LWC&pvw?8Eer;FyFq zujZ8&ckZg&y!ct`$f-{Yz+zuFBFVPDM5ef{ zIrko?f@{EL=o_-A-=(^W375-EG14T67ma$$1|Ds7=Bu{Eo_}j%gPN2A#@zV=+$rKgpGNZ1qz{{V%48 znjVq66$G8{Jjm2D(QTHs))ZD)CHhPl3uf)kX3UPB=6p$!<0_7S!-sc{344X5MC~DTep_7Xj z6A@j@sY#LM-n=D4^?uw>sGTemU8r#)xL+-VnU~bz?2QVwhO#74u7p~iNb)qn(j$ZHD z&e@u8B@|A6DR}H+#D2NtMbw$4!gTi_I&q_Mo5avtINp>6)3X2 zcD(*Sbp%v*(nE1 zuI0bbTP%EvEgw zHEg2Y1$C{B=SOj_xvKFaNe-zBxV@`uy<_=1d=E7C0RPN3Z@6ystUMF@9=- zW}Li9mp&ue9XtE!<3c;ue^3b!b6+$2)023`(`30bWo1Nvc;#?<_kifduI12vbr)av zR1qAtecc|g$yvOk{VZ5G86q9e9{z%`HTZG47W?PuQgF=MlJ)1_0S3DF9pqi4IFB{X z(-L*TWU1yu_;K4XQxR1_vl?f_-kf~;{nG|u@VS96VN=v9HQCvLx8`m8x5t;bah-Ok zbNmY|k*MLgc_vp*f^gS9-gY@ml?^ex!fih#Cz7AZL9NrR1tZP1MUz6yh;&0rJVrO;C5G^6Nj%l0{ybJvZ% zq7H4fV@)0_Gde%C+q?gh5Ungr6}GOdhHAea@OOqQl0MSfw#W6nNQv@R_)M&*Pn$Lq zMz!S;#u;gp>%Rc8{&lQb`Pv`)Mc)Z})zp`~=d)AD6+3_Pvdjwzo%k7i)&5%HYF&BW z+3`~Q89^DMjIEtuPk@2&;3OF|JzD{}W@U&ZqEh(2oz0y7ef1*su4TMkWC?+#F5-W> zyV33@8Tw8ShH0hj|-6WdSN5raP^mNCu)NYh~@H%^bz#7O^(B(ZfM$q zj8n=aZW|)HOft#8RviveA18?hH+kU3rQ;8tpb9Ksc`kFzk<0_=AiorrV)-I&gfpa4gXuEHdPi>KEI^lI2b&s zBwW@QB2bz9ALn@^2kl%R-abBmfNbzkhc*SbTeEEWY=h=Ak>^exst4u5WW{p0?v=WI zeBM0j-C-f#el>$yGI~Q8x2K4s-qgekTwEo}G1up))dkX?e>B)o&#gpsb2@MGmZUQM3xCr{nwa(Tnk6FBL^ZfbCLs{W#WnI?#lLdCa zc7oTd{0iO|hA{ELKFr&QaLQy!ENAbRVcVvr>4F;pwcNd?Ewta$3cjU>A$R1J25GoP zmC-$Sj($bN39C2O3)j|oGqb-p3r+`D3e|8C@5}8_j5qOFuS3TKRyxM?F4>!HPo@;} z-b`)brnWkv<)vn3~27w@E&Q)!}6Ns6;cLXPM0(v0SZNWh-bbApvOTFCDM z8KM^#{IFw+x5(Q}hg#D=pRe#x5=aZbh#Ks#;qcnM;Em`6FEp{9Y`J9ER=C7~`}aW( z;jbP;e+WMeWYlWf2iI!yMjm-EXWG--qBMq~msFPEaMv0-<2XW{{!+}Gj2q;}2l+Gm{dUGJDv&cWb`H_d6!(&P=rSG4~uZUX%e4A2|)sl!pZBs$W% zS;!y7!W&hUutfEkXbo=@PS$?`qZ*URj~muSQQf{PyfBS;ix#+FEIcrsWkUXPOh6@UPgyrv? zk&T_R*rO-*on$zW7vhKTkMECKH%|IUH61W#HOC?TbmWR~*z>ZTf=50t&Df6anD`It z87zQV0Ui`_^peQ@ku7hqqM0fhS_*mBN`;$!*HZK=3U+n9b!bkWMDE%!K-xSHB(zmB zo422xM7@n2!(G;U$QTU+?0U3aq&aODfl|y_^HCwN`e1~_Z+9w4pI}3#KIes(cM4zK z9DrjNIc)2r*L2v$-gf7RG=2Di6Lu=oW?97^??F4b@|tJK~Vu=cr+R-DXY^rEbZIk>#W#p z$(?Xx%?9o*0U>6OAoL_bO=u!z$8mD;C6{J?v0wJ{GPD{CqHdi#BAhgH9*e{tgNONZ z*}NeUJzkl`{FO=Ibcqz`T~&!-5pOR4s^_})jx-V~jQ@a5=U-7BM)jh+KZwbk$qGn6 zLrS6fI#~39;7>^WWP)E2aN1%+=ult8lQ9#}mzUSmf&M$GJz9yN(n>`3&w59#9Nta) zp+y|Q-Q9Ex6)k-8*pV&kOc8jmcHlk{?PDZ7q$%x`EMc0GE|~l~iaeTGMM}&5;=0MF z3H?l>n3xPn5M0uXYp!1voUT~M(+(ZtS$7l=U8?=eA^qdLZh<3W&+G&;<;|j7CmwTr zY)+EJj>-0E?P_Fll?7E^`@kWse!4KHO&Y;TrQlDoDw?tK1YcpSTO{}O1;4Mpj{CgK z3}2XN;%u@RC7P);YT)fTrnU7q|B#-IFzA(%uylG3H9t@ZT-&mr6HVR2&nC@g*KdE$ zL`4=r&i88^WmPR;Gv8cb<#&Q`72YQ98A);$@$U(eKUab_N`Y7c_wZ&nG?V`%89IMg zG#PrYQeYQygWH=ZDKbiz!_u4U@v^9|cAOpob4yK`zbQM2UtPAr;SYHiKY`kKnFh`M znq6wAD^n|4(W%FiIGYJGCq1Fh&BziyFqdXsc1x4*E{wseB3=GXNhe|2un7b78-*`kO9#VxkOR zO&{iVBuWZT9;+n_<}8BaOC;!3M@`7gX%kF`u`;{h)&SfUW`geyrVELFeR%R{5k({) zg*D$yp+m(lZsiP9hMccPDkcQs_>=_#tF&?aAXx>j?0?KZYv{(ex$MRZUq6SNo^XoE z-li)sf3FB$Z!hF8?d1W{+3&*5*{9&fdDBI+?EYgCJ@g&kt@bB_Hx##r9No)o?tRU4 z$gZKT9_O>|v-An2u#2Mgc0FxNytM=;XD)8u8!Ux_uNDH+a82YG;sOiC{@_>n*O=o; z#?;uBEYfe!3)mN{hw48>3Zd%_=KSSDcJF&H@>fqwfZh{Vu*L1^U{~c@!rSB?R#OV% z)5a zu_+DDg(J1^c%iFP>0R5zpp=#zVsPaiZp)UtgiH?~1iV!geSBR&pUyffsE)7^Di)_Q zSG&`MO3gNG`OW3zGgo;CX~vXb-oi!VPvn8R9|p5yoV zRT4#aD%#f$?`!S;_#e6Eo)2#-SAx}DST7tjUw||J)p9yLwty{_qxPZci@C8Xw`pLi z0dK8RCei{5+obl<_W1KEAe4S7@>zKm)McLHByZgYeViT1NEIJ}5-q`wYMXHXjb6oD zSHC2_%XaYeb99)(Mne?rGsv-j@)#?sxq(~zH-m1W1IXxXAwTx-!1D*KL=lO`>{7Ug zKGq}0f~{(z9dB8Frp$Md5g2ABD@l^6CpI~hFQ39Vnj50!{gb%y_4!SrNN08eR*A+V z&3NjYJKLk(JcX8LgM@O%Wdw0qhvRJyMUT97+0^6p+#`C=sE2}1Songp_}#$&bz6yrcArJR&80aV^Nsi`MihWZ zTRf*eDiy>^U*tX0YNC?-4XE9|5rR{nYv|8BbF_6lSu`rA%9iAqQpYSPaCd_NINQ+% z^a z_DItIe!U>)x;PWt*W~fl!+Suhr90nRp`Y{Vn=Bipmq{#s{2r>GdnOz^_O!kCK^8T- z{H~Z2a1Erc8Ke{=n?xh7w}mB+-K0P5DR}#>liJVpQs-nj6tU?obXjGA&h7K07TAdY zjf;1JV4)#eao?MmQE~)#^-ZOIOZU*r_uF%5r!n|8>J>E3?*RRM^}?EYcEonI6lVSf zcknRDhiZ9yk8@YCKvYP2icXvxp^slTrjj(I(6J}7bXVLoR$|{6T%l3O|F>{Ec&B*| zpS#{l_6tfGYe^C5=HtnmV|tC>xk`#tHvE|Qe113Sku*T5mk+k*f4d}Xi*Chx?iA3` zPWwqil_f+>)>6j8-+|gZyqUP5P%9$SayiZW@(I_{NM?ulj$9gS#C>@65uYpnfIn}y z1R@=^na5qGB6&9{9BS!HU%TeU`*Y&~&vRn{C{1kU=xe*tODeeN<%j)3Io^mcXMHHy zuh>SUO-`lvSEW-rb|%89Q46URYbUdN=1%3mQy0%+pVdI17Xq+OLUifl6-fFGGX@{6 z_=u8ZGR8Ux;RYpi0G_WQi95>=GC?-4glfuMro?3o`W$N75$!kLfFzQQP-M!L8#rS+5kAbHUY z!@MmPl<~7y z_#RIpc-dlN=gUDnKO&YX@9Ty;f;zB%q#65VBAWZRWfI|b@E+g6aUmIIY=ACQ{sebz zv-vJ2S1AMVN?2UbAh^3-8qN75$v5*Fg0%s&i3baR<6HxTl^%$=t|TJsFA*e(M#N_$ zkJvF;oI!c8jsLc$lz+vf7o;psB^zJWaXSi}$!h5^e5|t#CwlVCn&i4060g-9 zh#HsU@W{Ez>}o#;zM9(xO4`waUa2BSPPE-5s~`M=52`l-`bi=2u8rnDdM-~TRrOJU zRTEUhGF>8aM>nl;@HzK`XDOdQNs3qeS|06_*iLWG&cS6ru5kO~&cn^m+{s+sY2bEK z53MNPjfdPBq_@;g25w@%bhSqvR-2~DYxIL-FdYmpEPD#CMk|8n zD#<{5Z8~%6&2~}Tp+jUG+rY=mTZo4R^U;kgYkI`XQJjIALX0V8(7&CnMDGJ@+UNet zrKFXDi8s|#+LKl^5*gR@nCSX>bhzIvL=;RBy-nIjH@+1IJ-q;XxKM*XSZs`j6^e*I zdE5AlrtOC3p01#38JfQ@I0>hR^$TY%Ucz+N-GVVcpAnzKJgL$}UgTNpcB0|)HsRax zHZD_k}r+U5TU8=b*O8)2j+&7tciIUGrmHa9vNuvr=T*=TN` z*!2508GSiSc-PAusFv&#wa-{du8zGW(wQAW^DXxAy;QZ}Tz;X0b7C(4r{xEVQ81vl zW!v&<#L&Fee?noB<1f-8e40>KUjZG`@g+^W_b@F#fAJL)7E}926NFev0&aIXExK9r zoIG-3kh0%Bk6iXt58f*|NTuuiqxgj(gp|yrAU#>eJ zKcw~PJ}+lWok-m zJ+h~Af~Fyb#k+a+GSU2HO@zqjnk>U}RwHCzIte!~Hb$4)pNag^THw|n6^vYOD#_V= zn=yw^);`Gk!zM9%~~C#Bwr6wfcp`0$3=qTPM@%-l`0_&ts?!of|-z`|b=3;s(H79>BQ zPPyg@XIQ`IzHOBy&wqObzv*Yi+cBv-WL za~|oDJfq#`r2?4So+f;$EQ1xbv$=JXqZw_lhh&Xj5F@CPC1wu4&ARG17V{vpiIPw^666P^(1G#|^m!Ra!5&!3cM^>9CkEG( zJNvfrAK$qQ_v|re_IVZaAKF|e+6Rou$cI;8+^x;j)e92n<_S5o~e@b#2-{_=b} zG*bz!9+yOZI-jY{>44s)B#AulX7jgJC{s2gdRS3bgMNS58J{_S3wHEef?3(#M8MQH zoQ%AEMB2T6O22w_#9GTK@q-ep0uJ6n>u+4+@@Z&*w6E`Q*w`a2V8 zu5C=g+~e@myKEd{c^MeUm|jSBPD02*;F zyd4XE2;Ju%g`4*&pg3s@Zqf-!G+6fl8`jNYoMXEoXLc{|a`}0odEF~mw8>A@E^&yS z%ULa|TvmfsPbTn#ZddVb%+$!w50~Nj-H&0j$2fd<$bH#?`G*i?ue2n^R zhD7^^r_rw2>7uZ<9y(834&8{IPj}1f(ns4{=pQ_D=4eV95u^|;YO%Ev#Wbl48=0%% z)j=P6#)>?eR@O#Vj~^0h$;$+tRinHOZIEqVp$wE>T%kfv8uH64{{sKq-{i`tBG|8* zEyPjM+&5Ex!Vl;qW&Yn7Z==gLT6NrwcW37))0aF9w?$4vJrYTL|0`$d^mmJ}0jox- zS>{kLmTJ&mvyCNexs?d=8SfL}JsuX)r<$Q?QecLj2(@(apsL~j*b z+i?*nG)ocBB^D7M60P{Uoxx1Z4t?~gxQsXDd=6~kO@iAGI^gXk@|NgtB(%CYXw5%GCUChm^_za4CtKWzj8;9Y^V^$RC zE#sXJ6GEA3EBQ%)r7b^{KwWD~cIj7LYG$Z21pQa|2O9np{gke{g+s67xJF5d^rF2R1Pd1*A(n=VGmUgIysLPRo27PLw4$YBicCT0l6#hCmLL(ug9GX!UP$C&+ZhTyFKVF<=3ALQ<>+y%eM^}zbR3$Rmh zCTL#z2U~uLVydEa*sD1__);VV6;9Q|8A%V|yP?~3e)b0@E%hHDbN9fAr|qC%Vjpw( z$Qo!GBZT8Y71;h~4s)$~6*JFn3|K{dVSYa*n99?N@Fh{ktUk8@#->{1l#A}j<%St! zn41Tkr=Eh($Fp#5Q!GwRI?ZTh{-PCsOTvKab9mZ^Ka6z%moBiIgLR^3!8LR$c6^u( zXI9eGEu(yz&-gG!j)VMa*q@F~%RA8oiA7eYypFP!wR}-t?6Q}_V zG6HbUG$Sn0c?-n+GiP+}0d{oD17_brMYg0i7VdKIz`bA;|7iIOcHRntC)ZwMdiiG9 zYeXW09DT{T8HswPWqyzXRF2dgPe~I~YAS2)(xDK&&$lF`o^v zf$1dn(e3%rDLxN&zj;SO?22`cWpj=jm5bhRZPiOHFj`36x*~E zV%x6@Bz$E_-(GYKR7~<_A5>L9)mlv)Q=bE>&S}EK$OR9)*2CYmmXMv<&Nyn;M;u1Q zqWag>IQG|hSh9T~(sz-iu)%+^%=GKw?9vSd6Z9g{FJP6~vrXcB?_n5Nl z{;&iovB&wz46&s`qb4h$LH@*N4-k%7Cl$H0D;LAzb%j z7xeoj#7l!tFh6fRCaYiCvW<;>%>0#47^39>{r0F00=MF@bBI0!8=TNUZ5})#tH~y9 zkB6>FJ8@B59{roX0oOOo#nsLo&{caDHosR)mtBjc4<7prG|khP9aGc6jqaIj*oF*X zyx9~Lxfr1}x2#!?voxyTUkOjbschTdRM1215QB~)3nkxjij!5kP!V;YZq zp^mZ$8|n4XyfzC;=qZwN>F?IKW1afVw`?V z6<<{6LIo29Kdd+iGh2OO?^+`$m@r|Nd1#=FoMLElKaY;Mv=Lvrv4xrUCk8FKBFj!S z-wxY|)ws!z!7gi7U~elU9KJRH?l3DLL;Pa*@%JC^`{ zM9f9vbPv33wGzKKo(JOm+#vH*i&|-C#&+nNL!T05_RaAUX!h>{-e^UW_ZDA;mtR;z zh51jJr^+RGahr1nZy7WZ(X$ zF2;uG;u9;jF(w;rQDi{?RCd>98osGOE4_zgO#C|Z>+>!0?#8p&rvDNn)3q9VS3H2% z6PGgUWJjT;Kn5)^Tfn{voxxV$abzM|L!p@;7v$EDfv0j}pH)~RO#a;tZ;UL*HkOm| z!h4!9>`@smTxmdToaD{u&B-TtYqy}=F5&RP^P{kBKob63KLgjK&u4xLUiLc~)G?Wa+2iEB>}PAWH`{ukOTT&_a;0-vDyj3bBD_6O_9kU{qYr z(uzZ+%zJMM^suxOxNWvTgw#SD_uUlgO?O2ZB{JyiUy`<69RjtL%b1rt+nGbsJE3;< zLU?TLOS~yTmA!n{hc28W?xP^e=&q}qSd#-1r11f5@WE^Z`Yh7Kt8bIY$*GswQ89pB z@`mU@+fU4db}mzUqXABCh=eI|4D|mo8>`jHLa@S@{C=#G|0~FcHIA`|vwJLASE$C$ zzNrjP*KLA2dyC;~-FKkqy#W-6mN9$FGjPeFXF%K3jaJw%V8+gBpgi9?Ona@thqD&4 zuj`AzRo{P@|VOu@Xx+V;ImTh8FJC`!8HHy&BcrQG=odC9Z zuITgqWN=<<3QDmp!};$lak=>ic>bRh3KUL3)_yy%{$6>cFk*)t9j%!OZw#|n+Tskp z_zrdTM-|718U9Wl^QCka9grxbVYwG7b~yu|zD&hOMqbiSqhrwK>EU2l(Sq4*bQ5e$ z@MVn;D#L;uv(T1^sc5t_7+HT3b0|j;aGjcri>xNWcingJ=xbwk%G&F6(X%LawABX0 z>?{Q@>!TU#uEW4FY7E+Dt--6e+~@l|%z{eHDX98$A3V?h3#Q$#WClmB(MfkG=*+%m z9u-Z&5fKqgn|uyk5Gw;+I$Q98#lOi%xl2)wxEp2ugKGMDcL}xXW|?><>0laT)1m8! zcwn+r3JuX$A^yYCPaKVvT1@^(7@^}rBsvG!zr zXfxPhY7D)HbdYG83B#4Y4u;RwF^wx+(A1J)ST?Pd3Hn`xABwe_z1^Sj%GWbt;|CS= z&@BuG&Dn+B@=U;%hCp~jbrbw^yo$DQ3q;GBn?YoYBAQ|83A}BV;|i@4Oq0%YIx-;> zW@pVn`d6fY?wS2C+C~a|UMz(x*kUL<_z7fHr@-(_`Y`U?XQ+Q96Et1EjZ-S~@yL=O z9JT%s*p;Y{`=KffIkgy{XXZo)Yk%@`*8+9gT7Ebx^%54v#Ujq2N*< zV<;JerJt!m|GppezBkR#An-nv@+yOF;|a{RO(mcwSpb!$cfqZ*nqkPdM5q>Y8YYf4 zgVT>C*|%vtu*%pOFi9B>jNoj9OfK#}`KyrQ;;=M`x(D9xTc*fJMKR z*#DgG2!hg_!Fr3`cqnQn%v71gNJ^Z+`F@Xaf0F{-ca6(Vu8SmZ)k^?sojQ`0F2RrF zRzoAJ5}c#EoK5@p7Cl01FPujD=TaglzI%U32WZS zlZ$5d&~m0?U*psv!7^Xa0dUH_r#Y7wYy zdIn@{#=y}p_duj|A4XRWFrHV|qs_P2KQ$J;xD>@asY`^W-QxEU?>UoavWhvm{XHzT{mf7NxdJra%E8-@sNnh9 z5+wc48sA+c<{l*npp9t^P<`cye!dQeVD%Evy44BuzV> zB>U<1E8I2579YHHL3}m)!?_8A%v`5fq$P$O6s2FFKWMwcdt0tU+s_U7qg@SHz_(|w z-*Ewo3!<3kU-O_pkie4z(qKq!G<(@)IjZNR;131*=!;qpoHscaiY6@4{fMpT2dbs} z?{5ZAH$}jWl0ncYtdjiZ7KRkojpIpMJlHsmMi_W(K5G(I4fl)hlD*QWnKKjj;ha_5 zLF&K+d|JO7USIVMsOFzx#=ILroWlq>R3Xj~skbt-xE`z=bDO-M7lk#_+Husd4V3n6 z!(M--fRX7Cy>s>%W_RH@?DE+|rWoCTN<%jI)A@GX=mp?5GYKrUBa-zgm4jD}M2uTl z2u{iFg@c}fOlx>P=m~uW1WumJz*TW)*y-y`o7GVW4L_2gwg#M9-vPuuKq$(j5c)aA zFy*x;8Cj3R%pQsbYYLXak{_n9bZsy5;9v^*-l-q2GRwtUn-+th9rIz)pG+W|d;~V| zRM~%%PD2Z~3A{DzB$L_dh1W!XpS>nA07ZWz;8Rm ze3b3dV1w=uJd>{h-^klzzpZktr@uFBYGfI|-+3_f_-sb?{u`LOP9Gf!TtJV)05;&P zIuq>~0hb-=fd};inWSZxn0J#LK*<@1AH~l^n`<7x>VTj8yIxXE+j%XZ=COnwj+qH$ zOxmEsi3D_P)d{$MCLhJc&mgSuMYu_G6*6j50n%IqTu+A9ZhN_7(9&+&R z<_co?t_LgM-A5n2pNNcWgV2(2b$I)o*vtRTjy=_Jg$damOWxXkpJ=}~S?v9FWwSr^ z;I~%};d_I3;L*b_?7+I+z;wWs9bCJDa%$A4sZTqo;l0~f=YMBl?SWpnejp0v8O=bi zqSmuEQHj{(c|QJ@t%4>#y<)~A@4~vTf#|7ZDb&C3hw|^bu$R0faN4LO`{;Ng`(ST1 z{JL-eua?}(ZW{Ru*L6%`zpXt0p)J6>mrrA_zwCpN{ma;QML9&l5gFukJP~=1lI+_3 z#khB99}s=~3+H61pkCi4DD3n%X6H3|)0%oKFBa5JnI)lgDZ^2p+Jwdhye1F-z61Y^2u9#$OwpsJ&$+ zdv4Bk@%Ji#T|5o6KO_`Qerd?+*)Cn0;H zh}D~!urDWG#+|2R!e_&tm`=Fl>3@vXs3|ZI1*6xl0b;gPBD?yJ6Z4@=pAGc*g!gqE zW$HMIEUjgUf~%LXiB6U%?c^XlBJ%|He;WmVT@0|Kt}5HSW*sB7E08S_NPy~X1iXGI zihb;Z@CSP*@czg=jMMw@Rl^2sa&bGm##suFSiHt|ry6igggj2-+wgY$sG?Q*lHl5H zc^KZQ2lsV3vtD9Ww9`2qVj8an^CSP^)I*`@z2;x?mE1Ra-M0kRN3|5hIr~82+)G$w z-b*GuU5VCwoJCJvcMt0}Fu0P{rl9+@qt zhrJgA6wr55oJ~(4hqnC#(K@BLvg9G@)YOX2o+@HrCJZvOm!h5XZZeB4t$>P4I(=wZ zpZ!e$z{R#uA&?BcjieZ@QV4{2GfOkP08>FOW1uyp<`d{;)Sd+`CgX8J0y{8SH|tyYXl@)5K3oD!<_ zbq4dZ1M#mlJXS)g2A@)sVzV^0>A~xYu=&5UjGki{HXd8gpv4dIzX}m_w<{x}|i#ZCp776MXYO^jsBaq1_T_peP4s+v-3efh{qT36_dirS>e8)1E zG00tsEG4C(qxlP5JAZ`S@^3asDez@2Y=-b6zsoQpJp}rBFfjXW4Scw3J-nu{0-d|n z&-k7VhPl|3bssT9W#LDdb&1oM(hupdcK$TfeN*gZA@_)60y!K!`ORC6xDR&To62|=_UN{Suvbq91GSZSTb@2ThJvR9jLuB20YW719ET#%v$&uGzpJm z$@fiALNo#sVppQ3;usK9X^hXM#52`0F*s<_F4Q8k2u#+F!!JhL>BW_U*z~`T_yzRD zLS{ds<+&E6)XT6h)t|uBr+G~O$S|&}_X24XuK{mZhb${MWzCC!z}D-3vG-Jf@4cP^ zDwk-p$stpKls~{GUzZ_`GcK^>@N-C<-v_^`%|@x;#fJFd<&VtLb`?;vH;8UM8^H#AQbWde%ItD%!R%@C zWXvX+u?h~4@V9?kSfiu_#Jin~2fucMlSRk z_a(eliZf9qpw>(+)+ZPkiV!r{7xF5#KOSWTaokH<^ zc^fi@i*c+;I5hs*20qm0!$a=&Y@0?8Ep(Sdj^74AbhR!$IeZBk4va(rp0^=)`C_JQ z#yCtZi)F{T$>^EpCGfH`44uo`37jgAgO#P0xRMt{yv&-6vd3j%_`{1ZFzp49%tvsZ zq9QsW*nr2P}(8UEUIzGs^)XBdht?m-^FfdsprS!ep!b4 z3{Nw|t996)Z`ILb<<%%;);##!_77}dvVon|?!ngd^I^?OBQ)K1BV0fDl}=yp4!7DK z#4cj?-7&#;X3N7Bs9em=Sn=Wob51!0gvv|GVO&F3iPCrU%#|>{5z*h;$Xic&La*#eK^pe~{YH1na&*{g3uj_64 zo$MuGbf*~>wroe8%mT)2MY zYIva!$%WA<|LrZxcw#3@dB$S=MG5tG?8W2dX7Cnk3ofmzXB^V)aDG_}*4B?^Rla+( zD=%LHfCzNj- z#zxEcV~g4q>`cOu|C}dot=wP;fMUl4n4 z2Mc@a=v$W_L85aCYg(<(+6db~Ypw(vv}!%$ahPP~2F~L*Ymb1gFWO}2zw>w#_dDEs z-jkhYbPj}E%VbQeuETRX;!q)H0_3P{WVd`!BcsiSz{!HOY)JKd^4;t+qQ?s?*$4k- z2yc6+!jYOqjAhqBX!z?dzE);R43&-(Y-=d{CVDHIvM`fb_kKQ77?MXLf!k0|w+c9O z1G1#|8$zJ+RqPQzg1gNcp-kO&_P)5n>Ag66I`O6l3}noO9Su#m)UOFQ*0wMO zPTJt)sMs_6%a)yY=>x9%;|6q#71?#;Kj~!+tJxN>B-Y%16mjT!yJj6t0XX1T+B)TiHfL8tFNN!+?uubA7 zHX^o#$!+#P(?-(Z!BI~%Z^<@Ry-o`Ul^S7R-UQ6MCFaTz2XVu$t5|bxpSTNb4K00H zOPrD1$Qbm!ryB02utK>&cALjqY;Z^!?YFoH9w+Q#=iGK?72PMHRo>#x>_kf@?jslH zx9t~qvsaPHBP4p?kVP7=P(X0)26oqoE5wKe%RaiH{drY*sZ9_%I_(008^rx7pEt6J z=Q~9WL$xCL+DYsc1#QxyV*|=~coisynLy&rRxCl}0yP6=Y_y+%6RrvDiOn%ATxHHi zp%C`Fjsx0rOx!D*gu@-|56AsyU-cp-&iXyM^%9vsd`d4Gr{S zuNI@TZa#?q{R|rXDW#VUe<7~TPhelBE5Y@<^-<5eEi7_92tRhkvpSMXpk+c0SZ1LF z&;E@^#(DQ)-&F;yc)MDJ^KEf*Fc*ZSiT8xt5h}VsG54p|!IR#JY^TQ^^1{_3_(X3xm&>0o|JOHF5X0apZ zH?ywV8T70{8Ibqao*i=A!#wXlMrVr;$#msTpyn#acD$R#uHa~~IYxVcWo`|aqZNdT zWDDu7k8F|T`Wfu{b!%~4_Bd^zOMoqk+nGH+Vb~+EnK|tf#nyfsB>CBkQNyzR;J>$9 zSx221!XQ19In3&U_?qc#PRReO=sW{*`u+gkP?AKY5)BQNh@$m8_nv#&N*YQkLVNGM z3rSg7CCMz3klA?dJ?9uvW@cn%6p9Fu`jP+rzkFUiFP`VzbI?T5iMu>&&ok5haA{oHw70QZbU~y)rpoOhl(^_;K4j+5*#5x%8kpg z(_;!Ac{P>nUzh+ZzA|J*xfOXG@d`1C1x&*GrEp}h3ZHQ^AQHOUm|X3z?3RER%xWVI zM(^<(EVs1@3_j^a%HzxM4yn2Lm&^+2k}wJ;T`0s~`+tGBu`JZQ+kwPX~dzP^?`8h$RgbNHw{a7 zvWTgCL>=YF$|XLi($m*Gzyk(}q_Js)EB2nns!~zVtT_%IbgJcostz+neP0>ZrO9N1 zmppbaiUQ)3rSOAyQ6%Nk6Xucrbs!buKxUrwCkMA~!O9)sw6m-{xDxr8fltR1Lk*gF zRi?Lv!QV_c=22dth^KqH)^_**33;@-?0XKX0Pl>M5M)%nY=rAsb2utD=a4Koawi zW4+{N&{t&RVb-`zxIQ}>WnU^rpQrXQVtZGT%Tzvu6!0I~C7u>jbgN z2|%v5l=&jJ8K)o5g;Js>q!-lAc+F+8zM2a1e&fRgD0U$23>#odpF|ThGsrT@TkO>= zLuT<_H}2UYH#jpnm;T}0h{Z-;0=26)WZGM4WIETEmJ7&&{X8R(lA8?Ej%$EbTbGgz zGiBfvu#Cy%Op%!gakBR5&=@)VXtIFqzW)pNB0o&kC1X=fFSu;79n9=jLoQPT;bE~7Ec@977=3D> zKR@jSTDp$#=GxoBp8gERTuT|=E{-9QZqcx=%!XVZQpX1il&Hr%i}cs+RdAZl6Gl9L z7I|awzD4!oQs}=fl32IfbCvt}ntKfmq-&<&nOXj%a>f~8Ep;A?{bo?bk#j)riwG!8 z%p^ta_qZD`GvHs-4D#fC45^C{;YXA*zPF;EuAdY@5+91wYV)~kpLEIHs1O*?)4+s%xy&@`+OZRr_{-t8LZ(;t z6Iz%a#3jC71)radVx^NsqH5=KXnVzskv0ZKg}!-^8Ezv*E&qRbboPOHA#k1(=vblY&h`(5BOj9QG>{ zwWv^VpZ~m)f4*zWZGgMcc+#}ghgQp~qYj1&AT6iO*e(b|n|ZILR;?$F{~o|5UEPGa zK|Z0l7=h8yc=E~P4jNInpd2zH$!Ro=%rR;O^1kWt>%bfwHu*jZ4ZI1wY~oRb?s24i zAd9(S=fEXsRx!J#*D*f~L%0M}KYTXKllV@W1C^dlrrYOPz}KseagEm!VETapx_!lN z6l1Rndz59k?LI8)qvX!K+I$I57e(XI&WTPRV>0HO+3aDVMC(*o5noFq9Uw28ieO!!`}Y5g_a1zzfq*LH*% zmV%;luFBBOk7sAzr>jmK6@M;`9+JKcO&SUm$ZpX9g+Jn2bSnxjf9-efymD(KYjpElAq9%)0 zFmPTUcH1w+Cv+0Y#oj2mZ$&QLv|urFfB$Ay+U_6lqjCu{F#~hTf*{zZNrq3E!%y%V zv(I3FK4kV2ZTTS1Jip(ETX>(HS#UNobWA3C%8Q`O{%h!;T7m_*~ zMGBiOIKj&X#2miEwJv-?(~Xr3wJ(AA!D3FnD4U3_$|83rLHyTr5v=+vfnBub z5Xljm`F*1i4ca{fPYvdfHW*5>4dj_fmLkiO5(!!0M%156;ceFAM8*dO@xN8GS+Nc2 zq~7l$E>~FwzdrDWw};I@=@fo8FaHh|7gS0oDI|lkr>7YrUrtx*CKJK5m8AGWGLe=K zhpVc8Ak)7!AWmZe_!f}~?c;Y*i+``C&)yo(oIX>7?w1mv816t8#|lB2j6ac^nZRVP z&m#t#Q(9EkUBTZfZZnGgfiTOq6qm~x606!EayO=rdBlX^P`7$+p36Ga!1ge5tp+?p zCzR}q*F}Ci7LkeH#BluGXsA-Z3+P+jMql-Fm_oNJ;E(H0COapB=*{&2q0A}ZQ58sb z-j{``+hmaZF=tS%q=li$0{Ae@2PkC%>=Wus{MI%AJwX~Qyp)7)2p<9GN1;$+|l@$^`iLE7G}sK68VZ-f-#{k?mu6{nBTpP4PIxGdV|C4U)AO0 z%%-*6tK-MG=uO^a)qDr&F;|D=W-cW1>MGb&*&jKH&H+<3N75+22HTlx(PC+`q-I4d zT=#q%mKt>@E1aXqo(CuK>MJy(Jy{p8J{JYY%LK#z!pF?<(KRSJ?PqS zc(hPajY#}9ZaF->7~8F=VU!>G!RK3+kgvO@FiU1_MD=z0pekPz-uOCV>4u2h9%k4ih z1za+lf$txlig)X5ruME)hpK<|s6x4Xa#?l~yt@AcV|L^a*v-eGbEZF{zw6cGjzv0f zwdPAud{Pby#?0Uge{;raTPa@nI}Sv6Igy4ydw8rZ2P)23h0iG&lQC8ry<143Jt&6x z9rJ)W=B18w7uqwM-C|JU>I2w3Z4VOP@Rrqx!1G-`nAjlp}s}Y7D3)F6Q2^F@rrS zMqK(c4|J>40oi*jCkpj$q~nJx>}sgT(W@ZGOa}1ASTG7!gN;8XD*Gp!|V(ltZ|929raA&41e0-(R=31!J+4PC|niNo1cJNWhQuO zAx&_=JL*b*8WjG`MqT;hY|cB)!?b);#LET*@Vv}oE^6Fqs^siFaCt={O!>v%SyxAro{ig>BvzK3 zc1>z&<8$(lS`2u{AVsEhOER+uBx#Am{CAaRGl>)LBijiISZA{~53Y2=zrXtvlTt&% zt@(_uegQNtI8T4R!;p6-YoO1|z3j;5$uPxeh*3>zz)25Q!Gh2JFpyt2S?xr!p6|&T z-VzAceAU1eO|gWzIUfBuZ41rLRnu}q<&?aeBVCjAjtbq+dr!`EGpV|vu4V)&m^3w{bpM>S_zCD%OCIkwMy{Zw8D|_lL)n6PV!3Uht7=0(0@^Kd|_O z1n8UR3TtzZqV|;&!F@ho>$c=J=bRKz;IrGf=u{r*l1L-g!_p+b4KZ#FMNEgz9fsbb zfs$9v!KV(-fFW-T;omjtaPiz-wC1_(!Z=AOzJF*k`_i-%Y47@ro6JtIk|wgq9DSl* zPYEKy(I1(!?_S_(P85;;oj}}HrNOoG{!nvXFigAIPftl{ru~n2lcibi@Cv(|^)Ip| znMz+cL*;9H4BwjHSXH3S3@La&_MvXZQTvh2- zVb7uyT;;}SlJ&=eybBn@f(%#Tq0}O({kA(P_!vei=Kn-47VFu8@86jw_48oKcQ+Ah zbAy|ZH=cE~e=J&4!H{Eay9MoBz92J6hBUt_7QH?dOn+|6CcSljw873SBK>Cy_09Gw z<5zu6C>#D2SomJVZ@Yt_%GMm({qRN*@k_Cz!I&+PXlXg$;RElz zSWKrWqeOi=3eq(vTX0QL~-W|z|gjyFjQ@>ZZ$8e!Y|MLX)_HAoobC`yA zOKjk?N5=Saemkl;lLaU84zIo~2L*Q@NkA*xKE|@^tZ;W~A}p_PA(`D_Fmz`))^nOl z7o9$YQ>F6B(pLn9M7CJHzoW`UsD}$ACYu3}7(rQ_^d>S@nNYRsEVn*(CAm9eBKI#U z4Z76!Gb58s;qoafsp^~AFwrgxezz_Z?2eLQ{tH?O$Ffp|kuiToO=TIdJhn?{^{f`F zjTRx3lx5`Y@GYGDW-?n8bOGcUC6oGx*HP}Lm)L(AUtLH_gKAUT7^Zp#S9~Fo_~y+K zD%y$(1Lw{F#*W&}S63CG(i7QmzmWp_`rJ?IPrfxg=RbyCSbpaw#pIHo1-Ymse~`X) zyabS#Y{EXyB`MR-(vg%k_rRb?;N*S<=crHSB9)AxrT#_`ohD1aJ<&_sx_dH3k@=+L zxIX#&yp5CG??{%}#$e~+L>REyf|bh+gAdJZAsXjS^gB{W_tP)jKG#rUyiy6M)T-bG z8q?dtPYAowP@7ie6+r1FpU!PB%mmL$z_o<+-p8jyiHyUm_auz#oDMSCltWk%7F(MP& zz_av5>h@S!bI5F80VByZ|9XE0=mf;TzWH;YcGw}h=Y9eu79I)@jn9B78v@~$ogc84 z%qUKtpGvAG2jB|DSfcyr1QYJ&ND4}vX}v9esBpa$t(-mF*yKK+$el43g4ZH;hdbZ* zw$q~dXXiq0=zS>bgr*3#Z}^Ix%{O!JExScx+bUXAhVIfbHB*u5cFZQ#W}}hsnxw#! zg14^j$LH@Eky+(BwCBssK-tujGu4eJHq&y5dVnL$rL1nD5==|ABI7J~fNcxUv1`vLQngu0MA3B>=~Ia#_5G`bUu(5F-}W@3 zW1S8gq63JhO#-PnHJdK5CtRjf99a}0LRN3Y(Zif3?u0@r`CT`{JlX$=zU&^|7K!F-OUjmEKi3SQJB8rp+($-;$VfjuzBSjJ#=|$4fl-qS;QH))4tI~!q_XL z^j|X%Sh&9fNytqDvXvJo7cE(CYj-yg3rihLI%~AbhE^hD3}dz}Uq(@Y?ruxPEjh=U`+_dxeC-b+XrSK)4P3Qd&kU zW=BEos(Kpkj3s>+pR@7&`|0fE6u#cN-RjQ3YO-k27T!JgfsM=DLl!<29kyg2KCwv$ zG{1=;x)BGFS@8r=cqoO;A_#@1ZKi8iDAA31lJKDR81??dQIQSj##F>B(@8JvU|EAk zbA{_-y2sHT3L>_mhx4OgWY|Tnbl@W+LubI}iI-UC1CeloWG8igem<0#T0teWPU58Z zJhVbmO~6t|NfhyHE<3(=zSZNrRH*3Yz}{=@q<_a`LKCNJEn&g=#HH*X+VU#~%7tu0 z$M`*RcSE^Atbae3x(1M`wLU0y3dQdJ`$9PWN*>hTC5v>vUZAF*?dEzzmXIBjwo#s3 z5kCw!Bs!uh%kPExf(2BEF!rG@eD<`Ao7n0G^A(dx>%1S$-IrrY{WBL!*2sm`?`FsZ zWp8|AVhKH~cP_m8Xf}STxf@AO^d*~&-w02HYYFx*p+xhYtLcO#G}&@vlq!!9V=Bv1 z;L`;*q)XEV`L92TJt9hw!oUvnsHj4uyXAXJz^-maj@^uzukj@Cl9?!Ka0Q2yZi@co zU!=xW_*33_c~GY^i&U*#3A+p~;q&)XiGJl&(wkQ)40j(#Tg#;qttI-5j=MCw`0qCG z^H7z@_dj23&(}vqt8;PeX+)Dl0`zUHgoLCz8xaU?SCAUY*N78#CG7WN)I9o(E?%6yW`DR>9jxUBPut zBSEH}50uu{U>{hQQn}TiQ2(|jrXp8?vbmfo*tcbV^D?b;IAb<|SL(!tzfvl>Q_po- zyA2yq+UFctKXQh3GqWPlY>4^mvySOY*QO#xwwx)O3NJWi!h6b-h}R8Ya#JN6zS^EZ z*voBHT0{uldqxg!-Iod*YHPXa#WU!cpL#`FjjQ26!g6AE&=ZcBrjoYZKU*ez$cO)> zRskS-slO;;a#0YfL6K+!iKC z333ysKVU~KbKwi=G@_wa!nB^Fxr-MNo3@sBK`h-RJYX(IPn#YEOFx=J_p+`=uPp#i z-{DTS&-Q{-XCx7eX=Ugp&$zbG&mrLH1Q_0F2j_m7_kM&0I>>JE9?r9~gYUsdABa+E+vpHPjuUKgL5Yq?yjkp${7u-IREyBnZ@6ky2 zeDv*HKH<}1?D~sqtxOKY@%(;&;@lNWYCH|WilnsCTu7R4*^~jNuLsZ1VJf1W84m_tX3p`_Y zvEx!dqO;HvZm2oN`pdl#J=w60tC*GzPcylKH`NN{pj06F_VOcwGrO1%b(+*3=Q+@r zGiv^<&`Zy&%VvU7tcAN`W!Pcs5O~%lkRD42g-PyCOwPdpu5)GvXe*2(3UvZ%o`w=R zV3kd-49b#W6?3chchTf5y_<4W%M*$+!=ReoC){^W8{gd}1G>eo3E+h!5_h#-=)5%- zx~5D+`{WOEkMk5sRZk`~MHa9K=+I9#+p>Q5ZNaIZRlJ9-&+2xSY)hju$IehYEt2!# z_xh_cM6A6;OZTfHb*Wue_L0VTdy6uRb`E2H6;@dOb#xcnih$5ybTxH;)?9M??L_je z*Mb_hv|=i$Q6id{P#- zv~nr@C1DF&Tsx`2yDJG--AkYA?!`{mt)PeP8hEYNlM={m;>>4kWa45qVd|4yGCOb{ zeQ?x_INa4G2X17-S-Swa!8=p8pWQy-9V!@jQ4OZ~WSQ$iR!Niv4`-kS`|{V39018F%O~1F)R22Sri82>QNL%$h}+F$=Ka=ZU(RzDxm50AcK3zT(P7Ww6hS^4wD9>s)60Rr><4zQSms(Xo zM>hmMiq9k7u9e)6v(`kf;U6|yZb_P6s*~Z{8g$a&5l$vxI`&m4r3=`9R``JkFDQ0| zF8pOaSvY#(br$~|dB>2F8*>D{Mmczkw32Ay;zJ^pvsdT}oz>7m3~sf;8ZMkVd>T?y3#xYZ~Kr#8ng4s$8){HviqCB zag9Ot>bq#HlQLNt)o>C0{ucx%$K=3;HzRo0lO}4ru@tCi4T8*!0S1^5>chGWGXLCG z)+hI;z#*xGGM2awmR|Cv`+K*uzIA|-tmPfO4l_9u@kyfIMVFDvvTT@VBF6>}0b(1n zk-aia3+_Fn!ZgeefxV8g*r)Oc+xuh=iLRE1#SU`3Ie02_eytefd16kdu|A!}c&QHUbw`= zL1adsKU0Fr+@7F4%~Qeal!?Gd=n9TD>A`q6XBecgRdBh&kmP;)Mp07yyTR-fyYkOv zQM-i(SL862Y<(}ujP;D6#ZAY!sYEr-(SttdF0IK7%pdFeKCwWzgpX6i8uyChT(VV#>F-(QDpng3TWy z;LbUk_|NoAPEXtk&o=+T72kyL=z5;{qIeEixf~Gq)GXt;gYod1bTM9SlgCNM57Who z>zK{154cih0a>|5gMD>Im)qWCNvsr`g#U$DAnoHAG>-oU3}Uyko}-a4KQNo`;aN&e zFP0&>Q(WZ4^0Omq*#LCun{51MJ*c%CuX^VPyC^4ZcrHfNBb=a3kNF znbAB*&)ZZ$kFGq>qJqLRI{+1mPemE2LFq;8EZ4!&iL;*`<%np146W(8=?h=a-R z&SB5!eDFiJm}ylP19=sHIYTuka?_Z14f$#ab3IN9Pkea+mgm<3a6KLN4EM8c_tpS~ ztV1BqVkvPM%Ec;4YH$t{O#XYlh+GeN4|av5(~Uinpfd3c@RCf1o%`dM!_ipCE_}eM zdL-km@ztVz{%$Zr!IBOh<9mV5wDCFXHDt{rPm-YhRZu)36%e^9aKg`$T-7{HOQ+S) zcCNaVWX%tul$;(js!;&`iya2=)(hZ`Ug7j}W=v`5BQ!0o2x-SA!jFYl=tf%__Bs@T zQ+H!votHg~oa;yiV%0d+7JaZQ`UdT>C<*55Jk3>na3-%y0_l@$_EIwA1;n&t5|`7V z&WOn=p+jTkqD2!|(Nd#tTD~$9K?N@v3Vn;K72a z^qh}Va9h2OXIG2mJhz0*}yQOi4gA$P84VjduBPof-vP{I@OKXO#u?;)Qm|cGV-= z$bqA3?pRY+3*zB~tl7Bt`c%S?=uqMJgfIlI1n4XP?E@lIajOMSf>`OO1b6|;%) zy9o0BSt1&~^OT719}RootM_)@+C7l-UnJ?dmMz?-PRbr$v!AsVYY6 z>uUI#9|79FrcXFAC!8)pJGqVj`C;$Dg~Hj4ALsul8!O!9eQkfcz+#9f-@TGFRpLeH zTwk*PTRav0eZ8oKpJ&ntYY_SG@Uw`^@nN-nPJ=q#(=8s$?oyK9&VZxh*61zoOTXEc z2bc0SjD1H#Ii+P;P-<-;+h5?#EW9#IrS)i$z4I?|Z;w_}jgEWic3lT9>|QCY`_%zf z9}5Om$Hx=B#aeXZhJ&1QS|;4ZR)TxU)%29AH1c)9Haf=WD(^*?MC(s5WN|_TThLm< z<=@}Ke0}^=)Z4!k#k+muG!`VnS0}b`iNmtsc#If2n;QjHRTUV|wbNQkT`XZ=Z5y4F zln3L&pKuT6ClQ;biF9@bVs%4|824ZSnDdkc*DeV0#F%`Ldg6{~yhIDoeshc+z;d0dh9-->aPzlz{AS#C~MFTG}gKBcIl zg2uo8MXOX@R&3Wcdr^5kNK0K{~(;>x|gDSmSPmed5g}+^q zLQ8WaU{3W>&i=`4D8mH73rh?{2@725&`dw*K4L)M+dYn`8y@C1{tP0U{#~LETrgzk z$_2xF9WmtD{ctXR^IC5A>0i0c z^xMv82C&XVJ~)Ydy!-=9ZcU<}P9EUG>(_Cmx!rW8S{Rh@d_pZ|vYClyJDI_0|4}`w z7LsE1ZRkmHHk6E1W#W5fxdDrv;8|QAyjoPv4n^kSHSreM>E1^4)af**$u33J^BP5g zz9o!CiYoOpu9T^qVhAo|^1n~T5XXH80!vFd5SS$mi}SuS&vyvW==*rslC6nk+N3$o zVJpr$oR3=al41KXZzv-(8@%pb%B&ybiz9qq$TkC3 ztvW|)KlzSp$fk1Swu?<@Yv!G$WI8ywu3{KM9Mb|vi!B)~%NbY1gX#CJa z4}2`4Zn&G^-^+Q&cDIl)b93Q1x$A7mf<4T|hD12%J|K*cP2iqcxCnIJ2WY7qRT!kq z28D;6Y2Ah?$ntssJa~EqWD`q;RqoT^)Niwi+O<5CI+6|NP$z{xVW)7KY8-s@c@gh{ z-NTt~KOh=IC$XBB3ti?YVnz$n>5YYE#3()#=J9i1COtrxr9He3n$j^F8+B2D3C)a2^pP9ivd}HEN`9F7)Yb2F5R%=?>XAkXIH0^+f@!XM8rvb^F5n2))8Q zS#g%$P_Yy~XHDo&nL5~1=nG8(73uhyA&@*74?p_3lO2i?95cm??pQyLdhp~w)Vn1f zrj_)uSq62~)Nvcwx4{q5*zgcto25l3;CrrQR^W0@1)lzF0jBP&K(kJ`(A=C@_@PD?`I@c;s;$GQX0bO^4~1Zo&H(juz8UG9 zM$k*(M2vTD#QV2yr>Uv|>cBU5d~$9av3j$NRt{7~1!H_(Lw_gt$3+Y!yFI3!Rp`?klqNN{@ow*|n%iCIZ&}<+&VgQXIPcnAO{xN#{k1=-t7OQ0K{|B<7`vdSz)z z>dSAmTncs}I|HV2C)5n7CGv=Q^tuPdcT5ua>DAzrMZdYj-W=N;zmTJ z#)K<~$^NqN~~XxADGFD1!PM->*5S6ZQP_srDx4bo9gqjN`a4+~XHSoZG^hNfw$jL8>dnc_y5)D%lX9uOu`ycWO2d@AC From 5f1728f8552909ec3507c65ef7157cf1c6210ee1 Mon Sep 17 00:00:00 2001 From: lym0302 Date: Mon, 28 Feb 2022 14:11:17 +0800 Subject: [PATCH 12/45] rm server related, test=doc --- paddlespeech/cli/stats/infer.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/paddlespeech/cli/stats/infer.py b/paddlespeech/cli/stats/infer.py index 7e6df3d2..676f5f73 100644 --- a/paddlespeech/cli/stats/infer.py +++ b/paddlespeech/cli/stats/infer.py @@ -75,14 +75,6 @@ class StatsExecutor(): "Here is the list of ASR pretrained models released by PaddleSpeech that can be used by command line and python API" ) self.show_support_models(pretrained_models) - - # show ASR static pretrained model - from paddlespeech.server.engine.asr.paddleinference.asr_engine import pretrained_models - logger.info( - "Here is the list of ASR static pretrained models released by PaddleSpeech that can be used by command line and python API" - ) - self.show_support_models(pretrained_models) - return True except BaseException: logger.error("Failed to get the list of ASR pretrained models.") @@ -132,14 +124,6 @@ class StatsExecutor(): "Here is the list of TTS pretrained models released by PaddleSpeech that can be used by command line and python API" ) self.show_support_models(pretrained_models) - - # show TTS static pretrained model - from paddlespeech.server.engine.tts.paddleinference.tts_engine import pretrained_models - logger.info( - "Here is the list of TTS static pretrained models released by PaddleSpeech that can be used by command line and python API" - ) - self.show_support_models(pretrained_models) - return True except BaseException: logger.error("Failed to get the list of TTS pretrained models.") From 02056b3b02bfc7adfd8c5a30cca61dd59ede6ccb Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Mon, 28 Feb 2022 07:44:44 +0000 Subject: [PATCH 13/45] add conclusion --- docs/topic/ctc/ctc_loss_speed_compare.ipynb | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/topic/ctc/ctc_loss_speed_compare.ipynb b/docs/topic/ctc/ctc_loss_speed_compare.ipynb index 0682247f..eb7a030c 100644 --- a/docs/topic/ctc/ctc_loss_speed_compare.ipynb +++ b/docs/topic/ctc/ctc_loss_speed_compare.ipynb @@ -330,6 +330,19 @@ "print(\"loss\", pn_ctc_loss.item())\n", " " ] + }, + { + "cell_type": "markdown", + "id": "de525d38", + "metadata": {}, + "source": [ + "## 结论\n", + "在 CPU 环境下: torch 的 CTC loss 的计算速度是 paddle 的 9.8 倍 \n", + "在 GPU 环境下: torch 的 CTC loss 的计算速度是 paddle 的 6.87 倍\n", + "\n", + "## 其他结论\n", + "torch 的 ctc loss 在 CPU 和 GPU 下 都没有完全对齐。其中CPU的前向对齐精度大约为 1e-2。 GPU 的前向对齐精度大约为 1e-4 。" + ] } ], "metadata": { From 96abb33b5b71be351b30c402b6e74de16673d1a8 Mon Sep 17 00:00:00 2001 From: lym0302 Date: Mon, 28 Feb 2022 15:55:09 +0800 Subject: [PATCH 14/45] add __call__, test=doc --- paddlespeech/cli/stats/infer.py | 55 +++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/paddlespeech/cli/stats/infer.py b/paddlespeech/cli/stats/infer.py index 676f5f73..76b2f47b 100644 --- a/paddlespeech/cli/stats/infer.py +++ b/paddlespeech/cli/stats/infer.py @@ -136,10 +136,59 @@ class StatsExecutor(): """ Python API to call an executor. """ - if task not in ['asr', 'cls', 'st', 'text', 'tts']: + self.task = task + if self.task not in self.task_choices: print( "Please input correct speech task, choices = ['asr', 'cls', 'st', 'text', 'tts']" ) - res = "" - return res + elif self.task == 'asr': + try: + from ..asr.infer import pretrained_models + print( + "Here is the list of ASR pretrained models released by PaddleSpeech that can be used by command line and python API" + ) + self.show_support_models(pretrained_models) + except BaseException: + print("Failed to get the list of ASR pretrained models.") + + elif self.task == 'cls': + try: + from ..cls.infer import pretrained_models + print( + "Here is the list of CLS pretrained models released by PaddleSpeech that can be used by command line and python API" + ) + self.show_support_models(pretrained_models) + except BaseException: + print("Failed to get the list of CLS pretrained models.") + + elif self.task == 'st': + try: + from ..st.infer import pretrained_models + print( + "Here is the list of ST pretrained models released by PaddleSpeech that can be used by command line and python API" + ) + self.show_support_models(pretrained_models) + except BaseException: + print("Failed to get the list of ST pretrained models.") + + elif self.task == 'text': + try: + from ..text.infer import pretrained_models + print( + "Here is the list of TEXT pretrained models released by PaddleSpeech that can be used by command line and python API" + ) + self.show_support_models(pretrained_models) + except BaseException: + print( + "Failed to get the list of TEXT pretrained models.") + + elif self.task == 'tts': + try: + from ..tts.infer import pretrained_models + print( + "Here is the list of TTS pretrained models released by PaddleSpeech that can be used by command line and python API" + ) + self.show_support_models(pretrained_models) + except BaseException: + print("Failed to get the list of TTS pretrained models.") \ No newline at end of file From 66a8beb27f7ee8b537b635513c8ac63606ae6e48 Mon Sep 17 00:00:00 2001 From: TianYuan Date: Mon, 28 Feb 2022 09:47:06 +0000 Subject: [PATCH 15/45] update text frontend, test=tts --- README.md | 1 + README_cn.md | 1 + examples/aishell3/tts3/README.md | 4 +- examples/aishell3/tts3/conf/conformer.yaml | 110 ++++++++++++++++++ examples/other/g2p/README.md | 2 +- paddlespeech/t2s/frontend/tone_sandhi.py | 6 +- paddlespeech/t2s/frontend/zh_frontend.py | 22 ++++ .../frontend/zh_normalization/chronology.py | 10 +- .../t2s/frontend/zh_normalization/num.py | 7 +- .../zh_normalization/text_normlization.py | 9 ++ setup.py | 1 + 11 files changed, 165 insertions(+), 8 deletions(-) create mode 100644 examples/aishell3/tts3/conf/conformer.yaml diff --git a/README.md b/README.md index 46730797..e96d0710 100644 --- a/README.md +++ b/README.md @@ -561,6 +561,7 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P - Many thanks to [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) for developing Virtual Uploader(VUP)/Virtual YouTuber(VTuber) with PaddleSpeech TTS function. - Many thanks to [745165806](https://github.com/745165806)/[PaddleSpeechTask](https://github.com/745165806/PaddleSpeechTask) for contributing Punctuation Restoration model. - Many thanks to [kslz](https://github.com/745165806) for supplementary Chinese documents. +- Many thanks to [awmmmm](https://github.com/awmmmm) for contributing fastspeech2 aishell3 conformer pretrained model. Besides, PaddleSpeech depends on a lot of open source repositories. See [references](./docs/source/reference.md) for more information. diff --git a/README_cn.md b/README_cn.md index 9782240a..32d5c518 100644 --- a/README_cn.md +++ b/README_cn.md @@ -556,6 +556,7 @@ year={2021} - 非常感谢 [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) 采用 PaddleSpeech 语音合成功能实现 Virtual Uploader(VUP)/Virtual YouTuber(VTuber) 虚拟主播。 - 非常感谢 [745165806](https://github.com/745165806)/[PaddleSpeechTask](https://github.com/745165806/PaddleSpeechTask) 贡献标点重建相关模型。 - 非常感谢 [kslz](https://github.com/kslz) 补充中文文档。 +- 非常感谢 [awmmmm](https://github.com/awmmmm) 提供 fastspeech2 aishell3 conformer 预训练模型。 此外,PaddleSpeech 依赖于许多开源存储库。有关更多信息,请参阅 [references](./docs/source/reference.md)。 diff --git a/examples/aishell3/tts3/README.md b/examples/aishell3/tts3/README.md index 281ad836..d02ad1b6 100644 --- a/examples/aishell3/tts3/README.md +++ b/examples/aishell3/tts3/README.md @@ -225,7 +225,9 @@ optional arguments: 9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu. ## Pretrained Model -Pretrained FastSpeech2 model with no silence in the edge of audios. [fastspeech2_nosil_aishell3_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_ckpt_0.4.zip) +Pretrained FastSpeech2 model with no silence in the edge of audios: +- [fastspeech2_nosil_aishell3_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_ckpt_0.4.zip) +- [fastspeech2_conformer_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_aishell3_ckpt_0.2.0.zip) (Thanks for [@awmmmm](https://github.com/awmmmm)'s contribution) FastSpeech2 checkpoint contains files listed below. diff --git a/examples/aishell3/tts3/conf/conformer.yaml b/examples/aishell3/tts3/conf/conformer.yaml new file mode 100644 index 00000000..ea73593d --- /dev/null +++ b/examples/aishell3/tts3/conf/conformer.yaml @@ -0,0 +1,110 @@ +########################################################### +# FEATURE EXTRACTION SETTING # +########################################################### + +fs: 24000 # sr +n_fft: 2048 # FFT size (samples). +n_shift: 300 # Hop size (samples). 12.5ms +win_length: 1200 # Window length (samples). 50ms + # If set to null, it will be the same as fft_size. +window: "hann" # Window function. + +# Only used for feats_type != raw + +fmin: 80 # Minimum frequency of Mel basis. +fmax: 7600 # Maximum frequency of Mel basis. +n_mels: 80 # The number of mel basis. + +# Only used for the model using pitch features (e.g. FastSpeech2) +f0min: 80 # Maximum f0 for pitch extraction. +f0max: 400 # Minimum f0 for pitch extraction. + + +########################################################### +# DATA SETTING # +########################################################### +batch_size: 32 +num_workers: 4 + + +########################################################### +# MODEL SETTING # +########################################################### +model: + adim: 384 # attention dimension + aheads: 2 # number of attention heads + elayers: 4 # number of encoder layers + eunits: 1536 # number of encoder ff units + dlayers: 4 # number of decoder layers + dunits: 1536 # number of decoder ff units + positionwise_layer_type: conv1d # type of position-wise layer + positionwise_conv_kernel_size: 3 # kernel size of position wise conv layer + duration_predictor_layers: 2 # number of layers of duration predictor + duration_predictor_chans: 256 # number of channels of duration predictor + duration_predictor_kernel_size: 3 # filter size of duration predictor + postnet_layers: 5 # number of layers of postnset + postnet_filts: 5 # filter size of conv layers in postnet + postnet_chans: 256 # number of channels of conv layers in postnet + encoder_normalize_before: True # whether to perform layer normalization before the input + decoder_normalize_before: True # whether to perform layer normalization before the input + reduction_factor: 1 # reduction factor + encoder_type: conformer # encoder type + decoder_type: conformer # decoder type + conformer_pos_enc_layer_type: rel_pos # conformer positional encoding type + conformer_self_attn_layer_type: rel_selfattn # conformer self-attention type + conformer_activation_type: swish # conformer activation type + use_macaron_style_in_conformer: true # whether to use macaron style in conformer + use_cnn_in_conformer: true # whether to use CNN in conformer + conformer_enc_kernel_size: 7 # kernel size in CNN module of conformer-based encoder + conformer_dec_kernel_size: 31 # kernel size in CNN module of conformer-based decoder + init_type: xavier_uniform # initialization type + transformer_enc_dropout_rate: 0.2 # dropout rate for transformer encoder layer + transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding + transformer_enc_attn_dropout_rate: 0.2 # dropout rate for transformer encoder attention layer + transformer_dec_dropout_rate: 0.2 # dropout rate for transformer decoder layer + transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding + transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer + pitch_predictor_layers: 5 # number of conv layers in pitch predictor + pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor + pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch + pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch + stop_gradient_from_pitch_predictor: true # whether to stop the gradient from pitch predictor to encoder + energy_predictor_layers: 2 # number of conv layers in energy predictor + energy_predictor_chans: 256 # number of channels of conv layers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_dropout: 0.5 # dropout rate in energy predictor + energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy + energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy + stop_gradient_from_energy_predictor: false # whether to stop the gradient from energy predictor to encoder + spk_embed_dim: 256 # speaker embedding dimension + spk_embed_integration_type: concat # speaker embedding integration type + + +########################################################### +# UPDATER SETTING # +########################################################### +updater: + use_masking: True # whether to apply masking for padded part in loss calculation + + + +########################################################### +# OPTIMIZER SETTING # +########################################################### +optimizer: + optim: adam # optimizer type + learning_rate: 0.001 # learning rate + +########################################################### +# TRAINING SETTING # +########################################################### +max_epoch: 1000 +num_snapshots: 5 + + +########################################################### +# OTHER SETTING # +########################################################### +seed: 10086 diff --git a/examples/other/g2p/README.md b/examples/other/g2p/README.md index c0f55bd4..141f7f74 100644 --- a/examples/other/g2p/README.md +++ b/examples/other/g2p/README.md @@ -10,7 +10,7 @@ Run the command below to get the results of the test. ```bash ./run.sh ``` -The `avg WER` of g2p is: 0.027124048652822204 +The `avg WER` of g2p is: 0.026014352515701198 ```text ,--------------------------------------------------------------------. | | # Snt # Wrd | Corr Sub Del Ins Err S.Err | diff --git a/paddlespeech/t2s/frontend/tone_sandhi.py b/paddlespeech/t2s/frontend/tone_sandhi.py index 5264e068..07f7fa2b 100644 --- a/paddlespeech/t2s/frontend/tone_sandhi.py +++ b/paddlespeech/t2s/frontend/tone_sandhi.py @@ -63,7 +63,7 @@ class ToneSandhi(): '扫把', '惦记' } self.must_not_neural_tone_words = { - "男子", "女子", "分子", "原子", "量子", "莲子", "石子", "瓜子", "电子" + "男子", "女子", "分子", "原子", "量子", "莲子", "石子", "瓜子", "电子", "人人", "虎虎" } self.punc = ":,;。?!“”‘’':,;.?!" @@ -77,7 +77,9 @@ class ToneSandhi(): # reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺 for j, item in enumerate(word): - if j - 1 >= 0 and item == word[j - 1] and pos[0] in {"n", "v", "a"}: + if j - 1 >= 0 and item == word[j - 1] and pos[0] in { + "n", "v", "a" + } and word not in self.must_not_neural_tone_words: finals[j] = finals[j][:-1] + "5" ge_idx = word.find("个") if len(word) >= 1 and word[-1] in "吧呢哈啊呐噻嘛吖嗨呐哦哒额滴哩哟喽啰耶喔诶": diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py index a905c412..bb8ed5b4 100644 --- a/paddlespeech/t2s/frontend/zh_frontend.py +++ b/paddlespeech/t2s/frontend/zh_frontend.py @@ -20,7 +20,10 @@ import numpy as np import paddle from g2pM import G2pM from pypinyin import lazy_pinyin +from pypinyin import load_phrases_dict +from pypinyin import load_single_dict from pypinyin import Style +from pypinyin_dict.phrase_pinyin_data import large_pinyin from paddlespeech.t2s.frontend.generate_lexicon import generate_lexicon from paddlespeech.t2s.frontend.tone_sandhi import ToneSandhi @@ -41,6 +44,8 @@ class Frontend(): self.g2pM_model = G2pM() self.pinyin2phone = generate_lexicon( with_tone=True, with_erhua=False) + else: + self.__init__pypinyin() self.must_erhua = {"小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿"} self.not_erhua = { "虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿", "妻儿", @@ -62,6 +67,23 @@ class Frontend(): for tone, id in tone_id: self.vocab_tones[tone] = int(id) + def __init__pypinyin(self): + large_pinyin.load() + + load_phrases_dict({u'开户行': [[u'ka1i'], [u'hu4'], [u'hang2']]}) + load_phrases_dict({u'发卡行': [[u'fa4'], [u'ka3'], [u'hang2']]}) + load_phrases_dict({u'放款行': [[u'fa4ng'], [u'kua3n'], [u'hang2']]}) + load_phrases_dict({u'茧行': [[u'jia3n'], [u'hang2']]}) + load_phrases_dict({u'行号': [[u'hang2'], [u'ha4o']]}) + load_phrases_dict({u'各地': [[u'ge4'], [u'di4']]}) + load_phrases_dict({u'借还款': [[u'jie4'], [u'hua2n'], [u'kua3n']]}) + load_phrases_dict({u'时间为': [[u'shi2'], [u'jia1n'], [u'we2i']]}) + load_phrases_dict({u'为准': [[u'we2i'], [u'zhu3n']]}) + load_phrases_dict({u'色差': [[u'se4'], [u'cha1']]}) + + # 调整字的拼音顺序 + load_single_dict({ord(u'地'): u'de,di4'}) + def _get_initials_finals(self, word: str) -> List[List[str]]: initials = [] finals = [] diff --git a/paddlespeech/t2s/frontend/zh_normalization/chronology.py b/paddlespeech/t2s/frontend/zh_normalization/chronology.py index bfa7d2b1..ea518913 100644 --- a/paddlespeech/t2s/frontend/zh_normalization/chronology.py +++ b/paddlespeech/t2s/frontend/zh_normalization/chronology.py @@ -63,7 +63,10 @@ def replace_time(match) -> str: result = f"{num2str(hour)}点" if minute.lstrip('0'): - result += f"{_time_num2str(minute)}分" + if int(minute) == 30: + result += f"半" + else: + result += f"{_time_num2str(minute)}分" if second and second.lstrip('0'): result += f"{_time_num2str(second)}秒" @@ -71,7 +74,10 @@ def replace_time(match) -> str: result += "至" result += f"{num2str(hour_2)}点" if minute_2.lstrip('0'): - result += f"{_time_num2str(minute_2)}分" + if int(minute) == 30: + result += f"半" + else: + result += f"{_time_num2str(minute_2)}分" if second_2 and second_2.lstrip('0'): result += f"{_time_num2str(second_2)}秒" diff --git a/paddlespeech/t2s/frontend/zh_normalization/num.py b/paddlespeech/t2s/frontend/zh_normalization/num.py index 27a2f846..a83b42a4 100644 --- a/paddlespeech/t2s/frontend/zh_normalization/num.py +++ b/paddlespeech/t2s/frontend/zh_normalization/num.py @@ -28,7 +28,7 @@ UNITS = OrderedDict({ 8: '亿', }) -COM_QUANTIFIERS = '(朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)' +COM_QUANTIFIERS = '(所|朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|小时|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)' # 分数表达式 RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)') @@ -110,7 +110,7 @@ def replace_default_num(match): # 纯小数 RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))') # 正整数 + 量词 -RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几])?" + COM_QUANTIFIERS) +RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几\+])?" + COM_QUANTIFIERS) RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))') @@ -123,6 +123,8 @@ def replace_positive_quantifier(match) -> str: """ number = match.group(1) match_2 = match.group(2) + if match_2 == "+": + match_2 = "多" match_2: str = match_2 if match_2 else "" quantifiers: str = match.group(3) number: str = num2str(number) @@ -151,6 +153,7 @@ def replace_number(match) -> str: # 范围表达式 # match.group(1) and match.group(8) are copy from RE_NUMBER + RE_RANGE = re.compile( r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))') diff --git a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py index f9d1b8cb..bc663c70 100644 --- a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py +++ b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py @@ -63,11 +63,19 @@ class TextNormalizer(): # Only for pure Chinese here if lang == "zh": text = text.replace(" ", "") + # 过滤掉特殊字符 + text = re.sub(r'[《》【】<=>{}()()#&@“”^_|…\\]', '', text) text = self.SENTENCE_SPLITOR.sub(r'\1\n', text) text = text.strip() sentences = [sentence.strip() for sentence in re.split(r'\n+', text)] return sentences + def _post_replace(self, sentence: str) -> str: + sentence = sentence.replace('/', '每') + sentence = sentence.replace('~', '至') + + return sentence + def normalize_sentence(self, sentence: str) -> str: # basic character conversions sentence = tranditional_to_simplified(sentence) @@ -97,6 +105,7 @@ class TextNormalizer(): sentence) sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence) sentence = RE_NUMBER.sub(replace_number, sentence) + sentence = self._post_replace(sentence) return sentence diff --git a/setup.py b/setup.py index 3f3632b3..c1c29437 100644 --- a/setup.py +++ b/setup.py @@ -48,6 +48,7 @@ base = [ "paddlespeech_feat", "praatio==5.0.0", "pypinyin", + "pypinyin-dict", "python-dateutil", "pyworld", "resampy==0.2.2", From 54341c88a6e5d7595d20bfbb3a21cd84ecdaebfc Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Mon, 28 Feb 2022 10:39:19 +0000 Subject: [PATCH 16/45] cli batch and shell pipe, test=doc --- README.md | 15 +++++++++++++-- README_cn.md | 11 +++++++++++ demos/speech_recognition/.gitignore | 1 + demos/speech_recognition/README.md | 2 ++ demos/speech_recognition/README_cn.md | 2 ++ demos/speech_recognition/run.sh | 6 ++++++ demos/text_to_speech/README.md | 5 ++++- demos/text_to_speech/README_cn.md | 4 ++++ demos/text_to_speech/run.sh | 4 ++++ 9 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 demos/speech_recognition/.gitignore diff --git a/README.md b/README.md index 46730797..a142cb5e 100644 --- a/README.md +++ b/README.md @@ -196,16 +196,18 @@ Developers can have a try of our models with [PaddleSpeech Command Line](./paddl ```shell paddlespeech cls --input input.wav ``` + **Automatic Speech Recognition** ```shell paddlespeech asr --lang zh --input input_16k.wav ``` -**Speech Translation** (English to Chinese) +**Speech Translation** (English to Chinese) (not support for Mac and Windows now) ```shell paddlespeech st --input input_16k.wav ``` + **Text-to-Speech** ```shell paddlespeech tts --input "你好,欢迎使用飞桨深度学习框架!" --output output.wav @@ -218,7 +220,16 @@ paddlespeech tts --input "你好,欢迎使用飞桨深度学习框架!" --ou paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 ``` - +**Batch Process** +``` +echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts +``` + +**Shell Pipeline** +ASR + Punc: +``` +paddlespeech asr --input ./zh.wav | paddlespeech text --task punc +``` For more command lines, please see: [demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos) diff --git a/README_cn.md b/README_cn.md index 9782240a..366d9a02 100644 --- a/README_cn.md +++ b/README_cn.md @@ -216,6 +216,17 @@ paddlespeech tts --input "你好,欢迎使用百度飞桨深度学习框架! paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 ``` +**批处理** +``` +echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts +``` + +**Shell管道** +ASR + Punc: +``` +paddlespeech asr --input ./zh.wav | paddlespeech text --task punc +``` + 更多命令行命令请参考 [demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos) > Note: 如果需要训练或者微调,请查看[语音识别](./docs/source/asr/quick_start.md), [语音合成](./docs/source/tts/quick_start.md)。 diff --git a/demos/speech_recognition/.gitignore b/demos/speech_recognition/.gitignore new file mode 100644 index 00000000..d8dd7532 --- /dev/null +++ b/demos/speech_recognition/.gitignore @@ -0,0 +1 @@ +*.wav diff --git a/demos/speech_recognition/README.md b/demos/speech_recognition/README.md index c49afa35..5d964fce 100644 --- a/demos/speech_recognition/README.md +++ b/demos/speech_recognition/README.md @@ -27,6 +27,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee paddlespeech asr --input ./zh.wav # English paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav + # Chinese ASR + Punctuation Restoration + paddlespeech asr --input ./zh.wav | paddlespeech text --task punc ``` (It doesn't matter if package `paddlespeech-ctcdecoders` is not found, this package is optional.) diff --git a/demos/speech_recognition/README_cn.md b/demos/speech_recognition/README_cn.md index c2e38c91..ba1f1d65 100644 --- a/demos/speech_recognition/README_cn.md +++ b/demos/speech_recognition/README_cn.md @@ -25,6 +25,8 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee paddlespeech asr --input ./zh.wav # 英文 paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav + # 中文 + 标点恢复 + paddlespeech asr --input ./zh.wav | paddlespeech text --task punc ``` (如果显示 `paddlespeech-ctcdecoders` 这个 python 包没有找到的 Error,没有关系,这个包是非必须的。) diff --git a/demos/speech_recognition/run.sh b/demos/speech_recognition/run.sh index 5efc8b81..06466928 100755 --- a/demos/speech_recognition/run.sh +++ b/demos/speech_recognition/run.sh @@ -1,4 +1,10 @@ #!/bin/bash wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav + +# asr paddlespeech asr --input ./zh.wav + + +# asr + punc +paddlespeech asr --input ./zh.wav | paddlespeech text --task punc \ No newline at end of file diff --git a/demos/text_to_speech/README.md b/demos/text_to_speech/README.md index 9d3c4ac5..2df72a82 100644 --- a/demos/text_to_speech/README.md +++ b/demos/text_to_speech/README.md @@ -17,11 +17,14 @@ The input of this demo should be a text of the specific language that can be pas ### 3. Usage - Command Line (Recommended) - Chinese - The default acoustic model is `Fastspeech2`, and the default vocoder is `Parallel WaveGAN`. ```bash paddlespeech tts --input "你好,欢迎使用百度飞桨深度学习框架!" ``` + - Batch Process + ```bash + echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts + ``` - Chinese, use `SpeedySpeech` as the acoustic model ```bash paddlespeech tts --am speedyspeech_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!" diff --git a/demos/text_to_speech/README_cn.md b/demos/text_to_speech/README_cn.md index f075efda..7e02b962 100644 --- a/demos/text_to_speech/README_cn.md +++ b/demos/text_to_speech/README_cn.md @@ -24,6 +24,10 @@ ```bash paddlespeech tts --input "你好,欢迎使用百度飞桨深度学习框架!" ``` + - 批处理 + ```bash + echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts + ``` - 中文,使用 `SpeedySpeech` 作为声学模型 ```bash paddlespeech tts --am speedyspeech_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!" diff --git a/demos/text_to_speech/run.sh b/demos/text_to_speech/run.sh index c2487aee..b1340241 100755 --- a/demos/text_to_speech/run.sh +++ b/demos/text_to_speech/run.sh @@ -1,3 +1,7 @@ #!/bin/bash +# single process paddlespeech tts --input 今天的天气不错啊 + +# Batch process +echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts \ No newline at end of file From 75098698d8eae48d1d0343cd683c7b315ea4a02d Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Mon, 28 Feb 2022 10:45:39 +0000 Subject: [PATCH 17/45] format,test=doc --- paddlespeech/s2t/io/sampler.py | 2 +- paddlespeech/s2t/models/u2_st/u2_st.py | 4 +-- .../t2s/modules/transformer/repeat.py | 2 +- .../unit/asr/deepspeech2_online_model_test.py | 36 ++++++++----------- 4 files changed, 17 insertions(+), 27 deletions(-) diff --git a/paddlespeech/s2t/io/sampler.py b/paddlespeech/s2t/io/sampler.py index 89752bb9..ac55af12 100644 --- a/paddlespeech/s2t/io/sampler.py +++ b/paddlespeech/s2t/io/sampler.py @@ -51,7 +51,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False): """ rng = np.random.RandomState(epoch) shift_len = rng.randint(0, batch_size - 1) - batch_indices = list(zip(*[iter(indices[shift_len:])] * batch_size)) + batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size)) rng.shuffle(batch_indices) batch_indices = [item for batch in batch_indices for item in batch] assert clipped is False diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py index f7b05714..999723e5 100644 --- a/paddlespeech/s2t/models/u2_st/u2_st.py +++ b/paddlespeech/s2t/models/u2_st/u2_st.py @@ -33,8 +33,6 @@ from paddlespeech.s2t.modules.decoder import TransformerDecoder from paddlespeech.s2t.modules.encoder import ConformerEncoder from paddlespeech.s2t.modules.encoder import TransformerEncoder from paddlespeech.s2t.modules.loss import LabelSmoothingLoss -from paddlespeech.s2t.modules.mask import mask_finished_preds -from paddlespeech.s2t.modules.mask import mask_finished_scores from paddlespeech.s2t.modules.mask import subsequent_mask from paddlespeech.s2t.utils import checkpoint from paddlespeech.s2t.utils import layer_tools @@ -291,7 +289,7 @@ class U2STBaseModel(nn.Layer): device = speech.place # Let's assume B = batch_size and N = beam_size - # 1. Encoder and init hypothesis + # 1. Encoder and init hypothesis encoder_out, encoder_mask = self._forward_encoder( speech, speech_lengths, decoding_chunk_size, num_decoding_left_chunks, diff --git a/paddlespeech/t2s/modules/transformer/repeat.py b/paddlespeech/t2s/modules/transformer/repeat.py index 2073a78b..1e946adf 100644 --- a/paddlespeech/t2s/modules/transformer/repeat.py +++ b/paddlespeech/t2s/modules/transformer/repeat.py @@ -36,4 +36,4 @@ def repeat(N, fn): Returns: MultiSequential: Repeated model instance. """ - return MultiSequential(*[fn(n) for n in range(N)]) + return MultiSequential(* [fn(n) for n in range(N)]) diff --git a/tests/unit/asr/deepspeech2_online_model_test.py b/tests/unit/asr/deepspeech2_online_model_test.py index d26e5b15..f23c4926 100644 --- a/tests/unit/asr/deepspeech2_online_model_test.py +++ b/tests/unit/asr/deepspeech2_online_model_test.py @@ -11,16 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os +import pickle import unittest import numpy as np import paddle -import pickle -import os from paddle import inference -from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline from paddlespeech.s2t.models.ds2_online import DeepSpeech2InferModelOnline +from paddlespeech.s2t.models.ds2_online import DeepSpeech2ModelOnline + class TestDeepSpeech2ModelOnline(unittest.TestCase): def setUp(self): @@ -185,15 +186,12 @@ class TestDeepSpeech2ModelOnline(unittest.TestCase): paddle.allclose(final_state_c_box, final_state_c_box_chk), True) - - class TestDeepSpeech2StaticModelOnline(unittest.TestCase): - def setUp(self): export_prefix = "exp/deepspeech2_online/checkpoints/test_export" if not os.path.exists(os.path.dirname(export_prefix)): os.makedirs(os.path.dirname(export_prefix), mode=0o755) - infer_model = DeepSpeech2InferModelOnline( + infer_model = DeepSpeech2InferModelOnline( feat_size=161, dict_size=4233, num_conv_layers=2, @@ -207,27 +205,25 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase): with open("test_data/static_ds2online_inputs.pickle", "rb") as f: self.data_dict = pickle.load(f) - + self.setup_model(export_prefix) - def setup_model(self, export_prefix): - deepspeech_config = inference.Config( - export_prefix + ".pdmodel", - export_prefix + ".pdiparams") - if ('CUDA_VISIBLE_DEVICES' in os.environ.keys() and os.environ['CUDA_VISIBLE_DEVICES'].strip() != ''): + deepspeech_config = inference.Config(export_prefix + ".pdmodel", + export_prefix + ".pdiparams") + if ('CUDA_VISIBLE_DEVICES' in os.environ.keys() and + os.environ['CUDA_VISIBLE_DEVICES'].strip() != ''): deepspeech_config.enable_use_gpu(100, 0) deepspeech_config.enable_memory_optim() deepspeech_predictor = inference.create_predictor(deepspeech_config) self.predictor = deepspeech_predictor - + def test_unit(self): input_names = self.predictor.get_input_names() audio_handle = self.predictor.get_input_handle(input_names[0]) audio_len_handle = self.predictor.get_input_handle(input_names[1]) h_box_handle = self.predictor.get_input_handle(input_names[2]) c_box_handle = self.predictor.get_input_handle(input_names[3]) - x_chunk = self.data_dict["audio_chunk"] x_chunk_lens = self.data_dict["audio_chunk_lens"] @@ -246,13 +242,9 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase): c_box_handle.reshape(chunk_state_c_box.shape) c_box_handle.copy_from_cpu(chunk_state_c_box) - - output_names = self.predictor.get_output_names() - output_handle = self.predictor.get_output_handle( - output_names[0]) - output_lens_handle = self.predictor.get_output_handle( - output_names[1]) + output_handle = self.predictor.get_output_handle(output_names[0]) + output_lens_handle = self.predictor.get_output_handle(output_names[1]) output_state_h_handle = self.predictor.get_output_handle( output_names[2]) output_state_c_handle = self.predictor.get_output_handle( @@ -264,7 +256,7 @@ class TestDeepSpeech2StaticModelOnline(unittest.TestCase): chunk_state_h_box = output_state_h_handle.copy_to_cpu() chunk_state_c_box = output_state_c_handle.copy_to_cpu() return True - + if __name__ == '__main__': unittest.main() From 335638ba1877a72d94b39f964e999acd6e18f26a Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Mon, 28 Feb 2022 11:01:50 +0000 Subject: [PATCH 18/45] update gitignore, test=doct --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index cc8fff87..778824f5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.pyc .vscode *log +*.wav *.pdmodel *.pdiparams* *.zip @@ -30,5 +31,8 @@ tools/OpenBLAS/ tools/Miniconda3-latest-Linux-x86_64.sh tools/activate_python.sh tools/miniconda.sh +tools/CRF++-0.58/ + +speechx/fc_patch/ *output/ From 395c923dee8e2df8a9440242d9f06a4e5adae9f9 Mon Sep 17 00:00:00 2001 From: lym0302 Date: Mon, 28 Feb 2022 20:10:08 +0800 Subject: [PATCH 19/45] modified text sr to lang, test=doc --- paddlespeech/cli/stats/infer.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/paddlespeech/cli/stats/infer.py b/paddlespeech/cli/stats/infer.py index 76b2f47b..d60a6691 100644 --- a/paddlespeech/cli/stats/infer.py +++ b/paddlespeech/cli/stats/infer.py @@ -26,7 +26,7 @@ model_name_format = { 'asr': 'Model-Language-Sample Rate', 'cls': 'Model-Sample Rate', 'st': 'Model-Source language-Target language', - 'text': 'Model-Task-Sample Rate', + 'text': 'Model-Task-Language', 'tts': 'Model-Language' } @@ -180,8 +180,7 @@ class StatsExecutor(): ) self.show_support_models(pretrained_models) except BaseException: - print( - "Failed to get the list of TEXT pretrained models.") + print("Failed to get the list of TEXT pretrained models.") elif self.task == 'tts': try: @@ -191,4 +190,4 @@ class StatsExecutor(): ) self.show_support_models(pretrained_models) except BaseException: - print("Failed to get the list of TTS pretrained models.") \ No newline at end of file + print("Failed to get the list of TTS pretrained models.") From c64282e7a7741457a5ff35e4edccda8914d7243f Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Tue, 1 Mar 2022 10:13:13 +0800 Subject: [PATCH 20/45] fix shell pipe example --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d6d97d8b..aa17c71e 100644 --- a/README.md +++ b/README.md @@ -225,8 +225,8 @@ paddlespeech tts --input "你好,欢迎使用飞桨深度学习框架!" --ou echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts ``` -**Shell Pipeline** -ASR + Punc: +**Shell Pipeline** +- ASR + Punctuation Restoration ``` paddlespeech asr --input ./zh.wav | paddlespeech text --task punc ``` From 72c0cda30cdb184c091a65a518969be98ed8e10f Mon Sep 17 00:00:00 2001 From: lym0302 Date: Tue, 1 Mar 2022 10:18:04 +0800 Subject: [PATCH 21/45] add paddlespeech_server stats, test=doc --- paddlespeech/cli/stats/infer.py | 2 +- paddlespeech/server/bin/__init__.py | 1 + .../server/bin/paddlespeech_server.py | 140 +++++++++++++++++- 3 files changed, 141 insertions(+), 2 deletions(-) diff --git a/paddlespeech/cli/stats/infer.py b/paddlespeech/cli/stats/infer.py index d60a6691..4ef50449 100644 --- a/paddlespeech/cli/stats/infer.py +++ b/paddlespeech/cli/stats/infer.py @@ -68,7 +68,7 @@ class StatsExecutor(): ) return False - if self.task == 'asr': + elif self.task == 'asr': try: from ..asr.infer import pretrained_models logger.info( diff --git a/paddlespeech/server/bin/__init__.py b/paddlespeech/server/bin/__init__.py index bd75747f..025aab09 100644 --- a/paddlespeech/server/bin/__init__.py +++ b/paddlespeech/server/bin/__init__.py @@ -14,3 +14,4 @@ from .paddlespeech_client import ASRClientExecutor from .paddlespeech_client import TTSClientExecutor from .paddlespeech_server import ServerExecutor +from .paddlespeech_server import ServerStatsExecutor diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py index aff77d54..21fc5c65 100644 --- a/paddlespeech/server/bin/paddlespeech_server.py +++ b/paddlespeech/server/bin/paddlespeech_server.py @@ -16,15 +16,17 @@ from typing import List import uvicorn from fastapi import FastAPI +from prettytable import PrettyTable from ..executor import BaseExecutor from ..util import cli_server_register from ..util import stats_wrapper +from paddlespeech.cli.log import logger from paddlespeech.server.engine.engine_pool import init_engine_pool from paddlespeech.server.restful.api import setup_router from paddlespeech.server.utils.config import get_config -__all__ = ['ServerExecutor'] +__all__ = ['ServerExecutor', 'ServerStatsExecutor'] app = FastAPI( title="PaddleSpeech Serving API", description="Api", version="0.0.1") @@ -86,3 +88,139 @@ class ServerExecutor(BaseExecutor): config = get_config(config_file) if self.init(config): uvicorn.run(app, host=config.host, port=config.port, debug=True) + + +@cli_server_register( + name='paddlespeech_server.stats', + description='Get the models supported by each speech task in the service.') +class ServerStatsExecutor(): + def __init__(self): + super(ServerStatsExecutor, self).__init__() + + self.parser = argparse.ArgumentParser( + prog='paddlespeech_server.stats', add_help=True) + self.parser.add_argument( + '--task', + type=str, + default=None, + choices=['asr', 'tts'], + help='Choose speech task.', + required=True) + self.task_choices = ['asr', 'tts'] + self.model_name_format = { + 'asr': 'Model-Language-Sample Rate', + 'tts': 'Model-Language' + } + + def show_support_models(self, pretrained_models: dict): + fields = self.model_name_format[self.task].split("-") + table = PrettyTable(fields) + for key in pretrained_models: + table.add_row(key.split("-")) + print(table) + + def execute(self, argv: List[str]) -> bool: + """ + Command line entry. + """ + parser_args = self.parser.parse_args(argv) + self.task = parser_args.task + if self.task not in self.task_choices: + logger.error( + "Please input correct speech task, choices = ['asr', 'tts']") + return False + + elif self.task == 'asr': + try: + from paddlespeech.cli.asr.infer import pretrained_models + logger.info( + "Here is the table of ASR pretrained models supported in the service." + ) + self.show_support_models(pretrained_models) + + # show ASR static pretrained model + from paddlespeech.server.engine.asr.paddleinference.asr_engine import pretrained_models + logger.info( + "Here is the table of ASR static pretrained models supported in the service." + ) + self.show_support_models(pretrained_models) + + return True + except BaseException: + logger.error( + "Failed to get the table of ASR pretrained models supported in the service." + ) + return False + + elif self.task == 'tts': + try: + from paddlespeech.cli.tts.infer import pretrained_models + logger.info( + "Here is the table of TTS pretrained models supported in the service." + ) + self.show_support_models(pretrained_models) + + # show TTS static pretrained model + from paddlespeech.server.engine.tts.paddleinference.tts_engine import pretrained_models + logger.info( + "Here is the table of TTS static pretrained models supported in the service." + ) + self.show_support_models(pretrained_models) + + return True + except BaseException: + logger.error( + "Failed to get the table of TTS pretrained models supported in the service." + ) + return False + + @stats_wrapper + def __call__( + self, + task: str=None, ): + """ + Python API to call an executor. + """ + self.task = task + if self.task not in self.task_choices: + print("Please input correct speech task, choices = ['asr', 'tts']") + + elif self.task == 'asr': + try: + from paddlespeech.cli.asr.infer import pretrained_models + print( + "Here is the table of ASR pretrained models supported in the service." + ) + self.show_support_models(pretrained_models) + + # show ASR static pretrained model + from paddlespeech.server.engine.asr.paddleinference.asr_engine import pretrained_models + print( + "Here is the table of ASR static pretrained models supported in the service." + ) + self.show_support_models(pretrained_models) + + except BaseException: + print( + "Failed to get the table of ASR pretrained models supported in the service." + ) + + elif self.task == 'tts': + try: + from paddlespeech.cli.tts.infer import pretrained_models + print( + "Here is the table of TTS pretrained models supported in the service." + ) + self.show_support_models(pretrained_models) + + # show TTS static pretrained model + from paddlespeech.server.engine.tts.paddleinference.tts_engine import pretrained_models + print( + "Here is the table of TTS static pretrained models supported in the service." + ) + self.show_support_models(pretrained_models) + + except BaseException: + print( + "Failed to get the table of TTS pretrained models supported in the service." + ) From cb07bd2a94c8a39331eec5ae649bfe01331244aa Mon Sep 17 00:00:00 2001 From: TianYuan Date: Tue, 1 Mar 2022 03:41:24 +0000 Subject: [PATCH 22/45] add rtf for synthesize, add more vocoder for synthesize_e2e.sh, test=tts --- examples/csmsc/tts0/local/synthesize.sh | 106 +++++++++++++++--- examples/csmsc/tts0/local/synthesize_e2e.sh | 16 +-- examples/csmsc/tts2/local/synthesize.sh | 113 +++++++++++++++++--- examples/csmsc/tts2/local/synthesize_e2e.sh | 12 +-- examples/csmsc/tts3/local/synthesize.sh | 106 +++++++++++++++--- examples/csmsc/tts3/local/synthesize_e2e.sh | 12 +-- paddlespeech/t2s/exps/synthesize.py | 94 ++++++++++------ paddlespeech/t2s/exps/synthesize_e2e.py | 106 +++++++++--------- paddlespeech/t2s/exps/wavernn/synthesize.py | 2 +- paddlespeech/t2s/models/melgan/melgan.py | 2 +- paddlespeech/t2s/models/wavernn/wavernn.py | 14 ++- 11 files changed, 434 insertions(+), 149 deletions(-) diff --git a/examples/csmsc/tts0/local/synthesize.sh b/examples/csmsc/tts0/local/synthesize.sh index 4be06dd8..bfb4844b 100755 --- a/examples/csmsc/tts0/local/synthesize.sh +++ b/examples/csmsc/tts0/local/synthesize.sh @@ -3,18 +3,96 @@ config_path=$1 train_output_path=$2 ckpt_name=$3 +stage=0 +stop_stage=0 -FLAGS_allocator_strategy=naive_best_fit \ -FLAGS_fraction_of_gpu_memory_to_use=0.01 \ -python3 ${BIN_DIR}/../synthesize.py \ - --am=tacotron2_csmsc \ - --am_config=${config_path} \ - --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ - --am_stat=dump/train/speech_stats.npy \ - --voc=pwgan_csmsc \ - --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \ - --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ - --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ - --test_metadata=dump/test/norm/metadata.jsonl \ - --output_dir=${train_output_path}/test \ - --phones_dict=dump/phone_id_map.txt +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=tacotron2_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=pwgan_csmsc \ + --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \ + --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ + --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi + +# for more GAN Vocoders +# multi band melgan +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=tacotron2_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=mb_melgan_csmsc \ + --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \ + --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\ + --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi + +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=tacotron2_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=style_melgan_csmsc \ + --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \ + --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \ + --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi + +# hifigan +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + echo "in hifigan syn" + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=tacotron2_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=hifigan_csmsc \ + --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \ + --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \ + --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi + +# wavernn +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then + echo "in wavernn syn" + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=tacotron2_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=wavernn_csmsc \ + --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \ + --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \ + --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi diff --git a/examples/csmsc/tts0/local/synthesize_e2e.sh b/examples/csmsc/tts0/local/synthesize_e2e.sh index 79bb9f83..4c73a18d 100755 --- a/examples/csmsc/tts0/local/synthesize_e2e.sh +++ b/examples/csmsc/tts0/local/synthesize_e2e.sh @@ -39,14 +39,14 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ --am_stat=dump/train/speech_stats.npy \ --voc=mb_melgan_csmsc \ - --voc_config=mb_melgan_baker_finetune_ckpt_0.5/finetune.yaml \ - --voc_ckpt=mb_melgan_baker_finetune_ckpt_0.5/snapshot_iter_2000000.pdz\ - --voc_stat=mb_melgan_baker_finetune_ckpt_0.5/feats_stats.npy \ + --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \ + --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\ + --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ --text=${BIN_DIR}/../sentences.txt \ --output_dir=${train_output_path}/test_e2e \ - --inference_dir=${train_output_path}/inference \ - --phones_dict=dump/phone_id_map.txt + --phones_dict=dump/phone_id_map.txt \ + --inference_dir=${train_output_path}/inference fi # the pretrained models haven't release now @@ -88,8 +88,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --lang=zh \ --text=${BIN_DIR}/../sentences.txt \ --output_dir=${train_output_path}/test_e2e \ - --inference_dir=${train_output_path}/inference \ - --phones_dict=dump/phone_id_map.txt + --phones_dict=dump/phone_id_map.txt \ + --inference_dir=${train_output_path}/inference fi # wavernn @@ -111,4 +111,4 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference -fi \ No newline at end of file +fi diff --git a/examples/csmsc/tts2/local/synthesize.sh b/examples/csmsc/tts2/local/synthesize.sh index cedc9717..07cf156e 100755 --- a/examples/csmsc/tts2/local/synthesize.sh +++ b/examples/csmsc/tts2/local/synthesize.sh @@ -1,20 +1,103 @@ #!/bin/bash + config_path=$1 train_output_path=$2 ckpt_name=$3 +stage=0 +stop_stage=0 + +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=speedyspeech_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=pwgan_csmsc \ + --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \ + --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ + --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt \ + --tones_dict=dump/tone_id_map.txt +fi + +# for more GAN Vocoders +# multi band melgan +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=speedyspeech_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=mb_melgan_csmsc \ + --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \ + --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\ + --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt \ + --tones_dict=dump/tone_id_map.txt +fi + +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=speedyspeech_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=style_melgan_csmsc \ + --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \ + --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \ + --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt \ + --tones_dict=dump/tone_id_map.txt +fi + +# hifigan +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + echo "in hifigan syn" + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=speedyspeech_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=hifigan_csmsc \ + --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \ + --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \ + --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt \ + --tones_dict=dump/tone_id_map.txt +fi -FLAGS_allocator_strategy=naive_best_fit \ -FLAGS_fraction_of_gpu_memory_to_use=0.01 \ -python3 ${BIN_DIR}/../synthesize.py \ - --am=speedyspeech_csmsc \ - --am_config=${config_path} \ - --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ - --am_stat=dump/train/feats_stats.npy \ - --voc=pwgan_csmsc \ - --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \ - --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ - --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ - --test_metadata=dump/test/norm/metadata.jsonl \ - --output_dir=${train_output_path}/test \ - --phones_dict=dump/phone_id_map.txt \ - --tones_dict=dump/tone_id_map.txt \ No newline at end of file +# wavernn +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then + echo "in wavernn syn" + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=speedyspeech_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=wavernn_csmsc \ + --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \ + --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \ + --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --tones_dict=dump/tone_id_map.txt \ + --phones_dict=dump/phone_id_map.txt +fi diff --git a/examples/csmsc/tts2/local/synthesize_e2e.sh b/examples/csmsc/tts2/local/synthesize_e2e.sh index 35fcf251..d5862a61 100755 --- a/examples/csmsc/tts2/local/synthesize_e2e.sh +++ b/examples/csmsc/tts2/local/synthesize_e2e.sh @@ -22,9 +22,9 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --lang=zh \ --text=${BIN_DIR}/../sentences.txt \ --output_dir=${train_output_path}/test_e2e \ - --inference_dir=${train_output_path}/inference \ --phones_dict=dump/phone_id_map.txt \ - --tones_dict=dump/tone_id_map.txt + --tones_dict=dump/tone_id_map.txt \ + --inference_dir=${train_output_path}/inference fi # for more GAN Vocoders @@ -44,9 +44,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --lang=zh \ --text=${BIN_DIR}/../sentences.txt \ --output_dir=${train_output_path}/test_e2e \ - --inference_dir=${train_output_path}/inference \ --phones_dict=dump/phone_id_map.txt \ - --tones_dict=dump/tone_id_map.txt + --tones_dict=dump/tone_id_map.txt \ + --inference_dir=${train_output_path}/inference fi # the pretrained models haven't release now @@ -88,9 +88,9 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --lang=zh \ --text=${BIN_DIR}/../sentences.txt \ --output_dir=${train_output_path}/test_e2e \ - --inference_dir=${train_output_path}/inference \ --phones_dict=dump/phone_id_map.txt \ - --tones_dict=dump/tone_id_map.txt + --tones_dict=dump/tone_id_map.txt \ + --inference_dir=${train_output_path}/inference fi diff --git a/examples/csmsc/tts3/local/synthesize.sh b/examples/csmsc/tts3/local/synthesize.sh index 19767426..273dacd5 100755 --- a/examples/csmsc/tts3/local/synthesize.sh +++ b/examples/csmsc/tts3/local/synthesize.sh @@ -3,18 +3,96 @@ config_path=$1 train_output_path=$2 ckpt_name=$3 +stage=0 +stop_stage=0 -FLAGS_allocator_strategy=naive_best_fit \ -FLAGS_fraction_of_gpu_memory_to_use=0.01 \ -python3 ${BIN_DIR}/../synthesize.py \ - --am=fastspeech2_csmsc \ - --am_config=${config_path} \ - --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ - --am_stat=dump/train/speech_stats.npy \ - --voc=pwgan_csmsc \ - --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \ - --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ - --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ - --test_metadata=dump/test/norm/metadata.jsonl \ - --output_dir=${train_output_path}/test \ - --phones_dict=dump/phone_id_map.txt +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=fastspeech2_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=pwgan_csmsc \ + --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \ + --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ + --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi + +# for more GAN Vocoders +# multi band melgan +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=fastspeech2_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=mb_melgan_csmsc \ + --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \ + --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\ + --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi + +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=fastspeech2_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=style_melgan_csmsc \ + --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \ + --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \ + --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi + +# hifigan +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + echo "in hifigan syn" + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=fastspeech2_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=hifigan_csmsc \ + --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \ + --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \ + --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi + +# wavernn +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then + echo "in wavernn syn" + FLAGS_allocator_strategy=naive_best_fit \ + FLAGS_fraction_of_gpu_memory_to_use=0.01 \ + python3 ${BIN_DIR}/../synthesize.py \ + --am=fastspeech2_csmsc \ + --am_config=${config_path} \ + --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ + --am_stat=dump/train/speech_stats.npy \ + --voc=wavernn_csmsc \ + --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \ + --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \ + --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \ + --test_metadata=dump/test/norm/metadata.jsonl \ + --output_dir=${train_output_path}/test \ + --phones_dict=dump/phone_id_map.txt +fi diff --git a/examples/csmsc/tts3/local/synthesize_e2e.sh b/examples/csmsc/tts3/local/synthesize_e2e.sh index 44356e4b..9e25c072 100755 --- a/examples/csmsc/tts3/local/synthesize_e2e.sh +++ b/examples/csmsc/tts3/local/synthesize_e2e.sh @@ -22,8 +22,8 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --lang=zh \ --text=${BIN_DIR}/../sentences.txt \ --output_dir=${train_output_path}/test_e2e \ - --inference_dir=${train_output_path}/inference \ - --phones_dict=dump/phone_id_map.txt + --phones_dict=dump/phone_id_map.txt \ + --inference_dir=${train_output_path}/inference fi # for more GAN Vocoders @@ -43,8 +43,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --lang=zh \ --text=${BIN_DIR}/../sentences.txt \ --output_dir=${train_output_path}/test_e2e \ - --inference_dir=${train_output_path}/inference \ - --phones_dict=dump/phone_id_map.txt + --phones_dict=dump/phone_id_map.txt \ + --inference_dir=${train_output_path}/inference fi # the pretrained models haven't release now @@ -86,8 +86,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --lang=zh \ --text=${BIN_DIR}/../sentences.txt \ --output_dir=${train_output_path}/test_e2e \ - --inference_dir=${train_output_path}/inference \ - --phones_dict=dump/phone_id_map.txt + --phones_dict=dump/phone_id_map.txt \ + --inference_dir=${train_output_path}/inference fi diff --git a/paddlespeech/t2s/exps/synthesize.py b/paddlespeech/t2s/exps/synthesize.py index 1c42a87c..81da14f2 100644 --- a/paddlespeech/t2s/exps/synthesize.py +++ b/paddlespeech/t2s/exps/synthesize.py @@ -20,6 +20,7 @@ import numpy as np import paddle import soundfile as sf import yaml +from timer import timer from yacs.config import CfgNode from paddlespeech.s2t.utils.dynamic_import import dynamic_import @@ -50,6 +51,18 @@ model_alias = { "paddlespeech.t2s.models.melgan:MelGANGenerator", "mb_melgan_inference": "paddlespeech.t2s.models.melgan:MelGANInference", + "style_melgan": + "paddlespeech.t2s.models.melgan:StyleMelGANGenerator", + "style_melgan_inference": + "paddlespeech.t2s.models.melgan:StyleMelGANInference", + "hifigan": + "paddlespeech.t2s.models.hifigan:HiFiGANGenerator", + "hifigan_inference": + "paddlespeech.t2s.models.hifigan:HiFiGANInference", + "wavernn": + "paddlespeech.t2s.models.wavernn:WaveRNN", + "wavernn_inference": + "paddlespeech.t2s.models.wavernn:WaveRNNInference", } @@ -146,10 +159,15 @@ def evaluate(args): voc_name = args.voc[:args.voc.rindex('_')] voc_class = dynamic_import(voc_name, model_alias) voc_inference_class = dynamic_import(voc_name + '_inference', model_alias) - voc = voc_class(**voc_config["generator_params"]) - voc.set_state_dict(paddle.load(args.voc_ckpt)["generator_params"]) - voc.remove_weight_norm() - voc.eval() + if voc_name != 'wavernn': + voc = voc_class(**voc_config["generator_params"]) + voc.set_state_dict(paddle.load(args.voc_ckpt)["generator_params"]) + voc.remove_weight_norm() + voc.eval() + else: + voc = voc_class(**voc_config["model"]) + voc.set_state_dict(paddle.load(args.voc_ckpt)["main_params"]) + voc.eval() voc_mu, voc_std = np.load(args.voc_stat) voc_mu = paddle.to_tensor(voc_mu) voc_std = paddle.to_tensor(voc_std) @@ -162,38 +180,51 @@ def evaluate(args): output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) + N = 0 + T = 0 + for datum in test_dataset: utt_id = datum["utt_id"] - with paddle.no_grad(): - # acoustic model - if am_name == 'fastspeech2': - phone_ids = paddle.to_tensor(datum["text"]) - spk_emb = None - spk_id = None - # multi speaker - if args.voice_cloning and "spk_emb" in datum: - spk_emb = paddle.to_tensor(np.load(datum["spk_emb"])) - elif "spk_id" in datum: - spk_id = paddle.to_tensor(datum["spk_id"]) - mel = am_inference(phone_ids, spk_id=spk_id, spk_emb=spk_emb) - elif am_name == 'speedyspeech': - phone_ids = paddle.to_tensor(datum["phones"]) - tone_ids = paddle.to_tensor(datum["tones"]) - mel = am_inference(phone_ids, tone_ids) - elif am_name == 'tacotron2': - phone_ids = paddle.to_tensor(datum["text"]) - spk_emb = None - # multi speaker - if args.voice_cloning and "spk_emb" in datum: - spk_emb = paddle.to_tensor(np.load(datum["spk_emb"])) - mel = am_inference(phone_ids, spk_emb=spk_emb) + with timer() as t: + with paddle.no_grad(): + # acoustic model + if am_name == 'fastspeech2': + phone_ids = paddle.to_tensor(datum["text"]) + spk_emb = None + spk_id = None + # multi speaker + if args.voice_cloning and "spk_emb" in datum: + spk_emb = paddle.to_tensor(np.load(datum["spk_emb"])) + elif "spk_id" in datum: + spk_id = paddle.to_tensor(datum["spk_id"]) + mel = am_inference( + phone_ids, spk_id=spk_id, spk_emb=spk_emb) + elif am_name == 'speedyspeech': + phone_ids = paddle.to_tensor(datum["phones"]) + tone_ids = paddle.to_tensor(datum["tones"]) + mel = am_inference(phone_ids, tone_ids) + elif am_name == 'tacotron2': + phone_ids = paddle.to_tensor(datum["text"]) + spk_emb = None + # multi speaker + if args.voice_cloning and "spk_emb" in datum: + spk_emb = paddle.to_tensor(np.load(datum["spk_emb"])) + mel = am_inference(phone_ids, spk_emb=spk_emb) # vocoder wav = voc_inference(mel) + + wav = wav.numpy() + N += wav.size + T += t.elapse + speed = wav.size / t.elapse + rtf = am_config.fs / speed + print( + f"{utt_id}, mel: {mel.shape}, wave: {wav.size}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}." + ) sf.write( - str(output_dir / (utt_id + ".wav")), - wav.numpy(), - samplerate=am_config.fs) + str(output_dir / (utt_id + ".wav")), wav, samplerate=am_config.fs) print(f"{utt_id} done!") + print(f"generation speed: {N / T}Hz, RTF: {am_config.fs / (N / T) }") def main(): @@ -246,7 +277,8 @@ def main(): default='pwgan_csmsc', choices=[ 'pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', 'pwgan_vctk', - 'mb_melgan_csmsc' + 'mb_melgan_csmsc', 'wavernn_csmsc', 'hifigan_csmsc', + 'style_melgan_csmsc' ], help='Choose vocoder type of tts task.') diff --git a/paddlespeech/t2s/exps/synthesize_e2e.py b/paddlespeech/t2s/exps/synthesize_e2e.py index 75c631b8..be78b953 100644 --- a/paddlespeech/t2s/exps/synthesize_e2e.py +++ b/paddlespeech/t2s/exps/synthesize_e2e.py @@ -21,6 +21,7 @@ import soundfile as sf import yaml from paddle import jit from paddle.static import InputSpec +from timer import timer from yacs.config import CfgNode from paddlespeech.s2t.utils.dynamic_import import dynamic_import @@ -233,59 +234,68 @@ def evaluate(args): # but still not stopping in the end (NOTE by yuantian01 Feb 9 2022) if am_name == 'tacotron2': merge_sentences = True - + N = 0 + T = 0 for utt_id, sentence in sentences: - get_tone_ids = False - if am_name == 'speedyspeech': - get_tone_ids = True - if args.lang == 'zh': - input_ids = frontend.get_input_ids( - sentence, - merge_sentences=merge_sentences, - get_tone_ids=get_tone_ids) - phone_ids = input_ids["phone_ids"] - if get_tone_ids: - tone_ids = input_ids["tone_ids"] - elif args.lang == 'en': - input_ids = frontend.get_input_ids( - sentence, merge_sentences=merge_sentences) - phone_ids = input_ids["phone_ids"] - else: - print("lang should in {'zh', 'en'}!") - with paddle.no_grad(): - flags = 0 - for i in range(len(phone_ids)): - part_phone_ids = phone_ids[i] - # acoustic model - if am_name == 'fastspeech2': - # multi speaker - if am_dataset in {"aishell3", "vctk"}: - spk_id = paddle.to_tensor(args.spk_id) - mel = am_inference(part_phone_ids, spk_id) - else: + with timer() as t: + get_tone_ids = False + if am_name == 'speedyspeech': + get_tone_ids = True + if args.lang == 'zh': + input_ids = frontend.get_input_ids( + sentence, + merge_sentences=merge_sentences, + get_tone_ids=get_tone_ids) + phone_ids = input_ids["phone_ids"] + if get_tone_ids: + tone_ids = input_ids["tone_ids"] + elif args.lang == 'en': + input_ids = frontend.get_input_ids( + sentence, merge_sentences=merge_sentences) + phone_ids = input_ids["phone_ids"] + else: + print("lang should in {'zh', 'en'}!") + with paddle.no_grad(): + flags = 0 + for i in range(len(phone_ids)): + part_phone_ids = phone_ids[i] + # acoustic model + if am_name == 'fastspeech2': + # multi speaker + if am_dataset in {"aishell3", "vctk"}: + spk_id = paddle.to_tensor(args.spk_id) + mel = am_inference(part_phone_ids, spk_id) + else: + mel = am_inference(part_phone_ids) + elif am_name == 'speedyspeech': + part_tone_ids = tone_ids[i] + if am_dataset in {"aishell3", "vctk"}: + spk_id = paddle.to_tensor(args.spk_id) + mel = am_inference(part_phone_ids, part_tone_ids, + spk_id) + else: + mel = am_inference(part_phone_ids, part_tone_ids) + elif am_name == 'tacotron2': mel = am_inference(part_phone_ids) - elif am_name == 'speedyspeech': - part_tone_ids = tone_ids[i] - if am_dataset in {"aishell3", "vctk"}: - spk_id = paddle.to_tensor(args.spk_id) - mel = am_inference(part_phone_ids, part_tone_ids, - spk_id) + # vocoder + wav = voc_inference(mel) + if flags == 0: + wav_all = wav + flags = 1 else: - mel = am_inference(part_phone_ids, part_tone_ids) - elif am_name == 'tacotron2': - mel = am_inference(part_phone_ids) - # vocoder - wav = voc_inference(mel) - if flags == 0: - wav_all = wav - flags = 1 - else: - wav_all = paddle.concat([wav_all, wav]) + wav_all = paddle.concat([wav_all, wav]) + wav = wav_all.numpy() + N += wav.size + T += t.elapse + speed = wav.size / t.elapse + rtf = am_config.fs / speed + print( + f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}." + ) sf.write( - str(output_dir / (utt_id + ".wav")), - wav_all.numpy(), - samplerate=am_config.fs) + str(output_dir / (utt_id + ".wav")), wav, samplerate=am_config.fs) print(f"{utt_id} done!") + print(f"generation speed: {N / T}Hz, RTF: {am_config.fs / (N / T) }") def main(): diff --git a/paddlespeech/t2s/exps/wavernn/synthesize.py b/paddlespeech/t2s/exps/wavernn/synthesize.py index 4357b282..d23e9cb7 100644 --- a/paddlespeech/t2s/exps/wavernn/synthesize.py +++ b/paddlespeech/t2s/exps/wavernn/synthesize.py @@ -91,7 +91,7 @@ def main(): target=config.inference.target, overlap=config.inference.overlap, mu_law=config.mu_law, - gen_display=True) + gen_display=False) wav = wav.numpy() N += wav.size T += t.elapse diff --git a/paddlespeech/t2s/models/melgan/melgan.py b/paddlespeech/t2s/models/melgan/melgan.py index 6a139659..22d8fd9e 100644 --- a/paddlespeech/t2s/models/melgan/melgan.py +++ b/paddlespeech/t2s/models/melgan/melgan.py @@ -66,7 +66,7 @@ class MelGANGenerator(nn.Layer): nonlinear_activation_params (Dict[str, Any], optional): Parameters passed to the linear activation in the upsample network, by default {} pad (str): Padding function module name before dilated convolution layer. - pad_params (dict): Hyperparameters for padding function. + pad_params (dict): Hyperparameters for padding function. use_final_nonlinear_activation (nn.Layer): Activation function for the final layer. use_weight_norm (bool): Whether to use weight norm. If set to true, it will be applied to all of the conv layers. diff --git a/paddlespeech/t2s/models/wavernn/wavernn.py b/paddlespeech/t2s/models/wavernn/wavernn.py index 1320ffa3..95907043 100644 --- a/paddlespeech/t2s/models/wavernn/wavernn.py +++ b/paddlespeech/t2s/models/wavernn/wavernn.py @@ -509,16 +509,20 @@ class WaveRNN(nn.Layer): total_len = num_folds * (target + overlap) + overlap # Need some silence for the run warmup - slience_len = overlap // 2 + slience_len = 0 + linear_len = slience_len fade_len = overlap - slience_len slience = paddle.zeros([slience_len], dtype=paddle.float32) - linear = paddle.ones([fade_len], dtype=paddle.float32) + linear = paddle.ones([linear_len], dtype=paddle.float32) # Equal power crossfade # fade_in increase from 0 to 1, fade_out reduces from 1 to 0 - t = paddle.linspace(-1, 1, fade_len, dtype=paddle.float32) - fade_in = paddle.sqrt(0.5 * (1 + t)) - fade_out = paddle.sqrt(0.5 * (1 - t)) + sigmoid_scale = 2.3 + t = paddle.linspace( + -sigmoid_scale, sigmoid_scale, fade_len, dtype=paddle.float32) + # sigmoid 曲线应该更好 + fade_in = paddle.nn.functional.sigmoid(t) + fade_out = 1 - paddle.nn.functional.sigmoid(t) # Concat the silence to the fades fade_out = paddle.concat([linear, fade_out]) fade_in = paddle.concat([slience, fade_in]) From 641984ae30f52928258a33af29d8a6345134da72 Mon Sep 17 00:00:00 2001 From: TianYuan Date: Tue, 1 Mar 2022 09:51:05 +0000 Subject: [PATCH 23/45] add code annotation, test=tts --- examples/csmsc/tts0/local/synthesize.sh | 2 ++ examples/csmsc/tts0/local/synthesize_e2e.sh | 1 + examples/csmsc/tts2/local/synthesize.sh | 2 ++ examples/csmsc/tts2/local/synthesize_e2e.sh | 2 +- examples/csmsc/tts3/local/synthesize.sh | 2 ++ examples/csmsc/tts3/local/synthesize_e2e.sh | 1 + 6 files changed, 9 insertions(+), 1 deletion(-) diff --git a/examples/csmsc/tts0/local/synthesize.sh b/examples/csmsc/tts0/local/synthesize.sh index bfb4844b..5b8ed15e 100755 --- a/examples/csmsc/tts0/local/synthesize.sh +++ b/examples/csmsc/tts0/local/synthesize.sh @@ -6,6 +6,7 @@ ckpt_name=$3 stage=0 stop_stage=0 +# pwgan if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ @@ -42,6 +43,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --phones_dict=dump/phone_id_map.txt fi +# style melgan if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ diff --git a/examples/csmsc/tts0/local/synthesize_e2e.sh b/examples/csmsc/tts0/local/synthesize_e2e.sh index 4c73a18d..f7675873 100755 --- a/examples/csmsc/tts0/local/synthesize_e2e.sh +++ b/examples/csmsc/tts0/local/synthesize_e2e.sh @@ -8,6 +8,7 @@ stage=0 stop_stage=0 # TODO: tacotron2 动转静的结果没有静态图的响亮, 可能还是 decode 的时候某个函数动静不对齐 +# pwgan if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ diff --git a/examples/csmsc/tts2/local/synthesize.sh b/examples/csmsc/tts2/local/synthesize.sh index 07cf156e..37b29818 100755 --- a/examples/csmsc/tts2/local/synthesize.sh +++ b/examples/csmsc/tts2/local/synthesize.sh @@ -6,6 +6,7 @@ ckpt_name=$3 stage=0 stop_stage=0 +# pwgan if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ @@ -44,6 +45,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --tones_dict=dump/tone_id_map.txt fi +# style melgan if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ diff --git a/examples/csmsc/tts2/local/synthesize_e2e.sh b/examples/csmsc/tts2/local/synthesize_e2e.sh index d5862a61..553b4554 100755 --- a/examples/csmsc/tts2/local/synthesize_e2e.sh +++ b/examples/csmsc/tts2/local/synthesize_e2e.sh @@ -7,6 +7,7 @@ ckpt_name=$3 stage=0 stop_stage=0 +# pwgan if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ @@ -93,7 +94,6 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --inference_dir=${train_output_path}/inference fi - # wavernn if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then echo "in wavernn syn_e2e" diff --git a/examples/csmsc/tts3/local/synthesize.sh b/examples/csmsc/tts3/local/synthesize.sh index 273dacd5..043bb52f 100755 --- a/examples/csmsc/tts3/local/synthesize.sh +++ b/examples/csmsc/tts3/local/synthesize.sh @@ -6,6 +6,7 @@ ckpt_name=$3 stage=0 stop_stage=0 +# pwgan if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ @@ -42,6 +43,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --phones_dict=dump/phone_id_map.txt fi +# style melgan if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ diff --git a/examples/csmsc/tts3/local/synthesize_e2e.sh b/examples/csmsc/tts3/local/synthesize_e2e.sh index 9e25c072..512e062b 100755 --- a/examples/csmsc/tts3/local/synthesize_e2e.sh +++ b/examples/csmsc/tts3/local/synthesize_e2e.sh @@ -7,6 +7,7 @@ ckpt_name=$3 stage=0 stop_stage=0 +# pwgan if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ From ab0448873815158b9bf28f9f3e200007afe70c4c Mon Sep 17 00:00:00 2001 From: lym0302 Date: Tue, 1 Mar 2022 21:12:37 +0800 Subject: [PATCH 24/45] update server cli, test=doc --- demos/speech_server/README.md | 59 +++------ demos/speech_server/README_cn.md | 62 +++------ demos/speech_server/conf/application.yaml | 18 +-- demos/speech_server/conf/asr/asr.yaml | 2 +- demos/speech_server/conf/asr/asr_pd.yaml | 5 +- demos/speech_server/conf/tts/tts.yaml | 2 +- demos/speech_server/conf/tts/tts_pd.yaml | 14 +- paddlespeech/cli/__init__.py | 2 +- paddlespeech/cli/tts/infer.py | 9 ++ .../server/bin/paddlespeech_client.py | 1 - paddlespeech/server/conf/application.yaml | 2 +- paddlespeech/server/conf/asr/asr.yaml | 2 +- paddlespeech/server/conf/asr/asr_pd.yaml | 5 +- paddlespeech/server/conf/tts/tts.yaml | 2 +- paddlespeech/server/conf/tts/tts_pd.yaml | 18 +-- .../engine/asr/paddleinference/asr_engine.py | 7 +- .../server/engine/asr/python/asr_engine.py | 24 +++- .../engine/tts/paddleinference/tts_engine.py | 121 +++++++++++++----- .../server/engine/tts/python/tts_engine.py | 87 ++++++++++--- paddlespeech/server/restful/tts_api.py | 5 + paddlespeech/server/utils/paddle_predictor.py | 26 +++- 21 files changed, 294 insertions(+), 179 deletions(-) diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md index 39007f6c..ac5cc4b0 100644 --- a/demos/speech_server/README.md +++ b/demos/speech_server/README.md @@ -15,6 +15,17 @@ You can choose one way from easy, meduim and hard to install paddlespeech. ### 2. Prepare config File The configuration file contains the service-related configuration files and the model configuration related to the voice tasks contained in the service. They are all under the `conf` folder. +**Note: The configuration of `engine_backend` in `application.yaml` represents all speech tasks included in the started service. ** +If the service you want to start contains only a certain speech task, then you need to comment out the speech tasks that do not need to be included. For example, if you only want to use the speech recognition (ASR) service, then you can comment out the speech synthesis (TTS) service, as in the following example: +```bash +engine_backend: + asr: 'conf/asr/asr.yaml' + #tts: 'conf/tts/tts.yaml' +``` + +**Note: The configuration file of `engine_backend` in `application.yaml` needs to match the configuration type of `engine_type`. ** +When the configuration file of `engine_backend` is `XXX.yaml`, the configuration type of `engine_type` needs to be set to `python`; when the configuration file of `engine_backend` is `XXX_pd.yaml`, the configuration of `engine_type` needs to be set type is `inference`; + The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. Here are sample files for thisASR client demo that can be downloaded: @@ -76,6 +87,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ### 4. ASR Client Usage +**Note:** The response time will be slightly longer when using the client for the first time - Command Line (Recommended) ``` paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav @@ -122,6 +134,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ``` ### 5. TTS Client Usage +**Note:** The response time will be slightly longer when using the client for the first time - Command Line (Recommended) ```bash paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav @@ -147,8 +160,6 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee [2022-02-23 15:20:37,875] [ INFO] - Save synthesized audio successfully on output.wav. [2022-02-23 15:20:37,875] [ INFO] - Audio duration: 3.612500 s. [2022-02-23 15:20:37,875] [ INFO] - Response time: 0.348050 s. - [2022-02-23 15:20:37,875] [ INFO] - RTF: 0.096346 - ``` @@ -174,51 +185,13 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee Save synthesized audio successfully on ./output.wav. Audio duration: 3.612500 s. Response time: 0.388317 s. - RTF: 0.107493 ``` -## Pretrained Models +## Models supported by the service ### ASR model -Here is a list of [ASR pretrained models](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speech_recognition/README.md#4pretrained-models) released by PaddleSpeech, both command line and python interfaces are available: - -| Model | Language | Sample Rate -| :--- | :---: | :---: | -| conformer_wenetspeech| zh| 16000 -| transformer_librispeech| en| 16000 +Get all models supported by the ASR service via `paddlespeech_server stats --task asr`, where static models can be used for paddle inference inference. ### TTS model -Here is a list of [TTS pretrained models](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/text_to_speech/README.md#4-pretrained-models) released by PaddleSpeech, both command line and python interfaces are available: - -- Acoustic model - | Model | Language - | :--- | :---: | - | speedyspeech_csmsc| zh - | fastspeech2_csmsc| zh - | fastspeech2_aishell3| zh - | fastspeech2_ljspeech| en - | fastspeech2_vctk| en - -- Vocoder - | Model | Language - | :--- | :---: | - | pwgan_csmsc| zh - | pwgan_aishell3| zh - | pwgan_ljspeech| en - | pwgan_vctk| en - | mb_melgan_csmsc| zh - -Here is a list of **TTS pretrained static models** released by PaddleSpeech, both command line and python interfaces are available: -- Acoustic model - | Model | Language - | :--- | :---: | - | speedyspeech_csmsc| zh - | fastspeech2_csmsc| zh - -- Vocoder - | Model | Language - | :--- | :---: | - | pwgan_csmsc| zh - | mb_melgan_csmsc| zh - | hifigan_csmsc| zh +Get all models supported by the TTS service via `paddlespeech_server stats --task tts`, where static models can be used for paddle inference inference. diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md index f5666070..f202a30c 100644 --- a/demos/speech_server/README_cn.md +++ b/demos/speech_server/README_cn.md @@ -14,6 +14,15 @@ ### 2. 准备配置文件 配置文件包含服务相关的配置文件和服务中包含的语音任务相关的模型配置。 它们都在 `conf` 文件夹下。 +**注意:`application.yaml` 中 `engine_backend` 的配置表示启动的服务中包含的所有语音任务。** +如果你想启动的服务中只包含某项语音任务,那么你需要注释掉不需要包含的语音任务。例如你只想使用语音识别(ASR)服务,那么你可以将语音合成(TTS)服务注释掉,如下示例: +```bash +engine_backend: + asr: 'conf/asr/asr.yaml' + #tts: 'conf/tts/tts.yaml' +``` +**注意:`application.yaml` 中 `engine_backend` 的配置文件需要和 `engine_type` 的配置类型匹配。** +当`engine_backend` 的配置文件为`XXX.yaml`时,需要设置`engine_type`的配置类型为`python`;当`engine_backend` 的配置文件为`XXX_pd.yaml`时,需要设置`engine_type`的配置类型为`inference`; 这个 ASR client 的输入应该是一个 WAV 文件(`.wav`),并且采样率必须与模型的采样率相同。 @@ -75,6 +84,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ``` ### 4. ASR客户端使用方法 +**注意:**初次使用客户端时响应时间会略长 - 命令行 (推荐使用) ``` paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav @@ -123,6 +133,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ``` ### 5. TTS客户端使用方法 +**注意:**初次使用客户端时响应时间会略长 ```bash paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav ``` @@ -148,7 +159,6 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee [2022-02-23 15:20:37,875] [ INFO] - Save synthesized audio successfully on output.wav. [2022-02-23 15:20:37,875] [ INFO] - Audio duration: 3.612500 s. [2022-02-23 15:20:37,875] [ INFO] - Response time: 0.348050 s. - [2022-02-23 15:20:37,875] [ INFO] - RTF: 0.096346 ``` - Python API @@ -173,50 +183,12 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee Save synthesized audio successfully on ./output.wav. Audio duration: 3.612500 s. Response time: 0.388317 s. - RTF: 0.107493 ``` -## Pretrained Models -### ASR model -下面是PaddleSpeech发布的[ASR预训练模型](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speech_recognition/README.md#4pretrained-models)列表,命令行和python接口均可用: - -| Model | Language | Sample Rate -| :--- | :---: | :---: | -| conformer_wenetspeech| zh| 16000 -| transformer_librispeech| en| 16000 - -### TTS model -下面是PaddleSpeech发布的 [TTS预训练模型](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/text_to_speech/README.md#4-pretrained-models) 列表,命令行和python接口均可用: - -- Acoustic model - | Model | Language - | :--- | :---: | - | speedyspeech_csmsc| zh - | fastspeech2_csmsc| zh - | fastspeech2_aishell3| zh - | fastspeech2_ljspeech| en - | fastspeech2_vctk| en - -- Vocoder - | Model | Language - | :--- | :---: | - | pwgan_csmsc| zh - | pwgan_aishell3| zh - | pwgan_ljspeech| en - | pwgan_vctk| en - | mb_melgan_csmsc| zh - -下面是PaddleSpeech发布的 **TTS预训练静态模型** 列表,命令行和python接口均可用: -- Acoustic model - | Model | Language - | :--- | :---: | - | speedyspeech_csmsc| zh - | fastspeech2_csmsc| zh - -- Vocoder - | Model | Language - | :--- | :---: | - | pwgan_csmsc| zh - | mb_melgan_csmsc| zh - | hifigan_csmsc| zh +## 服务支持的模型 +### ASR支持的模型 +通过 `paddlespeech_server stats --task asr` 获取ASR服务支持的所有模型,其中静态模型可用于 paddle inference 推理。 + +### TTS支持的模型 +通过 `paddlespeech_server stats --task tts` 获取TTS服务支持的所有模型,其中静态模型可用于 paddle inference 推理。 diff --git a/demos/speech_server/conf/application.yaml b/demos/speech_server/conf/application.yaml index fd4f5f37..6dcae74a 100644 --- a/demos/speech_server/conf/application.yaml +++ b/demos/speech_server/conf/application.yaml @@ -3,23 +3,25 @@ ################################################################## # SERVER SETTING # ################################################################## -host: '0.0.0.0' +host: '127.0.0.1' port: 8090 ################################################################## # CONFIG FILE # ################################################################## +# add engine backend type (Options: asr, tts) and config file here. +# Adding a speech task to engine_backend means starting the service. +engine_backend: + asr: 'conf/asr/asr.yaml' + tts: 'conf/tts/tts.yaml' + # The engine_type of speech task needs to keep the same type as the config file of speech task. # E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' # E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' # # add engine type (Options: python, inference) engine_type: - asr: 'inference' - tts: 'inference' + asr: 'python' + tts: 'python' + -# add engine backend type (Options: asr, tts) and config file here. -# Adding a speech task to engine_backend means starting the service. -engine_backend: - asr: 'conf/asr/asr_pd.yaml' - tts: 'conf/tts/tts_pd.yaml' diff --git a/demos/speech_server/conf/asr/asr.yaml b/demos/speech_server/conf/asr/asr.yaml index 1a805142..a6743b77 100644 --- a/demos/speech_server/conf/asr/asr.yaml +++ b/demos/speech_server/conf/asr/asr.yaml @@ -5,4 +5,4 @@ cfg_path: # [optional] ckpt_path: # [optional] decode_method: 'attention_rescoring' force_yes: True -device: 'cpu' # set 'gpu:id' or 'cpu' +device: # set 'gpu:id' or 'cpu' diff --git a/demos/speech_server/conf/asr/asr_pd.yaml b/demos/speech_server/conf/asr/asr_pd.yaml index 6cddb450..4c415ac7 100644 --- a/demos/speech_server/conf/asr/asr_pd.yaml +++ b/demos/speech_server/conf/asr/asr_pd.yaml @@ -15,9 +15,10 @@ decode_method: force_yes: True am_predictor_conf: - device: 'cpu' # set 'gpu:id' or 'cpu' - enable_mkldnn: True + device: # set 'gpu:id' or 'cpu' switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config ################################################################## diff --git a/demos/speech_server/conf/tts/tts.yaml b/demos/speech_server/conf/tts/tts.yaml index 19e8874e..19207f0b 100644 --- a/demos/speech_server/conf/tts/tts.yaml +++ b/demos/speech_server/conf/tts/tts.yaml @@ -29,4 +29,4 @@ voc_stat: # OTHERS # ################################################################## lang: 'zh' -device: 'cpu' # set 'gpu:id' or 'cpu' +device: # set 'gpu:id' or 'cpu' diff --git a/demos/speech_server/conf/tts/tts_pd.yaml b/demos/speech_server/conf/tts/tts_pd.yaml index 97df5261..e27b9665 100644 --- a/demos/speech_server/conf/tts/tts_pd.yaml +++ b/demos/speech_server/conf/tts/tts_pd.yaml @@ -15,9 +15,10 @@ speaker_dict: spk_id: 0 am_predictor_conf: - device: 'cpu' # set 'gpu:id' or 'cpu' - enable_mkldnn: False - switch_ir_optim: False + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config ################################################################## @@ -30,9 +31,10 @@ voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) voc_sample_rate: 24000 voc_predictor_conf: - device: 'cpu' # set 'gpu:id' or 'cpu' - enable_mkldnn: False - switch_ir_optim: False + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config ################################################################## # OTHERS # diff --git a/paddlespeech/cli/__init__.py b/paddlespeech/cli/__init__.py index 12ff9919..b526a384 100644 --- a/paddlespeech/cli/__init__.py +++ b/paddlespeech/cli/__init__.py @@ -18,8 +18,8 @@ from .base_commands import BaseCommand from .base_commands import HelpCommand from .cls import CLSExecutor from .st import STExecutor +from .stats import StatsExecutor from .text import TextExecutor from .tts import TTSExecutor -from .stats import StatsExecutor _locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8']) diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py index ba15d652..8423dfa8 100644 --- a/paddlespeech/cli/tts/infer.py +++ b/paddlespeech/cli/tts/infer.py @@ -13,6 +13,7 @@ # limitations under the License. import argparse import os +import time from collections import OrderedDict from typing import Any from typing import List @@ -621,6 +622,7 @@ class TTSExecutor(BaseExecutor): am_dataset = am[am.rindex('_') + 1:] get_tone_ids = False merge_sentences = False + frontend_st = time.time() if am_name == 'speedyspeech': get_tone_ids = True if lang == 'zh': @@ -637,9 +639,13 @@ class TTSExecutor(BaseExecutor): phone_ids = input_ids["phone_ids"] else: print("lang should in {'zh', 'en'}!") + self.frontend_time = time.time() - frontend_st + self.am_time = 0 + self.voc_time = 0 flags = 0 for i in range(len(phone_ids)): + am_st = time.time() part_phone_ids = phone_ids[i] # am if am_name == 'speedyspeech': @@ -653,13 +659,16 @@ class TTSExecutor(BaseExecutor): part_phone_ids, spk_id=paddle.to_tensor(spk_id)) else: mel = self.am_inference(part_phone_ids) + self.am_time += (time.time() - am_st) # voc + voc_st = time.time() wav = self.voc_inference(mel) if flags == 0: wav_all = wav flags = 1 else: wav_all = paddle.concat([wav_all, wav]) + self.voc_time += (time.time() - voc_st) self._outputs['wav'] = wav_all def postprocess(self, output: str='output.wav') -> Union[str, os.PathLike]: diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 853d272f..ee6ab7ad 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -121,7 +121,6 @@ class TTSClientExecutor(BaseExecutor): (args.output)) logger.info("Audio duration: %f s." % (duration)) logger.info("Response time: %f s." % (time_consume)) - logger.info("RTF: %f " % (time_consume / duration)) return True except BaseException: diff --git a/paddlespeech/server/conf/application.yaml b/paddlespeech/server/conf/application.yaml index cc08665e..9900492c 100644 --- a/paddlespeech/server/conf/application.yaml +++ b/paddlespeech/server/conf/application.yaml @@ -3,7 +3,7 @@ ################################################################## # SERVER SETTING # ################################################################## -host: '0.0.0.0' +host: '127.0.0.1' port: 8090 ################################################################## diff --git a/paddlespeech/server/conf/asr/asr.yaml b/paddlespeech/server/conf/asr/asr.yaml index 1a805142..a6743b77 100644 --- a/paddlespeech/server/conf/asr/asr.yaml +++ b/paddlespeech/server/conf/asr/asr.yaml @@ -5,4 +5,4 @@ cfg_path: # [optional] ckpt_path: # [optional] decode_method: 'attention_rescoring' force_yes: True -device: 'cpu' # set 'gpu:id' or 'cpu' +device: # set 'gpu:id' or 'cpu' diff --git a/paddlespeech/server/conf/asr/asr_pd.yaml b/paddlespeech/server/conf/asr/asr_pd.yaml index 6cddb450..4c415ac7 100644 --- a/paddlespeech/server/conf/asr/asr_pd.yaml +++ b/paddlespeech/server/conf/asr/asr_pd.yaml @@ -15,9 +15,10 @@ decode_method: force_yes: True am_predictor_conf: - device: 'cpu' # set 'gpu:id' or 'cpu' - enable_mkldnn: True + device: # set 'gpu:id' or 'cpu' switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config ################################################################## diff --git a/paddlespeech/server/conf/tts/tts.yaml b/paddlespeech/server/conf/tts/tts.yaml index 19e8874e..19207f0b 100644 --- a/paddlespeech/server/conf/tts/tts.yaml +++ b/paddlespeech/server/conf/tts/tts.yaml @@ -29,4 +29,4 @@ voc_stat: # OTHERS # ################################################################## lang: 'zh' -device: 'cpu' # set 'gpu:id' or 'cpu' +device: # set 'gpu:id' or 'cpu' diff --git a/paddlespeech/server/conf/tts/tts_pd.yaml b/paddlespeech/server/conf/tts/tts_pd.yaml index 019c7ed6..e27b9665 100644 --- a/paddlespeech/server/conf/tts/tts_pd.yaml +++ b/paddlespeech/server/conf/tts/tts_pd.yaml @@ -8,16 +8,17 @@ am: 'fastspeech2_csmsc' am_model: # the pdmodel file of your am static model (XX.pdmodel) am_params: # the pdiparams file of your am static model (XX.pdipparams) -am_sample_rate: 24000 # must match the model +am_sample_rate: 24000 phones_dict: tones_dict: speaker_dict: spk_id: 0 am_predictor_conf: - device: 'cpu' # set 'gpu:id' or 'cpu' - enable_mkldnn: False - switch_ir_optim: False + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config ################################################################## @@ -27,12 +28,13 @@ am_predictor_conf: voc: 'pwgan_csmsc' voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) -voc_sample_rate: 24000 #must match the model +voc_sample_rate: 24000 voc_predictor_conf: - device: 'cpu' # set 'gpu:id' or 'cpu' - enable_mkldnn: False - switch_ir_optim: False + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config ################################################################## # OTHERS # diff --git a/paddlespeech/server/engine/asr/paddleinference/asr_engine.py b/paddlespeech/server/engine/asr/paddleinference/asr_engine.py index 5d4c4fa6..cb973e92 100644 --- a/paddlespeech/server/engine/asr/paddleinference/asr_engine.py +++ b/paddlespeech/server/engine/asr/paddleinference/asr_engine.py @@ -13,6 +13,7 @@ # limitations under the License. import io import os +import time from typing import Optional import paddle @@ -197,7 +198,6 @@ class ASREngine(BaseEngine): self.executor = ASRServerExecutor() self.config = get_config(config_file) - paddle.set_device(paddle.get_device()) self.executor._init_from_path( model_type=self.config.model_type, am_model=self.config.am_model, @@ -223,13 +223,18 @@ class ASREngine(BaseEngine): logger.info("start running asr engine") self.executor.preprocess(self.config.model_type, io.BytesIO(audio_data)) + st = time.time() self.executor.infer(self.config.model_type) + infer_time = time.time() - st self.output = self.executor.postprocess() # Retrieve result of asr. logger.info("end inferring asr engine") else: logger.info("file check failed!") self.output = None + logger.info("inference time: {}".format(infer_time)) + logger.info("asr engine type: paddle inference") + def postprocess(self): """postprocess """ diff --git a/paddlespeech/server/engine/asr/python/asr_engine.py b/paddlespeech/server/engine/asr/python/asr_engine.py index 9fac487d..1e2c5cc2 100644 --- a/paddlespeech/server/engine/asr/python/asr_engine.py +++ b/paddlespeech/server/engine/asr/python/asr_engine.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import io +import time import paddle @@ -53,16 +54,24 @@ class ASREngine(BaseEngine): self.executor = ASRServerExecutor() self.config = get_config(config_file) - if self.config.device is None: - paddle.set_device(paddle.get_device()) - else: - paddle.set_device(self.config.device) + try: + if self.config.device: + self.device = self.config.device + else: + self.device = paddle.get_device() + paddle.set_device(self.device) + except BaseException: + logger.error( + "Set device failed, please check if device is already used and the parameter 'device' in the yaml file" + ) + self.executor._init_from_path( self.config.model, self.config.lang, self.config.sample_rate, self.config.cfg_path, self.config.decode_method, self.config.ckpt_path) - logger.info("Initialize ASR server engine successfully.") + logger.info("Initialize ASR server engine successfully on device: %s." % + (self.device)) return True def run(self, audio_data): @@ -76,12 +85,17 @@ class ASREngine(BaseEngine): self.config.force_yes): logger.info("start run asr engine") self.executor.preprocess(self.config.model, io.BytesIO(audio_data)) + st = time.time() self.executor.infer(self.config.model) + infer_time = time.time() - st self.output = self.executor.postprocess() # Retrieve result of asr. else: logger.info("file check failed!") self.output = None + logger.info("inference time: {}".format(infer_time)) + logger.info("asr engine type: python") + def postprocess(self): """postprocess """ diff --git a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py index a9dc5f4e..5955c1a2 100644 --- a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py +++ b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py @@ -14,6 +14,7 @@ import base64 import io import os +import time from typing import Optional import librosa @@ -179,7 +180,7 @@ class TTSServerExecutor(TTSExecutor): self.phones_dict = os.path.abspath(phones_dict) self.am_sample_rate = am_sample_rate self.am_res_path = os.path.dirname(os.path.abspath(self.am_model)) - print("self.phones_dict:", self.phones_dict) + logger.info("self.phones_dict: {}".format(self.phones_dict)) # for speedyspeech self.tones_dict = None @@ -224,21 +225,21 @@ class TTSServerExecutor(TTSExecutor): with open(self.phones_dict, "r") as f: phn_id = [line.strip().split() for line in f.readlines()] vocab_size = len(phn_id) - print("vocab_size:", vocab_size) + logger.info("vocab_size: {}".format(vocab_size)) tone_size = None if self.tones_dict: with open(self.tones_dict, "r") as f: tone_id = [line.strip().split() for line in f.readlines()] tone_size = len(tone_id) - print("tone_size:", tone_size) + logger.info("tone_size: {}".format(tone_size)) spk_num = None if self.speaker_dict: with open(self.speaker_dict, 'rt') as f: spk_id = [line.strip().split() for line in f.readlines()] spk_num = len(spk_id) - print("spk_num:", spk_num) + logger.info("spk_num: {}".format(spk_num)) # frontend if lang == 'zh': @@ -248,21 +249,29 @@ class TTSServerExecutor(TTSExecutor): elif lang == 'en': self.frontend = English(phone_vocab_path=self.phones_dict) - print("frontend done!") - - # am predictor - self.am_predictor_conf = am_predictor_conf - self.am_predictor = init_predictor( - model_file=self.am_model, - params_file=self.am_params, - predictor_conf=self.am_predictor_conf) - - # voc predictor - self.voc_predictor_conf = voc_predictor_conf - self.voc_predictor = init_predictor( - model_file=self.voc_model, - params_file=self.voc_params, - predictor_conf=self.voc_predictor_conf) + logger.info("frontend done!") + + try: + # am predictor + self.am_predictor_conf = am_predictor_conf + self.am_predictor = init_predictor( + model_file=self.am_model, + params_file=self.am_params, + predictor_conf=self.am_predictor_conf) + logger.info("Create AM predictor successfully.") + except BaseException: + logger.error("Failed to create AM predictor.") + + try: + # voc predictor + self.voc_predictor_conf = voc_predictor_conf + self.voc_predictor = init_predictor( + model_file=self.voc_model, + params_file=self.voc_params, + predictor_conf=self.voc_predictor_conf) + logger.info("Create Vocoder predictor successfully.") + except BaseException: + logger.error("Failed to create Vocoder predictor.") @paddle.no_grad() def infer(self, @@ -277,6 +286,7 @@ class TTSServerExecutor(TTSExecutor): am_dataset = am[am.rindex('_') + 1:] get_tone_ids = False merge_sentences = False + frontend_st = time.time() if am_name == 'speedyspeech': get_tone_ids = True if lang == 'zh': @@ -292,10 +302,14 @@ class TTSServerExecutor(TTSExecutor): text, merge_sentences=merge_sentences) phone_ids = input_ids["phone_ids"] else: - print("lang should in {'zh', 'en'}!") + logger.error("lang should in {'zh', 'en'}!") + self.frontend_time = time.time() - frontend_st + self.am_time = 0 + self.voc_time = 0 flags = 0 for i in range(len(phone_ids)): + am_st = time.time() part_phone_ids = phone_ids[i] # am if am_name == 'speedyspeech': @@ -314,7 +328,10 @@ class TTSServerExecutor(TTSExecutor): am_result = run_model(self.am_predictor, [part_phone_ids.numpy()]) mel = am_result[0] + self.am_time += (time.time() - am_st) + # voc + voc_st = time.time() voc_result = run_model(self.voc_predictor, [mel]) wav = voc_result[0] wav = paddle.to_tensor(wav) @@ -324,6 +341,7 @@ class TTSServerExecutor(TTSExecutor): flags = 1 else: wav_all = paddle.concat([wav_all, wav]) + self.voc_time += (time.time() - voc_st) self._outputs['wav'] = wav_all @@ -370,7 +388,7 @@ class TTSEngine(BaseEngine): def postprocess(self, wav, original_fs: int, - target_fs: int=16000, + target_fs: int=0, volume: float=1.0, speed: float=1.0, audio_path: str=None): @@ -395,38 +413,50 @@ class TTSEngine(BaseEngine): if target_fs == 0 or target_fs > original_fs: target_fs = original_fs wav_tar_fs = wav + logger.info( + "The sample rate of synthesized audio is the same as model, which is {}Hz". + format(original_fs)) else: wav_tar_fs = librosa.resample( np.squeeze(wav), original_fs, target_fs) - + logger.info( + "The sample rate of model is {}Hz and the target sample rate is {}Hz. Converting the sample rate of the synthesized audio successfully.". + format(original_fs, target_fs)) # transform volume wav_vol = wav_tar_fs * volume + logger.info("Transform the volume of the audio successfully.") # transform speed try: # windows not support soxbindings wav_speed = change_speed(wav_vol, speed, target_fs) + logger.info("Transform the speed of the audio successfully.") except ServerBaseException: raise ServerBaseException( ErrorCode.SERVER_INTERNAL_ERR, - "Transform speed failed. Can not install soxbindings on your system. \ + "Failed to transform speed. Can not install soxbindings on your system. \ You need to set speed value 1.0.") except BaseException: - logger.error("Transform speed failed.") + logger.error("Failed to transform speed.") # wav to base64 buf = io.BytesIO() wavfile.write(buf, target_fs, wav_speed) base64_bytes = base64.b64encode(buf.read()) wav_base64 = base64_bytes.decode('utf-8') + logger.info("Audio to string successfully.") # save audio - if audio_path is not None and audio_path.endswith(".wav"): - sf.write(audio_path, wav_speed, target_fs) - elif audio_path is not None and audio_path.endswith(".pcm"): - wav_norm = wav_speed * (32767 / max(0.001, - np.max(np.abs(wav_speed)))) - with open(audio_path, "wb") as f: - f.write(wav_norm.astype(np.int16)) + if audio_path is not None: + if audio_path.endswith(".wav"): + sf.write(audio_path, wav_speed, target_fs) + elif audio_path.endswith(".pcm"): + wav_norm = wav_speed * (32767 / max(0.001, + np.max(np.abs(wav_speed)))) + with open(audio_path, "wb") as f: + f.write(wav_norm.astype(np.int16)) + logger.info("Save audio to {} successfully.".format(audio_path)) + else: + logger.info("There is no need to save audio.") return target_fs, wav_base64 @@ -462,8 +492,12 @@ class TTSEngine(BaseEngine): lang = self.config.lang try: + infer_st = time.time() self.executor.infer( text=sentence, lang=lang, am=self.config.am, spk_id=spk_id) + infer_et = time.time() + infer_time = infer_et - infer_st + except ServerBaseException: raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR, "tts infer failed.") @@ -471,6 +505,7 @@ class TTSEngine(BaseEngine): logger.error("tts infer failed.") try: + postprocess_st = time.time() target_sample_rate, wav_base64 = self.postprocess( wav=self.executor._outputs['wav'].numpy(), original_fs=self.executor.am_sample_rate, @@ -478,10 +513,34 @@ class TTSEngine(BaseEngine): volume=volume, speed=speed, audio_path=save_path) + postprocess_et = time.time() + postprocess_time = postprocess_et - postprocess_st + duration = len(self.executor._outputs['wav'] + .numpy()) / self.executor.am_sample_rate + rtf = infer_time / duration + except ServerBaseException: raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR, "tts postprocess failed.") except BaseException: logger.error("tts postprocess failed.") + logger.info("AM model: {}".format(self.config.am)) + logger.info("Vocoder model: {}".format(self.config.voc)) + logger.info("Language: {}".format(lang)) + logger.info("tts engine type: paddle inference") + + logger.info("audio duration: {}".format(duration)) + logger.info( + "frontend inference time: {}".format(self.executor.frontend_time)) + logger.info("AM inference time: {}".format(self.executor.am_time)) + logger.info("Vocoder inference time: {}".format(self.executor.voc_time)) + logger.info("total inference time: {}".format(infer_time)) + logger.info( + "postprocess (change speed, volume, target sample rate) time: {}". + format(postprocess_time)) + logger.info("total generate audio time: {}".format(infer_time + + postprocess_time)) + logger.info("RTF: {}".format(rtf)) + return lang, target_sample_rate, wav_base64 diff --git a/paddlespeech/server/engine/tts/python/tts_engine.py b/paddlespeech/server/engine/tts/python/tts_engine.py index 20b4e0fe..7dd57669 100644 --- a/paddlespeech/server/engine/tts/python/tts_engine.py +++ b/paddlespeech/server/engine/tts/python/tts_engine.py @@ -13,6 +13,7 @@ # limitations under the License. import base64 import io +import time import librosa import numpy as np @@ -54,11 +55,20 @@ class TTSEngine(BaseEngine): try: self.config = get_config(config_file) - if self.config.device is None: - paddle.set_device(paddle.get_device()) + if self.config.device: + self.device = self.config.device else: - paddle.set_device(self.config.device) + self.device = paddle.get_device() + paddle.set_device(self.device) + except BaseException: + logger.error( + "Set device failed, please check if device is already used and the parameter 'device' in the yaml file" + ) + logger.error("Initialize TTS server engine Failed on device: %s." % + (self.device)) + return False + try: self.executor._init_from_path( am=self.config.am, am_config=self.config.am_config, @@ -73,16 +83,19 @@ class TTSEngine(BaseEngine): voc_stat=self.config.voc_stat, lang=self.config.lang) except BaseException: - logger.error("Initialize TTS server engine Failed.") + logger.error("Failed to get model related files.") + logger.error("Initialize TTS server engine Failed on device: %s." % + (self.device)) return False - logger.info("Initialize TTS server engine successfully.") + logger.info("Initialize TTS server engine successfully on device: %s." % + (self.device)) return True def postprocess(self, wav, original_fs: int, - target_fs: int=16000, + target_fs: int=0, volume: float=1.0, speed: float=1.0, audio_path: str=None): @@ -107,38 +120,50 @@ class TTSEngine(BaseEngine): if target_fs == 0 or target_fs > original_fs: target_fs = original_fs wav_tar_fs = wav + logger.info( + "The sample rate of synthesized audio is the same as model, which is {}Hz". + format(original_fs)) else: wav_tar_fs = librosa.resample( np.squeeze(wav), original_fs, target_fs) - + logger.info( + "The sample rate of model is {}Hz and the target sample rate is {}Hz. Converting the sample rate of the synthesized audio successfully.". + format(original_fs, target_fs)) # transform volume wav_vol = wav_tar_fs * volume + logger.info("Transform the volume of the audio successfully.") # transform speed try: # windows not support soxbindings wav_speed = change_speed(wav_vol, speed, target_fs) + logger.info("Transform the speed of the audio successfully.") except ServerBaseException: raise ServerBaseException( ErrorCode.SERVER_INTERNAL_ERR, - "Transform speed failed. Can not install soxbindings on your system. \ + "Failed to transform speed. Can not install soxbindings on your system. \ You need to set speed value 1.0.") except BaseException: - logger.error("Transform speed failed.") + logger.error("Failed to transform speed.") # wav to base64 buf = io.BytesIO() wavfile.write(buf, target_fs, wav_speed) base64_bytes = base64.b64encode(buf.read()) wav_base64 = base64_bytes.decode('utf-8') + logger.info("Audio to string successfully.") # save audio - if audio_path is not None and audio_path.endswith(".wav"): - sf.write(audio_path, wav_speed, target_fs) - elif audio_path is not None and audio_path.endswith(".pcm"): - wav_norm = wav_speed * (32767 / max(0.001, - np.max(np.abs(wav_speed)))) - with open(audio_path, "wb") as f: - f.write(wav_norm.astype(np.int16)) + if audio_path is not None: + if audio_path.endswith(".wav"): + sf.write(audio_path, wav_speed, target_fs) + elif audio_path.endswith(".pcm"): + wav_norm = wav_speed * (32767 / max(0.001, + np.max(np.abs(wav_speed)))) + with open(audio_path, "wb") as f: + f.write(wav_norm.astype(np.int16)) + logger.info("Save audio to {} successfully.".format(audio_path)) + else: + logger.info("There is no need to save audio.") return target_fs, wav_base64 @@ -174,8 +199,15 @@ class TTSEngine(BaseEngine): lang = self.config.lang try: + infer_st = time.time() self.executor.infer( text=sentence, lang=lang, am=self.config.am, spk_id=spk_id) + infer_et = time.time() + infer_time = infer_et - infer_st + duration = len(self.executor._outputs['wav'] + .numpy()) / self.executor.am_config.fs + rtf = infer_time / duration + except ServerBaseException: raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR, "tts infer failed.") @@ -183,6 +215,7 @@ class TTSEngine(BaseEngine): logger.error("tts infer failed.") try: + postprocess_st = time.time() target_sample_rate, wav_base64 = self.postprocess( wav=self.executor._outputs['wav'].numpy(), original_fs=self.executor.am_config.fs, @@ -190,10 +223,32 @@ class TTSEngine(BaseEngine): volume=volume, speed=speed, audio_path=save_path) + postprocess_et = time.time() + postprocess_time = postprocess_et - postprocess_st + except ServerBaseException: raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR, "tts postprocess failed.") except BaseException: logger.error("tts postprocess failed.") + logger.info("AM model: {}".format(self.config.am)) + logger.info("Vocoder model: {}".format(self.config.voc)) + logger.info("Language: {}".format(lang)) + logger.info("tts engine type: python") + + logger.info("audio duration: {}".format(duration)) + logger.info( + "frontend inference time: {}".format(self.executor.frontend_time)) + logger.info("AM inference time: {}".format(self.executor.am_time)) + logger.info("Vocoder inference time: {}".format(self.executor.voc_time)) + logger.info("total inference time: {}".format(infer_time)) + logger.info( + "postprocess (change speed, volume, target sample rate) time: {}". + format(postprocess_time)) + logger.info("total generate audio time: {}".format(infer_time + + postprocess_time)) + logger.info("RTF: {}".format(rtf)) + logger.info("device: {}".format(self.device)) + return lang, target_sample_rate, wav_base64 diff --git a/paddlespeech/server/restful/tts_api.py b/paddlespeech/server/restful/tts_api.py index c7e91300..0af0f6d0 100644 --- a/paddlespeech/server/restful/tts_api.py +++ b/paddlespeech/server/restful/tts_api.py @@ -16,6 +16,7 @@ from typing import Union from fastapi import APIRouter +from paddlespeech.cli.log import logger from paddlespeech.server.engine.engine_pool import get_engine_pool from paddlespeech.server.restful.request import TTSRequest from paddlespeech.server.restful.response import ErrorResponse @@ -60,6 +61,9 @@ def tts(request_body: TTSRequest): Returns: json: [description] """ + + logger.info("request: {}".format(request_body)) + # get params text = request_body.text spk_id = request_body.spk_id @@ -92,6 +96,7 @@ def tts(request_body: TTSRequest): # get single engine from engine pool engine_pool = get_engine_pool() tts_engine = engine_pool['tts'] + logger.info("Get tts engine successfully.") lang, target_sample_rate, wav_base64 = tts_engine.run( text, spk_id, speed, volume, sample_rate, save_path) diff --git a/paddlespeech/server/utils/paddle_predictor.py b/paddlespeech/server/utils/paddle_predictor.py index f4216d74..4035d48d 100644 --- a/paddlespeech/server/utils/paddle_predictor.py +++ b/paddlespeech/server/utils/paddle_predictor.py @@ -15,6 +15,7 @@ import os from typing import List from typing import Optional +import paddle from paddle.inference import Config from paddle.inference import create_predictor @@ -40,15 +41,30 @@ def init_predictor(model_dir: Optional[os.PathLike]=None, else: config = Config(model_file, params_file) - config.enable_memory_optim() - if "gpu" in predictor_conf["device"]: - gpu_id = predictor_conf["device"].split(":")[-1] + # set device + if predictor_conf["device"]: + device = predictor_conf["device"] + else: + device = paddle.get_device() + if "gpu" in device: + gpu_id = device.split(":")[-1] config.enable_use_gpu(1000, int(gpu_id)) - if predictor_conf["enable_mkldnn"]: - config.enable_mkldnn() + + # IR optim if predictor_conf["switch_ir_optim"]: config.switch_ir_optim() + # glog + if not predictor_conf["glog_info"]: + config.disable_glog_info() + + # config summary + if predictor_conf["summary"]: + print(config.summary()) + + # memory optim + config.enable_memory_optim() + predictor = create_predictor(config) return predictor From 7d1ed0d052aa4f2a2481dd82f9471004e82a20f6 Mon Sep 17 00:00:00 2001 From: Phecda xu <46859427+phecda-xu@users.noreply.github.com> Date: Tue, 1 Mar 2022 22:45:30 +0800 Subject: [PATCH 25/45] Update README.md add PaddleDubbing info --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 46730797..48732f59 100644 --- a/README.md +++ b/README.md @@ -561,6 +561,7 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P - Many thanks to [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) for developing Virtual Uploader(VUP)/Virtual YouTuber(VTuber) with PaddleSpeech TTS function. - Many thanks to [745165806](https://github.com/745165806)/[PaddleSpeechTask](https://github.com/745165806/PaddleSpeechTask) for contributing Punctuation Restoration model. - Many thanks to [kslz](https://github.com/745165806) for supplementary Chinese documents. +- Many thanks to [phecda-xu](https://github.com/phecda-xu)/[PaddleDubbing](https://github.com/phecda-xu/PaddleDubbing) for developing a dubbing tool with GUI based on PaddleSpeech TTS model. Besides, PaddleSpeech depends on a lot of open source repositories. See [references](./docs/source/reference.md) for more information. From 7da1d388b9ee6e6606bddcefab629237692dbec0 Mon Sep 17 00:00:00 2001 From: Phecda xu <46859427+phecda-xu@users.noreply.github.com> Date: Tue, 1 Mar 2022 22:54:59 +0800 Subject: [PATCH 26/45] Update README_cn.md --- README_cn.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README_cn.md b/README_cn.md index 9782240a..72352887 100644 --- a/README_cn.md +++ b/README_cn.md @@ -556,6 +556,7 @@ year={2021} - 非常感谢 [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) 采用 PaddleSpeech 语音合成功能实现 Virtual Uploader(VUP)/Virtual YouTuber(VTuber) 虚拟主播。 - 非常感谢 [745165806](https://github.com/745165806)/[PaddleSpeechTask](https://github.com/745165806/PaddleSpeechTask) 贡献标点重建相关模型。 - 非常感谢 [kslz](https://github.com/kslz) 补充中文文档。 +- 非常感谢 [phecda-xu](https://github.com/phecda-xu)/[PaddleDubbing](https://github.com/phecda-xu/PaddleDubbing) 基于PaddleSpeech的TTS模型搭建带GUI操作界面的配音工具。 此外,PaddleSpeech 依赖于许多开源存储库。有关更多信息,请参阅 [references](./docs/source/reference.md)。 From d69b507a09d1b7512318699e24a67a89191b1121 Mon Sep 17 00:00:00 2001 From: Phecda xu <46859427+phecda-xu@users.noreply.github.com> Date: Tue, 1 Mar 2022 23:12:10 +0800 Subject: [PATCH 27/45] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 48732f59..66178662 100644 --- a/README.md +++ b/README.md @@ -561,6 +561,7 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P - Many thanks to [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) for developing Virtual Uploader(VUP)/Virtual YouTuber(VTuber) with PaddleSpeech TTS function. - Many thanks to [745165806](https://github.com/745165806)/[PaddleSpeechTask](https://github.com/745165806/PaddleSpeechTask) for contributing Punctuation Restoration model. - Many thanks to [kslz](https://github.com/745165806) for supplementary Chinese documents. +- Many thanks to [awmmmm](https://github.com/awmmmm) for contributing fastspeech2 aishell3 conformer pretrained model. - Many thanks to [phecda-xu](https://github.com/phecda-xu)/[PaddleDubbing](https://github.com/phecda-xu/PaddleDubbing) for developing a dubbing tool with GUI based on PaddleSpeech TTS model. Besides, PaddleSpeech depends on a lot of open source repositories. See [references](./docs/source/reference.md) for more information. From 8858c7066b9959e8502df4a974bbe59f80e08ec9 Mon Sep 17 00:00:00 2001 From: Phecda xu <46859427+phecda-xu@users.noreply.github.com> Date: Tue, 1 Mar 2022 23:13:44 +0800 Subject: [PATCH 28/45] Update README_cn.md --- README_cn.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README_cn.md b/README_cn.md index 72352887..27580639 100644 --- a/README_cn.md +++ b/README_cn.md @@ -556,6 +556,7 @@ year={2021} - 非常感谢 [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) 采用 PaddleSpeech 语音合成功能实现 Virtual Uploader(VUP)/Virtual YouTuber(VTuber) 虚拟主播。 - 非常感谢 [745165806](https://github.com/745165806)/[PaddleSpeechTask](https://github.com/745165806/PaddleSpeechTask) 贡献标点重建相关模型。 - 非常感谢 [kslz](https://github.com/kslz) 补充中文文档。 +- 非常感谢 [awmmmm](https://github.com/awmmmm) 提供 fastspeech2 aishell3 conformer 预训练模型。 - 非常感谢 [phecda-xu](https://github.com/phecda-xu)/[PaddleDubbing](https://github.com/phecda-xu/PaddleDubbing) 基于PaddleSpeech的TTS模型搭建带GUI操作界面的配音工具。 此外,PaddleSpeech 依赖于许多开源存储库。有关更多信息,请参阅 [references](./docs/source/reference.md)。 From 34b600c4a2035b44c29ca70f7d34e685ff5f98a3 Mon Sep 17 00:00:00 2001 From: Phecda xu <46859427+phecda-xu@users.noreply.github.com> Date: Tue, 1 Mar 2022 23:15:30 +0800 Subject: [PATCH 29/45] Update README_cn.md --- README_cn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_cn.md b/README_cn.md index 27580639..1196eec1 100644 --- a/README_cn.md +++ b/README_cn.md @@ -557,7 +557,7 @@ year={2021} - 非常感谢 [745165806](https://github.com/745165806)/[PaddleSpeechTask](https://github.com/745165806/PaddleSpeechTask) 贡献标点重建相关模型。 - 非常感谢 [kslz](https://github.com/kslz) 补充中文文档。 - 非常感谢 [awmmmm](https://github.com/awmmmm) 提供 fastspeech2 aishell3 conformer 预训练模型。 -- 非常感谢 [phecda-xu](https://github.com/phecda-xu)/[PaddleDubbing](https://github.com/phecda-xu/PaddleDubbing) 基于PaddleSpeech的TTS模型搭建带GUI操作界面的配音工具。 +- 非常感谢 [phecda-xu](https://github.com/phecda-xu)/[PaddleDubbing](https://github.com/phecda-xu/PaddleDubbing) 基于 PaddleSpeech 的 TTS 模型搭建带 GUI 操作界面的配音工具。 此外,PaddleSpeech 依赖于许多开源存储库。有关更多信息,请参阅 [references](./docs/source/reference.md)。 From c116a3a92644a6fcbf0e2346d0077bb7c3b3c50c Mon Sep 17 00:00:00 2001 From: Jerryuhoo Date: Wed, 2 Mar 2022 09:41:18 +0800 Subject: [PATCH 30/45] fix Speedyspeech multi-speaker inference, test=tts --- paddlespeech/t2s/exps/synthesize_e2e.py | 8 ++++---- paddlespeech/t2s/models/speedyspeech/speedyspeech.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/paddlespeech/t2s/exps/synthesize_e2e.py b/paddlespeech/t2s/exps/synthesize_e2e.py index 75c631b8..514d4822 100644 --- a/paddlespeech/t2s/exps/synthesize_e2e.py +++ b/paddlespeech/t2s/exps/synthesize_e2e.py @@ -194,10 +194,10 @@ def evaluate(args): am_inference = jit.to_static( am_inference, input_spec=[ - InputSpec([-1], dtype=paddle.int64), # text - InputSpec([-1], dtype=paddle.int64), # tone - None, # duration - InputSpec([-1], dtype=paddle.int64) # spk_id + InputSpec([-1], dtype=paddle.int64), # text + InputSpec([-1], dtype=paddle.int64), # tone + InputSpec([1], dtype=paddle.int64), # spk_id + None # duration ]) else: am_inference = jit.to_static( diff --git a/paddlespeech/t2s/models/speedyspeech/speedyspeech.py b/paddlespeech/t2s/models/speedyspeech/speedyspeech.py index 42e8f743..44ccfc60 100644 --- a/paddlespeech/t2s/models/speedyspeech/speedyspeech.py +++ b/paddlespeech/t2s/models/speedyspeech/speedyspeech.py @@ -247,7 +247,7 @@ class SpeedySpeechInference(nn.Layer): self.normalizer = normalizer self.acoustic_model = speedyspeech_model - def forward(self, phones, tones, durations=None, spk_id=None): + def forward(self, phones, tones, spk_id=None, durations=None): normalized_mel = self.acoustic_model.inference( phones, tones, durations=durations, spk_id=spk_id) logmel = self.normalizer.inverse(normalized_mel) From 85d4a31e04e238b3459e8c3f34a502fe8dd5f69e Mon Sep 17 00:00:00 2001 From: lym0302 Date: Wed, 2 Mar 2022 09:55:54 +0800 Subject: [PATCH 31/45] update application.yaml, test=doc --- paddlespeech/server/conf/application.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/paddlespeech/server/conf/application.yaml b/paddlespeech/server/conf/application.yaml index 9900492c..6dcae74a 100644 --- a/paddlespeech/server/conf/application.yaml +++ b/paddlespeech/server/conf/application.yaml @@ -9,6 +9,12 @@ port: 8090 ################################################################## # CONFIG FILE # ################################################################## +# add engine backend type (Options: asr, tts) and config file here. +# Adding a speech task to engine_backend means starting the service. +engine_backend: + asr: 'conf/asr/asr.yaml' + tts: 'conf/tts/tts.yaml' + # The engine_type of speech task needs to keep the same type as the config file of speech task. # E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' # E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' @@ -18,8 +24,4 @@ engine_type: asr: 'python' tts: 'python' -# add engine backend type (Options: asr, tts) and config file here. -# Adding a speech task to engine_backend means starting the service. -engine_backend: - asr: 'conf/asr/asr.yaml' - tts: 'conf/tts/tts.yaml' + From 556ac958d4367b5d8751710cbbbac14500b4d9f8 Mon Sep 17 00:00:00 2001 From: Jerryuhoo Date: Wed, 2 Mar 2022 10:39:38 +0800 Subject: [PATCH 32/45] update readme, test=doc add examples --- README.md | 7 +++++++ README_cn.md | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/README.md b/README.md index 837d2478..7f95abac 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,12 @@ For more synthesized audios, please refer to [PaddleSpeech Text-to-Speech sample - [PaddleSpeech Demo Video](https://paddlespeech.readthedocs.io/en/latest/demo_video.html) +- **[VTuberTalk](https://github.com/JiehangXie/PaddleBoBo): Use PaddleSpeech TTS and ASR to clone voice from videos.** + +
+ +
+ ### 🔥 Hot Activities - 2021.12.21~12.24 @@ -574,6 +580,7 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P - Many thanks to [kslz](https://github.com/745165806) for supplementary Chinese documents. - Many thanks to [awmmmm](https://github.com/awmmmm) for contributing fastspeech2 aishell3 conformer pretrained model. - Many thanks to [phecda-xu](https://github.com/phecda-xu)/[PaddleDubbing](https://github.com/phecda-xu/PaddleDubbing) for developing a dubbing tool with GUI based on PaddleSpeech TTS model. +- Many thanks to [jerryuhoo](https://github.com/jerryuhoo)/[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk) for developing a GUI tool based on PaddleSpeech TTS and code for making datasets from videos based on PaddleSpeech ASR. Besides, PaddleSpeech depends on a lot of open source repositories. See [references](./docs/source/reference.md) for more information. diff --git a/README_cn.md b/README_cn.md index 5c00637d..742ef062 100644 --- a/README_cn.md +++ b/README_cn.md @@ -150,6 +150,12 @@ from https://github.com/18F/open-source-guide/blob/18f-pages/pages/making-readme - [PaddleSpeech 示例视频](https://paddlespeech.readthedocs.io/en/latest/demo_video.html) +- **[VTuberTalk](https://github.com/JiehangXie/PaddleBoBo): 使用 PaddleSpeech 的语音合成和语音识别从视频中克隆人声。** + +
+ +
+ ### 🔥 热门活动 - 2021.12.21~12.24 @@ -569,6 +575,7 @@ year={2021} - 非常感谢 [kslz](https://github.com/kslz) 补充中文文档。 - 非常感谢 [awmmmm](https://github.com/awmmmm) 提供 fastspeech2 aishell3 conformer 预训练模型。 - 非常感谢 [phecda-xu](https://github.com/phecda-xu)/[PaddleDubbing](https://github.com/phecda-xu/PaddleDubbing) 基于 PaddleSpeech 的 TTS 模型搭建带 GUI 操作界面的配音工具。 +- 非常感谢 [jerryuhoo](https://github.com/jerryuhoo)/[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk) 基于 PaddleSpeech 的 TTS GUI 界面和基于 ASR 制作数据集的相关代码。 此外,PaddleSpeech 依赖于许多开源存储库。有关更多信息,请参阅 [references](./docs/source/reference.md)。 From a848f408f0a345e09d4b553ecccae16b447fa328 Mon Sep 17 00:00:00 2001 From: Jerryuhoo Date: Wed, 2 Mar 2022 11:14:39 +0800 Subject: [PATCH 33/45] Update readme, test=doc --- README.md | 2 +- README_cn.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7f95abac..46f492e9 100644 --- a/README.md +++ b/README.md @@ -148,7 +148,7 @@ For more synthesized audios, please refer to [PaddleSpeech Text-to-Speech sample - [PaddleSpeech Demo Video](https://paddlespeech.readthedocs.io/en/latest/demo_video.html) -- **[VTuberTalk](https://github.com/JiehangXie/PaddleBoBo): Use PaddleSpeech TTS and ASR to clone voice from videos.** +- **[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk): Use PaddleSpeech TTS and ASR to clone voice from videos.**
diff --git a/README_cn.md b/README_cn.md index 742ef062..e8494737 100644 --- a/README_cn.md +++ b/README_cn.md @@ -150,7 +150,7 @@ from https://github.com/18F/open-source-guide/blob/18f-pages/pages/making-readme - [PaddleSpeech 示例视频](https://paddlespeech.readthedocs.io/en/latest/demo_video.html) -- **[VTuberTalk](https://github.com/JiehangXie/PaddleBoBo): 使用 PaddleSpeech 的语音合成和语音识别从视频中克隆人声。** +- **[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk): 使用 PaddleSpeech 的语音合成和语音识别从视频中克隆人声。**
From c52f0f805bc92800b61b9594d873778f79304a9a Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Wed, 2 Mar 2022 12:09:56 +0800 Subject: [PATCH 34/45] refactor --- paddleaudio/paddleaudio/__init__.py | 2 + paddleaudio/paddleaudio/backends/__init__.py | 6 + .../paddleaudio/backends/soundfile_backend.py | 252 ++++++ .../{kaldi => compliance}/__init__.py | 0 paddleaudio/paddleaudio/compliance/kaldi.py | 688 ++++++++++++++++ paddleaudio/paddleaudio/compliance/librosa.py | 728 ++++++++++++++++ .../features/{librosa.py => layers.py} | 241 +----- .../paddleaudio/functional/__init__.py | 7 + .../paddleaudio/functional/functional.py | 776 ++++-------------- paddleaudio/paddleaudio/io/__init__.py | 8 +- paddleaudio/paddleaudio/io/audio.py | 303 ------- 11 files changed, 1870 insertions(+), 1141 deletions(-) rename paddleaudio/paddleaudio/{kaldi => compliance}/__init__.py (100%) create mode 100644 paddleaudio/paddleaudio/compliance/kaldi.py create mode 100644 paddleaudio/paddleaudio/compliance/librosa.py rename paddleaudio/paddleaudio/features/{librosa.py => layers.py} (59%) delete mode 100644 paddleaudio/paddleaudio/io/audio.py diff --git a/paddleaudio/paddleaudio/__init__.py b/paddleaudio/paddleaudio/__init__.py index 185a92b8..2dab610c 100644 --- a/paddleaudio/paddleaudio/__init__.py +++ b/paddleaudio/paddleaudio/__init__.py @@ -11,3 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from .backends import load +from .backends import save diff --git a/paddleaudio/paddleaudio/backends/__init__.py b/paddleaudio/paddleaudio/backends/__init__.py index 185a92b8..8eae07e8 100644 --- a/paddleaudio/paddleaudio/backends/__init__.py +++ b/paddleaudio/paddleaudio/backends/__init__.py @@ -11,3 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from .soundfile_backend import depth_convert +from .soundfile_backend import load +from .soundfile_backend import normalize +from .soundfile_backend import resample +from .soundfile_backend import save +from .soundfile_backend import to_mono diff --git a/paddleaudio/paddleaudio/backends/soundfile_backend.py b/paddleaudio/paddleaudio/backends/soundfile_backend.py index 97043fd7..2b920284 100644 --- a/paddleaudio/paddleaudio/backends/soundfile_backend.py +++ b/paddleaudio/paddleaudio/backends/soundfile_backend.py @@ -11,3 +11,255 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import warnings +from typing import Optional +from typing import Tuple +from typing import Union + +import numpy as np +import resampy +import soundfile as sf +from numpy import ndarray as array +from scipy.io import wavfile + +from ..utils import ParameterError + +__all__ = [ + 'resample', + 'to_mono', + 'depth_convert', + 'normalize', + 'save', + 'load', +] +NORMALMIZE_TYPES = ['linear', 'gaussian'] +MERGE_TYPES = ['ch0', 'ch1', 'random', 'average'] +RESAMPLE_MODES = ['kaiser_best', 'kaiser_fast'] +EPS = 1e-8 + + +def resample(y: array, src_sr: int, target_sr: int, + mode: str='kaiser_fast') -> array: + """ Audio resampling + This function is the same as using resampy.resample(). + Notes: + The default mode is kaiser_fast. For better audio quality, use mode = 'kaiser_fast' + """ + + if mode == 'kaiser_best': + warnings.warn( + f'Using resampy in kaiser_best to {src_sr}=>{target_sr}. This function is pretty slow, \ + we recommend the mode kaiser_fast in large scale audio trainning') + + if not isinstance(y, np.ndarray): + raise ParameterError( + 'Only support numpy array, but received y in {type(y)}') + + if mode not in RESAMPLE_MODES: + raise ParameterError(f'resample mode must in {RESAMPLE_MODES}') + + return resampy.resample(y, src_sr, target_sr, filter=mode) + + +def to_mono(y: array, merge_type: str='average') -> array: + """ convert sterior audio to mono + """ + if merge_type not in MERGE_TYPES: + raise ParameterError( + f'Unsupported merge type {merge_type}, available types are {MERGE_TYPES}' + ) + if y.ndim > 2: + raise ParameterError( + f'Unsupported audio array, y.ndim > 2, the shape is {y.shape}') + if y.ndim == 1: # nothing to merge + return y + + if merge_type == 'ch0': + return y[0] + if merge_type == 'ch1': + return y[1] + if merge_type == 'random': + return y[np.random.randint(0, 2)] + + # need to do averaging according to dtype + + if y.dtype == 'float32': + y_out = (y[0] + y[1]) * 0.5 + elif y.dtype == 'int16': + y_out = y.astype('int32') + y_out = (y_out[0] + y_out[1]) // 2 + y_out = np.clip(y_out, np.iinfo(y.dtype).min, + np.iinfo(y.dtype).max).astype(y.dtype) + + elif y.dtype == 'int8': + y_out = y.astype('int16') + y_out = (y_out[0] + y_out[1]) // 2 + y_out = np.clip(y_out, np.iinfo(y.dtype).min, + np.iinfo(y.dtype).max).astype(y.dtype) + else: + raise ParameterError(f'Unsupported dtype: {y.dtype}') + return y_out + + +def _safe_cast(y: array, dtype: Union[type, str]) -> array: + """ data type casting in a safe way, i.e., prevent overflow or underflow + This function is used internally. + """ + return np.clip(y, np.iinfo(dtype).min, np.iinfo(dtype).max).astype(dtype) + + +def depth_convert(y: array, dtype: Union[type, str], + dithering: bool=True) -> array: + """Convert audio array to target dtype safely + This function convert audio waveform to a target dtype, with addition steps of + preventing overflow/underflow and preserving audio range. + """ + + SUPPORT_DTYPE = ['int16', 'int8', 'float32', 'float64'] + if y.dtype not in SUPPORT_DTYPE: + raise ParameterError( + 'Unsupported audio dtype, ' + f'y.dtype is {y.dtype}, supported dtypes are {SUPPORT_DTYPE}') + + if dtype not in SUPPORT_DTYPE: + raise ParameterError( + 'Unsupported audio dtype, ' + f'target dtype is {dtype}, supported dtypes are {SUPPORT_DTYPE}') + + if dtype == y.dtype: + return y + + if dtype == 'float64' and y.dtype == 'float32': + return _safe_cast(y, dtype) + if dtype == 'float32' and y.dtype == 'float64': + return _safe_cast(y, dtype) + + if dtype == 'int16' or dtype == 'int8': + if y.dtype in ['float64', 'float32']: + factor = np.iinfo(dtype).max + y = np.clip(y * factor, np.iinfo(dtype).min, + np.iinfo(dtype).max).astype(dtype) + y = y.astype(dtype) + else: + if dtype == 'int16' and y.dtype == 'int8': + factor = np.iinfo('int16').max / np.iinfo('int8').max - EPS + y = y.astype('float32') * factor + y = y.astype('int16') + + else: # dtype == 'int8' and y.dtype=='int16': + y = y.astype('int32') * np.iinfo('int8').max / \ + np.iinfo('int16').max + y = y.astype('int8') + + if dtype in ['float32', 'float64']: + org_dtype = y.dtype + y = y.astype(dtype) / np.iinfo(org_dtype).max + return y + + +def sound_file_load(file: str, + offset: Optional[float]=None, + dtype: str='int16', + duration: Optional[int]=None) -> Tuple[array, int]: + """Load audio using soundfile library + This function load audio file using libsndfile. + Reference: + http://www.mega-nerd.com/libsndfile/#Features + """ + with sf.SoundFile(file) as sf_desc: + sr_native = sf_desc.samplerate + if offset: + sf_desc.seek(int(offset * sr_native)) + if duration is not None: + frame_duration = int(duration * sr_native) + else: + frame_duration = -1 + y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T + + return y, sf_desc.samplerate + + +def normalize(y: array, norm_type: str='linear', + mul_factor: float=1.0) -> array: + """ normalize an input audio with additional multiplier. + """ + + if norm_type == 'linear': + amax = np.max(np.abs(y)) + factor = 1.0 / (amax + EPS) + y = y * factor * mul_factor + elif norm_type == 'gaussian': + amean = np.mean(y) + astd = np.std(y) + astd = max(astd, EPS) + y = mul_factor * (y - amean) / astd + else: + raise NotImplementedError(f'norm_type should be in {NORMALMIZE_TYPES}') + + return y + + +def save(y: array, sr: int, file: str) -> None: + """Save audio file to disk. + This function saves audio to disk using scipy.io.wavfile, with additional step + to convert input waveform to int16 unless it already is int16 + Notes: + It only support raw wav format. + """ + if not file.endswith('.wav'): + raise ParameterError( + f'only .wav file supported, but dst file name is: {file}') + + if sr <= 0: + raise ParameterError( + f'Sample rate should be larger than 0, recieved sr = {sr}') + + if y.dtype not in ['int16', 'int8']: + warnings.warn( + f'input data type is {y.dtype}, will convert data to int16 format before saving' + ) + y_out = depth_convert(y, 'int16') + else: + y_out = y + + wavfile.write(file, sr, y_out) + + +def load( + file: str, + sr: Optional[int]=None, + mono: bool=True, + merge_type: str='average', # ch0,ch1,random,average + normal: bool=True, + norm_type: str='linear', + norm_mul_factor: float=1.0, + offset: float=0.0, + duration: Optional[int]=None, + dtype: str='float32', + resample_mode: str='kaiser_fast') -> Tuple[array, int]: + """Load audio file from disk. + This function loads audio from disk using using audio beackend. + Parameters: + Notes: + """ + + y, r = sound_file_load(file, offset=offset, dtype=dtype, duration=duration) + + if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)): + raise ParameterError(f'audio file {file} looks empty') + + if mono: + y = to_mono(y, merge_type) + + if sr is not None and sr != r: + y = resample(y, r, sr, mode=resample_mode) + r = sr + + if normal: + y = normalize(y, norm_type, norm_mul_factor) + elif dtype in ['int8', 'int16']: + # still need to do normalization, before depth convertion + y = normalize(y, 'linear', 1.0) + + y = depth_convert(y, dtype) + return y, r diff --git a/paddleaudio/paddleaudio/kaldi/__init__.py b/paddleaudio/paddleaudio/compliance/__init__.py similarity index 100% rename from paddleaudio/paddleaudio/kaldi/__init__.py rename to paddleaudio/paddleaudio/compliance/__init__.py diff --git a/paddleaudio/paddleaudio/compliance/kaldi.py b/paddleaudio/paddleaudio/compliance/kaldi.py new file mode 100644 index 00000000..61ca4e3d --- /dev/null +++ b/paddleaudio/paddleaudio/compliance/kaldi.py @@ -0,0 +1,688 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import math +from typing import Tuple + +import paddle +from paddle import Tensor + +from ..functional.window import get_window +from .spectrum import create_dct + +__all__ = [ + 'spectrogram', + 'fbank', + 'mfcc', +] + +# window types +HANNING = 'hann' +HAMMING = 'hamming' +POVEY = 'povey' +RECTANGULAR = 'rect' +BLACKMAN = 'blackman' + + +def _get_epsilon(dtype): + return paddle.to_tensor(1e-07, dtype=dtype) + + +def _next_power_of_2(x: int) -> int: + return 1 if x == 0 else 2**(x - 1).bit_length() + + +def _get_strided(waveform: Tensor, + window_size: int, + window_shift: int, + snip_edges: bool) -> Tensor: + assert waveform.dim() == 1 + num_samples = waveform.shape[0] + + if snip_edges: + if num_samples < window_size: + return paddle.empty((0, 0), dtype=waveform.dtype) + else: + m = 1 + (num_samples - window_size) // window_shift + else: + reversed_waveform = paddle.flip(waveform, [0]) + m = (num_samples + (window_shift // 2)) // window_shift + pad = window_size // 2 - window_shift // 2 + pad_right = reversed_waveform + if pad > 0: + pad_left = reversed_waveform[-pad:] + waveform = paddle.concat((pad_left, waveform, pad_right), axis=0) + else: + waveform = paddle.concat((waveform[-pad:], pad_right), axis=0) + + return paddle.signal.frame(waveform, window_size, window_shift)[:, :m].T + + +def _feature_window_function( + window_type: str, + window_size: int, + blackman_coeff: float, + dtype: int, ) -> Tensor: + if window_type == HANNING: + return get_window('hann', window_size, fftbins=False, dtype=dtype) + elif window_type == HAMMING: + return get_window('hamming', window_size, fftbins=False, dtype=dtype) + elif window_type == POVEY: + return get_window( + 'hann', window_size, fftbins=False, dtype=dtype).pow(0.85) + elif window_type == RECTANGULAR: + return paddle.ones([window_size], dtype=dtype) + elif window_type == BLACKMAN: + a = 2 * math.pi / (window_size - 1) + window_function = paddle.arange(window_size, dtype=dtype) + return (blackman_coeff - 0.5 * paddle.cos(a * window_function) + + (0.5 - blackman_coeff) * paddle.cos(2 * a * window_function) + ).astype(dtype) + else: + raise Exception('Invalid window type ' + window_type) + + +def _get_log_energy(strided_input: Tensor, epsilon: Tensor, + energy_floor: float) -> Tensor: + log_energy = paddle.maximum(strided_input.pow(2).sum(1), epsilon).log() + if energy_floor == 0.0: + return log_energy + return paddle.maximum( + log_energy, + paddle.to_tensor(math.log(energy_floor), dtype=strided_input.dtype)) + + +def _get_waveform_and_window_properties( + waveform: Tensor, + channel: int, + sample_frequency: float, + frame_shift: float, + frame_length: float, + round_to_power_of_two: bool, + preemphasis_coefficient: float) -> Tuple[Tensor, int, int, int]: + channel = max(channel, 0) + assert channel < waveform.shape[0], ( + 'Invalid channel {} for size {}'.format(channel, waveform.shape[0])) + waveform = waveform[channel, :] # size (n) + window_shift = int( + sample_frequency * frame_shift * + 0.001) # pass frame_shift and frame_length in milliseconds + window_size = int(sample_frequency * frame_length * 0.001) + padded_window_size = _next_power_of_2( + window_size) if round_to_power_of_two else window_size + + assert 2 <= window_size <= len(waveform), ( + 'choose a window size {} that is [2, {}]'.format(window_size, + len(waveform))) + assert 0 < window_shift, '`window_shift` must be greater than 0' + assert padded_window_size % 2 == 0, 'the padded `window_size` must be divisible by two.' \ + ' use `round_to_power_of_two` or change `frame_length`' + assert 0. <= preemphasis_coefficient <= 1.0, '`preemphasis_coefficient` must be between [0,1]' + assert sample_frequency > 0, '`sample_frequency` must be greater than zero' + return waveform, window_shift, window_size, padded_window_size + + +def _get_window(waveform: Tensor, + padded_window_size: int, + window_size: int, + window_shift: int, + window_type: str, + blackman_coeff: float, + snip_edges: bool, + raw_energy: bool, + energy_floor: float, + dither: float, + remove_dc_offset: bool, + preemphasis_coefficient: float) -> Tuple[Tensor, Tensor]: + dtype = waveform.dtype + epsilon = _get_epsilon(dtype) + + # size (m, window_size) + strided_input = _get_strided(waveform, window_size, window_shift, + snip_edges) + + if dither != 0.0: + # Returns a random number strictly between 0 and 1 + x = paddle.maximum(epsilon, + paddle.rand(strided_input.shape, dtype=dtype)) + rand_gauss = paddle.sqrt(-2 * x.log()) * paddle.cos(2 * math.pi * x) + strided_input = strided_input + rand_gauss * dither + + if remove_dc_offset: + # Subtract each row/frame by its mean + row_means = paddle.mean( + strided_input, axis=1).unsqueeze(1) # size (m, 1) + strided_input = strided_input - row_means + + if raw_energy: + # Compute the log energy of each row/frame before applying preemphasis and + # window function + signal_log_energy = _get_log_energy(strided_input, epsilon, + energy_floor) # size (m) + + if preemphasis_coefficient != 0.0: + # strided_input[i,j] -= preemphasis_coefficient * strided_input[i, max(0, j-1)] for all i,j + offset_strided_input = paddle.nn.functional.pad( + strided_input.unsqueeze(0), (1, 0), + data_format='NCL', + mode='replicate').squeeze(0) # size (m, window_size + 1) + strided_input = strided_input - preemphasis_coefficient * offset_strided_input[:, : + -1] + + # Apply window_function to each row/frame + window_function = _feature_window_function( + window_type, window_size, blackman_coeff, + dtype).unsqueeze(0) # size (1, window_size) + strided_input = strided_input * window_function # size (m, window_size) + + # Pad columns with zero until we reach size (m, padded_window_size) + if padded_window_size != window_size: + padding_right = padded_window_size - window_size + strided_input = paddle.nn.functional.pad( + strided_input.unsqueeze(0), (0, padding_right), + data_format='NCL', + mode='constant', + value=0).squeeze(0) + + # Compute energy after window function (not the raw one) + if not raw_energy: + signal_log_energy = _get_log_energy(strided_input, epsilon, + energy_floor) # size (m) + + return strided_input, signal_log_energy + + +def _subtract_column_mean(tensor: Tensor, subtract_mean: bool) -> Tensor: + # subtracts the column mean of the tensor size (m, n) if subtract_mean=True + # it returns size (m, n) + if subtract_mean: + col_means = paddle.mean(tensor, axis=0).unsqueeze(0) + tensor = tensor - col_means + return tensor + + +def spectrogram(waveform: Tensor, + blackman_coeff: float=0.42, + channel: int=-1, + dither: float=0.0, + energy_floor: float=1.0, + frame_length: float=25.0, + frame_shift: float=10.0, + min_duration: float=0.0, + preemphasis_coefficient: float=0.97, + raw_energy: bool=True, + remove_dc_offset: bool=True, + round_to_power_of_two: bool=True, + sample_frequency: float=16000.0, + snip_edges: bool=True, + subtract_mean: bool=False, + window_type: str=POVEY) -> Tensor: + """[summary] + + Args: + waveform (Tensor): [description] + blackman_coeff (float, optional): [description]. Defaults to 0.42. + channel (int, optional): [description]. Defaults to -1. + dither (float, optional): [description]. Defaults to 0.0. + energy_floor (float, optional): [description]. Defaults to 1.0. + frame_length (float, optional): [description]. Defaults to 25.0. + frame_shift (float, optional): [description]. Defaults to 10.0. + min_duration (float, optional): [description]. Defaults to 0.0. + preemphasis_coefficient (float, optional): [description]. Defaults to 0.97. + raw_energy (bool, optional): [description]. Defaults to True. + remove_dc_offset (bool, optional): [description]. Defaults to True. + round_to_power_of_two (bool, optional): [description]. Defaults to True. + sample_frequency (float, optional): [description]. Defaults to 16000.0. + snip_edges (bool, optional): [description]. Defaults to True. + subtract_mean (bool, optional): [description]. Defaults to False. + window_type (str, optional): [description]. Defaults to POVEY. + + Returns: + Tensor: [description] + """ + dtype = waveform.dtype + epsilon = _get_epsilon(dtype) + + waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties( + waveform, channel, sample_frequency, frame_shift, frame_length, + round_to_power_of_two, preemphasis_coefficient) + + if len(waveform) < min_duration * sample_frequency: + # signal is too short + return paddle.empty([0]) + + strided_input, signal_log_energy = _get_window( + waveform, padded_window_size, window_size, window_shift, window_type, + blackman_coeff, snip_edges, raw_energy, energy_floor, dither, + remove_dc_offset, preemphasis_coefficient) + + # size (m, padded_window_size // 2 + 1, 2) + fft = paddle.fft.rfft(strided_input) + + # Convert the FFT into a power spectrum + power_spectrum = paddle.maximum( + fft.abs().pow(2.), + epsilon).log() # size (m, padded_window_size // 2 + 1) + power_spectrum[:, 0] = signal_log_energy + + power_spectrum = _subtract_column_mean(power_spectrum, subtract_mean) + return power_spectrum + + +def _inverse_mel_scale_scalar(mel_freq: float) -> float: + return 700.0 * (math.exp(mel_freq / 1127.0) - 1.0) + + +def _inverse_mel_scale(mel_freq: Tensor) -> Tensor: + return 700.0 * ((mel_freq / 1127.0).exp() - 1.0) + + +def _mel_scale_scalar(freq: float) -> float: + return 1127.0 * math.log(1.0 + freq / 700.0) + + +def _mel_scale(freq: Tensor) -> Tensor: + return 1127.0 * (1.0 + freq / 700.0).log() + + +def _vtln_warp_freq(vtln_low_cutoff: float, + vtln_high_cutoff: float, + low_freq: float, + high_freq: float, + vtln_warp_factor: float, + freq: Tensor) -> Tensor: + assert vtln_low_cutoff > low_freq, 'be sure to set the vtln_low option higher than low_freq' + assert vtln_high_cutoff < high_freq, 'be sure to set the vtln_high option lower than high_freq [or negative]' + l = vtln_low_cutoff * max(1.0, vtln_warp_factor) + h = vtln_high_cutoff * min(1.0, vtln_warp_factor) + scale = 1.0 / vtln_warp_factor + Fl = scale * l # F(l) + Fh = scale * h # F(h) + assert l > low_freq and h < high_freq + # slope of left part of the 3-piece linear function + scale_left = (Fl - low_freq) / (l - low_freq) + # [slope of center part is just "scale"] + + # slope of right part of the 3-piece linear function + scale_right = (high_freq - Fh) / (high_freq - h) + + res = paddle.empty_like(freq) + + outside_low_high_freq = paddle.less_than(freq, paddle.to_tensor(low_freq)) \ + | paddle.greater_than(freq, paddle.to_tensor(high_freq)) # freq < low_freq || freq > high_freq + before_l = paddle.less_than(freq, paddle.to_tensor(l)) # freq < l + before_h = paddle.less_than(freq, paddle.to_tensor(h)) # freq < h + after_h = paddle.greater_equal(freq, paddle.to_tensor(h)) # freq >= h + + # order of operations matter here (since there is overlapping frequency regions) + res[after_h] = high_freq + scale_right * (freq[after_h] - high_freq) + res[before_h] = scale * freq[before_h] + res[before_l] = low_freq + scale_left * (freq[before_l] - low_freq) + res[outside_low_high_freq] = freq[outside_low_high_freq] + + return res + + +def _vtln_warp_mel_freq(vtln_low_cutoff: float, + vtln_high_cutoff: float, + low_freq, + high_freq: float, + vtln_warp_factor: float, + mel_freq: Tensor) -> Tensor: + return _mel_scale( + _vtln_warp_freq(vtln_low_cutoff, vtln_high_cutoff, low_freq, high_freq, + vtln_warp_factor, _inverse_mel_scale(mel_freq))) + + +def _get_mel_banks(num_bins: int, + window_length_padded: int, + sample_freq: float, + low_freq: float, + high_freq: float, + vtln_low: float, + vtln_high: float, + vtln_warp_factor: float) -> Tuple[Tensor, Tensor]: + assert num_bins > 3, 'Must have at least 3 mel bins' + assert window_length_padded % 2 == 0 + num_fft_bins = window_length_padded / 2 + nyquist = 0.5 * sample_freq + + if high_freq <= 0.0: + high_freq += nyquist + + assert (0.0 <= low_freq < nyquist) and (0.0 < high_freq <= nyquist) and (low_freq < high_freq), \ + ('Bad values in options: low-freq {} and high-freq {} vs. nyquist {}'.format(low_freq, high_freq, nyquist)) + + # fft-bin width [think of it as Nyquist-freq / half-window-length] + fft_bin_width = sample_freq / window_length_padded + mel_low_freq = _mel_scale_scalar(low_freq) + mel_high_freq = _mel_scale_scalar(high_freq) + + # divide by num_bins+1 in next line because of end-effects where the bins + # spread out to the sides. + mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1) + + if vtln_high < 0.0: + vtln_high += nyquist + + assert vtln_warp_factor == 1.0 or ((low_freq < vtln_low < high_freq) and + (0.0 < vtln_high < high_freq) and (vtln_low < vtln_high)), \ + ('Bad values in options: vtln-low {} and vtln-high {}, versus ' + 'low-freq {} and high-freq {}'.format(vtln_low, vtln_high, low_freq, high_freq)) + + bin = paddle.arange(num_bins).unsqueeze(1) + left_mel = mel_low_freq + bin * mel_freq_delta # size(num_bins, 1) + center_mel = mel_low_freq + (bin + 1.0 + ) * mel_freq_delta # size(num_bins, 1) + right_mel = mel_low_freq + (bin + 2.0) * mel_freq_delta # size(num_bins, 1) + + if vtln_warp_factor != 1.0: + left_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, high_freq, + vtln_warp_factor, left_mel) + center_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, + high_freq, vtln_warp_factor, + center_mel) + right_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, + high_freq, vtln_warp_factor, right_mel) + + center_freqs = _inverse_mel_scale(center_mel) # size (num_bins) + # size(1, num_fft_bins) + mel = _mel_scale(fft_bin_width * paddle.arange(num_fft_bins)).unsqueeze(0) + + # size (num_bins, num_fft_bins) + up_slope = (mel - left_mel) / (center_mel - left_mel) + down_slope = (right_mel - mel) / (right_mel - center_mel) + + if vtln_warp_factor == 1.0: + # left_mel < center_mel < right_mel so we can min the two slopes and clamp negative values + bins = paddle.maximum( + paddle.zeros([1]), paddle.minimum(up_slope, down_slope)) + else: + # warping can move the order of left_mel, center_mel, right_mel anywhere + bins = paddle.zeros_like(up_slope) + up_idx = paddle.greater_than(mel, left_mel) & paddle.less_than( + mel, center_mel) # left_mel < mel <= center_mel + down_idx = paddle.greater_than(mel, center_mel) & paddle.less_than( + mel, right_mel) # center_mel < mel < right_mel + bins[up_idx] = up_slope[up_idx] + bins[down_idx] = down_slope[down_idx] + + return bins, center_freqs + + +def fbank(waveform: Tensor, + blackman_coeff: float=0.42, + channel: int=-1, + dither: float=0.0, + energy_floor: float=1.0, + frame_length: float=25.0, + frame_shift: float=10.0, + high_freq: float=0.0, + htk_compat: bool=False, + low_freq: float=20.0, + min_duration: float=0.0, + num_mel_bins: int=23, + preemphasis_coefficient: float=0.97, + raw_energy: bool=True, + remove_dc_offset: bool=True, + round_to_power_of_two: bool=True, + sample_frequency: float=16000.0, + snip_edges: bool=True, + subtract_mean: bool=False, + use_energy: bool=False, + use_log_fbank: bool=True, + use_power: bool=True, + vtln_high: float=-500.0, + vtln_low: float=100.0, + vtln_warp: float=1.0, + window_type: str=POVEY) -> Tensor: + """[summary] + + Args: + waveform (Tensor): [description] + blackman_coeff (float, optional): [description]. Defaults to 0.42. + channel (int, optional): [description]. Defaults to -1. + dither (float, optional): [description]. Defaults to 0.0. + energy_floor (float, optional): [description]. Defaults to 1.0. + frame_length (float, optional): [description]. Defaults to 25.0. + frame_shift (float, optional): [description]. Defaults to 10.0. + high_freq (float, optional): [description]. Defaults to 0.0. + htk_compat (bool, optional): [description]. Defaults to False. + low_freq (float, optional): [description]. Defaults to 20.0. + min_duration (float, optional): [description]. Defaults to 0.0. + num_mel_bins (int, optional): [description]. Defaults to 23. + preemphasis_coefficient (float, optional): [description]. Defaults to 0.97. + raw_energy (bool, optional): [description]. Defaults to True. + remove_dc_offset (bool, optional): [description]. Defaults to True. + round_to_power_of_two (bool, optional): [description]. Defaults to True. + sample_frequency (float, optional): [description]. Defaults to 16000.0. + snip_edges (bool, optional): [description]. Defaults to True. + subtract_mean (bool, optional): [description]. Defaults to False. + use_energy (bool, optional): [description]. Defaults to False. + use_log_fbank (bool, optional): [description]. Defaults to True. + use_power (bool, optional): [description]. Defaults to True. + vtln_high (float, optional): [description]. Defaults to -500.0. + vtln_low (float, optional): [description]. Defaults to 100.0. + vtln_warp (float, optional): [description]. Defaults to 1.0. + window_type (str, optional): [description]. Defaults to POVEY. + + Returns: + Tensor: [description] + """ + dtype = waveform.dtype + + waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties( + waveform, channel, sample_frequency, frame_shift, frame_length, + round_to_power_of_two, preemphasis_coefficient) + + if len(waveform) < min_duration * sample_frequency: + # signal is too short + return paddle.empty([0], dtype=dtype) + + # strided_input, size (m, padded_window_size) and signal_log_energy, size (m) + strided_input, signal_log_energy = _get_window( + waveform, padded_window_size, window_size, window_shift, window_type, + blackman_coeff, snip_edges, raw_energy, energy_floor, dither, + remove_dc_offset, preemphasis_coefficient) + + # size (m, padded_window_size // 2 + 1) + spectrum = paddle.fft.rfft(strided_input).abs() + if use_power: + spectrum = spectrum.pow(2.) + + # size (num_mel_bins, padded_window_size // 2) + mel_energies, _ = _get_mel_banks(num_mel_bins, padded_window_size, + sample_frequency, low_freq, high_freq, + vtln_low, vtln_high, vtln_warp) + mel_energies = mel_energies.astype(dtype) + + # pad right column with zeros and add dimension, size (num_mel_bins, padded_window_size // 2 + 1) + mel_energies = paddle.nn.functional.pad( + mel_energies.unsqueeze(0), (0, 1), + data_format='NCL', + mode='constant', + value=0).squeeze(0) + + # sum with mel fiterbanks over the power spectrum, size (m, num_mel_bins) + mel_energies = paddle.mm(spectrum, mel_energies.T) + if use_log_fbank: + # avoid log of zero (which should be prevented anyway by dithering) + mel_energies = paddle.maximum(mel_energies, _get_epsilon(dtype)).log() + + # if use_energy then add it as the last column for htk_compat == true else first column + if use_energy: + signal_log_energy = signal_log_energy.unsqueeze(1) # size (m, 1) + # returns size (m, num_mel_bins + 1) + if htk_compat: + mel_energies = paddle.concat( + (mel_energies, signal_log_energy), axis=1) + else: + mel_energies = paddle.concat( + (signal_log_energy, mel_energies), axis=1) + + mel_energies = _subtract_column_mean(mel_energies, subtract_mean) + return mel_energies + + +def _get_dct_matrix(num_ceps: int, num_mel_bins: int) -> Tensor: + # returns a dct matrix of size (num_mel_bins, num_ceps) + # size (num_mel_bins, num_mel_bins) + dct_matrix = create_dct(num_mel_bins, num_mel_bins, 'ortho') + # kaldi expects the first cepstral to be weighted sum of factor sqrt(1/num_mel_bins) + # this would be the first column in the dct_matrix for torchaudio as it expects a + # right multiply (which would be the first column of the kaldi's dct_matrix as kaldi + # expects a left multiply e.g. dct_matrix * vector). + dct_matrix[:, 0] = math.sqrt(1 / float(num_mel_bins)) + dct_matrix = dct_matrix[:, :num_ceps] + return dct_matrix + + +def _get_lifter_coeffs(num_ceps: int, cepstral_lifter: float) -> Tensor: + # returns size (num_ceps) + # Compute liftering coefficients (scaling on cepstral coeffs) + # coeffs are numbered slightly differently from HTK: the zeroth index is C0, which is not affected. + i = paddle.arange(num_ceps) + return 1.0 + 0.5 * cepstral_lifter * paddle.sin(math.pi * i / + cepstral_lifter) + + +def mfcc(waveform: Tensor, + blackman_coeff: float=0.42, + cepstral_lifter: float=22.0, + channel: int=-1, + dither: float=0.0, + energy_floor: float=1.0, + frame_length: float=25.0, + frame_shift: float=10.0, + high_freq: float=0.0, + htk_compat: bool=False, + low_freq: float=20.0, + num_ceps: int=13, + min_duration: float=0.0, + num_mel_bins: int=23, + preemphasis_coefficient: float=0.97, + raw_energy: bool=True, + remove_dc_offset: bool=True, + round_to_power_of_two: bool=True, + sample_frequency: float=16000.0, + snip_edges: bool=True, + subtract_mean: bool=False, + use_energy: bool=False, + vtln_high: float=-500.0, + vtln_low: float=100.0, + vtln_warp: float=1.0, + window_type: str=POVEY) -> Tensor: + """[summary] + + Args: + waveform (Tensor): [description] + blackman_coeff (float, optional): [description]. Defaults to 0.42. + cepstral_lifter (float, optional): [description]. Defaults to 22.0. + channel (int, optional): [description]. Defaults to -1. + dither (float, optional): [description]. Defaults to 0.0. + energy_floor (float, optional): [description]. Defaults to 1.0. + frame_length (float, optional): [description]. Defaults to 25.0. + frame_shift (float, optional): [description]. Defaults to 10.0. + high_freq (float, optional): [description]. Defaults to 0.0. + htk_compat (bool, optional): [description]. Defaults to False. + low_freq (float, optional): [description]. Defaults to 20.0. + num_ceps (int, optional): [description]. Defaults to 13. + min_duration (float, optional): [description]. Defaults to 0.0. + num_mel_bins (int, optional): [description]. Defaults to 23. + preemphasis_coefficient (float, optional): [description]. Defaults to 0.97. + raw_energy (bool, optional): [description]. Defaults to True. + remove_dc_offset (bool, optional): [description]. Defaults to True. + round_to_power_of_two (bool, optional): [description]. Defaults to True. + sample_frequency (float, optional): [description]. Defaults to 16000.0. + snip_edges (bool, optional): [description]. Defaults to True. + subtract_mean (bool, optional): [description]. Defaults to False. + use_energy (bool, optional): [description]. Defaults to False. + vtln_high (float, optional): [description]. Defaults to -500.0. + vtln_low (float, optional): [description]. Defaults to 100.0. + vtln_warp (float, optional): [description]. Defaults to 1.0. + window_type (str, optional): [description]. Defaults to POVEY. + + Returns: + Tensor: [description] + """ + assert num_ceps <= num_mel_bins, 'num_ceps cannot be larger than num_mel_bins: %d vs %d' % ( + num_ceps, num_mel_bins) + + dtype = waveform.dtype + + # The mel_energies should not be squared (use_power=True), not have mean subtracted + # (subtract_mean=False), and use log (use_log_fbank=True). + # size (m, num_mel_bins + use_energy) + feature = fbank( + waveform=waveform, + blackman_coeff=blackman_coeff, + channel=channel, + dither=dither, + energy_floor=energy_floor, + frame_length=frame_length, + frame_shift=frame_shift, + high_freq=high_freq, + htk_compat=htk_compat, + low_freq=low_freq, + min_duration=min_duration, + num_mel_bins=num_mel_bins, + preemphasis_coefficient=preemphasis_coefficient, + raw_energy=raw_energy, + remove_dc_offset=remove_dc_offset, + round_to_power_of_two=round_to_power_of_two, + sample_frequency=sample_frequency, + snip_edges=snip_edges, + subtract_mean=False, + use_energy=use_energy, + use_log_fbank=True, + use_power=True, + vtln_high=vtln_high, + vtln_low=vtln_low, + vtln_warp=vtln_warp, + window_type=window_type) + + if use_energy: + # size (m) + signal_log_energy = feature[:, num_mel_bins if htk_compat else 0] + # offset is 0 if htk_compat==True else 1 + mel_offset = int(not htk_compat) + feature = feature[:, mel_offset:(num_mel_bins + mel_offset)] + + # size (num_mel_bins, num_ceps) + dct_matrix = _get_dct_matrix(num_ceps, num_mel_bins).astype(dtype=dtype) + + # size (m, num_ceps) + feature = feature.matmul(dct_matrix) + + if cepstral_lifter != 0.0: + # size (1, num_ceps) + lifter_coeffs = _get_lifter_coeffs(num_ceps, + cepstral_lifter).unsqueeze(0) + feature *= lifter_coeffs.astype(dtype=dtype) + + # if use_energy then replace the last column for htk_compat == true else first column + if use_energy: + feature[:, 0] = signal_log_energy + + if htk_compat: + energy = feature[:, 0].unsqueeze(1) # size (m, 1) + feature = feature[:, 1:] # size (m, num_ceps - 1) + if not use_energy: + # scale on C0 (actually removing a scale we previously added that's + # part of one common definition of the cosine transform.) + energy *= math.sqrt(2) + + feature = paddle.concat((feature, energy), axis=1) + + feature = _subtract_column_mean(feature, subtract_mean) + return feature diff --git a/paddleaudio/paddleaudio/compliance/librosa.py b/paddleaudio/paddleaudio/compliance/librosa.py new file mode 100644 index 00000000..167795c3 --- /dev/null +++ b/paddleaudio/paddleaudio/compliance/librosa.py @@ -0,0 +1,728 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Modified from librosa(https://github.com/librosa/librosa) +import warnings +from typing import List +from typing import Optional +from typing import Union + +import numpy as np +import scipy +from numpy import ndarray as array +from numpy.lib.stride_tricks import as_strided +from scipy import signal + +from ..backends import depth_convert +from ..utils import ParameterError + +__all__ = [ + # dsp + 'stft', + 'mfcc', + 'hz_to_mel', + 'mel_to_hz', + 'split_frames', + 'mel_frequencies', + 'power_to_db', + 'compute_fbank_matrix', + 'melspectrogram', + 'spectrogram', + 'mu_encode', + 'mu_decode', + # augmentation + 'depth_augment', + 'spect_augment', + 'random_crop1d', + 'random_crop2d', + 'adaptive_spect_augment', +] + + +def pad_center(data: array, size: int, axis: int=-1, **kwargs) -> array: + """Pad an array to a target length along a target axis. + + This differs from `np.pad` by centering the data prior to padding, + analogous to `str.center` + """ + + kwargs.setdefault("mode", "constant") + n = data.shape[axis] + lpad = int((size - n) // 2) + lengths = [(0, 0)] * data.ndim + lengths[axis] = (lpad, int(size - n - lpad)) + + if lpad < 0: + raise ParameterError(("Target size ({size:d}) must be " + "at least input size ({n:d})")) + + return np.pad(data, lengths, **kwargs) + + +def split_frames(x: array, frame_length: int, hop_length: int, + axis: int=-1) -> array: + """Slice a data array into (overlapping) frames. + + This function is aligned with librosa.frame + """ + + if not isinstance(x, np.ndarray): + raise ParameterError( + f"Input must be of type numpy.ndarray, given type(x)={type(x)}") + + if x.shape[axis] < frame_length: + raise ParameterError(f"Input is too short (n={x.shape[axis]:d})" + f" for frame_length={frame_length:d}") + + if hop_length < 1: + raise ParameterError(f"Invalid hop_length: {hop_length:d}") + + if axis == -1 and not x.flags["F_CONTIGUOUS"]: + warnings.warn(f"librosa.util.frame called with axis={axis} " + "on a non-contiguous input. This will result in a copy.") + x = np.asfortranarray(x) + elif axis == 0 and not x.flags["C_CONTIGUOUS"]: + warnings.warn(f"librosa.util.frame called with axis={axis} " + "on a non-contiguous input. This will result in a copy.") + x = np.ascontiguousarray(x) + + n_frames = 1 + (x.shape[axis] - frame_length) // hop_length + strides = np.asarray(x.strides) + + new_stride = np.prod(strides[strides > 0] // x.itemsize) * x.itemsize + + if axis == -1: + shape = list(x.shape)[:-1] + [frame_length, n_frames] + strides = list(strides) + [hop_length * new_stride] + + elif axis == 0: + shape = [n_frames, frame_length] + list(x.shape)[1:] + strides = [hop_length * new_stride] + list(strides) + + else: + raise ParameterError(f"Frame axis={axis} must be either 0 or -1") + + return as_strided(x, shape=shape, strides=strides) + + +def _check_audio(y, mono=True) -> bool: + """Determine whether a variable contains valid audio data. + + The audio y must be a np.ndarray, ether 1-channel or two channel + """ + if not isinstance(y, np.ndarray): + raise ParameterError("Audio data must be of type numpy.ndarray") + if y.ndim > 2: + raise ParameterError( + f"Invalid shape for audio ndim={y.ndim:d}, shape={y.shape}") + + if mono and y.ndim == 2: + raise ParameterError( + f"Invalid shape for mono audio ndim={y.ndim:d}, shape={y.shape}") + + if (mono and len(y) == 0) or (not mono and y.shape[1] < 0): + raise ParameterError(f"Audio is empty ndim={y.ndim:d}, shape={y.shape}") + + if not np.issubdtype(y.dtype, np.floating): + raise ParameterError("Audio data must be floating-point") + + if not np.isfinite(y).all(): + raise ParameterError("Audio buffer is not finite everywhere") + + return True + + +def hz_to_mel(frequencies: Union[float, List[float], array], + htk: bool=False) -> array: + """Convert Hz to Mels + + This function is aligned with librosa. + """ + freq = np.asanyarray(frequencies) + + if htk: + return 2595.0 * np.log10(1.0 + freq / 700.0) + + # Fill in the linear part + f_min = 0.0 + f_sp = 200.0 / 3 + + mels = (freq - f_min) / f_sp + + # Fill in the log-scale part + + min_log_hz = 1000.0 # beginning of log region (Hz) + min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels) + logstep = np.log(6.4) / 27.0 # step size for log region + + if freq.ndim: + # If we have array data, vectorize + log_t = freq >= min_log_hz + mels[log_t] = min_log_mel + \ + np.log(freq[log_t] / min_log_hz) / logstep + elif freq >= min_log_hz: + # If we have scalar data, heck directly + mels = min_log_mel + np.log(freq / min_log_hz) / logstep + + return mels + + +def mel_to_hz(mels: Union[float, List[float], array], htk: int=False) -> array: + """Convert mel bin numbers to frequencies. + + This function is aligned with librosa. + """ + mel_array = np.asanyarray(mels) + + if htk: + return 700.0 * (10.0**(mel_array / 2595.0) - 1.0) + + # Fill in the linear scale + f_min = 0.0 + f_sp = 200.0 / 3 + freqs = f_min + f_sp * mel_array + + # And now the nonlinear scale + min_log_hz = 1000.0 # beginning of log region (Hz) + min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels) + logstep = np.log(6.4) / 27.0 # step size for log region + + if mel_array.ndim: + # If we have vector data, vectorize + log_t = mel_array >= min_log_mel + freqs[log_t] = min_log_hz * \ + np.exp(logstep * (mel_array[log_t] - min_log_mel)) + elif mel_array >= min_log_mel: + # If we have scalar data, check directly + freqs = min_log_hz * np.exp(logstep * (mel_array - min_log_mel)) + + return freqs + + +def mel_frequencies(n_mels: int=128, + fmin: float=0.0, + fmax: float=11025.0, + htk: bool=False) -> array: + """Compute mel frequencies + + This function is aligned with librosa. + """ + # 'Center freqs' of mel bands - uniformly spaced between limits + min_mel = hz_to_mel(fmin, htk=htk) + max_mel = hz_to_mel(fmax, htk=htk) + + mels = np.linspace(min_mel, max_mel, n_mels) + + return mel_to_hz(mels, htk=htk) + + +def fft_frequencies(sr: int, n_fft: int) -> array: + """Compute fourier frequencies. + + This function is aligned with librosa. + """ + return np.linspace(0, float(sr) / 2, int(1 + n_fft // 2), endpoint=True) + + +def compute_fbank_matrix(sr: int, + n_fft: int, + n_mels: int=128, + fmin: float=0.0, + fmax: Optional[float]=None, + htk: bool=False, + norm: str="slaney", + dtype: type=np.float32): + """Compute fbank matrix. + + This funciton is aligned with librosa. + """ + if norm != "slaney": + raise ParameterError('norm must set to slaney') + + if fmax is None: + fmax = float(sr) / 2 + + # Initialize the weights + n_mels = int(n_mels) + weights = np.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype) + + # Center freqs of each FFT bin + fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft) + + # 'Center freqs' of mel bands - uniformly spaced between limits + mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk) + + fdiff = np.diff(mel_f) + ramps = np.subtract.outer(mel_f, fftfreqs) + + for i in range(n_mels): + # lower and upper slopes for all bins + lower = -ramps[i] / fdiff[i] + upper = ramps[i + 2] / fdiff[i + 1] + + # .. then intersect them with each other and zero + weights[i] = np.maximum(0, np.minimum(lower, upper)) + + if norm == "slaney": + # Slaney-style mel is scaled to be approx constant energy per channel + enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels]) + weights *= enorm[:, np.newaxis] + + # Only check weights if f_mel[0] is positive + if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)): + # This means we have an empty channel somewhere + warnings.warn("Empty filters detected in mel frequency basis. " + "Some channels will produce empty responses. " + "Try increasing your sampling rate (and fmax) or " + "reducing n_mels.") + + return weights + + +def stft(x: array, + n_fft: int=2048, + hop_length: Optional[int]=None, + win_length: Optional[int]=None, + window: str="hann", + center: bool=True, + dtype: type=np.complex64, + pad_mode: str="reflect") -> array: + """Short-time Fourier transform (STFT). + + This function is aligned with librosa. + """ + _check_audio(x) + + # By default, use the entire frame + if win_length is None: + win_length = n_fft + + # Set the default hop, if it's not already specified + if hop_length is None: + hop_length = int(win_length // 4) + + fft_window = signal.get_window(window, win_length, fftbins=True) + + # Pad the window out to n_fft size + fft_window = pad_center(fft_window, n_fft) + + # Reshape so that the window can be broadcast + fft_window = fft_window.reshape((-1, 1)) + + # Pad the time series so that frames are centered + if center: + if n_fft > x.shape[-1]: + warnings.warn( + f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}" + ) + x = np.pad(x, int(n_fft // 2), mode=pad_mode) + + elif n_fft > x.shape[-1]: + raise ParameterError( + f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}" + ) + + # Window the time series. + x_frames = split_frames(x, frame_length=n_fft, hop_length=hop_length) + # Pre-allocate the STFT matrix + stft_matrix = np.empty( + (int(1 + n_fft // 2), x_frames.shape[1]), dtype=dtype, order="F") + fft = np.fft # use numpy fft as default + # Constrain STFT block sizes to 256 KB + MAX_MEM_BLOCK = 2**8 * 2**10 + # how many columns can we fit within MAX_MEM_BLOCK? + n_columns = MAX_MEM_BLOCK // (stft_matrix.shape[0] * stft_matrix.itemsize) + n_columns = max(n_columns, 1) + + for bl_s in range(0, stft_matrix.shape[1], n_columns): + bl_t = min(bl_s + n_columns, stft_matrix.shape[1]) + stft_matrix[:, bl_s:bl_t] = fft.rfft( + fft_window * x_frames[:, bl_s:bl_t], axis=0) + + return stft_matrix + + +def power_to_db(spect: array, + ref: float=1.0, + amin: float=1e-10, + top_db: Optional[float]=80.0) -> array: + """Convert a power spectrogram (amplitude squared) to decibel (dB) units + + This computes the scaling ``10 * log10(spect / ref)`` in a numerically + stable way. + + This function is aligned with librosa. + """ + spect = np.asarray(spect) + + if amin <= 0: + raise ParameterError("amin must be strictly positive") + + if np.issubdtype(spect.dtype, np.complexfloating): + warnings.warn( + "power_to_db was called on complex input so phase " + "information will be discarded. To suppress this warning, " + "call power_to_db(np.abs(D)**2) instead.") + magnitude = np.abs(spect) + else: + magnitude = spect + + if callable(ref): + # User supplied a function to calculate reference power + ref_value = ref(magnitude) + else: + ref_value = np.abs(ref) + + log_spec = 10.0 * np.log10(np.maximum(amin, magnitude)) + log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value)) + + if top_db is not None: + if top_db < 0: + raise ParameterError("top_db must be non-negative") + log_spec = np.maximum(log_spec, log_spec.max() - top_db) + + return log_spec + + +def mfcc(x, + sr: int=16000, + spect: Optional[array]=None, + n_mfcc: int=20, + dct_type: int=2, + norm: str="ortho", + lifter: int=0, + **kwargs) -> array: + """Mel-frequency cepstral coefficients (MFCCs) + + This function is NOT strictly aligned with librosa. The following example shows how to get the + same result with librosa: + + # mfcc: + kwargs = { + 'window_size':512, + 'hop_length':320, + 'mel_bins':64, + 'fmin':50, + 'to_db':False} + a = mfcc(x, + spect=None, + n_mfcc=20, + dct_type=2, + norm='ortho', + lifter=0, + **kwargs) + + # librosa mfcc: + spect = librosa.feature.melspectrogram(y=x,sr=16000,n_fft=512, + win_length=512, + hop_length=320, + n_mels=64, fmin=50) + b = librosa.feature.mfcc(y=x, + sr=16000, + S=spect, + n_mfcc=20, + dct_type=2, + norm='ortho', + lifter=0) + + assert np.mean( (a-b)**2) < 1e-8 + + """ + if spect is None: + spect = melspectrogram(x, sr=sr, **kwargs) + + M = scipy.fftpack.dct(spect, axis=0, type=dct_type, norm=norm)[:n_mfcc] + + if lifter > 0: + factor = np.sin(np.pi * np.arange(1, 1 + n_mfcc, dtype=M.dtype) / + lifter) + return M * factor[:, np.newaxis] + elif lifter == 0: + return M + else: + raise ParameterError( + f"MFCC lifter={lifter} must be a non-negative number") + + +def melspectrogram(x: array, + sr: int=16000, + window_size: int=512, + hop_length: int=320, + n_mels: int=64, + fmin: int=50, + fmax: Optional[float]=None, + window: str='hann', + center: bool=True, + pad_mode: str='reflect', + power: float=2.0, + to_db: bool=True, + ref: float=1.0, + amin: float=1e-10, + top_db: Optional[float]=None) -> array: + """Compute mel-spectrogram. + + Parameters: + x: numpy.ndarray + The input wavform is a numpy array [shape=(n,)] + + window_size: int, typically 512, 1024, 2048, etc. + The window size for framing, also used as n_fft for stft + + + Returns: + The mel-spectrogram in power scale or db scale(default) + + + Notes: + 1. sr is default to 16000, which is commonly used in speech/speaker processing. + 2. when fmax is None, it is set to sr//2. + 3. this function will convert mel spectgrum to db scale by default. This is different + that of librosa. + + """ + _check_audio(x, mono=True) + if len(x) <= 0: + raise ParameterError('The input waveform is empty') + + if fmax is None: + fmax = sr // 2 + if fmin < 0 or fmin >= fmax: + raise ParameterError('fmin and fmax must statisfy 0 array: + """Compute spectrogram from an input waveform. + + This function is a wrapper for librosa.feature.stft, with addition step to + compute the magnitude of the complex spectrogram. + """ + + s = stft( + x, + n_fft=window_size, + hop_length=hop_length, + win_length=window_size, + window=window, + center=center, + pad_mode=pad_mode) + + return np.abs(s)**power + + +def mu_encode(x: array, mu: int=255, quantized: bool=True) -> array: + """Mu-law encoding. + + Compute the mu-law decoding given an input code. + When quantized is True, the result will be converted to + integer in range [0,mu-1]. Otherwise, the resulting signal + is in range [-1,1] + + + Reference: + https://en.wikipedia.org/wiki/%CE%9C-law_algorithm + + """ + mu = 255 + y = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu) + if quantized: + y = np.floor((y + 1) / 2 * mu + 0.5) # convert to [0 , mu-1] + return y + + +def mu_decode(y: array, mu: int=255, quantized: bool=True) -> array: + """Mu-law decoding. + + Compute the mu-law decoding given an input code. + + it assumes that the input y is in + range [0,mu-1] when quantize is True and [-1,1] otherwise + + Reference: + https://en.wikipedia.org/wiki/%CE%9C-law_algorithm + + """ + if mu < 1: + raise ParameterError('mu is typically set as 2**k-1, k=1, 2, 3,...') + + mu = mu - 1 + if quantized: # undo the quantization + y = y * 2 / mu - 1 + x = np.sign(y) / mu * ((1 + mu)**np.abs(y) - 1) + return x + + +def randint(high: int) -> int: + """Generate one random integer in range [0 high) + + This is a helper function for random data augmentaiton + """ + return int(np.random.randint(0, high=high)) + + +def rand() -> float: + """Generate one floating-point number in range [0 1) + + This is a helper function for random data augmentaiton + """ + return float(np.random.rand(1)) + + +def depth_augment(y: array, + choices: List=['int8', 'int16'], + probs: List[float]=[0.5, 0.5]) -> array: + """ Audio depth augmentation + + Do audio depth augmentation to simulate the distortion brought by quantization. + """ + assert len(probs) == len( + choices + ), 'number of choices {} must be equal to size of probs {}'.format( + len(choices), len(probs)) + depth = np.random.choice(choices, p=probs) + src_depth = y.dtype + y1 = depth_convert(y, depth) + y2 = depth_convert(y1, src_depth) + + return y2 + + +def adaptive_spect_augment(spect: array, tempo_axis: int=0, + level: float=0.1) -> array: + """Do adpative spectrogram augmentation + + The level of the augmentation is gowern by the paramter level, + ranging from 0 to 1, with 0 represents no augmentation。 + + """ + assert spect.ndim == 2., 'only supports 2d tensor or numpy array' + if tempo_axis == 0: + nt, nf = spect.shape + else: + nf, nt = spect.shape + + time_mask_width = int(nt * level * 0.5) + freq_mask_width = int(nf * level * 0.5) + + num_time_mask = int(10 * level) + num_freq_mask = int(10 * level) + + if tempo_axis == 0: + for _ in range(num_time_mask): + start = randint(nt - time_mask_width) + spect[start:start + time_mask_width, :] = 0 + for _ in range(num_freq_mask): + start = randint(nf - freq_mask_width) + spect[:, start:start + freq_mask_width] = 0 + else: + for _ in range(num_time_mask): + start = randint(nt - time_mask_width) + spect[:, start:start + time_mask_width] = 0 + for _ in range(num_freq_mask): + start = randint(nf - freq_mask_width) + spect[start:start + freq_mask_width, :] = 0 + + return spect + + +def spect_augment(spect: array, + tempo_axis: int=0, + max_time_mask: int=3, + max_freq_mask: int=3, + max_time_mask_width: int=30, + max_freq_mask_width: int=20) -> array: + """Do spectrogram augmentation in both time and freq axis + + Reference: + + """ + assert spect.ndim == 2., 'only supports 2d tensor or numpy array' + if tempo_axis == 0: + nt, nf = spect.shape + else: + nf, nt = spect.shape + + num_time_mask = randint(max_time_mask) + num_freq_mask = randint(max_freq_mask) + + time_mask_width = randint(max_time_mask_width) + freq_mask_width = randint(max_freq_mask_width) + + if tempo_axis == 0: + for _ in range(num_time_mask): + start = randint(nt - time_mask_width) + spect[start:start + time_mask_width, :] = 0 + for _ in range(num_freq_mask): + start = randint(nf - freq_mask_width) + spect[:, start:start + freq_mask_width] = 0 + else: + for _ in range(num_time_mask): + start = randint(nt - time_mask_width) + spect[:, start:start + time_mask_width] = 0 + for _ in range(num_freq_mask): + start = randint(nf - freq_mask_width) + spect[start:start + freq_mask_width, :] = 0 + + return spect + + +def random_crop1d(y: array, crop_len: int) -> array: + """ Do random cropping on 1d input signal + + The input is a 1d signal, typically a sound waveform + """ + if y.ndim != 1: + 'only accept 1d tensor or numpy array' + n = len(y) + idx = randint(n - crop_len) + return y[idx:idx + crop_len] + + +def random_crop2d(s: array, crop_len: int, tempo_axis: int=0) -> array: + """ Do random cropping for 2D array, typically a spectrogram. + + The cropping is done in temporal direction on the time-freq input signal. + """ + if tempo_axis >= s.ndim: + raise ParameterError('axis out of range') + + n = s.shape[tempo_axis] + idx = randint(high=n - crop_len) + sli = [slice(None) for i in range(s.ndim)] + sli[tempo_axis] = slice(idx, idx + crop_len) + out = s[tuple(sli)] + return out diff --git a/paddleaudio/paddleaudio/features/librosa.py b/paddleaudio/paddleaudio/features/layers.py similarity index 59% rename from paddleaudio/paddleaudio/features/librosa.py rename to paddleaudio/paddleaudio/features/layers.py index 1cbd2d1a..69f814d6 100644 --- a/paddleaudio/paddleaudio/features/librosa.py +++ b/paddleaudio/paddleaudio/features/layers.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import math from functools import partial from typing import Optional from typing import Union @@ -19,225 +18,19 @@ from typing import Union import paddle import paddle.nn as nn +from ..functional import compute_fbank_matrix +from ..functional import create_dct +from ..functional import power_to_db from ..functional.window import get_window __all__ = [ 'Spectrogram', 'MelSpectrogram', 'LogMelSpectrogram', + 'MFCC', ] -def hz_to_mel(freq: Union[paddle.Tensor, float], - htk: bool=False) -> Union[paddle.Tensor, float]: - """Convert Hz to Mels. - Parameters: - freq: the input tensor of arbitrary shape, or a single floating point number. - htk: use HTK formula to do the conversion. - The default value is False. - Returns: - The frequencies represented in Mel-scale. - """ - - if htk: - if isinstance(freq, paddle.Tensor): - return 2595.0 * paddle.log10(1.0 + freq / 700.0) - else: - return 2595.0 * math.log10(1.0 + freq / 700.0) - - # Fill in the linear part - f_min = 0.0 - f_sp = 200.0 / 3 - - mels = (freq - f_min) / f_sp - - # Fill in the log-scale part - - min_log_hz = 1000.0 # beginning of log region (Hz) - min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels) - logstep = math.log(6.4) / 27.0 # step size for log region - - if isinstance(freq, paddle.Tensor): - target = min_log_mel + paddle.log( - freq / min_log_hz + 1e-10) / logstep # prevent nan with 1e-10 - mask = (freq > min_log_hz).astype(freq.dtype) - mels = target * mask + mels * ( - 1 - mask) # will replace by masked_fill OP in future - else: - if freq >= min_log_hz: - mels = min_log_mel + math.log(freq / min_log_hz + 1e-10) / logstep - - return mels - - -def mel_to_hz(mel: Union[float, paddle.Tensor], - htk: bool=False) -> Union[float, paddle.Tensor]: - """Convert mel bin numbers to frequencies. - Parameters: - mel: the mel frequency represented as a tensor of arbitrary shape, or a floating point number. - htk: use HTK formula to do the conversion. - Returns: - The frequencies represented in hz. - """ - if htk: - return 700.0 * (10.0**(mel / 2595.0) - 1.0) - - f_min = 0.0 - f_sp = 200.0 / 3 - freqs = f_min + f_sp * mel - # And now the nonlinear scale - min_log_hz = 1000.0 # beginning of log region (Hz) - min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels) - logstep = math.log(6.4) / 27.0 # step size for log region - if isinstance(mel, paddle.Tensor): - target = min_log_hz * paddle.exp(logstep * (mel - min_log_mel)) - mask = (mel > min_log_mel).astype(mel.dtype) - freqs = target * mask + freqs * ( - 1 - mask) # will replace by masked_fill OP in future - else: - if mel >= min_log_mel: - freqs = min_log_hz * math.exp(logstep * (mel - min_log_mel)) - - return freqs - - -def mel_frequencies(n_mels: int=64, - f_min: float=0.0, - f_max: float=11025.0, - htk: bool=False, - dtype: str=paddle.float32): - """Compute mel frequencies. - Parameters: - n_mels(int): number of Mel bins. - f_min(float): the lower cut-off frequency, below which the filter response is zero. - f_max(float): the upper cut-off frequency, above which the filter response is zero. - htk(bool): whether to use htk formula. - dtype(str): the datatype of the return frequencies. - Returns: - The frequencies represented in Mel-scale - """ - # 'Center freqs' of mel bands - uniformly spaced between limits - min_mel = hz_to_mel(f_min, htk=htk) - max_mel = hz_to_mel(f_max, htk=htk) - mels = paddle.linspace(min_mel, max_mel, n_mels, dtype=dtype) - freqs = mel_to_hz(mels, htk=htk) - return freqs - - -def fft_frequencies(sr: int, n_fft: int, dtype: str=paddle.float32): - """Compute fourier frequencies. - Parameters: - sr(int): the audio sample rate. - n_fft(float): the number of fft bins. - dtype(str): the datatype of the return frequencies. - Returns: - The frequencies represented in hz. - """ - return paddle.linspace(0, float(sr) / 2, int(1 + n_fft // 2), dtype=dtype) - - -def compute_fbank_matrix(sr: int, - n_fft: int, - n_mels: int=64, - f_min: float=0.0, - f_max: Optional[float]=None, - htk: bool=False, - norm: Union[str, float]='slaney', - dtype: str=paddle.float32): - """Compute fbank matrix. - Parameters: - sr(int): the audio sample rate. - n_fft(int): the number of fft bins. - n_mels(int): the number of Mel bins. - f_min(float): the lower cut-off frequency, below which the filter response is zero. - f_max(float): the upper cut-off frequency, above which the filter response is zero. - htk: whether to use htk formula. - return_complex(bool): whether to return complex matrix. If True, the matrix will - be complex type. Otherwise, the real and image part will be stored in the last - axis of returned tensor. - dtype(str): the datatype of the returned fbank matrix. - Returns: - The fbank matrix of shape (n_mels, int(1+n_fft//2)). - Shape: - output: (n_mels, int(1+n_fft//2)) - """ - - if f_max is None: - f_max = float(sr) / 2 - - # Initialize the weights - weights = paddle.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype) - - # Center freqs of each FFT bin - fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft, dtype=dtype) - - # 'Center freqs' of mel bands - uniformly spaced between limits - mel_f = mel_frequencies( - n_mels + 2, f_min=f_min, f_max=f_max, htk=htk, dtype=dtype) - - fdiff = mel_f[1:] - mel_f[:-1] #np.diff(mel_f) - ramps = mel_f.unsqueeze(1) - fftfreqs.unsqueeze(0) - #ramps = np.subtract.outer(mel_f, fftfreqs) - - for i in range(n_mels): - # lower and upper slopes for all bins - lower = -ramps[i] / fdiff[i] - upper = ramps[i + 2] / fdiff[i + 1] - - # .. then intersect them with each other and zero - weights[i] = paddle.maximum( - paddle.zeros_like(lower), paddle.minimum(lower, upper)) - - # Slaney-style mel is scaled to be approx constant energy per channel - if norm == 'slaney': - enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels]) - weights *= enorm.unsqueeze(1) - elif isinstance(norm, int) or isinstance(norm, float): - weights = paddle.nn.functional.normalize(weights, p=norm, axis=-1) - - return weights - - -def power_to_db(magnitude: paddle.Tensor, - ref_value: float=1.0, - amin: float=1e-10, - top_db: Optional[float]=None) -> paddle.Tensor: - """Convert a power spectrogram (amplitude squared) to decibel (dB) units. - The function computes the scaling ``10 * log10(x / ref)`` in a numerically - stable way. - Parameters: - magnitude(Tensor): the input magnitude tensor of any shape. - ref_value(float): the reference value. If smaller than 1.0, the db level - of the signal will be pulled up accordingly. Otherwise, the db level - is pushed down. - amin(float): the minimum value of input magnitude, below which the input - magnitude is clipped(to amin). - top_db(float): the maximum db value of resulting spectrum, above which the - spectrum is clipped(to top_db). - Returns: - The spectrogram in log-scale. - shape: - input: any shape - output: same as input - """ - if amin <= 0: - raise Exception("amin must be strictly positive") - - if ref_value <= 0: - raise Exception("ref_value must be strictly positive") - - ones = paddle.ones_like(magnitude) - log_spec = 10.0 * paddle.log10(paddle.maximum(ones * amin, magnitude)) - log_spec -= 10.0 * math.log10(max(ref_value, amin)) - - if top_db is not None: - if top_db < 0: - raise Exception("top_db must be non-negative") - log_spec = paddle.maximum(log_spec, ones * (log_spec.max() - top_db)) - - return log_spec - - class Spectrogram(nn.Layer): def __init__(self, n_fft: int=512, @@ -459,3 +252,29 @@ class LogMelSpectrogram(nn.Layer): amin=self.amin, top_db=self.top_db) return log_mel_feature + + +class MFCC(nn.Layer): + def __init__(self, + sr: int=22050, + n_mfcc: int=40, + norm: str='ortho', + **kwargs): + """[summary] + Parameters: + sr (int, optional): [description]. Defaults to 22050. + n_mfcc (int, optional): [description]. Defaults to 40. + norm (str, optional): [description]. Defaults to 'ortho'. + """ + super(MFCC, self).__init__() + self._log_melspectrogram = LogMelSpectrogram(sr=sr, **kwargs) + self.dct_matrix = create_dct( + n_mfcc=n_mfcc, n_mels=self._log_melspectrogram.n_mels, norm=norm) + self.register_buffer('dct_matrix', self.dct_matrix) + + def forward(self, x): + log_mel_feature = self._log_melspectrogram(x) + mfcc = paddle.matmul( + log_mel_feature.transpose((0, 2, 1)), self.dct_matrix).transpose( + (0, 2, 1)) # (B, n_mels, L) + return mfcc diff --git a/paddleaudio/paddleaudio/functional/__init__.py b/paddleaudio/paddleaudio/functional/__init__.py index 97043fd7..c85232df 100644 --- a/paddleaudio/paddleaudio/functional/__init__.py +++ b/paddleaudio/paddleaudio/functional/__init__.py @@ -11,3 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from .functional import compute_fbank_matrix +from .functional import create_dct +from .functional import fft_frequencies +from .functional import hz_to_mel +from .functional import mel_frequencies +from .functional import mel_to_hz +from .functional import power_to_db diff --git a/paddleaudio/paddleaudio/functional/functional.py b/paddleaudio/paddleaudio/functional/functional.py index 167795c3..c07f14fd 100644 --- a/paddleaudio/paddleaudio/functional/functional.py +++ b/paddleaudio/paddleaudio/functional/functional.py @@ -12,146 +12,39 @@ # See the License for the specific language governing permissions and # limitations under the License. # Modified from librosa(https://github.com/librosa/librosa) -import warnings -from typing import List +import math from typing import Optional from typing import Union -import numpy as np -import scipy -from numpy import ndarray as array -from numpy.lib.stride_tricks import as_strided -from scipy import signal - -from ..backends import depth_convert -from ..utils import ParameterError +import paddle __all__ = [ - # dsp - 'stft', - 'mfcc', 'hz_to_mel', 'mel_to_hz', - 'split_frames', 'mel_frequencies', - 'power_to_db', + 'fft_frequencies', 'compute_fbank_matrix', - 'melspectrogram', - 'spectrogram', - 'mu_encode', - 'mu_decode', - # augmentation - 'depth_augment', - 'spect_augment', - 'random_crop1d', - 'random_crop2d', - 'adaptive_spect_augment', + 'power_to_db', + 'create_dct', ] -def pad_center(data: array, size: int, axis: int=-1, **kwargs) -> array: - """Pad an array to a target length along a target axis. - - This differs from `np.pad` by centering the data prior to padding, - analogous to `str.center` - """ - - kwargs.setdefault("mode", "constant") - n = data.shape[axis] - lpad = int((size - n) // 2) - lengths = [(0, 0)] * data.ndim - lengths[axis] = (lpad, int(size - n - lpad)) - - if lpad < 0: - raise ParameterError(("Target size ({size:d}) must be " - "at least input size ({n:d})")) - - return np.pad(data, lengths, **kwargs) - - -def split_frames(x: array, frame_length: int, hop_length: int, - axis: int=-1) -> array: - """Slice a data array into (overlapping) frames. - - This function is aligned with librosa.frame - """ - - if not isinstance(x, np.ndarray): - raise ParameterError( - f"Input must be of type numpy.ndarray, given type(x)={type(x)}") - - if x.shape[axis] < frame_length: - raise ParameterError(f"Input is too short (n={x.shape[axis]:d})" - f" for frame_length={frame_length:d}") - - if hop_length < 1: - raise ParameterError(f"Invalid hop_length: {hop_length:d}") - - if axis == -1 and not x.flags["F_CONTIGUOUS"]: - warnings.warn(f"librosa.util.frame called with axis={axis} " - "on a non-contiguous input. This will result in a copy.") - x = np.asfortranarray(x) - elif axis == 0 and not x.flags["C_CONTIGUOUS"]: - warnings.warn(f"librosa.util.frame called with axis={axis} " - "on a non-contiguous input. This will result in a copy.") - x = np.ascontiguousarray(x) - - n_frames = 1 + (x.shape[axis] - frame_length) // hop_length - strides = np.asarray(x.strides) - - new_stride = np.prod(strides[strides > 0] // x.itemsize) * x.itemsize - - if axis == -1: - shape = list(x.shape)[:-1] + [frame_length, n_frames] - strides = list(strides) + [hop_length * new_stride] - - elif axis == 0: - shape = [n_frames, frame_length] + list(x.shape)[1:] - strides = [hop_length * new_stride] + list(strides) - - else: - raise ParameterError(f"Frame axis={axis} must be either 0 or -1") - - return as_strided(x, shape=shape, strides=strides) - - -def _check_audio(y, mono=True) -> bool: - """Determine whether a variable contains valid audio data. - - The audio y must be a np.ndarray, ether 1-channel or two channel - """ - if not isinstance(y, np.ndarray): - raise ParameterError("Audio data must be of type numpy.ndarray") - if y.ndim > 2: - raise ParameterError( - f"Invalid shape for audio ndim={y.ndim:d}, shape={y.shape}") - - if mono and y.ndim == 2: - raise ParameterError( - f"Invalid shape for mono audio ndim={y.ndim:d}, shape={y.shape}") - - if (mono and len(y) == 0) or (not mono and y.shape[1] < 0): - raise ParameterError(f"Audio is empty ndim={y.ndim:d}, shape={y.shape}") - - if not np.issubdtype(y.dtype, np.floating): - raise ParameterError("Audio data must be floating-point") - - if not np.isfinite(y).all(): - raise ParameterError("Audio buffer is not finite everywhere") - - return True - - -def hz_to_mel(frequencies: Union[float, List[float], array], - htk: bool=False) -> array: - """Convert Hz to Mels - - This function is aligned with librosa. +def hz_to_mel(freq: Union[paddle.Tensor, float], + htk: bool=False) -> Union[paddle.Tensor, float]: + """Convert Hz to Mels. + Parameters: + freq: the input tensor of arbitrary shape, or a single floating point number. + htk: use HTK formula to do the conversion. + The default value is False. + Returns: + The frequencies represented in Mel-scale. """ - freq = np.asanyarray(frequencies) if htk: - return 2595.0 * np.log10(1.0 + freq / 700.0) + if isinstance(freq, paddle.Tensor): + return 2595.0 * paddle.log10(1.0 + freq / 700.0) + else: + return 2595.0 * math.log10(1.0 + freq / 700.0) # Fill in the linear part f_min = 0.0 @@ -163,107 +56,129 @@ def hz_to_mel(frequencies: Union[float, List[float], array], min_log_hz = 1000.0 # beginning of log region (Hz) min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels) - logstep = np.log(6.4) / 27.0 # step size for log region - - if freq.ndim: - # If we have array data, vectorize - log_t = freq >= min_log_hz - mels[log_t] = min_log_mel + \ - np.log(freq[log_t] / min_log_hz) / logstep - elif freq >= min_log_hz: - # If we have scalar data, heck directly - mels = min_log_mel + np.log(freq / min_log_hz) / logstep + logstep = math.log(6.4) / 27.0 # step size for log region + + if isinstance(freq, paddle.Tensor): + target = min_log_mel + paddle.log( + freq / min_log_hz + 1e-10) / logstep # prevent nan with 1e-10 + mask = (freq > min_log_hz).astype(freq.dtype) + mels = target * mask + mels * ( + 1 - mask) # will replace by masked_fill OP in future + else: + if freq >= min_log_hz: + mels = min_log_mel + math.log(freq / min_log_hz + 1e-10) / logstep return mels -def mel_to_hz(mels: Union[float, List[float], array], htk: int=False) -> array: +def mel_to_hz(mel: Union[float, paddle.Tensor], + htk: bool=False) -> Union[float, paddle.Tensor]: """Convert mel bin numbers to frequencies. - - This function is aligned with librosa. + Parameters: + mel: the mel frequency represented as a tensor of arbitrary shape, or a floating point number. + htk: use HTK formula to do the conversion. + Returns: + The frequencies represented in hz. """ - mel_array = np.asanyarray(mels) - if htk: - return 700.0 * (10.0**(mel_array / 2595.0) - 1.0) + return 700.0 * (10.0**(mel / 2595.0) - 1.0) - # Fill in the linear scale f_min = 0.0 f_sp = 200.0 / 3 - freqs = f_min + f_sp * mel_array - + freqs = f_min + f_sp * mel # And now the nonlinear scale min_log_hz = 1000.0 # beginning of log region (Hz) min_log_mel = (min_log_hz - f_min) / f_sp # same (Mels) - logstep = np.log(6.4) / 27.0 # step size for log region - - if mel_array.ndim: - # If we have vector data, vectorize - log_t = mel_array >= min_log_mel - freqs[log_t] = min_log_hz * \ - np.exp(logstep * (mel_array[log_t] - min_log_mel)) - elif mel_array >= min_log_mel: - # If we have scalar data, check directly - freqs = min_log_hz * np.exp(logstep * (mel_array - min_log_mel)) + logstep = math.log(6.4) / 27.0 # step size for log region + if isinstance(mel, paddle.Tensor): + target = min_log_hz * paddle.exp(logstep * (mel - min_log_mel)) + mask = (mel > min_log_mel).astype(mel.dtype) + freqs = target * mask + freqs * ( + 1 - mask) # will replace by masked_fill OP in future + else: + if mel >= min_log_mel: + freqs = min_log_hz * math.exp(logstep * (mel - min_log_mel)) return freqs -def mel_frequencies(n_mels: int=128, - fmin: float=0.0, - fmax: float=11025.0, - htk: bool=False) -> array: - """Compute mel frequencies - - This function is aligned with librosa. +def mel_frequencies(n_mels: int=64, + f_min: float=0.0, + f_max: float=11025.0, + htk: bool=False, + dtype: str=paddle.float32): + """Compute mel frequencies. + Parameters: + n_mels(int): number of Mel bins. + f_min(float): the lower cut-off frequency, below which the filter response is zero. + f_max(float): the upper cut-off frequency, above which the filter response is zero. + htk(bool): whether to use htk formula. + dtype(str): the datatype of the return frequencies. + Returns: + The frequencies represented in Mel-scale """ # 'Center freqs' of mel bands - uniformly spaced between limits - min_mel = hz_to_mel(fmin, htk=htk) - max_mel = hz_to_mel(fmax, htk=htk) - - mels = np.linspace(min_mel, max_mel, n_mels) - - return mel_to_hz(mels, htk=htk) + min_mel = hz_to_mel(f_min, htk=htk) + max_mel = hz_to_mel(f_max, htk=htk) + mels = paddle.linspace(min_mel, max_mel, n_mels, dtype=dtype) + freqs = mel_to_hz(mels, htk=htk) + return freqs -def fft_frequencies(sr: int, n_fft: int) -> array: +def fft_frequencies(sr: int, n_fft: int, dtype: str=paddle.float32): """Compute fourier frequencies. - - This function is aligned with librosa. + Parameters: + sr(int): the audio sample rate. + n_fft(float): the number of fft bins. + dtype(str): the datatype of the return frequencies. + Returns: + The frequencies represented in hz. """ - return np.linspace(0, float(sr) / 2, int(1 + n_fft // 2), endpoint=True) + return paddle.linspace(0, float(sr) / 2, int(1 + n_fft // 2), dtype=dtype) def compute_fbank_matrix(sr: int, n_fft: int, - n_mels: int=128, - fmin: float=0.0, - fmax: Optional[float]=None, + n_mels: int=64, + f_min: float=0.0, + f_max: Optional[float]=None, htk: bool=False, - norm: str="slaney", - dtype: type=np.float32): + norm: Union[str, float]='slaney', + dtype: str=paddle.float32): """Compute fbank matrix. - - This funciton is aligned with librosa. + Parameters: + sr(int): the audio sample rate. + n_fft(int): the number of fft bins. + n_mels(int): the number of Mel bins. + f_min(float): the lower cut-off frequency, below which the filter response is zero. + f_max(float): the upper cut-off frequency, above which the filter response is zero. + htk: whether to use htk formula. + return_complex(bool): whether to return complex matrix. If True, the matrix will + be complex type. Otherwise, the real and image part will be stored in the last + axis of returned tensor. + dtype(str): the datatype of the returned fbank matrix. + Returns: + The fbank matrix of shape (n_mels, int(1+n_fft//2)). + Shape: + output: (n_mels, int(1+n_fft//2)) """ - if norm != "slaney": - raise ParameterError('norm must set to slaney') - if fmax is None: - fmax = float(sr) / 2 + if f_max is None: + f_max = float(sr) / 2 # Initialize the weights - n_mels = int(n_mels) - weights = np.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype) + weights = paddle.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype) # Center freqs of each FFT bin - fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft) + fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft, dtype=dtype) # 'Center freqs' of mel bands - uniformly spaced between limits - mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk) + mel_f = mel_frequencies( + n_mels + 2, f_min=f_min, f_max=f_max, htk=htk, dtype=dtype) - fdiff = np.diff(mel_f) - ramps = np.subtract.outer(mel_f, fftfreqs) + fdiff = mel_f[1:] - mel_f[:-1] #np.diff(mel_f) + ramps = mel_f.unsqueeze(1) - fftfreqs.unsqueeze(0) + #ramps = np.subtract.outer(mel_f, fftfreqs) for i in range(n_mels): # lower and upper slopes for all bins @@ -271,458 +186,79 @@ def compute_fbank_matrix(sr: int, upper = ramps[i + 2] / fdiff[i + 1] # .. then intersect them with each other and zero - weights[i] = np.maximum(0, np.minimum(lower, upper)) + weights[i] = paddle.maximum( + paddle.zeros_like(lower), paddle.minimum(lower, upper)) - if norm == "slaney": - # Slaney-style mel is scaled to be approx constant energy per channel + # Slaney-style mel is scaled to be approx constant energy per channel + if norm == 'slaney': enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels]) - weights *= enorm[:, np.newaxis] - - # Only check weights if f_mel[0] is positive - if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)): - # This means we have an empty channel somewhere - warnings.warn("Empty filters detected in mel frequency basis. " - "Some channels will produce empty responses. " - "Try increasing your sampling rate (and fmax) or " - "reducing n_mels.") + weights *= enorm.unsqueeze(1) + elif isinstance(norm, int) or isinstance(norm, float): + weights = paddle.nn.functional.normalize(weights, p=norm, axis=-1) return weights -def stft(x: array, - n_fft: int=2048, - hop_length: Optional[int]=None, - win_length: Optional[int]=None, - window: str="hann", - center: bool=True, - dtype: type=np.complex64, - pad_mode: str="reflect") -> array: - """Short-time Fourier transform (STFT). - - This function is aligned with librosa. - """ - _check_audio(x) - - # By default, use the entire frame - if win_length is None: - win_length = n_fft - - # Set the default hop, if it's not already specified - if hop_length is None: - hop_length = int(win_length // 4) - - fft_window = signal.get_window(window, win_length, fftbins=True) - - # Pad the window out to n_fft size - fft_window = pad_center(fft_window, n_fft) - - # Reshape so that the window can be broadcast - fft_window = fft_window.reshape((-1, 1)) - - # Pad the time series so that frames are centered - if center: - if n_fft > x.shape[-1]: - warnings.warn( - f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}" - ) - x = np.pad(x, int(n_fft // 2), mode=pad_mode) - - elif n_fft > x.shape[-1]: - raise ParameterError( - f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}" - ) - - # Window the time series. - x_frames = split_frames(x, frame_length=n_fft, hop_length=hop_length) - # Pre-allocate the STFT matrix - stft_matrix = np.empty( - (int(1 + n_fft // 2), x_frames.shape[1]), dtype=dtype, order="F") - fft = np.fft # use numpy fft as default - # Constrain STFT block sizes to 256 KB - MAX_MEM_BLOCK = 2**8 * 2**10 - # how many columns can we fit within MAX_MEM_BLOCK? - n_columns = MAX_MEM_BLOCK // (stft_matrix.shape[0] * stft_matrix.itemsize) - n_columns = max(n_columns, 1) - - for bl_s in range(0, stft_matrix.shape[1], n_columns): - bl_t = min(bl_s + n_columns, stft_matrix.shape[1]) - stft_matrix[:, bl_s:bl_t] = fft.rfft( - fft_window * x_frames[:, bl_s:bl_t], axis=0) - - return stft_matrix - - -def power_to_db(spect: array, - ref: float=1.0, +def power_to_db(magnitude: paddle.Tensor, + ref_value: float=1.0, amin: float=1e-10, - top_db: Optional[float]=80.0) -> array: - """Convert a power spectrogram (amplitude squared) to decibel (dB) units - - This computes the scaling ``10 * log10(spect / ref)`` in a numerically + top_db: Optional[float]=None) -> paddle.Tensor: + """Convert a power spectrogram (amplitude squared) to decibel (dB) units. + The function computes the scaling ``10 * log10(x / ref)`` in a numerically stable way. - - This function is aligned with librosa. + Parameters: + magnitude(Tensor): the input magnitude tensor of any shape. + ref_value(float): the reference value. If smaller than 1.0, the db level + of the signal will be pulled up accordingly. Otherwise, the db level + is pushed down. + amin(float): the minimum value of input magnitude, below which the input + magnitude is clipped(to amin). + top_db(float): the maximum db value of resulting spectrum, above which the + spectrum is clipped(to top_db). + Returns: + The spectrogram in log-scale. + shape: + input: any shape + output: same as input """ - spect = np.asarray(spect) - if amin <= 0: - raise ParameterError("amin must be strictly positive") - - if np.issubdtype(spect.dtype, np.complexfloating): - warnings.warn( - "power_to_db was called on complex input so phase " - "information will be discarded. To suppress this warning, " - "call power_to_db(np.abs(D)**2) instead.") - magnitude = np.abs(spect) - else: - magnitude = spect + raise Exception("amin must be strictly positive") - if callable(ref): - # User supplied a function to calculate reference power - ref_value = ref(magnitude) - else: - ref_value = np.abs(ref) + if ref_value <= 0: + raise Exception("ref_value must be strictly positive") - log_spec = 10.0 * np.log10(np.maximum(amin, magnitude)) - log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value)) + ones = paddle.ones_like(magnitude) + log_spec = 10.0 * paddle.log10(paddle.maximum(ones * amin, magnitude)) + log_spec -= 10.0 * math.log10(max(ref_value, amin)) if top_db is not None: if top_db < 0: - raise ParameterError("top_db must be non-negative") - log_spec = np.maximum(log_spec, log_spec.max() - top_db) + raise Exception("top_db must be non-negative") + log_spec = paddle.maximum(log_spec, ones * (log_spec.max() - top_db)) return log_spec -def mfcc(x, - sr: int=16000, - spect: Optional[array]=None, - n_mfcc: int=20, - dct_type: int=2, - norm: str="ortho", - lifter: int=0, - **kwargs) -> array: - """Mel-frequency cepstral coefficients (MFCCs) - - This function is NOT strictly aligned with librosa. The following example shows how to get the - same result with librosa: - - # mfcc: - kwargs = { - 'window_size':512, - 'hop_length':320, - 'mel_bins':64, - 'fmin':50, - 'to_db':False} - a = mfcc(x, - spect=None, - n_mfcc=20, - dct_type=2, - norm='ortho', - lifter=0, - **kwargs) - - # librosa mfcc: - spect = librosa.feature.melspectrogram(y=x,sr=16000,n_fft=512, - win_length=512, - hop_length=320, - n_mels=64, fmin=50) - b = librosa.feature.mfcc(y=x, - sr=16000, - S=spect, - n_mfcc=20, - dct_type=2, - norm='ortho', - lifter=0) - - assert np.mean( (a-b)**2) < 1e-8 - - """ - if spect is None: - spect = melspectrogram(x, sr=sr, **kwargs) - - M = scipy.fftpack.dct(spect, axis=0, type=dct_type, norm=norm)[:n_mfcc] - - if lifter > 0: - factor = np.sin(np.pi * np.arange(1, 1 + n_mfcc, dtype=M.dtype) / - lifter) - return M * factor[:, np.newaxis] - elif lifter == 0: - return M - else: - raise ParameterError( - f"MFCC lifter={lifter} must be a non-negative number") - - -def melspectrogram(x: array, - sr: int=16000, - window_size: int=512, - hop_length: int=320, - n_mels: int=64, - fmin: int=50, - fmax: Optional[float]=None, - window: str='hann', - center: bool=True, - pad_mode: str='reflect', - power: float=2.0, - to_db: bool=True, - ref: float=1.0, - amin: float=1e-10, - top_db: Optional[float]=None) -> array: - """Compute mel-spectrogram. - +def create_dct(n_mfcc: int, + n_mels: int, + norm: Optional[str]='ortho', + dtype: Optional[str]=paddle.float32): + """[summary] Parameters: - x: numpy.ndarray - The input wavform is a numpy array [shape=(n,)] - - window_size: int, typically 512, 1024, 2048, etc. - The window size for framing, also used as n_fft for stft - - + n_mfcc (int): [description] + n_mels (int): [description] + norm (str, optional): [description]. Defaults to 'ortho'. Returns: - The mel-spectrogram in power scale or db scale(default) - - - Notes: - 1. sr is default to 16000, which is commonly used in speech/speaker processing. - 2. when fmax is None, it is set to sr//2. - 3. this function will convert mel spectgrum to db scale by default. This is different - that of librosa. - - """ - _check_audio(x, mono=True) - if len(x) <= 0: - raise ParameterError('The input waveform is empty') - - if fmax is None: - fmax = sr // 2 - if fmin < 0 or fmin >= fmax: - raise ParameterError('fmin and fmax must statisfy 0 array: - """Compute spectrogram from an input waveform. - - This function is a wrapper for librosa.feature.stft, with addition step to - compute the magnitude of the complex spectrogram. - """ - - s = stft( - x, - n_fft=window_size, - hop_length=hop_length, - win_length=window_size, - window=window, - center=center, - pad_mode=pad_mode) - - return np.abs(s)**power - - -def mu_encode(x: array, mu: int=255, quantized: bool=True) -> array: - """Mu-law encoding. - - Compute the mu-law decoding given an input code. - When quantized is True, the result will be converted to - integer in range [0,mu-1]. Otherwise, the resulting signal - is in range [-1,1] - - - Reference: - https://en.wikipedia.org/wiki/%CE%9C-law_algorithm - - """ - mu = 255 - y = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu) - if quantized: - y = np.floor((y + 1) / 2 * mu + 0.5) # convert to [0 , mu-1] - return y - - -def mu_decode(y: array, mu: int=255, quantized: bool=True) -> array: - """Mu-law decoding. - - Compute the mu-law decoding given an input code. - - it assumes that the input y is in - range [0,mu-1] when quantize is True and [-1,1] otherwise - - Reference: - https://en.wikipedia.org/wiki/%CE%9C-law_algorithm - - """ - if mu < 1: - raise ParameterError('mu is typically set as 2**k-1, k=1, 2, 3,...') - - mu = mu - 1 - if quantized: # undo the quantization - y = y * 2 / mu - 1 - x = np.sign(y) / mu * ((1 + mu)**np.abs(y) - 1) - return x - - -def randint(high: int) -> int: - """Generate one random integer in range [0 high) - - This is a helper function for random data augmentaiton - """ - return int(np.random.randint(0, high=high)) - - -def rand() -> float: - """Generate one floating-point number in range [0 1) - - This is a helper function for random data augmentaiton - """ - return float(np.random.rand(1)) - - -def depth_augment(y: array, - choices: List=['int8', 'int16'], - probs: List[float]=[0.5, 0.5]) -> array: - """ Audio depth augmentation - - Do audio depth augmentation to simulate the distortion brought by quantization. - """ - assert len(probs) == len( - choices - ), 'number of choices {} must be equal to size of probs {}'.format( - len(choices), len(probs)) - depth = np.random.choice(choices, p=probs) - src_depth = y.dtype - y1 = depth_convert(y, depth) - y2 = depth_convert(y1, src_depth) - - return y2 - - -def adaptive_spect_augment(spect: array, tempo_axis: int=0, - level: float=0.1) -> array: - """Do adpative spectrogram augmentation - - The level of the augmentation is gowern by the paramter level, - ranging from 0 to 1, with 0 represents no augmentation。 - - """ - assert spect.ndim == 2., 'only supports 2d tensor or numpy array' - if tempo_axis == 0: - nt, nf = spect.shape - else: - nf, nt = spect.shape - - time_mask_width = int(nt * level * 0.5) - freq_mask_width = int(nf * level * 0.5) - - num_time_mask = int(10 * level) - num_freq_mask = int(10 * level) - - if tempo_axis == 0: - for _ in range(num_time_mask): - start = randint(nt - time_mask_width) - spect[start:start + time_mask_width, :] = 0 - for _ in range(num_freq_mask): - start = randint(nf - freq_mask_width) - spect[:, start:start + freq_mask_width] = 0 - else: - for _ in range(num_time_mask): - start = randint(nt - time_mask_width) - spect[:, start:start + time_mask_width] = 0 - for _ in range(num_freq_mask): - start = randint(nf - freq_mask_width) - spect[start:start + freq_mask_width, :] = 0 - - return spect - - -def spect_augment(spect: array, - tempo_axis: int=0, - max_time_mask: int=3, - max_freq_mask: int=3, - max_time_mask_width: int=30, - max_freq_mask_width: int=20) -> array: - """Do spectrogram augmentation in both time and freq axis - - Reference: - + [type]: [description] """ - assert spect.ndim == 2., 'only supports 2d tensor or numpy array' - if tempo_axis == 0: - nt, nf = spect.shape - else: - nf, nt = spect.shape - - num_time_mask = randint(max_time_mask) - num_freq_mask = randint(max_freq_mask) - - time_mask_width = randint(max_time_mask_width) - freq_mask_width = randint(max_freq_mask_width) - - if tempo_axis == 0: - for _ in range(num_time_mask): - start = randint(nt - time_mask_width) - spect[start:start + time_mask_width, :] = 0 - for _ in range(num_freq_mask): - start = randint(nf - freq_mask_width) - spect[:, start:start + freq_mask_width] = 0 + n = paddle.arange(n_mels, dtype=dtype) + k = paddle.arange(n_mfcc, dtype=dtype).unsqueeze(1) + dct = paddle.cos(math.pi / float(n_mels) * (n + 0.5) * + k) # size (n_mfcc, n_mels) + if norm is None: + dct *= 2.0 else: - for _ in range(num_time_mask): - start = randint(nt - time_mask_width) - spect[:, start:start + time_mask_width] = 0 - for _ in range(num_freq_mask): - start = randint(nf - freq_mask_width) - spect[start:start + freq_mask_width, :] = 0 - - return spect - - -def random_crop1d(y: array, crop_len: int) -> array: - """ Do random cropping on 1d input signal - - The input is a 1d signal, typically a sound waveform - """ - if y.ndim != 1: - 'only accept 1d tensor or numpy array' - n = len(y) - idx = randint(n - crop_len) - return y[idx:idx + crop_len] - - -def random_crop2d(s: array, crop_len: int, tempo_axis: int=0) -> array: - """ Do random cropping for 2D array, typically a spectrogram. - - The cropping is done in temporal direction on the time-freq input signal. - """ - if tempo_axis >= s.ndim: - raise ParameterError('axis out of range') - - n = s.shape[tempo_axis] - idx = randint(high=n - crop_len) - sli = [slice(None) for i in range(s.ndim)] - sli[tempo_axis] = slice(idx, idx + crop_len) - out = s[tuple(sli)] - return out + assert norm == "ortho" + dct[0] *= 1.0 / math.sqrt(2.0) + dct *= math.sqrt(2.0 / float(n_mels)) + return dct.T diff --git a/paddleaudio/paddleaudio/io/__init__.py b/paddleaudio/paddleaudio/io/__init__.py index cc2538f7..185a92b8 100644 --- a/paddleaudio/paddleaudio/io/__init__.py +++ b/paddleaudio/paddleaudio/io/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,9 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .audio import depth_convert -from .audio import load -from .audio import normalize -from .audio import resample -from .audio import save_wav -from .audio import to_mono diff --git a/paddleaudio/paddleaudio/io/audio.py b/paddleaudio/paddleaudio/io/audio.py deleted file mode 100644 index 4127570e..00000000 --- a/paddleaudio/paddleaudio/io/audio.py +++ /dev/null @@ -1,303 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import warnings -from typing import Optional -from typing import Tuple -from typing import Union - -import numpy as np -import resampy -import soundfile as sf -from numpy import ndarray as array -from scipy.io import wavfile - -from ..utils import ParameterError - -__all__ = [ - 'resample', - 'to_mono', - 'depth_convert', - 'normalize', - 'save_wav', - 'load', -] -NORMALMIZE_TYPES = ['linear', 'gaussian'] -MERGE_TYPES = ['ch0', 'ch1', 'random', 'average'] -RESAMPLE_MODES = ['kaiser_best', 'kaiser_fast'] -EPS = 1e-8 - - -def resample(y: array, src_sr: int, target_sr: int, - mode: str='kaiser_fast') -> array: - """ Audio resampling - - This function is the same as using resampy.resample(). - - Notes: - The default mode is kaiser_fast. For better audio quality, use mode = 'kaiser_fast' - - """ - - if mode == 'kaiser_best': - warnings.warn( - f'Using resampy in kaiser_best to {src_sr}=>{target_sr}. This function is pretty slow, \ - we recommend the mode kaiser_fast in large scale audio trainning') - - if not isinstance(y, np.ndarray): - raise ParameterError( - 'Only support numpy array, but received y in {type(y)}') - - if mode not in RESAMPLE_MODES: - raise ParameterError(f'resample mode must in {RESAMPLE_MODES}') - - return resampy.resample(y, src_sr, target_sr, filter=mode) - - -def to_mono(y: array, merge_type: str='average') -> array: - """ convert sterior audio to mono - """ - if merge_type not in MERGE_TYPES: - raise ParameterError( - f'Unsupported merge type {merge_type}, available types are {MERGE_TYPES}' - ) - if y.ndim > 2: - raise ParameterError( - f'Unsupported audio array, y.ndim > 2, the shape is {y.shape}') - if y.ndim == 1: # nothing to merge - return y - - if merge_type == 'ch0': - return y[0] - if merge_type == 'ch1': - return y[1] - if merge_type == 'random': - return y[np.random.randint(0, 2)] - - # need to do averaging according to dtype - - if y.dtype == 'float32': - y_out = (y[0] + y[1]) * 0.5 - elif y.dtype == 'int16': - y_out = y.astype('int32') - y_out = (y_out[0] + y_out[1]) // 2 - y_out = np.clip(y_out, np.iinfo(y.dtype).min, - np.iinfo(y.dtype).max).astype(y.dtype) - - elif y.dtype == 'int8': - y_out = y.astype('int16') - y_out = (y_out[0] + y_out[1]) // 2 - y_out = np.clip(y_out, np.iinfo(y.dtype).min, - np.iinfo(y.dtype).max).astype(y.dtype) - else: - raise ParameterError(f'Unsupported dtype: {y.dtype}') - return y_out - - -def _safe_cast(y: array, dtype: Union[type, str]) -> array: - """ data type casting in a safe way, i.e., prevent overflow or underflow - - This function is used internally. - """ - return np.clip(y, np.iinfo(dtype).min, np.iinfo(dtype).max).astype(dtype) - - -def depth_convert(y: array, dtype: Union[type, str], - dithering: bool=True) -> array: - """Convert audio array to target dtype safely - - This function convert audio waveform to a target dtype, with addition steps of - preventing overflow/underflow and preserving audio range. - - """ - - SUPPORT_DTYPE = ['int16', 'int8', 'float32', 'float64'] - if y.dtype not in SUPPORT_DTYPE: - raise ParameterError( - 'Unsupported audio dtype, ' - f'y.dtype is {y.dtype}, supported dtypes are {SUPPORT_DTYPE}') - - if dtype not in SUPPORT_DTYPE: - raise ParameterError( - 'Unsupported audio dtype, ' - f'target dtype is {dtype}, supported dtypes are {SUPPORT_DTYPE}') - - if dtype == y.dtype: - return y - - if dtype == 'float64' and y.dtype == 'float32': - return _safe_cast(y, dtype) - if dtype == 'float32' and y.dtype == 'float64': - return _safe_cast(y, dtype) - - if dtype == 'int16' or dtype == 'int8': - if y.dtype in ['float64', 'float32']: - factor = np.iinfo(dtype).max - y = np.clip(y * factor, np.iinfo(dtype).min, - np.iinfo(dtype).max).astype(dtype) - y = y.astype(dtype) - else: - if dtype == 'int16' and y.dtype == 'int8': - factor = np.iinfo('int16').max / np.iinfo('int8').max - EPS - y = y.astype('float32') * factor - y = y.astype('int16') - - else: # dtype == 'int8' and y.dtype=='int16': - y = y.astype('int32') * np.iinfo('int8').max / \ - np.iinfo('int16').max - y = y.astype('int8') - - if dtype in ['float32', 'float64']: - org_dtype = y.dtype - y = y.astype(dtype) / np.iinfo(org_dtype).max - return y - - -def sound_file_load(file: str, - offset: Optional[float]=None, - dtype: str='int16', - duration: Optional[int]=None) -> Tuple[array, int]: - """Load audio using soundfile library - - This function load audio file using libsndfile. - - Reference: - http://www.mega-nerd.com/libsndfile/#Features - - """ - with sf.SoundFile(file) as sf_desc: - sr_native = sf_desc.samplerate - if offset: - sf_desc.seek(int(offset * sr_native)) - if duration is not None: - frame_duration = int(duration * sr_native) - else: - frame_duration = -1 - y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T - - return y, sf_desc.samplerate - - -def audio_file_load(): - """Load audio using audiofile library - - This function load audio file using audiofile. - - Reference: - https://audiofile.68k.org/ - - """ - raise NotImplementedError() - - -def sox_file_load(): - """Load audio using sox library - - This function load audio file using sox. - - Reference: - http://sox.sourceforge.net/ - """ - raise NotImplementedError() - - -def normalize(y: array, norm_type: str='linear', - mul_factor: float=1.0) -> array: - """ normalize an input audio with additional multiplier. - - """ - - if norm_type == 'linear': - amax = np.max(np.abs(y)) - factor = 1.0 / (amax + EPS) - y = y * factor * mul_factor - elif norm_type == 'gaussian': - amean = np.mean(y) - astd = np.std(y) - astd = max(astd, EPS) - y = mul_factor * (y - amean) / astd - else: - raise NotImplementedError(f'norm_type should be in {NORMALMIZE_TYPES}') - - return y - - -def save_wav(y: array, sr: int, file: str) -> None: - """Save audio file to disk. - This function saves audio to disk using scipy.io.wavfile, with additional step - to convert input waveform to int16 unless it already is int16 - - Notes: - It only support raw wav format. - - """ - if not file.endswith('.wav'): - raise ParameterError( - f'only .wav file supported, but dst file name is: {file}') - - if sr <= 0: - raise ParameterError( - f'Sample rate should be larger than 0, recieved sr = {sr}') - - if y.dtype not in ['int16', 'int8']: - warnings.warn( - f'input data type is {y.dtype}, will convert data to int16 format before saving' - ) - y_out = depth_convert(y, 'int16') - else: - y_out = y - - wavfile.write(file, sr, y_out) - - -def load( - file: str, - sr: Optional[int]=None, - mono: bool=True, - merge_type: str='average', # ch0,ch1,random,average - normal: bool=True, - norm_type: str='linear', - norm_mul_factor: float=1.0, - offset: float=0.0, - duration: Optional[int]=None, - dtype: str='float32', - resample_mode: str='kaiser_fast') -> Tuple[array, int]: - """Load audio file from disk. - This function loads audio from disk using using audio beackend. - - Parameters: - - Notes: - - """ - - y, r = sound_file_load(file, offset=offset, dtype=dtype, duration=duration) - - if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)): - raise ParameterError(f'audio file {file} looks empty') - - if mono: - y = to_mono(y, merge_type) - - if sr is not None and sr != r: - y = resample(y, r, sr, mode=resample_mode) - r = sr - - if normal: - y = normalize(y, norm_type, norm_mul_factor) - elif dtype in ['int8', 'int16']: - # still need to do normalization, before depth convertion - y = normalize(y, 'linear', 1.0) - - y = depth_convert(y, dtype) - return y, r From f4c720544013d0eb28b7f8cfb858b355a1a5e6ef Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Wed, 2 Mar 2022 12:42:20 +0800 Subject: [PATCH 35/45] refactor --- paddleaudio/paddleaudio/compliance/kaldi.py | 2 +- paddleaudio/paddleaudio/datasets/__init__.py | 7 ------- paddleaudio/paddleaudio/datasets/dataset.py | 4 ++-- paddleaudio/paddleaudio/features/__init__.py | 7 ++++--- paddleaudio/paddleaudio/metric/mcd.py | 1 + 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/paddleaudio/paddleaudio/compliance/kaldi.py b/paddleaudio/paddleaudio/compliance/kaldi.py index 61ca4e3d..e4192e81 100644 --- a/paddleaudio/paddleaudio/compliance/kaldi.py +++ b/paddleaudio/paddleaudio/compliance/kaldi.py @@ -17,8 +17,8 @@ from typing import Tuple import paddle from paddle import Tensor +from ..functional import create_dct from ..functional.window import get_window -from .spectrum import create_dct __all__ = [ 'spectrogram', diff --git a/paddleaudio/paddleaudio/datasets/__init__.py b/paddleaudio/paddleaudio/datasets/__init__.py index 8d2fdab4..5c5f0369 100644 --- a/paddleaudio/paddleaudio/datasets/__init__.py +++ b/paddleaudio/paddleaudio/datasets/__init__.py @@ -15,10 +15,3 @@ from .esc50 import ESC50 from .gtzan import GTZAN from .tess import TESS from .urban_sound import UrbanSound8K - -__all__ = [ - 'ESC50', - 'UrbanSound8K', - 'GTZAN', - 'TESS', -] diff --git a/paddleaudio/paddleaudio/datasets/dataset.py b/paddleaudio/paddleaudio/datasets/dataset.py index 7a57fd6c..06e2df6d 100644 --- a/paddleaudio/paddleaudio/datasets/dataset.py +++ b/paddleaudio/paddleaudio/datasets/dataset.py @@ -17,8 +17,8 @@ import numpy as np import paddle from ..backends import load as load_audio -from ..features import melspectrogram -from ..features import mfcc +from ..compliance.librosa import melspectrogram +from ..compliance.librosa import mfcc feat_funcs = { 'raw': None, diff --git a/paddleaudio/paddleaudio/features/__init__.py b/paddleaudio/paddleaudio/features/__init__.py index 469b4c9b..00781397 100644 --- a/paddleaudio/paddleaudio/features/__init__.py +++ b/paddleaudio/paddleaudio/features/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .librosa import LogMelSpectrogram -from .librosa import MelSpectrogram -from .librosa import Spectrogram +from .layers import LogMelSpectrogram +from .layers import MelSpectrogram +from .layers import MFCC +from .layers import Spectrogram diff --git a/paddleaudio/paddleaudio/metric/mcd.py b/paddleaudio/paddleaudio/metric/mcd.py index 281e5765..465cd5a4 100644 --- a/paddleaudio/paddleaudio/metric/mcd.py +++ b/paddleaudio/paddleaudio/metric/mcd.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import mcd.metrics_fast as mt +import numpy as np from mcd import dtw __all__ = [ From 4d2f2191a817d3d3db2d4562d5844387c659c819 Mon Sep 17 00:00:00 2001 From: TianYuan Date: Wed, 2 Mar 2022 08:44:07 +0000 Subject: [PATCH 36/45] fix gbk encode bug --- paddlespeech/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paddlespeech/__init__.py b/paddlespeech/__init__.py index 185a92b8..b781c4a8 100644 --- a/paddlespeech/__init__.py +++ b/paddlespeech/__init__.py @@ -11,3 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import _locale + +_locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8']) From 504c2c9d50ca360aab23c78162a5b0e2ce5b53fe Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Wed, 2 Mar 2022 16:35:11 +0800 Subject: [PATCH 37/45] refactor --- paddleaudio/paddleaudio/__init__.py | 7 +++++++ paddleaudio/paddleaudio/features/layers.py | 6 ++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/paddleaudio/paddleaudio/__init__.py b/paddleaudio/paddleaudio/__init__.py index 2dab610c..6184c1dd 100644 --- a/paddleaudio/paddleaudio/__init__.py +++ b/paddleaudio/paddleaudio/__init__.py @@ -11,5 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from . import compliance +from . import datasets +from . import features +from . import functional +from . import io +from . import metric +from . import sox_effects from .backends import load from .backends import save diff --git a/paddleaudio/paddleaudio/features/layers.py b/paddleaudio/paddleaudio/features/layers.py index 69f814d6..69f46254 100644 --- a/paddleaudio/paddleaudio/features/layers.py +++ b/paddleaudio/paddleaudio/features/layers.py @@ -71,15 +71,17 @@ class Spectrogram(nn.Layer): if win_length is None: win_length = n_fft - fft_window = get_window(window, win_length, fftbins=True, dtype=dtype) + self.fft_window = get_window( + window, win_length, fftbins=True, dtype=dtype) self._stft = partial( paddle.signal.stft, n_fft=n_fft, hop_length=hop_length, win_length=win_length, - window=fft_window, + window=self.fft_window, center=center, pad_mode=pad_mode) + self.register_buffer('fft_window', self.fft_window) def forward(self, x): stft = self._stft(x) From 959408bafe70fab8f096a5393daabf81405a27e6 Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Thu, 3 Mar 2022 17:22:21 +0800 Subject: [PATCH 38/45] Refactor and add doc string. --- paddleaudio/paddleaudio/compliance/kaldi.py | 341 +++++++++----------- paddleaudio/paddleaudio/features/layers.py | 14 +- 2 files changed, 155 insertions(+), 200 deletions(-) diff --git a/paddleaudio/paddleaudio/compliance/kaldi.py b/paddleaudio/paddleaudio/compliance/kaldi.py index e4192e81..35d7072c 100644 --- a/paddleaudio/paddleaudio/compliance/kaldi.py +++ b/paddleaudio/paddleaudio/compliance/kaldi.py @@ -105,7 +105,7 @@ def _get_log_energy(strided_input: Tensor, epsilon: Tensor, def _get_waveform_and_window_properties( waveform: Tensor, channel: int, - sample_frequency: float, + sr: int, frame_shift: float, frame_length: float, round_to_power_of_two: bool, @@ -115,9 +115,9 @@ def _get_waveform_and_window_properties( 'Invalid channel {} for size {}'.format(channel, waveform.shape[0])) waveform = waveform[channel, :] # size (n) window_shift = int( - sample_frequency * frame_shift * + sr * frame_shift * 0.001) # pass frame_shift and frame_length in milliseconds - window_size = int(sample_frequency * frame_length * 0.001) + window_size = int(sr * frame_length * 0.001) padded_window_size = _next_power_of_2( window_size) if round_to_power_of_two else window_size @@ -128,7 +128,7 @@ def _get_waveform_and_window_properties( assert padded_window_size % 2 == 0, 'the padded `window_size` must be divisible by two.' \ ' use `round_to_power_of_two` or change `frame_length`' assert 0. <= preemphasis_coefficient <= 1.0, '`preemphasis_coefficient` must be between [0,1]' - assert sample_frequency > 0, '`sample_frequency` must be greater than zero' + assert sr > 0, '`sr` must be greater than zero' return waveform, window_shift, window_size, padded_window_size @@ -147,45 +147,38 @@ def _get_window(waveform: Tensor, dtype = waveform.dtype epsilon = _get_epsilon(dtype) - # size (m, window_size) + # (m, window_size) strided_input = _get_strided(waveform, window_size, window_shift, snip_edges) if dither != 0.0: - # Returns a random number strictly between 0 and 1 x = paddle.maximum(epsilon, paddle.rand(strided_input.shape, dtype=dtype)) rand_gauss = paddle.sqrt(-2 * x.log()) * paddle.cos(2 * math.pi * x) strided_input = strided_input + rand_gauss * dither if remove_dc_offset: - # Subtract each row/frame by its mean - row_means = paddle.mean( - strided_input, axis=1).unsqueeze(1) # size (m, 1) + row_means = paddle.mean(strided_input, axis=1).unsqueeze(1) # (m, 1) strided_input = strided_input - row_means if raw_energy: - # Compute the log energy of each row/frame before applying preemphasis and - # window function signal_log_energy = _get_log_energy(strided_input, epsilon, - energy_floor) # size (m) + energy_floor) # (m) if preemphasis_coefficient != 0.0: - # strided_input[i,j] -= preemphasis_coefficient * strided_input[i, max(0, j-1)] for all i,j offset_strided_input = paddle.nn.functional.pad( strided_input.unsqueeze(0), (1, 0), data_format='NCL', - mode='replicate').squeeze(0) # size (m, window_size + 1) + mode='replicate').squeeze(0) # (m, window_size + 1) strided_input = strided_input - preemphasis_coefficient * offset_strided_input[:, : -1] - # Apply window_function to each row/frame window_function = _feature_window_function( window_type, window_size, blackman_coeff, - dtype).unsqueeze(0) # size (1, window_size) - strided_input = strided_input * window_function # size (m, window_size) + dtype).unsqueeze(0) # (1, window_size) + strided_input = strided_input * window_function # (m, window_size) - # Pad columns with zero until we reach size (m, padded_window_size) + # (m, padded_window_size) if padded_window_size != window_size: padding_right = padded_window_size - window_size strided_input = paddle.nn.functional.pad( @@ -194,7 +187,6 @@ def _get_window(waveform: Tensor, mode='constant', value=0).squeeze(0) - # Compute energy after window function (not the raw one) if not raw_energy: signal_log_energy = _get_log_energy(strided_input, epsilon, energy_floor) # size (m) @@ -203,8 +195,6 @@ def _get_window(waveform: Tensor, def _subtract_column_mean(tensor: Tensor, subtract_mean: bool) -> Tensor: - # subtracts the column mean of the tensor size (m, n) if subtract_mean=True - # it returns size (m, n) if subtract_mean: col_means = paddle.mean(tensor, axis=0).unsqueeze(0) tensor = tensor - col_means @@ -218,61 +208,56 @@ def spectrogram(waveform: Tensor, energy_floor: float=1.0, frame_length: float=25.0, frame_shift: float=10.0, - min_duration: float=0.0, preemphasis_coefficient: float=0.97, raw_energy: bool=True, remove_dc_offset: bool=True, round_to_power_of_two: bool=True, - sample_frequency: float=16000.0, + sr: int=16000, snip_edges: bool=True, subtract_mean: bool=False, window_type: str=POVEY) -> Tensor: - """[summary] + """Compute and return a spectrogram from a waveform. The output is identical to Kaldi's. Args: - waveform (Tensor): [description] - blackman_coeff (float, optional): [description]. Defaults to 0.42. - channel (int, optional): [description]. Defaults to -1. - dither (float, optional): [description]. Defaults to 0.0. - energy_floor (float, optional): [description]. Defaults to 1.0. - frame_length (float, optional): [description]. Defaults to 25.0. - frame_shift (float, optional): [description]. Defaults to 10.0. - min_duration (float, optional): [description]. Defaults to 0.0. - preemphasis_coefficient (float, optional): [description]. Defaults to 0.97. - raw_energy (bool, optional): [description]. Defaults to True. - remove_dc_offset (bool, optional): [description]. Defaults to True. - round_to_power_of_two (bool, optional): [description]. Defaults to True. - sample_frequency (float, optional): [description]. Defaults to 16000.0. - snip_edges (bool, optional): [description]. Defaults to True. - subtract_mean (bool, optional): [description]. Defaults to False. - window_type (str, optional): [description]. Defaults to POVEY. + waveform (Tensor): A waveform tensor with shape [C, T]. + blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42. + channel (int, optional): Select the channel of waveform. Defaults to -1. + dither (float, optional): Dithering constant . Defaults to 0.0. + energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0. + frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0. + frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0. + preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97. + raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True. + remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True. + round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input + to FFT. Defaults to True. + sr (int, optional): Sample rate of input waveform. Defaults to 16000. + snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it + is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True. + subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False. + window_type (str, optional): Choose type of window for FFT computation. Defaults to POVEY. Returns: - Tensor: [description] + Tensor: A spectrogram tensor with shape (m, padded_window_size // 2 + 1) where m is the number of frames + depends on frame_length and frame_shift. """ dtype = waveform.dtype epsilon = _get_epsilon(dtype) waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties( - waveform, channel, sample_frequency, frame_shift, frame_length, - round_to_power_of_two, preemphasis_coefficient) - - if len(waveform) < min_duration * sample_frequency: - # signal is too short - return paddle.empty([0]) + waveform, channel, sr, frame_shift, frame_length, round_to_power_of_two, + preemphasis_coefficient) strided_input, signal_log_energy = _get_window( waveform, padded_window_size, window_size, window_shift, window_type, blackman_coeff, snip_edges, raw_energy, energy_floor, dither, remove_dc_offset, preemphasis_coefficient) - # size (m, padded_window_size // 2 + 1, 2) + # (m, padded_window_size // 2 + 1, 2) fft = paddle.fft.rfft(strided_input) - # Convert the FFT into a power spectrum power_spectrum = paddle.maximum( - fft.abs().pow(2.), - epsilon).log() # size (m, padded_window_size // 2 + 1) + fft.abs().pow(2.), epsilon).log() # (m, padded_window_size // 2 + 1) power_spectrum[:, 0] = signal_log_energy power_spectrum = _subtract_column_mean(power_spectrum, subtract_mean) @@ -306,25 +291,19 @@ def _vtln_warp_freq(vtln_low_cutoff: float, l = vtln_low_cutoff * max(1.0, vtln_warp_factor) h = vtln_high_cutoff * min(1.0, vtln_warp_factor) scale = 1.0 / vtln_warp_factor - Fl = scale * l # F(l) - Fh = scale * h # F(h) + Fl = scale * l + Fh = scale * h assert l > low_freq and h < high_freq - # slope of left part of the 3-piece linear function scale_left = (Fl - low_freq) / (l - low_freq) - # [slope of center part is just "scale"] - - # slope of right part of the 3-piece linear function scale_right = (high_freq - Fh) / (high_freq - h) - res = paddle.empty_like(freq) outside_low_high_freq = paddle.less_than(freq, paddle.to_tensor(low_freq)) \ - | paddle.greater_than(freq, paddle.to_tensor(high_freq)) # freq < low_freq || freq > high_freq - before_l = paddle.less_than(freq, paddle.to_tensor(l)) # freq < l - before_h = paddle.less_than(freq, paddle.to_tensor(h)) # freq < h - after_h = paddle.greater_equal(freq, paddle.to_tensor(h)) # freq >= h + | paddle.greater_than(freq, paddle.to_tensor(high_freq)) + before_l = paddle.less_than(freq, paddle.to_tensor(l)) + before_h = paddle.less_than(freq, paddle.to_tensor(h)) + after_h = paddle.greater_equal(freq, paddle.to_tensor(h)) - # order of operations matter here (since there is overlapping frequency regions) res[after_h] = high_freq + scale_right * (freq[after_h] - high_freq) res[before_h] = scale * freq[before_h] res[before_l] = low_freq + scale_left * (freq[before_l] - low_freq) @@ -363,13 +342,10 @@ def _get_mel_banks(num_bins: int, assert (0.0 <= low_freq < nyquist) and (0.0 < high_freq <= nyquist) and (low_freq < high_freq), \ ('Bad values in options: low-freq {} and high-freq {} vs. nyquist {}'.format(low_freq, high_freq, nyquist)) - # fft-bin width [think of it as Nyquist-freq / half-window-length] fft_bin_width = sample_freq / window_length_padded mel_low_freq = _mel_scale_scalar(low_freq) mel_high_freq = _mel_scale_scalar(high_freq) - # divide by num_bins+1 in next line because of end-effects where the bins - # spread out to the sides. mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1) if vtln_high < 0.0: @@ -381,10 +357,9 @@ def _get_mel_banks(num_bins: int, 'low-freq {} and high-freq {}'.format(vtln_low, vtln_high, low_freq, high_freq)) bin = paddle.arange(num_bins).unsqueeze(1) - left_mel = mel_low_freq + bin * mel_freq_delta # size(num_bins, 1) - center_mel = mel_low_freq + (bin + 1.0 - ) * mel_freq_delta # size(num_bins, 1) - right_mel = mel_low_freq + (bin + 2.0) * mel_freq_delta # size(num_bins, 1) + left_mel = mel_low_freq + bin * mel_freq_delta # (num_bins, 1) + center_mel = mel_low_freq + (bin + 1.0) * mel_freq_delta # (num_bins, 1) + right_mel = mel_low_freq + (bin + 2.0) * mel_freq_delta # (num_bins, 1) if vtln_warp_factor != 1.0: left_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, high_freq, @@ -395,25 +370,23 @@ def _get_mel_banks(num_bins: int, right_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, high_freq, vtln_warp_factor, right_mel) - center_freqs = _inverse_mel_scale(center_mel) # size (num_bins) - # size(1, num_fft_bins) + center_freqs = _inverse_mel_scale(center_mel) # (num_bins) + # (1, num_fft_bins) mel = _mel_scale(fft_bin_width * paddle.arange(num_fft_bins)).unsqueeze(0) - # size (num_bins, num_fft_bins) + # (num_bins, num_fft_bins) up_slope = (mel - left_mel) / (center_mel - left_mel) down_slope = (right_mel - mel) / (right_mel - center_mel) if vtln_warp_factor == 1.0: - # left_mel < center_mel < right_mel so we can min the two slopes and clamp negative values bins = paddle.maximum( paddle.zeros([1]), paddle.minimum(up_slope, down_slope)) else: - # warping can move the order of left_mel, center_mel, right_mel anywhere bins = paddle.zeros_like(up_slope) up_idx = paddle.greater_than(mel, left_mel) & paddle.less_than( - mel, center_mel) # left_mel < mel <= center_mel + mel, center_mel) down_idx = paddle.greater_than(mel, center_mel) & paddle.less_than( - mel, right_mel) # center_mel < mel < right_mel + mel, right_mel) bins[up_idx] = up_slope[up_idx] bins[down_idx] = down_slope[down_idx] @@ -430,13 +403,12 @@ def fbank(waveform: Tensor, high_freq: float=0.0, htk_compat: bool=False, low_freq: float=20.0, - min_duration: float=0.0, - num_mel_bins: int=23, + n_mels: int=23, preemphasis_coefficient: float=0.97, raw_energy: bool=True, remove_dc_offset: bool=True, round_to_power_of_two: bool=True, - sample_frequency: float=16000.0, + sr: int=16000, snip_edges: bool=True, subtract_mean: bool=False, use_energy: bool=False, @@ -446,83 +418,75 @@ def fbank(waveform: Tensor, vtln_low: float=100.0, vtln_warp: float=1.0, window_type: str=POVEY) -> Tensor: - """[summary] + """Compute and return filter banks from a waveform. The output is identical to Kaldi's. Args: - waveform (Tensor): [description] - blackman_coeff (float, optional): [description]. Defaults to 0.42. - channel (int, optional): [description]. Defaults to -1. - dither (float, optional): [description]. Defaults to 0.0. - energy_floor (float, optional): [description]. Defaults to 1.0. - frame_length (float, optional): [description]. Defaults to 25.0. - frame_shift (float, optional): [description]. Defaults to 10.0. - high_freq (float, optional): [description]. Defaults to 0.0. - htk_compat (bool, optional): [description]. Defaults to False. - low_freq (float, optional): [description]. Defaults to 20.0. - min_duration (float, optional): [description]. Defaults to 0.0. - num_mel_bins (int, optional): [description]. Defaults to 23. - preemphasis_coefficient (float, optional): [description]. Defaults to 0.97. - raw_energy (bool, optional): [description]. Defaults to True. - remove_dc_offset (bool, optional): [description]. Defaults to True. - round_to_power_of_two (bool, optional): [description]. Defaults to True. - sample_frequency (float, optional): [description]. Defaults to 16000.0. - snip_edges (bool, optional): [description]. Defaults to True. - subtract_mean (bool, optional): [description]. Defaults to False. - use_energy (bool, optional): [description]. Defaults to False. - use_log_fbank (bool, optional): [description]. Defaults to True. - use_power (bool, optional): [description]. Defaults to True. - vtln_high (float, optional): [description]. Defaults to -500.0. - vtln_low (float, optional): [description]. Defaults to 100.0. - vtln_warp (float, optional): [description]. Defaults to 1.0. - window_type (str, optional): [description]. Defaults to POVEY. + waveform (Tensor): A waveform tensor with shape [C, T]. + blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42. + channel (int, optional): Select the channel of waveform. Defaults to -1. + dither (float, optional): Dithering constant . Defaults to 0.0. + energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0. + frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0. + frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0. + high_freq (float, optional): The upper cut-off frequency. Defaults to 0.0. + htk_compat (bool, optional): Put energy to the last when it is set True. Defaults to False. + low_freq (float, optional): The lower cut-off frequency. Defaults to 20.0. + n_mels (int, optional): Number of output mel bins. Defaults to 23. + preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97. + raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True. + remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True. + round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input + to FFT. Defaults to True. + sr (int, optional): Sample rate of input waveform. Defaults to 16000. + snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it + is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True. + subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False. + use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False. + use_log_fbank (bool, optional): Return log fbank when it is set True. Defaults to True. + use_power (bool, optional): Whether to use power instead of magnitude. Defaults to True. + vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function. Defaults to -500.0. + vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function. Defaults to 100.0. + vtln_warp (float, optional): Vtln warp factor. Defaults to 1.0. + window_type (str, optional): Choose type of window for FFT computation. Defaults to POVEY. Returns: - Tensor: [description] + Tensor: A filter banks tensor with shape (m, n_mels). """ dtype = waveform.dtype waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties( - waveform, channel, sample_frequency, frame_shift, frame_length, - round_to_power_of_two, preemphasis_coefficient) - - if len(waveform) < min_duration * sample_frequency: - # signal is too short - return paddle.empty([0], dtype=dtype) + waveform, channel, sr, frame_shift, frame_length, round_to_power_of_two, + preemphasis_coefficient) - # strided_input, size (m, padded_window_size) and signal_log_energy, size (m) strided_input, signal_log_energy = _get_window( waveform, padded_window_size, window_size, window_shift, window_type, blackman_coeff, snip_edges, raw_energy, energy_floor, dither, remove_dc_offset, preemphasis_coefficient) - # size (m, padded_window_size // 2 + 1) + # (m, padded_window_size // 2 + 1) spectrum = paddle.fft.rfft(strided_input).abs() if use_power: spectrum = spectrum.pow(2.) - # size (num_mel_bins, padded_window_size // 2) - mel_energies, _ = _get_mel_banks(num_mel_bins, padded_window_size, - sample_frequency, low_freq, high_freq, - vtln_low, vtln_high, vtln_warp) + # (n_mels, padded_window_size // 2) + mel_energies, _ = _get_mel_banks(n_mels, padded_window_size, sr, low_freq, + high_freq, vtln_low, vtln_high, vtln_warp) mel_energies = mel_energies.astype(dtype) - # pad right column with zeros and add dimension, size (num_mel_bins, padded_window_size // 2 + 1) + # (n_mels, padded_window_size // 2 + 1) mel_energies = paddle.nn.functional.pad( mel_energies.unsqueeze(0), (0, 1), data_format='NCL', mode='constant', value=0).squeeze(0) - # sum with mel fiterbanks over the power spectrum, size (m, num_mel_bins) + # (m, n_mels) mel_energies = paddle.mm(spectrum, mel_energies.T) if use_log_fbank: - # avoid log of zero (which should be prevented anyway by dithering) mel_energies = paddle.maximum(mel_energies, _get_epsilon(dtype)).log() - # if use_energy then add it as the last column for htk_compat == true else first column if use_energy: - signal_log_energy = signal_log_energy.unsqueeze(1) # size (m, 1) - # returns size (m, num_mel_bins + 1) + signal_log_energy = signal_log_energy.unsqueeze(1) if htk_compat: mel_energies = paddle.concat( (mel_energies, signal_log_energy), axis=1) @@ -530,28 +494,20 @@ def fbank(waveform: Tensor, mel_energies = paddle.concat( (signal_log_energy, mel_energies), axis=1) + # (m, n_mels + 1) mel_energies = _subtract_column_mean(mel_energies, subtract_mean) return mel_energies -def _get_dct_matrix(num_ceps: int, num_mel_bins: int) -> Tensor: - # returns a dct matrix of size (num_mel_bins, num_ceps) - # size (num_mel_bins, num_mel_bins) - dct_matrix = create_dct(num_mel_bins, num_mel_bins, 'ortho') - # kaldi expects the first cepstral to be weighted sum of factor sqrt(1/num_mel_bins) - # this would be the first column in the dct_matrix for torchaudio as it expects a - # right multiply (which would be the first column of the kaldi's dct_matrix as kaldi - # expects a left multiply e.g. dct_matrix * vector). - dct_matrix[:, 0] = math.sqrt(1 / float(num_mel_bins)) - dct_matrix = dct_matrix[:, :num_ceps] +def _get_dct_matrix(n_mfcc: int, n_mels: int) -> Tensor: + dct_matrix = create_dct(n_mels, n_mels, 'ortho') + dct_matrix[:, 0] = math.sqrt(1 / float(n_mels)) + dct_matrix = dct_matrix[:, :n_mfcc] # (n_mels, n_mfcc) return dct_matrix -def _get_lifter_coeffs(num_ceps: int, cepstral_lifter: float) -> Tensor: - # returns size (num_ceps) - # Compute liftering coefficients (scaling on cepstral coeffs) - # coeffs are numbered slightly differently from HTK: the zeroth index is C0, which is not affected. - i = paddle.arange(num_ceps) +def _get_lifter_coeffs(n_mfcc: int, cepstral_lifter: float) -> Tensor: + i = paddle.arange(n_mfcc) return 1.0 + 0.5 * cepstral_lifter * paddle.sin(math.pi * i / cepstral_lifter) @@ -567,14 +523,13 @@ def mfcc(waveform: Tensor, high_freq: float=0.0, htk_compat: bool=False, low_freq: float=20.0, - num_ceps: int=13, - min_duration: float=0.0, - num_mel_bins: int=23, + n_mfcc: int=13, + n_mels: int=23, preemphasis_coefficient: float=0.97, raw_energy: bool=True, remove_dc_offset: bool=True, round_to_power_of_two: bool=True, - sample_frequency: float=16000.0, + sr: int=16000, snip_edges: bool=True, subtract_mean: bool=False, use_energy: bool=False, @@ -582,47 +537,47 @@ def mfcc(waveform: Tensor, vtln_low: float=100.0, vtln_warp: float=1.0, window_type: str=POVEY) -> Tensor: - """[summary] + """Compute and return mel frequency cepstral coefficients from a waveform. The output is + identical to Kaldi's. Args: - waveform (Tensor): [description] - blackman_coeff (float, optional): [description]. Defaults to 0.42. - cepstral_lifter (float, optional): [description]. Defaults to 22.0. - channel (int, optional): [description]. Defaults to -1. - dither (float, optional): [description]. Defaults to 0.0. - energy_floor (float, optional): [description]. Defaults to 1.0. - frame_length (float, optional): [description]. Defaults to 25.0. - frame_shift (float, optional): [description]. Defaults to 10.0. - high_freq (float, optional): [description]. Defaults to 0.0. - htk_compat (bool, optional): [description]. Defaults to False. - low_freq (float, optional): [description]. Defaults to 20.0. - num_ceps (int, optional): [description]. Defaults to 13. - min_duration (float, optional): [description]. Defaults to 0.0. - num_mel_bins (int, optional): [description]. Defaults to 23. - preemphasis_coefficient (float, optional): [description]. Defaults to 0.97. - raw_energy (bool, optional): [description]. Defaults to True. - remove_dc_offset (bool, optional): [description]. Defaults to True. - round_to_power_of_two (bool, optional): [description]. Defaults to True. - sample_frequency (float, optional): [description]. Defaults to 16000.0. - snip_edges (bool, optional): [description]. Defaults to True. - subtract_mean (bool, optional): [description]. Defaults to False. - use_energy (bool, optional): [description]. Defaults to False. - vtln_high (float, optional): [description]. Defaults to -500.0. - vtln_low (float, optional): [description]. Defaults to 100.0. - vtln_warp (float, optional): [description]. Defaults to 1.0. - window_type (str, optional): [description]. Defaults to POVEY. + waveform (Tensor): A waveform tensor with shape [C, T]. + blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42. + cepstral_lifter (float, optional): Scaling of output mfccs. Defaults to 22.0. + channel (int, optional): Select the channel of waveform. Defaults to -1. + dither (float, optional): Dithering constant . Defaults to 0.0. + energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0. + frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0. + frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0. + high_freq (float, optional): The upper cut-off frequency. Defaults to 0.0. + htk_compat (bool, optional): Put energy to the last when it is set True. Defaults to False. + low_freq (float, optional): The lower cut-off frequency. Defaults to 20.0. + n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 13. + n_mels (int, optional): Number of output mel bins. Defaults to 23. + preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97. + raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True. + remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True. + round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input + to FFT. Defaults to True. + sr (int, optional): Sample rate of input waveform. Defaults to 16000. + snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a singal frame when it + is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True. + subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False. + use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False. + vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function. Defaults to -500.0. + vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function. Defaults to 100.0. + vtln_warp (float, optional): Vtln warp factor. Defaults to 1.0. + window_type (str, optional): Choose type of window for FFT computation. Defaults to POVEY. Returns: - Tensor: [description] + Tensor: A mel frequency cepstral coefficients tensor with shape (m, n_mfcc). """ - assert num_ceps <= num_mel_bins, 'num_ceps cannot be larger than num_mel_bins: %d vs %d' % ( - num_ceps, num_mel_bins) + assert n_mfcc <= n_mels, 'n_mfcc cannot be larger than n_mels: %d vs %d' % ( + n_mfcc, n_mels) dtype = waveform.dtype - # The mel_energies should not be squared (use_power=True), not have mean subtracted - # (subtract_mean=False), and use log (use_log_fbank=True). - # size (m, num_mel_bins + use_energy) + # (m, n_mels + use_energy) feature = fbank( waveform=waveform, blackman_coeff=blackman_coeff, @@ -634,13 +589,12 @@ def mfcc(waveform: Tensor, high_freq=high_freq, htk_compat=htk_compat, low_freq=low_freq, - min_duration=min_duration, - num_mel_bins=num_mel_bins, + n_mels=n_mels, preemphasis_coefficient=preemphasis_coefficient, raw_energy=raw_energy, remove_dc_offset=remove_dc_offset, round_to_power_of_two=round_to_power_of_two, - sample_frequency=sample_frequency, + sr=sr, snip_edges=snip_edges, subtract_mean=False, use_energy=use_energy, @@ -652,34 +606,29 @@ def mfcc(waveform: Tensor, window_type=window_type) if use_energy: - # size (m) - signal_log_energy = feature[:, num_mel_bins if htk_compat else 0] - # offset is 0 if htk_compat==True else 1 + # (m) + signal_log_energy = feature[:, n_mels if htk_compat else 0] mel_offset = int(not htk_compat) - feature = feature[:, mel_offset:(num_mel_bins + mel_offset)] + feature = feature[:, mel_offset:(n_mels + mel_offset)] - # size (num_mel_bins, num_ceps) - dct_matrix = _get_dct_matrix(num_ceps, num_mel_bins).astype(dtype=dtype) + # (n_mels, n_mfcc) + dct_matrix = _get_dct_matrix(n_mfcc, n_mels).astype(dtype=dtype) - # size (m, num_ceps) + # (m, n_mfcc) feature = feature.matmul(dct_matrix) if cepstral_lifter != 0.0: - # size (1, num_ceps) - lifter_coeffs = _get_lifter_coeffs(num_ceps, - cepstral_lifter).unsqueeze(0) + # (1, n_mfcc) + lifter_coeffs = _get_lifter_coeffs(n_mfcc, cepstral_lifter).unsqueeze(0) feature *= lifter_coeffs.astype(dtype=dtype) - # if use_energy then replace the last column for htk_compat == true else first column if use_energy: feature[:, 0] = signal_log_energy if htk_compat: - energy = feature[:, 0].unsqueeze(1) # size (m, 1) - feature = feature[:, 1:] # size (m, num_ceps - 1) + energy = feature[:, 0].unsqueeze(1) # (m, 1) + feature = feature[:, 1:] # (m, n_mfcc - 1) if not use_energy: - # scale on C0 (actually removing a scale we previously added that's - # part of one common definition of the cosine transform.) energy *= math.sqrt(2) feature = paddle.concat((feature, energy), axis=1) diff --git a/paddleaudio/paddleaudio/features/layers.py b/paddleaudio/paddleaudio/features/layers.py index 69f46254..16fa0081 100644 --- a/paddleaudio/paddleaudio/features/layers.py +++ b/paddleaudio/paddleaudio/features/layers.py @@ -261,12 +261,18 @@ class MFCC(nn.Layer): sr: int=22050, n_mfcc: int=40, norm: str='ortho', + dtype: str=paddle.float32, **kwargs): - """[summary] + """Compute mel frequency cepstral coefficients(MFCCs) feature of given waveforms. + Parameters: - sr (int, optional): [description]. Defaults to 22050. - n_mfcc (int, optional): [description]. Defaults to 40. - norm (str, optional): [description]. Defaults to 'ortho'. + sr(int): the audio sample rate. + The default value is 22050. + n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 40. + norm(str|float): the normalization type in computing fbank matrix. Slaney-style is used by default. + You can specify norm=1.0/2.0 to use customized p-norm normalization. + dtype(str): the datatype of fbank matrix used in the transform. Use float64 to increase numerical + accuracy. Note that the final transform will be conducted in float32 regardless of dtype of fbank matrix. """ super(MFCC, self).__init__() self._log_melspectrogram = LogMelSpectrogram(sr=sr, **kwargs) From e50c1b3b1d61695369478e81ab9f5280416d7ba2 Mon Sep 17 00:00:00 2001 From: lym0302 Date: Thu, 3 Mar 2022 20:39:26 +0800 Subject: [PATCH 39/45] add server test, test=doc --- demos/speech_server/README.md | 5 +- demos/speech_server/README_cn.md | 2 + demos/speech_server/conf/application.yaml | 2 +- paddlespeech/server/conf/application.yaml | 2 +- tests/unit/server/change_yaml.py | 114 ++++++++++++++ tests/unit/server/conf/application.yaml | 27 ++++ tests/unit/server/conf/asr/asr.yaml | 8 + tests/unit/server/conf/asr/asr_pd.yaml | 26 +++ tests/unit/server/conf/tts/tts.yaml | 32 ++++ tests/unit/server/conf/tts/tts_pd.yaml | 42 +++++ tests/unit/server/test_server_client.sh | 184 ++++++++++++++++++++++ 11 files changed, 440 insertions(+), 4 deletions(-) create mode 100644 tests/unit/server/change_yaml.py create mode 100644 tests/unit/server/conf/application.yaml create mode 100644 tests/unit/server/conf/asr/asr.yaml create mode 100644 tests/unit/server/conf/asr/asr_pd.yaml create mode 100644 tests/unit/server/conf/tts/tts.yaml create mode 100644 tests/unit/server/conf/tts/tts_pd.yaml create mode 100644 tests/unit/server/test_server_client.sh diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md index ac5cc4b0..515abaf6 100644 --- a/demos/speech_server/README.md +++ b/demos/speech_server/README.md @@ -10,12 +10,13 @@ This demo is an implementation of starting the voice service and accessing the s ### 1. Installation see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). +It is recommended to use **paddlepaddle 2.2.1** or above. You can choose one way from easy, meduim and hard to install paddlespeech. ### 2. Prepare config File The configuration file contains the service-related configuration files and the model configuration related to the voice tasks contained in the service. They are all under the `conf` folder. -**Note: The configuration of `engine_backend` in `application.yaml` represents all speech tasks included in the started service. ** +**Note: The configuration of `engine_backend` in `application.yaml` represents all speech tasks included in the started service.** If the service you want to start contains only a certain speech task, then you need to comment out the speech tasks that do not need to be included. For example, if you only want to use the speech recognition (ASR) service, then you can comment out the speech synthesis (TTS) service, as in the following example: ```bash engine_backend: @@ -23,7 +24,7 @@ engine_backend: #tts: 'conf/tts/tts.yaml' ``` -**Note: The configuration file of `engine_backend` in `application.yaml` needs to match the configuration type of `engine_type`. ** +**Note: The configuration file of `engine_backend` in `application.yaml` needs to match the configuration type of `engine_type`.** When the configuration file of `engine_backend` is `XXX.yaml`, the configuration type of `engine_type` needs to be set to `python`; when the configuration file of `engine_backend` is `XXX_pd.yaml`, the configuration of `engine_type` needs to be set type is `inference`; The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md index f202a30c..05884fbd 100644 --- a/demos/speech_server/README_cn.md +++ b/demos/speech_server/README_cn.md @@ -10,8 +10,10 @@ ### 1. 安装 请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). +推荐使用 **paddlepaddle 2.2.1** 或以上版本。 你可以从 easy,medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 + ### 2. 准备配置文件 配置文件包含服务相关的配置文件和服务中包含的语音任务相关的模型配置。 它们都在 `conf` 文件夹下。 **注意:`application.yaml` 中 `engine_backend` 的配置表示启动的服务中包含的所有语音任务。** diff --git a/demos/speech_server/conf/application.yaml b/demos/speech_server/conf/application.yaml index 6dcae74a..aba33a51 100644 --- a/demos/speech_server/conf/application.yaml +++ b/demos/speech_server/conf/application.yaml @@ -3,7 +3,7 @@ ################################################################## # SERVER SETTING # ################################################################## -host: '127.0.0.1' +host: 127.0.0.1 port: 8090 ################################################################## diff --git a/paddlespeech/server/conf/application.yaml b/paddlespeech/server/conf/application.yaml index 6dcae74a..aba33a51 100644 --- a/paddlespeech/server/conf/application.yaml +++ b/paddlespeech/server/conf/application.yaml @@ -3,7 +3,7 @@ ################################################################## # SERVER SETTING # ################################################################## -host: '127.0.0.1' +host: 127.0.0.1 port: 8090 ################################################################## diff --git a/tests/unit/server/change_yaml.py b/tests/unit/server/change_yaml.py new file mode 100644 index 00000000..5a5d9ae0 --- /dev/null +++ b/tests/unit/server/change_yaml.py @@ -0,0 +1,114 @@ +#!/usr/bin/python +import argparse +import os + +import yaml + + +def change_speech_yaml(yaml_name: str, device: str): + """Change the settings of the device under the voice task configuration file + + Args: + yaml_name (str): asr or asr_pd or tts or tts_pd + cpu (bool): True means set device to "cpu" + model_type (dict): change model type + """ + if "asr" in yaml_name: + dirpath = "./conf/asr/" + elif 'tts' in yaml_name: + dirpath = "./conf/tts/" + yamlfile = dirpath + yaml_name + ".yaml" + tmp_yamlfile = dirpath + yaml_name + "_tmp.yaml" + os.system("cp %s %s" % (yamlfile, tmp_yamlfile)) + + with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw: + y = yaml.safe_load(f) + if device == 'cpu': + print("Set device: cpu") + if yaml_name == 'asr': + y['device'] = 'cpu' + elif yaml_name == 'asr_pd': + y['am_predictor_conf']['device'] = 'cpu' + elif yaml_name == 'tts': + y['device'] = 'cpu' + elif yaml_name == 'tts_pd': + y['am_predictor_conf']['device'] = 'cpu' + y['voc_predictor_conf']['device'] = 'cpu' + elif device == 'gpu': + print("Set device: gpu") + if yaml_name == 'asr': + y['device'] = 'gpu:0' + elif yaml_name == 'asr_pd': + y['am_predictor_conf']['device'] = 'gpu:0' + elif yaml_name == 'tts': + y['device'] = 'gpu:0' + elif yaml_name == 'tts_pd': + y['am_predictor_conf']['device'] = 'gpu:0' + y['voc_predictor_conf']['device'] = 'gpu:0' + else: + print("Please set correct device: cpu or gpu.") + + print("The content of '%s': " % (yamlfile)) + print(yaml.dump(y, default_flow_style=False, sort_keys=False)) + yaml.dump(y, fw, allow_unicode=True) + os.system("rm %s" % (tmp_yamlfile)) + print("Change %s successfully." % (yamlfile)) + + +def change_app_yaml(task: str, engine_type: str): + """Change the engine type and corresponding configuration file of the speech task in application.yaml + + Args: + task (str): asr or tts + """ + yamlfile = "./conf/application.yaml" + tmp_yamlfile = "./conf/application_tmp.yaml" + os.system("cp %s %s" % (yamlfile, tmp_yamlfile)) + with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw: + y = yaml.safe_load(f) + y['engine_type'][task] = engine_type + path_list = ["./conf/", task, "/", task] + if engine_type == 'python': + path_list.append(".yaml") + + elif engine_type == 'inference': + path_list.append("_pd.yaml") + y['engine_backend'][task] = ''.join(path_list) + print("The content of './conf/application.yaml': ") + print(yaml.dump(y, default_flow_style=False, sort_keys=False)) + yaml.dump(y, fw, allow_unicode=True) + os.system("rm %s" % (tmp_yamlfile)) + print("Change %s successfully." % (yamlfile)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + '--change_task', + type=str, + default=None, + help='Change task', + choices=[ + 'app-asr-python', + 'app-asr-inference', + 'app-tts-python', + 'app-tts-inference', + 'speech-asr-cpu', + 'speech-asr-gpu', + 'speech-asr_pd-cpu', + 'speech-asr_pd-gpu', + 'speech-tts-cpu', + 'speech-tts-gpu', + 'speech-tts_pd-cpu', + 'speech-tts_pd-gpu', + ], + required=True) + args = parser.parse_args() + + types = args.change_task.split("-") + if types[0] == "app": + change_app_yaml(types[1], types[2]) + elif types[0] == "speech": + change_speech_yaml(types[1], types[2]) + else: + print("Error change task, please check change_task.") diff --git a/tests/unit/server/conf/application.yaml b/tests/unit/server/conf/application.yaml new file mode 100644 index 00000000..aba33a51 --- /dev/null +++ b/tests/unit/server/conf/application.yaml @@ -0,0 +1,27 @@ +# This is the parameter configuration file for PaddleSpeech Serving. + +################################################################## +# SERVER SETTING # +################################################################## +host: 127.0.0.1 +port: 8090 + +################################################################## +# CONFIG FILE # +################################################################## +# add engine backend type (Options: asr, tts) and config file here. +# Adding a speech task to engine_backend means starting the service. +engine_backend: + asr: 'conf/asr/asr.yaml' + tts: 'conf/tts/tts.yaml' + +# The engine_type of speech task needs to keep the same type as the config file of speech task. +# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' +# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' +# +# add engine type (Options: python, inference) +engine_type: + asr: 'python' + tts: 'python' + + diff --git a/tests/unit/server/conf/asr/asr.yaml b/tests/unit/server/conf/asr/asr.yaml new file mode 100644 index 00000000..a6743b77 --- /dev/null +++ b/tests/unit/server/conf/asr/asr.yaml @@ -0,0 +1,8 @@ +model: 'conformer_wenetspeech' +lang: 'zh' +sample_rate: 16000 +cfg_path: # [optional] +ckpt_path: # [optional] +decode_method: 'attention_rescoring' +force_yes: True +device: # set 'gpu:id' or 'cpu' diff --git a/tests/unit/server/conf/asr/asr_pd.yaml b/tests/unit/server/conf/asr/asr_pd.yaml new file mode 100644 index 00000000..4c415ac7 --- /dev/null +++ b/tests/unit/server/conf/asr/asr_pd.yaml @@ -0,0 +1,26 @@ +# This is the parameter configuration file for ASR server. +# These are the static models that support paddle inference. + +################################################################## +# ACOUSTIC MODEL SETTING # +# am choices=['deepspeech2offline_aishell'] TODO +################################################################## +model_type: 'deepspeech2offline_aishell' +am_model: # the pdmodel file of am static model [optional] +am_params: # the pdiparams file of am static model [optional] +lang: 'zh' +sample_rate: 16000 +cfg_path: +decode_method: +force_yes: True + +am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + +################################################################## +# OTHERS # +################################################################## diff --git a/tests/unit/server/conf/tts/tts.yaml b/tests/unit/server/conf/tts/tts.yaml new file mode 100644 index 00000000..19207f0b --- /dev/null +++ b/tests/unit/server/conf/tts/tts.yaml @@ -0,0 +1,32 @@ +# This is the parameter configuration file for TTS server. + +################################################################## +# ACOUSTIC MODEL SETTING # +# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', +# 'fastspeech2_ljspeech', 'fastspeech2_aishell3', +# 'fastspeech2_vctk'] +################################################################## +am: 'fastspeech2_csmsc' +am_config: +am_ckpt: +am_stat: +phones_dict: +tones_dict: +speaker_dict: +spk_id: 0 + +################################################################## +# VOCODER SETTING # +# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', +# 'pwgan_vctk', 'mb_melgan_csmsc'] +################################################################## +voc: 'pwgan_csmsc' +voc_config: +voc_ckpt: +voc_stat: + +################################################################## +# OTHERS # +################################################################## +lang: 'zh' +device: # set 'gpu:id' or 'cpu' diff --git a/tests/unit/server/conf/tts/tts_pd.yaml b/tests/unit/server/conf/tts/tts_pd.yaml new file mode 100644 index 00000000..e27b9665 --- /dev/null +++ b/tests/unit/server/conf/tts/tts_pd.yaml @@ -0,0 +1,42 @@ +# This is the parameter configuration file for TTS server. +# These are the static models that support paddle inference. + +################################################################## +# ACOUSTIC MODEL SETTING # +# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] +################################################################## +am: 'fastspeech2_csmsc' +am_model: # the pdmodel file of your am static model (XX.pdmodel) +am_params: # the pdiparams file of your am static model (XX.pdipparams) +am_sample_rate: 24000 +phones_dict: +tones_dict: +speaker_dict: +spk_id: 0 + +am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + +################################################################## +# VOCODER SETTING # +# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] +################################################################## +voc: 'pwgan_csmsc' +voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) +voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) +voc_sample_rate: 24000 + +voc_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + +################################################################## +# OTHERS # +################################################################## +lang: 'zh' diff --git a/tests/unit/server/test_server_client.sh b/tests/unit/server/test_server_client.sh new file mode 100644 index 00000000..8f6a1368 --- /dev/null +++ b/tests/unit/server/test_server_client.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +StartService(){ + # Start service + paddlespeech_server start --config_file $config_file 1>>log/server.log 2>>log/server.log.wf & + echo $! > pid + + start_num=$(cat log/server.log.wf | grep "INFO: Uvicorn running on http://" -c) + flag="normal" + while [[ $start_num -lt $target_start_num && $flag == "normal" ]] + do + start_num=$(cat log/server.log.wf | grep "INFO: Uvicorn running on http://" -c) + # start service failed + if [ $(cat log/server.log.wf | grep -i "error" -c) -gt $error_time ];then + echo "Service started failed." | tee -a ./log/test_result.log + error_time=$(cat log/server.log.wf | grep -i "error" -c) + flag="unnormal" + fi + done +} + +ClientTest(){ + # Client test + # test asr client + paddlespeech_client asr --server_ip $server_ip --port $port --input ./zh.wav + ((test_times+=1)) + paddlespeech_client asr --server_ip $server_ip --port $port --input ./zh.wav + ((test_times+=1)) + + # test tts client + paddlespeech_client tts --server_ip $server_ip --port $port --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav + ((test_times+=1)) + paddlespeech_client tts --server_ip $server_ip --port $port --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav + ((test_times+=1)) +} + +GetTestResult() { + # Determine if the test was successful + response_success_time=$(cat log/server.log | grep "200 OK" -c) + if (( $response_success_time == $test_times )) ; then + echo "Testing successfully. The service configuration is: asr engine type: $1; tts engine type: $1; device: $2." | tee -a ./log/test_result.log + else + echo "Testing failed. The service configuration is: asr engine type: $1; tts engine type: $1; device: $2." | tee -a ./log/test_result.log + fi + test_times=$response_success_time +} + + +mkdir -p log +rm -rf log/server.log.wf +rm -rf log/server.log +rm -rf log/test_result.log + +config_file=./conf/application.yaml +server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}') +port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}') + +echo "Sevice ip: $server_ip" | tee ./log/test_result.log +echo "Sevice port: $port" | tee -a ./log/test_result.log + +# whether a process is listening on $port +pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'` +if [ "$pid" != "" ]; then + echo "The port: $port is occupied, please change another port" + exit +fi + +# download test audios for ASR client +wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav + + +target_start_num=0 # the number of start service +test_times=0 # The number of client test +error_time=0 # The number of error occurrences in the startup failure server.log.wf file + +# start server: asr engine type: python; tts engine type: python; device: gpu +echo "Start the service: asr engine type: python; tts engine type: python; device: gpu" | tee -a ./log/test_result.log +((target_start_num+=1)) +StartService + +if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then + echo "Service started successfully." | tee -a ./log/test_result.log + ClientTest + echo "This round of testing is over." | tee -a ./log/test_result.log + + GetTestResult python gpu +else + echo "Service failed to start, no client test." + target_start_num=$start_num + +fi + +kill -9 `cat pid` +rm -rf pid +sleep 2s +echo "**************************************************************************************" | tee -a ./log/test_result.log + + + +# start server: asr engine type: python; tts engine type: python; device: cpu +python change_yaml.py --change_task speech-asr-cpu # change asr.yaml device: cpu +python change_yaml.py --change_task speech-tts-cpu # change tts.yaml device: cpu + +echo "Start the service: asr engine type: python; tts engine type: python; device: cpu" | tee -a ./log/test_result.log +((target_start_num+=1)) +StartService + +if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then + echo "Service started successfully." | tee -a ./log/test_result.log + ClientTest + echo "This round of testing is over." | tee -a ./log/test_result.log + + GetTestResult python cpu +else + echo "Service failed to start, no client test." + target_start_num=$start_num + +fi + +kill -9 `cat pid` +rm -rf pid +sleep 2s +echo "**************************************************************************************" | tee -a ./log/test_result.log + + +# start server: asr engine type: inference; tts engine type: inference; device: gpu +python change_yaml.py --change_task app-asr-inference # change application.yaml, asr engine_type: inference; asr engine_backend: asr_pd.yaml +python change_yaml.py --change_task app-tts-inference # change application.yaml, tts engine_type: inference; tts engine_backend: tts_pd.yaml + +echo "Start the service: asr engine type: inference; tts engine type: inference; device: gpu" | tee -a ./log/test_result.log +((target_start_num+=1)) +StartService + +if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then + echo "Service started successfully." | tee -a ./log/test_result.log + ClientTest + echo "This round of testing is over." | tee -a ./log/test_result.log + + GetTestResult inference gpu +else + echo "Service failed to start, no client test." + target_start_num=$start_num + +fi + +kill -9 `cat pid` +rm -rf pid +sleep 2s +echo "**************************************************************************************" | tee -a ./log/test_result.log + + +# start server: asr engine type: inference; tts engine type: inference; device: cpu +python change_yaml.py --change_task speech-asr_pd-cpu # change asr_pd.yaml device: cpu +python change_yaml.py --change_task speech-tts_pd-cpu # change tts_pd.yaml device: cpu + +echo "start the service: asr engine type: inference; tts engine type: inference; device: cpu" | tee -a ./log/test_result.log +((target_start_num+=1)) +StartService + +if [[ $start_num -eq $target_start_num && $flag == "normal" ]]; then + echo "Service started successfully." | tee -a ./log/test_result.log + ClientTest + echo "This round of testing is over." | tee -a ./log/test_result.log + + GetTestResult inference cpu +else + echo "Service failed to start, no client test." + target_start_num=$start_num + +fi + +kill -9 `cat pid` +rm -rf pid +sleep 2s +echo "**************************************************************************************" | tee -a ./log/test_result.log + +echo "All tests completed." | tee -a ./log/test_result.log + +# sohw all the test results +echo "***************** Here are all the test results ********************" +cat ./log/test_result.log + +# Restoring conf is the same as demos/speech_server +cp ../../../demos/speech_server/conf/ ./ -rf \ No newline at end of file From 933f879a2835446ebbd47f9c3b78c86b790931a8 Mon Sep 17 00:00:00 2001 From: lym0302 Date: Thu, 3 Mar 2022 20:43:13 +0800 Subject: [PATCH 40/45] add usage, test=doc --- tests/unit/server/test_server_client.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/server/test_server_client.sh b/tests/unit/server/test_server_client.sh index 8f6a1368..795a23e0 100644 --- a/tests/unit/server/test_server_client.sh +++ b/tests/unit/server/test_server_client.sh @@ -1,4 +1,5 @@ #!/bin/bash +# bash test_server_client.sh StartService(){ # Start service From 4eb780ad2b1ed91c4e170699b742cc10b894ec24 Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Fri, 4 Mar 2022 11:28:14 +0800 Subject: [PATCH 41/45] Add reference and doc string. --- paddleaudio/paddleaudio/compliance/kaldi.py | 1 + paddleaudio/paddleaudio/features/layers.py | 116 +++++++++++++----- .../paddleaudio/functional/functional.py | 13 +- 3 files changed, 94 insertions(+), 36 deletions(-) diff --git a/paddleaudio/paddleaudio/compliance/kaldi.py b/paddleaudio/paddleaudio/compliance/kaldi.py index 35d7072c..8cb9b666 100644 --- a/paddleaudio/paddleaudio/compliance/kaldi.py +++ b/paddleaudio/paddleaudio/compliance/kaldi.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# Modified from torchaudio(https://github.com/pytorch/audio) import math from typing import Tuple diff --git a/paddleaudio/paddleaudio/features/layers.py b/paddleaudio/paddleaudio/features/layers.py index 16fa0081..4a2c1673 100644 --- a/paddleaudio/paddleaudio/features/layers.py +++ b/paddleaudio/paddleaudio/features/layers.py @@ -44,22 +44,22 @@ class Spectrogram(nn.Layer): The spectorgram is defined as the complex norm of the short-time Fourier transformation. Parameters: - n_fft(int): the number of frequency components of the discrete Fourier transform. + n_fft (int): the number of frequency components of the discrete Fourier transform. The default value is 2048, - hop_length(int|None): the hop length of the short time FFT. If None, it is set to win_length//4. + hop_length (int|None): the hop length of the short time FFT. If None, it is set to win_length//4. The default value is None. win_length: the window length of the short time FFt. If None, it is set to same as n_fft. The default value is None. - window(str): the name of the window function applied to the single before the Fourier transform. + window (str): the name of the window function applied to the single before the Fourier transform. The folllowing window names are supported: 'hamming','hann','kaiser','gaussian', 'exponential','triang','bohman','blackman','cosine','tukey','taylor'. The default value is 'hann' - center(bool): if True, the signal is padded so that frame t is centered at x[t * hop_length]. + center (bool): if True, the signal is padded so that frame t is centered at x[t * hop_length]. If False, frame t begins at x[t * hop_length] The default value is True - pad_mode(str): the mode to pad the signal if necessary. The supported modes are 'reflect' + pad_mode (str): the mode to pad the signal if necessary. The supported modes are 'reflect' and 'constant'. The default value is 'reflect'. - dtype(str): the data type of input and window. + dtype (str): the data type of input and window. Notes: The Spectrogram transform relies on STFT transform to compute the spectrogram. By default, the weights are not learnable. To fine-tune the Fourier coefficients, @@ -190,39 +190,39 @@ class LogMelSpectrogram(nn.Layer): """Compute log-mel-spectrogram(also known as LogFBank) feature of a given signal, typically an audio waveform. Parameters: - sr(int): the audio sample rate. + sr (int): the audio sample rate. The default value is 22050. - n_fft(int): the number of frequency components of the discrete Fourier transform. + n_fft (int): the number of frequency components of the discrete Fourier transform. The default value is 2048, - hop_length(int|None): the hop length of the short time FFT. If None, it is set to win_length//4. + hop_length (int|None): the hop length of the short time FFT. If None, it is set to win_length//4. The default value is None. win_length: the window length of the short time FFt. If None, it is set to same as n_fft. The default value is None. - window(str): the name of the window function applied to the single before the Fourier transform. + window (str): the name of the window function applied to the single before the Fourier transform. The folllowing window names are supported: 'hamming','hann','kaiser','gaussian', 'exponential','triang','bohman','blackman','cosine','tukey','taylor'. The default value is 'hann' - center(bool): if True, the signal is padded so that frame t is centered at x[t * hop_length]. + center (bool): if True, the signal is padded so that frame t is centered at x[t * hop_length]. If False, frame t begins at x[t * hop_length] The default value is True - pad_mode(str): the mode to pad the signal if necessary. The supported modes are 'reflect' + pad_mode (str): the mode to pad the signal if necessary. The supported modes are 'reflect' and 'constant'. The default value is 'reflect'. - n_mels(int): the mel bins. - f_min(float): the lower cut-off frequency, below which the filter response is zero. - f_max(float): the upper cut-off frequency, above which the filter response is zeros. - ref_value(float): the reference value. If smaller than 1.0, the db level - htk(bool): whether to use HTK formula in computing fbank matrix. - norm(str|float): the normalization type in computing fbank matrix. Slaney-style is used by default. + n_mels (int): the mel bins. + f_min (float): the lower cut-off frequency, below which the filter response is zero. + f_max (float): the upper cut-off frequency, above which the filter response is zeros. + htk (bool): whether to use HTK formula in computing fbank matrix. + norm (str|float): the normalization type in computing fbank matrix. Slaney-style is used by default. You can specify norm=1.0/2.0 to use customized p-norm normalization. - dtype(str): the datatype of fbank matrix used in the transform. Use float64 to increase numerical - accuracy. Note that the final transform will be conducted in float32 regardless of dtype of fbank matrix. - amin(float): the minimum value of input magnitude, below which the input of the signal will be pulled up accordingly. + ref_value (float): the reference value. If smaller than 1.0, the db level + amin (float): the minimum value of input magnitude, below which the input of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. magnitude is clipped(to amin). For numerical stability, set amin to a larger value, e.g., 1e-3. - top_db(float): the maximum db value of resulting spectrum, above which the + top_db (float): the maximum db value of resulting spectrum, above which the spectrum is clipped(to top_db). + dtype (str): the datatype of fbank matrix used in the transform. Use float64 to increase numerical + accuracy. Note that the final transform will be conducted in float32 regardless of dtype of fbank matrix. """ super(LogMelSpectrogram, self).__init__() @@ -260,24 +260,80 @@ class MFCC(nn.Layer): def __init__(self, sr: int=22050, n_mfcc: int=40, - norm: str='ortho', - dtype: str=paddle.float32, - **kwargs): + n_fft: int=512, + hop_length: Optional[int]=None, + win_length: Optional[int]=None, + window: str='hann', + center: bool=True, + pad_mode: str='reflect', + n_mels: int=64, + f_min: float=50.0, + f_max: Optional[float]=None, + htk: bool=False, + norm: Union[str, float]='slaney', + ref_value: float=1.0, + amin: float=1e-10, + top_db: Optional[float]=None, + dtype: str=paddle.float32): """Compute mel frequency cepstral coefficients(MFCCs) feature of given waveforms. Parameters: sr(int): the audio sample rate. The default value is 22050. n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 40. - norm(str|float): the normalization type in computing fbank matrix. Slaney-style is used by default. + n_fft (int): the number of frequency components of the discrete Fourier transform. + The default value is 2048, + hop_length (int|None): the hop length of the short time FFT. If None, it is set to win_length//4. + The default value is None. + win_length: the window length of the short time FFt. If None, it is set to same as n_fft. + The default value is None. + window (str): the name of the window function applied to the single before the Fourier transform. + The folllowing window names are supported: 'hamming','hann','kaiser','gaussian', + 'exponential','triang','bohman','blackman','cosine','tukey','taylor'. + The default value is 'hann' + center (bool): if True, the signal is padded so that frame t is centered at x[t * hop_length]. + If False, frame t begins at x[t * hop_length] + The default value is True + pad_mode (str): the mode to pad the signal if necessary. The supported modes are 'reflect' + and 'constant'. + The default value is 'reflect'. + n_mels (int): the mel bins. + f_min (float): the lower cut-off frequency, below which the filter response is zero. + f_max (float): the upper cut-off frequency, above which the filter response is zeros. + htk (bool): whether to use HTK formula in computing fbank matrix. + norm (str|float): the normalization type in computing fbank matrix. Slaney-style is used by default. You can specify norm=1.0/2.0 to use customized p-norm normalization. - dtype(str): the datatype of fbank matrix used in the transform. Use float64 to increase numerical + ref_value (float): the reference value. If smaller than 1.0, the db level + amin (float): the minimum value of input magnitude, below which the input of the signal will be pulled up accordingly. + Otherwise, the db level is pushed down. + magnitude is clipped(to amin). For numerical stability, set amin to a larger value, + e.g., 1e-3. + top_db (float): the maximum db value of resulting spectrum, above which the + spectrum is clipped(to top_db). + dtype (str): the datatype of fbank matrix used in the transform. Use float64 to increase numerical accuracy. Note that the final transform will be conducted in float32 regardless of dtype of fbank matrix. """ super(MFCC, self).__init__() - self._log_melspectrogram = LogMelSpectrogram(sr=sr, **kwargs) - self.dct_matrix = create_dct( - n_mfcc=n_mfcc, n_mels=self._log_melspectrogram.n_mels, norm=norm) + assert n_mfcc <= n_mels, 'n_mfcc cannot be larger than n_mels: %d vs %d' % ( + n_mfcc, n_mels) + self._log_melspectrogram = LogMelSpectrogram( + sr=sr, + n_fft=n_fft, + hop_length=hop_length, + win_length=win_length, + window=window, + center=center, + pad_mode=pad_mode, + n_mels=n_mels, + f_min=f_min, + f_max=f_max, + htk=htk, + norm=norm, + ref_value=ref_value, + amin=amin, + top_db=top_db, + dtype=dtype) + self.dct_matrix = create_dct(n_mfcc=n_mfcc, n_mels=n_mels, dtype=dtype) self.register_buffer('dct_matrix', self.dct_matrix) def forward(self, x): diff --git a/paddleaudio/paddleaudio/functional/functional.py b/paddleaudio/paddleaudio/functional/functional.py index c07f14fd..c5ab3045 100644 --- a/paddleaudio/paddleaudio/functional/functional.py +++ b/paddleaudio/paddleaudio/functional/functional.py @@ -242,14 +242,15 @@ def power_to_db(magnitude: paddle.Tensor, def create_dct(n_mfcc: int, n_mels: int, norm: Optional[str]='ortho', - dtype: Optional[str]=paddle.float32): - """[summary] + dtype: Optional[str]=paddle.float32) -> paddle.Tensor: + """Create a discrete cosine transform(DCT) matrix. + Parameters: - n_mfcc (int): [description] - n_mels (int): [description] - norm (str, optional): [description]. Defaults to 'ortho'. + n_mfcc (int): Number of mel frequency cepstral coefficients. + n_mels (int): Number of mel filterbanks. + norm (str, optional): Normalizaiton type. Defaults to 'ortho'. Returns: - [type]: [description] + Tensor: The DCT matrix with shape (n_mels, n_mfcc). """ n = paddle.arange(n_mels, dtype=dtype) k = paddle.arange(n_mfcc, dtype=dtype).unsqueeze(1) From 0bb9c3eaf2500c60d13c56c8069877ef1c504b3e Mon Sep 17 00:00:00 2001 From: TianYuan Date: Fri, 4 Mar 2022 14:37:58 +0800 Subject: [PATCH 42/45] Update README_cn.md --- demos/speech_server/README_cn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md index 05884fbd..14f25e81 100644 --- a/demos/speech_server/README_cn.md +++ b/demos/speech_server/README_cn.md @@ -86,7 +86,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ``` ### 4. ASR客户端使用方法 -**注意:**初次使用客户端时响应时间会略长 +**注意:** 初次使用客户端时响应时间会略长 - 命令行 (推荐使用) ``` paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav From a3b789512a73568829a3171a1f31bb5aa42a2b65 Mon Sep 17 00:00:00 2001 From: TianYuan Date: Fri, 4 Mar 2022 14:38:39 +0800 Subject: [PATCH 43/45] Update README_cn.md --- demos/speech_server/README_cn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md index 14f25e81..e4e50c0b 100644 --- a/demos/speech_server/README_cn.md +++ b/demos/speech_server/README_cn.md @@ -135,7 +135,7 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ``` ### 5. TTS客户端使用方法 -**注意:**初次使用客户端时响应时间会略长 +**注意:** 初次使用客户端时响应时间会略长 ```bash paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav ``` From 10ab7aabfed63ef431f66f301630cd61e828194c Mon Sep 17 00:00:00 2001 From: TianYuan Date: Fri, 4 Mar 2022 14:41:39 +0800 Subject: [PATCH 44/45] Update README_cn.md --- demos/speech_server/README_cn.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md index e4e50c0b..da05b686 100644 --- a/demos/speech_server/README_cn.md +++ b/demos/speech_server/README_cn.md @@ -136,9 +136,11 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespee ### 5. TTS客户端使用方法 **注意:** 初次使用客户端时响应时间会略长 - ```bash - paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav - ``` +- 命令行 (推荐使用) + + ```bash + paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav + ``` 使用帮助: ```bash From 3b304544f6187b91368c66e5a5b16840f69d175c Mon Sep 17 00:00:00 2001 From: lym0302 Date: Mon, 7 Mar 2022 18:19:17 +0800 Subject: [PATCH 45/45] modify yaml, test=doc --- demos/speech_server/README.md | 17 +-- demos/speech_server/README_cn.md | 17 +-- demos/speech_server/conf/application.yaml | 120 +++++++++++++++--- demos/speech_server/conf/asr/asr.yaml | 8 -- demos/speech_server/conf/asr/asr_pd.yaml | 26 ---- demos/speech_server/conf/tts/tts.yaml | 32 ----- demos/speech_server/conf/tts/tts_pd.yaml | 42 ------ demos/speech_server/server.sh | 2 +- paddlespeech/server/bin/main.py | 2 +- .../server/bin/paddlespeech_server.py | 2 +- paddlespeech/server/conf/application.yaml | 120 +++++++++++++++--- paddlespeech/server/conf/asr/asr.yaml | 8 -- paddlespeech/server/conf/asr/asr_pd.yaml | 26 ---- paddlespeech/server/conf/tts/tts.yaml | 32 ----- paddlespeech/server/conf/tts/tts_pd.yaml | 42 ------ .../engine/asr/paddleinference/asr_engine.py | 5 +- .../server/engine/asr/python/asr_engine.py | 6 +- paddlespeech/server/engine/engine_pool.py | 10 +- .../engine/tts/paddleinference/tts_engine.py | 5 +- .../server/engine/tts/python/tts_engine.py | 5 +- tests/unit/server/change_yaml.py | 109 ++++++++-------- tests/unit/server/conf/application.yaml | 120 +++++++++++++++--- tests/unit/server/conf/asr/asr.yaml | 8 -- tests/unit/server/conf/asr/asr_pd.yaml | 26 ---- tests/unit/server/conf/tts/tts.yaml | 32 ----- tests/unit/server/conf/tts/tts_pd.yaml | 42 ------ tests/unit/server/test_server_client.sh | 13 +- 27 files changed, 385 insertions(+), 492 deletions(-) delete mode 100644 demos/speech_server/conf/asr/asr.yaml delete mode 100644 demos/speech_server/conf/asr/asr_pd.yaml delete mode 100644 demos/speech_server/conf/tts/tts.yaml delete mode 100644 demos/speech_server/conf/tts/tts_pd.yaml delete mode 100644 paddlespeech/server/conf/asr/asr.yaml delete mode 100644 paddlespeech/server/conf/asr/asr_pd.yaml delete mode 100644 paddlespeech/server/conf/tts/tts.yaml delete mode 100644 paddlespeech/server/conf/tts/tts_pd.yaml delete mode 100644 tests/unit/server/conf/asr/asr.yaml delete mode 100644 tests/unit/server/conf/asr/asr_pd.yaml delete mode 100644 tests/unit/server/conf/tts/tts.yaml delete mode 100644 tests/unit/server/conf/tts/tts_pd.yaml diff --git a/demos/speech_server/README.md b/demos/speech_server/README.md index 515abaf6..a2f6f221 100644 --- a/demos/speech_server/README.md +++ b/demos/speech_server/README.md @@ -11,21 +11,14 @@ This demo is an implementation of starting the voice service and accessing the s see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). It is recommended to use **paddlepaddle 2.2.1** or above. -You can choose one way from easy, meduim and hard to install paddlespeech. +You can choose one way from meduim and hard to install paddlespeech. ### 2. Prepare config File -The configuration file contains the service-related configuration files and the model configuration related to the voice tasks contained in the service. They are all under the `conf` folder. +The configuration file can be found in `conf/application.yaml` . +Among them, `engine_list` indicates the speech engine that will be included in the service to be started, in the format of _. +At present, the speech tasks integrated by the service include: asr (speech recognition) and tts (speech synthesis). +Currently the engine type supports two forms: python and inference (Paddle Inference) -**Note: The configuration of `engine_backend` in `application.yaml` represents all speech tasks included in the started service.** -If the service you want to start contains only a certain speech task, then you need to comment out the speech tasks that do not need to be included. For example, if you only want to use the speech recognition (ASR) service, then you can comment out the speech synthesis (TTS) service, as in the following example: -```bash -engine_backend: - asr: 'conf/asr/asr.yaml' - #tts: 'conf/tts/tts.yaml' -``` - -**Note: The configuration file of `engine_backend` in `application.yaml` needs to match the configuration type of `engine_type`.** -When the configuration file of `engine_backend` is `XXX.yaml`, the configuration type of `engine_type` needs to be set to `python`; when the configuration file of `engine_backend` is `XXX_pd.yaml`, the configuration of `engine_type` needs to be set type is `inference`; The input of ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model. diff --git a/demos/speech_server/README_cn.md b/demos/speech_server/README_cn.md index da05b686..762248a1 100644 --- a/demos/speech_server/README_cn.md +++ b/demos/speech_server/README_cn.md @@ -11,20 +11,15 @@ 请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). 推荐使用 **paddlepaddle 2.2.1** 或以上版本。 -你可以从 easy,medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 +你可以从 medium,hard 三中方式中选择一种方式安装 PaddleSpeech。 ### 2. 准备配置文件 -配置文件包含服务相关的配置文件和服务中包含的语音任务相关的模型配置。 它们都在 `conf` 文件夹下。 -**注意:`application.yaml` 中 `engine_backend` 的配置表示启动的服务中包含的所有语音任务。** -如果你想启动的服务中只包含某项语音任务,那么你需要注释掉不需要包含的语音任务。例如你只想使用语音识别(ASR)服务,那么你可以将语音合成(TTS)服务注释掉,如下示例: -```bash -engine_backend: - asr: 'conf/asr/asr.yaml' - #tts: 'conf/tts/tts.yaml' -``` -**注意:`application.yaml` 中 `engine_backend` 的配置文件需要和 `engine_type` 的配置类型匹配。** -当`engine_backend` 的配置文件为`XXX.yaml`时,需要设置`engine_type`的配置类型为`python`;当`engine_backend` 的配置文件为`XXX_pd.yaml`时,需要设置`engine_type`的配置类型为`inference`; +配置文件可参见 `conf/application.yaml` 。 +其中,`engine_list`表示即将启动的服务将会包含的语音引擎,格式为 <语音任务>_<引擎类型>。 +目前服务集成的语音任务有: asr(语音识别)、tts(语音合成)。 +目前引擎类型支持两种形式:python 及 inference (Paddle Inference) + 这个 ASR client 的输入应该是一个 WAV 文件(`.wav`),并且采样率必须与模型的采样率相同。 diff --git a/demos/speech_server/conf/application.yaml b/demos/speech_server/conf/application.yaml index aba33a51..6048450b 100644 --- a/demos/speech_server/conf/application.yaml +++ b/demos/speech_server/conf/application.yaml @@ -1,27 +1,107 @@ # This is the parameter configuration file for PaddleSpeech Serving. -################################################################## -# SERVER SETTING # -################################################################## +################################################################################# +# SERVER SETTING # +################################################################################# host: 127.0.0.1 port: 8090 -################################################################## -# CONFIG FILE # -################################################################## -# add engine backend type (Options: asr, tts) and config file here. -# Adding a speech task to engine_backend means starting the service. -engine_backend: - asr: 'conf/asr/asr.yaml' - tts: 'conf/tts/tts.yaml' - -# The engine_type of speech task needs to keep the same type as the config file of speech task. -# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' -# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' -# -# add engine type (Options: python, inference) -engine_type: - asr: 'python' - tts: 'python' +# The task format in the engin_list is: _ +# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference'] +engine_list: ['asr_python', 'tts_python'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# +################### speech task: asr; engine_type: python ####################### +asr_python: + model: 'conformer_wenetspeech' + lang: 'zh' + sample_rate: 16000 + cfg_path: # [optional] + ckpt_path: # [optional] + decode_method: 'attention_rescoring' + force_yes: True + device: # set 'gpu:id' or 'cpu' + + +################### speech task: asr; engine_type: inference ####################### +asr_inference: + # model_type choices=['deepspeech2offline_aishell'] + model_type: 'deepspeech2offline_aishell' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + +################### speech task: tts; engine_type: python ####################### +tts_python: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', + # 'fastspeech2_ljspeech', 'fastspeech2_aishell3', + # 'fastspeech2_vctk'] + am: 'fastspeech2_csmsc' + am_config: + am_ckpt: + am_stat: + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', + # 'pwgan_vctk', 'mb_melgan_csmsc'] + voc: 'pwgan_csmsc' + voc_config: + voc_ckpt: + voc_stat: + + # others + lang: 'zh' + device: # set 'gpu:id' or 'cpu' + + +################### speech task: tts; engine_type: inference ####################### +tts_inference: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] + am: 'fastspeech2_csmsc' + am_model: # the pdmodel file of your am static model (XX.pdmodel) + am_params: # the pdiparams file of your am static model (XX.pdipparams) + am_sample_rate: 24000 + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] + voc: 'pwgan_csmsc' + voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) + voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) + voc_sample_rate: 24000 + + voc_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # others + lang: 'zh' diff --git a/demos/speech_server/conf/asr/asr.yaml b/demos/speech_server/conf/asr/asr.yaml deleted file mode 100644 index a6743b77..00000000 --- a/demos/speech_server/conf/asr/asr.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: 'conformer_wenetspeech' -lang: 'zh' -sample_rate: 16000 -cfg_path: # [optional] -ckpt_path: # [optional] -decode_method: 'attention_rescoring' -force_yes: True -device: # set 'gpu:id' or 'cpu' diff --git a/demos/speech_server/conf/asr/asr_pd.yaml b/demos/speech_server/conf/asr/asr_pd.yaml deleted file mode 100644 index 4c415ac7..00000000 --- a/demos/speech_server/conf/asr/asr_pd.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# This is the parameter configuration file for ASR server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['deepspeech2offline_aishell'] TODO -################################################################## -model_type: 'deepspeech2offline_aishell' -am_model: # the pdmodel file of am static model [optional] -am_params: # the pdiparams file of am static model [optional] -lang: 'zh' -sample_rate: 16000 -cfg_path: -decode_method: -force_yes: True - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# OTHERS # -################################################################## diff --git a/demos/speech_server/conf/tts/tts.yaml b/demos/speech_server/conf/tts/tts.yaml deleted file mode 100644 index 19207f0b..00000000 --- a/demos/speech_server/conf/tts/tts.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# This is the parameter configuration file for TTS server. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', -# 'fastspeech2_ljspeech', 'fastspeech2_aishell3', -# 'fastspeech2_vctk'] -################################################################## -am: 'fastspeech2_csmsc' -am_config: -am_ckpt: -am_stat: -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', -# 'pwgan_vctk', 'mb_melgan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_config: -voc_ckpt: -voc_stat: - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' -device: # set 'gpu:id' or 'cpu' diff --git a/demos/speech_server/conf/tts/tts_pd.yaml b/demos/speech_server/conf/tts/tts_pd.yaml deleted file mode 100644 index e27b9665..00000000 --- a/demos/speech_server/conf/tts/tts_pd.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# This is the parameter configuration file for TTS server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] -################################################################## -am: 'fastspeech2_csmsc' -am_model: # the pdmodel file of your am static model (XX.pdmodel) -am_params: # the pdiparams file of your am static model (XX.pdipparams) -am_sample_rate: 24000 -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) -voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) -voc_sample_rate: 24000 - -voc_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' diff --git a/demos/speech_server/server.sh b/demos/speech_server/server.sh index d9367ec0..e5961286 100644 --- a/demos/speech_server/server.sh +++ b/demos/speech_server/server.sh @@ -1,3 +1,3 @@ #!/bin/bash -paddlespeech_server start --config_file ./conf/application.yaml \ No newline at end of file +paddlespeech_server start --config_file ./conf/application.yaml diff --git a/paddlespeech/server/bin/main.py b/paddlespeech/server/bin/main.py index 360d295e..de528299 100644 --- a/paddlespeech/server/bin/main.py +++ b/paddlespeech/server/bin/main.py @@ -34,7 +34,7 @@ def init(config): bool: """ # init api - api_list = list(config.engine_backend) + api_list = list(engine.split("_")[0] for engine in config.engine_list) api_router = setup_router(api_list) app.include_router(api_router) diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py index 21fc5c65..3d71f091 100644 --- a/paddlespeech/server/bin/paddlespeech_server.py +++ b/paddlespeech/server/bin/paddlespeech_server.py @@ -62,7 +62,7 @@ class ServerExecutor(BaseExecutor): bool: """ # init api - api_list = list(config.engine_backend) + api_list = list(engine.split("_")[0] for engine in config.engine_list) api_router = setup_router(api_list) app.include_router(api_router) diff --git a/paddlespeech/server/conf/application.yaml b/paddlespeech/server/conf/application.yaml index aba33a51..6048450b 100644 --- a/paddlespeech/server/conf/application.yaml +++ b/paddlespeech/server/conf/application.yaml @@ -1,27 +1,107 @@ # This is the parameter configuration file for PaddleSpeech Serving. -################################################################## -# SERVER SETTING # -################################################################## +################################################################################# +# SERVER SETTING # +################################################################################# host: 127.0.0.1 port: 8090 -################################################################## -# CONFIG FILE # -################################################################## -# add engine backend type (Options: asr, tts) and config file here. -# Adding a speech task to engine_backend means starting the service. -engine_backend: - asr: 'conf/asr/asr.yaml' - tts: 'conf/tts/tts.yaml' - -# The engine_type of speech task needs to keep the same type as the config file of speech task. -# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' -# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' -# -# add engine type (Options: python, inference) -engine_type: - asr: 'python' - tts: 'python' +# The task format in the engin_list is: _ +# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference'] +engine_list: ['asr_python', 'tts_python'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# +################### speech task: asr; engine_type: python ####################### +asr_python: + model: 'conformer_wenetspeech' + lang: 'zh' + sample_rate: 16000 + cfg_path: # [optional] + ckpt_path: # [optional] + decode_method: 'attention_rescoring' + force_yes: True + device: # set 'gpu:id' or 'cpu' + + +################### speech task: asr; engine_type: inference ####################### +asr_inference: + # model_type choices=['deepspeech2offline_aishell'] + model_type: 'deepspeech2offline_aishell' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + +################### speech task: tts; engine_type: python ####################### +tts_python: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', + # 'fastspeech2_ljspeech', 'fastspeech2_aishell3', + # 'fastspeech2_vctk'] + am: 'fastspeech2_csmsc' + am_config: + am_ckpt: + am_stat: + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', + # 'pwgan_vctk', 'mb_melgan_csmsc'] + voc: 'pwgan_csmsc' + voc_config: + voc_ckpt: + voc_stat: + + # others + lang: 'zh' + device: # set 'gpu:id' or 'cpu' + + +################### speech task: tts; engine_type: inference ####################### +tts_inference: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] + am: 'fastspeech2_csmsc' + am_model: # the pdmodel file of your am static model (XX.pdmodel) + am_params: # the pdiparams file of your am static model (XX.pdipparams) + am_sample_rate: 24000 + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] + voc: 'pwgan_csmsc' + voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) + voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) + voc_sample_rate: 24000 + + voc_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # others + lang: 'zh' diff --git a/paddlespeech/server/conf/asr/asr.yaml b/paddlespeech/server/conf/asr/asr.yaml deleted file mode 100644 index a6743b77..00000000 --- a/paddlespeech/server/conf/asr/asr.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: 'conformer_wenetspeech' -lang: 'zh' -sample_rate: 16000 -cfg_path: # [optional] -ckpt_path: # [optional] -decode_method: 'attention_rescoring' -force_yes: True -device: # set 'gpu:id' or 'cpu' diff --git a/paddlespeech/server/conf/asr/asr_pd.yaml b/paddlespeech/server/conf/asr/asr_pd.yaml deleted file mode 100644 index 4c415ac7..00000000 --- a/paddlespeech/server/conf/asr/asr_pd.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# This is the parameter configuration file for ASR server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['deepspeech2offline_aishell'] TODO -################################################################## -model_type: 'deepspeech2offline_aishell' -am_model: # the pdmodel file of am static model [optional] -am_params: # the pdiparams file of am static model [optional] -lang: 'zh' -sample_rate: 16000 -cfg_path: -decode_method: -force_yes: True - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# OTHERS # -################################################################## diff --git a/paddlespeech/server/conf/tts/tts.yaml b/paddlespeech/server/conf/tts/tts.yaml deleted file mode 100644 index 19207f0b..00000000 --- a/paddlespeech/server/conf/tts/tts.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# This is the parameter configuration file for TTS server. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', -# 'fastspeech2_ljspeech', 'fastspeech2_aishell3', -# 'fastspeech2_vctk'] -################################################################## -am: 'fastspeech2_csmsc' -am_config: -am_ckpt: -am_stat: -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', -# 'pwgan_vctk', 'mb_melgan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_config: -voc_ckpt: -voc_stat: - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' -device: # set 'gpu:id' or 'cpu' diff --git a/paddlespeech/server/conf/tts/tts_pd.yaml b/paddlespeech/server/conf/tts/tts_pd.yaml deleted file mode 100644 index e27b9665..00000000 --- a/paddlespeech/server/conf/tts/tts_pd.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# This is the parameter configuration file for TTS server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] -################################################################## -am: 'fastspeech2_csmsc' -am_model: # the pdmodel file of your am static model (XX.pdmodel) -am_params: # the pdiparams file of your am static model (XX.pdipparams) -am_sample_rate: 24000 -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) -voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) -voc_sample_rate: 24000 - -voc_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' diff --git a/paddlespeech/server/engine/asr/paddleinference/asr_engine.py b/paddlespeech/server/engine/asr/paddleinference/asr_engine.py index cb973e92..1925bf1d 100644 --- a/paddlespeech/server/engine/asr/paddleinference/asr_engine.py +++ b/paddlespeech/server/engine/asr/paddleinference/asr_engine.py @@ -26,7 +26,6 @@ from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.modules.ctc import CTCDecoder from paddlespeech.s2t.utils.utility import UpdateConfig from paddlespeech.server.engine.base_engine import BaseEngine -from paddlespeech.server.utils.config import get_config from paddlespeech.server.utils.paddle_predictor import init_predictor from paddlespeech.server.utils.paddle_predictor import run_model @@ -184,7 +183,7 @@ class ASREngine(BaseEngine): def __init__(self): super(ASREngine, self).__init__() - def init(self, config_file: str) -> bool: + def init(self, config: dict) -> bool: """init engine resource Args: @@ -196,7 +195,7 @@ class ASREngine(BaseEngine): self.input = None self.output = None self.executor = ASRServerExecutor() - self.config = get_config(config_file) + self.config = config self.executor._init_from_path( model_type=self.config.model_type, diff --git a/paddlespeech/server/engine/asr/python/asr_engine.py b/paddlespeech/server/engine/asr/python/asr_engine.py index 1e2c5cc2..e76c49a7 100644 --- a/paddlespeech/server/engine/asr/python/asr_engine.py +++ b/paddlespeech/server/engine/asr/python/asr_engine.py @@ -19,7 +19,6 @@ import paddle from paddlespeech.cli.asr.infer import ASRExecutor from paddlespeech.cli.log import logger from paddlespeech.server.engine.base_engine import BaseEngine -from paddlespeech.server.utils.config import get_config __all__ = ['ASREngine'] @@ -40,7 +39,7 @@ class ASREngine(BaseEngine): def __init__(self): super(ASREngine, self).__init__() - def init(self, config_file: str) -> bool: + def init(self, config: dict) -> bool: """init engine resource Args: @@ -52,8 +51,7 @@ class ASREngine(BaseEngine): self.input = None self.output = None self.executor = ASRServerExecutor() - - self.config = get_config(config_file) + self.config = config try: if self.config.device: self.device = self.config.device diff --git a/paddlespeech/server/engine/engine_pool.py b/paddlespeech/server/engine/engine_pool.py index f6a4d2aa..9de73567 100644 --- a/paddlespeech/server/engine/engine_pool.py +++ b/paddlespeech/server/engine/engine_pool.py @@ -28,11 +28,13 @@ def init_engine_pool(config) -> bool: """ Init engine pool """ global ENGINE_POOL - for engine in config.engine_backend: + + for engine_and_type in config.engine_list: + engine = engine_and_type.split("_")[0] + engine_type = engine_and_type.split("_")[1] ENGINE_POOL[engine] = EngineFactory.get_engine( - engine_name=engine, engine_type=config.engine_type[engine]) - if not ENGINE_POOL[engine].init( - config_file=config.engine_backend[engine]): + engine_name=engine, engine_type=engine_type) + if not ENGINE_POOL[engine].init(config=config[engine_and_type]): return False return True diff --git a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py index 5955c1a2..1bbbe0ea 100644 --- a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py +++ b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py @@ -29,7 +29,6 @@ from paddlespeech.cli.utils import download_and_decompress from paddlespeech.cli.utils import MODEL_HOME from paddlespeech.server.engine.base_engine import BaseEngine from paddlespeech.server.utils.audio_process import change_speed -from paddlespeech.server.utils.config import get_config from paddlespeech.server.utils.errors import ErrorCode from paddlespeech.server.utils.exception import ServerBaseException from paddlespeech.server.utils.paddle_predictor import init_predictor @@ -357,11 +356,11 @@ class TTSEngine(BaseEngine): """ super(TTSEngine, self).__init__() - def init(self, config_file: str) -> bool: + def init(self, config: dict) -> bool: self.executor = TTSServerExecutor() try: - self.config = get_config(config_file) + self.config = config self.executor._init_from_path( am=self.config.am, am_model=self.config.am_model, diff --git a/paddlespeech/server/engine/tts/python/tts_engine.py b/paddlespeech/server/engine/tts/python/tts_engine.py index 7dd57669..8d6c7fd1 100644 --- a/paddlespeech/server/engine/tts/python/tts_engine.py +++ b/paddlespeech/server/engine/tts/python/tts_engine.py @@ -25,7 +25,6 @@ from paddlespeech.cli.log import logger from paddlespeech.cli.tts.infer import TTSExecutor from paddlespeech.server.engine.base_engine import BaseEngine from paddlespeech.server.utils.audio_process import change_speed -from paddlespeech.server.utils.config import get_config from paddlespeech.server.utils.errors import ErrorCode from paddlespeech.server.utils.exception import ServerBaseException @@ -50,11 +49,11 @@ class TTSEngine(BaseEngine): """ super(TTSEngine, self).__init__() - def init(self, config_file: str) -> bool: + def init(self, config: dict) -> bool: self.executor = TTSServerExecutor() try: - self.config = get_config(config_file) + self.config = config if self.config.device: self.device = self.config.device else: diff --git a/tests/unit/server/change_yaml.py b/tests/unit/server/change_yaml.py index 5a5d9ae0..1f063d8f 100644 --- a/tests/unit/server/change_yaml.py +++ b/tests/unit/server/change_yaml.py @@ -5,7 +5,7 @@ import os import yaml -def change_speech_yaml(yaml_name: str, device: str): +def change_device(yamlfile: str, engine: str, device: str): """Change the settings of the device under the voice task configuration file Args: @@ -13,68 +13,54 @@ def change_speech_yaml(yaml_name: str, device: str): cpu (bool): True means set device to "cpu" model_type (dict): change model type """ - if "asr" in yaml_name: - dirpath = "./conf/asr/" - elif 'tts' in yaml_name: - dirpath = "./conf/tts/" - yamlfile = dirpath + yaml_name + ".yaml" - tmp_yamlfile = dirpath + yaml_name + "_tmp.yaml" + tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml" os.system("cp %s %s" % (yamlfile, tmp_yamlfile)) + if device == 'cpu': + set_device = 'cpu' + elif device == 'gpu': + set_device = 'gpu:0' + else: + print("Please set correct device: cpu or gpu.") + with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw: y = yaml.safe_load(f) - if device == 'cpu': - print("Set device: cpu") - if yaml_name == 'asr': - y['device'] = 'cpu' - elif yaml_name == 'asr_pd': - y['am_predictor_conf']['device'] = 'cpu' - elif yaml_name == 'tts': - y['device'] = 'cpu' - elif yaml_name == 'tts_pd': - y['am_predictor_conf']['device'] = 'cpu' - y['voc_predictor_conf']['device'] = 'cpu' - elif device == 'gpu': - print("Set device: gpu") - if yaml_name == 'asr': - y['device'] = 'gpu:0' - elif yaml_name == 'asr_pd': - y['am_predictor_conf']['device'] = 'gpu:0' - elif yaml_name == 'tts': - y['device'] = 'gpu:0' - elif yaml_name == 'tts_pd': - y['am_predictor_conf']['device'] = 'gpu:0' - y['voc_predictor_conf']['device'] = 'gpu:0' + if engine == 'asr_python' or engine == 'tts_python': + y[engine]['device'] = set_device + elif engine == 'asr_inference': + y[engine]['am_predictor_conf']['device'] = set_device + elif engine == 'tts_inference': + y[engine]['am_predictor_conf']['device'] = set_device + y[engine]['voc_predictor_conf']['device'] = set_device else: - print("Please set correct device: cpu or gpu.") + print( + "Please set correct engine: asr_python, tts_python, asr_inference, tts_inference." + ) - print("The content of '%s': " % (yamlfile)) print(yaml.dump(y, default_flow_style=False, sort_keys=False)) yaml.dump(y, fw, allow_unicode=True) os.system("rm %s" % (tmp_yamlfile)) print("Change %s successfully." % (yamlfile)) -def change_app_yaml(task: str, engine_type: str): +def change_engine_type(yamlfile: str, engine_type): """Change the engine type and corresponding configuration file of the speech task in application.yaml Args: task (str): asr or tts """ - yamlfile = "./conf/application.yaml" - tmp_yamlfile = "./conf/application_tmp.yaml" + tmp_yamlfile = yamlfile.split(".yaml")[0] + "_tmp.yaml" os.system("cp %s %s" % (yamlfile, tmp_yamlfile)) + speech_task = engine_type.split("_")[0] + with open(tmp_yamlfile) as f, open(yamlfile, "w+", encoding="utf-8") as fw: y = yaml.safe_load(f) - y['engine_type'][task] = engine_type - path_list = ["./conf/", task, "/", task] - if engine_type == 'python': - path_list.append(".yaml") - - elif engine_type == 'inference': - path_list.append("_pd.yaml") - y['engine_backend'][task] = ''.join(path_list) - print("The content of './conf/application.yaml': ") + engine_list = y['engine_list'] + for engine in engine_list: + if speech_task in engine: + engine_list.remove(engine) + engine_list.append(engine_type) + y['engine_list'] = engine_list print(yaml.dump(y, default_flow_style=False, sort_keys=False)) yaml.dump(y, fw, allow_unicode=True) os.system("rm %s" % (tmp_yamlfile)) @@ -83,32 +69,37 @@ def change_app_yaml(task: str, engine_type: str): if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument( + '--config_file', + type=str, + default='./conf/application.yaml', + help='server yaml file.') parser.add_argument( '--change_task', type=str, default=None, help='Change task', choices=[ - 'app-asr-python', - 'app-asr-inference', - 'app-tts-python', - 'app-tts-inference', - 'speech-asr-cpu', - 'speech-asr-gpu', - 'speech-asr_pd-cpu', - 'speech-asr_pd-gpu', - 'speech-tts-cpu', - 'speech-tts-gpu', - 'speech-tts_pd-cpu', - 'speech-tts_pd-gpu', + 'enginetype-asr_python', + 'enginetype-asr_inference', + 'enginetype-tts_python', + 'enginetype-tts_inference', + 'device-asr_python-cpu', + 'device-asr_python-gpu', + 'device-asr_inference-cpu', + 'device-asr_inference-gpu', + 'device-tts_python-cpu', + 'device-tts_python-gpu', + 'device-tts_inference-cpu', + 'device-tts_inference-gpu', ], required=True) args = parser.parse_args() types = args.change_task.split("-") - if types[0] == "app": - change_app_yaml(types[1], types[2]) - elif types[0] == "speech": - change_speech_yaml(types[1], types[2]) + if types[0] == "enginetype": + change_engine_type(args.config_file, types[1]) + elif types[0] == "device": + change_device(args.config_file, types[1], types[2]) else: print("Error change task, please check change_task.") diff --git a/tests/unit/server/conf/application.yaml b/tests/unit/server/conf/application.yaml index aba33a51..6048450b 100644 --- a/tests/unit/server/conf/application.yaml +++ b/tests/unit/server/conf/application.yaml @@ -1,27 +1,107 @@ # This is the parameter configuration file for PaddleSpeech Serving. -################################################################## -# SERVER SETTING # -################################################################## +################################################################################# +# SERVER SETTING # +################################################################################# host: 127.0.0.1 port: 8090 -################################################################## -# CONFIG FILE # -################################################################## -# add engine backend type (Options: asr, tts) and config file here. -# Adding a speech task to engine_backend means starting the service. -engine_backend: - asr: 'conf/asr/asr.yaml' - tts: 'conf/tts/tts.yaml' - -# The engine_type of speech task needs to keep the same type as the config file of speech task. -# E.g: The engine_type of asr is 'python', the engine_backend of asr is 'XX/asr.yaml' -# E.g: The engine_type of asr is 'inference', the engine_backend of asr is 'XX/asr_pd.yaml' -# -# add engine type (Options: python, inference) -engine_type: - asr: 'python' - tts: 'python' +# The task format in the engin_list is: _ +# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference'] +engine_list: ['asr_python', 'tts_python'] + + +################################################################################# +# ENGINE CONFIG # +################################################################################# +################### speech task: asr; engine_type: python ####################### +asr_python: + model: 'conformer_wenetspeech' + lang: 'zh' + sample_rate: 16000 + cfg_path: # [optional] + ckpt_path: # [optional] + decode_method: 'attention_rescoring' + force_yes: True + device: # set 'gpu:id' or 'cpu' + + +################### speech task: asr; engine_type: inference ####################### +asr_inference: + # model_type choices=['deepspeech2offline_aishell'] + model_type: 'deepspeech2offline_aishell' + am_model: # the pdmodel file of am static model [optional] + am_params: # the pdiparams file of am static model [optional] + lang: 'zh' + sample_rate: 16000 + cfg_path: + decode_method: + force_yes: True + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + +################### speech task: tts; engine_type: python ####################### +tts_python: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', + # 'fastspeech2_ljspeech', 'fastspeech2_aishell3', + # 'fastspeech2_vctk'] + am: 'fastspeech2_csmsc' + am_config: + am_ckpt: + am_stat: + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', + # 'pwgan_vctk', 'mb_melgan_csmsc'] + voc: 'pwgan_csmsc' + voc_config: + voc_ckpt: + voc_stat: + + # others + lang: 'zh' + device: # set 'gpu:id' or 'cpu' + + +################### speech task: tts; engine_type: inference ####################### +tts_inference: + # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] + am: 'fastspeech2_csmsc' + am_model: # the pdmodel file of your am static model (XX.pdmodel) + am_params: # the pdiparams file of your am static model (XX.pdipparams) + am_sample_rate: 24000 + phones_dict: + tones_dict: + speaker_dict: + spk_id: 0 + + am_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] + voc: 'pwgan_csmsc' + voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) + voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) + voc_sample_rate: 24000 + + voc_predictor_conf: + device: # set 'gpu:id' or 'cpu' + switch_ir_optim: True + glog_info: False # True -> print glog + summary: True # False -> do not show predictor config + + # others + lang: 'zh' diff --git a/tests/unit/server/conf/asr/asr.yaml b/tests/unit/server/conf/asr/asr.yaml deleted file mode 100644 index a6743b77..00000000 --- a/tests/unit/server/conf/asr/asr.yaml +++ /dev/null @@ -1,8 +0,0 @@ -model: 'conformer_wenetspeech' -lang: 'zh' -sample_rate: 16000 -cfg_path: # [optional] -ckpt_path: # [optional] -decode_method: 'attention_rescoring' -force_yes: True -device: # set 'gpu:id' or 'cpu' diff --git a/tests/unit/server/conf/asr/asr_pd.yaml b/tests/unit/server/conf/asr/asr_pd.yaml deleted file mode 100644 index 4c415ac7..00000000 --- a/tests/unit/server/conf/asr/asr_pd.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# This is the parameter configuration file for ASR server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['deepspeech2offline_aishell'] TODO -################################################################## -model_type: 'deepspeech2offline_aishell' -am_model: # the pdmodel file of am static model [optional] -am_params: # the pdiparams file of am static model [optional] -lang: 'zh' -sample_rate: 16000 -cfg_path: -decode_method: -force_yes: True - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# OTHERS # -################################################################## diff --git a/tests/unit/server/conf/tts/tts.yaml b/tests/unit/server/conf/tts/tts.yaml deleted file mode 100644 index 19207f0b..00000000 --- a/tests/unit/server/conf/tts/tts.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# This is the parameter configuration file for TTS server. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', -# 'fastspeech2_ljspeech', 'fastspeech2_aishell3', -# 'fastspeech2_vctk'] -################################################################## -am: 'fastspeech2_csmsc' -am_config: -am_ckpt: -am_stat: -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3', -# 'pwgan_vctk', 'mb_melgan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_config: -voc_ckpt: -voc_stat: - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' -device: # set 'gpu:id' or 'cpu' diff --git a/tests/unit/server/conf/tts/tts_pd.yaml b/tests/unit/server/conf/tts/tts_pd.yaml deleted file mode 100644 index e27b9665..00000000 --- a/tests/unit/server/conf/tts/tts_pd.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# This is the parameter configuration file for TTS server. -# These are the static models that support paddle inference. - -################################################################## -# ACOUSTIC MODEL SETTING # -# am choices=['speedyspeech_csmsc', 'fastspeech2_csmsc'] -################################################################## -am: 'fastspeech2_csmsc' -am_model: # the pdmodel file of your am static model (XX.pdmodel) -am_params: # the pdiparams file of your am static model (XX.pdipparams) -am_sample_rate: 24000 -phones_dict: -tones_dict: -speaker_dict: -spk_id: 0 - -am_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - - -################################################################## -# VOCODER SETTING # -# voc choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc'] -################################################################## -voc: 'pwgan_csmsc' -voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel) -voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) -voc_sample_rate: 24000 - -voc_predictor_conf: - device: # set 'gpu:id' or 'cpu' - switch_ir_optim: True - glog_info: False # True -> print glog - summary: True # False -> do not show predictor config - -################################################################## -# OTHERS # -################################################################## -lang: 'zh' diff --git a/tests/unit/server/test_server_client.sh b/tests/unit/server/test_server_client.sh index 795a23e0..b48e7111 100644 --- a/tests/unit/server/test_server_client.sh +++ b/tests/unit/server/test_server_client.sh @@ -99,8 +99,8 @@ echo "************************************************************************** # start server: asr engine type: python; tts engine type: python; device: cpu -python change_yaml.py --change_task speech-asr-cpu # change asr.yaml device: cpu -python change_yaml.py --change_task speech-tts-cpu # change tts.yaml device: cpu +python change_yaml.py --change_task device-asr_python-cpu # change asr.yaml device: cpu +python change_yaml.py --change_task device-tts_python-cpu # change tts.yaml device: cpu echo "Start the service: asr engine type: python; tts engine type: python; device: cpu" | tee -a ./log/test_result.log ((target_start_num+=1)) @@ -125,8 +125,8 @@ echo "************************************************************************** # start server: asr engine type: inference; tts engine type: inference; device: gpu -python change_yaml.py --change_task app-asr-inference # change application.yaml, asr engine_type: inference; asr engine_backend: asr_pd.yaml -python change_yaml.py --change_task app-tts-inference # change application.yaml, tts engine_type: inference; tts engine_backend: tts_pd.yaml +python change_yaml.py --change_task enginetype-asr_inference # change application.yaml, asr engine_type: inference; asr engine_backend: asr_pd.yaml +python change_yaml.py --change_task enginetype-tts_inference # change application.yaml, tts engine_type: inference; tts engine_backend: tts_pd.yaml echo "Start the service: asr engine type: inference; tts engine type: inference; device: gpu" | tee -a ./log/test_result.log ((target_start_num+=1)) @@ -151,8 +151,8 @@ echo "************************************************************************** # start server: asr engine type: inference; tts engine type: inference; device: cpu -python change_yaml.py --change_task speech-asr_pd-cpu # change asr_pd.yaml device: cpu -python change_yaml.py --change_task speech-tts_pd-cpu # change tts_pd.yaml device: cpu +python change_yaml.py --change_task device-asr_inference-cpu # change asr_pd.yaml device: cpu +python change_yaml.py --change_task device-tts_inference-cpu # change tts_pd.yaml device: cpu echo "start the service: asr engine type: inference; tts engine type: inference; device: cpu" | tee -a ./log/test_result.log ((target_start_num+=1)) @@ -182,4 +182,5 @@ echo "***************** Here are all the test results ********************" cat ./log/test_result.log # Restoring conf is the same as demos/speech_server +rm -rf ./conf cp ../../../demos/speech_server/conf/ ./ -rf \ No newline at end of file