diff --git a/speechx/examples/ds2_ol/onnx/README.md b/speechx/examples/ds2_ol/onnx/README.md index 8ad625a0..e6ab953c 100644 --- a/speechx/examples/ds2_ol/onnx/README.md +++ b/speechx/examples/ds2_ol/onnx/README.md @@ -51,4 +51,4 @@ Acoustic Model | Model Size | enigne | dedoding_method | ctc_weight | decoding_c | deepspeech2online_wenetspeech | 166MB | onnx quant | ctc_prefix_beam_search | - | 1 | - | 0.44507715475808385 (utts=80) | > quant 和机器有关,不是所有机器都支持。ONNX quant测试机器指令集支持: -> Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology eagerfpu pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 arat umip pku ospke avx512_vnni spec_ctrl \ No newline at end of file +> Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology eagerfpu pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 arat umip pku ospke avx512_vnni spec_ctrl diff --git a/speechx/examples/ds2_ol/onnx/local/onnx_convert_opset.py b/speechx/examples/ds2_ol/onnx/local/onnx_convert_opset.py index 53efc3db..00b5cf77 100755 --- a/speechx/examples/ds2_ol/onnx/local/onnx_convert_opset.py +++ b/speechx/examples/ds2_ol/onnx/local/onnx_convert_opset.py @@ -1,15 +1,21 @@ #!/usr/bin/env python3 import argparse -import onnx -from onnx import version_converter, helper +import onnx +from onnx import version_converter if __name__ == '__main__': parser = argparse.ArgumentParser(prog=__doc__) - parser.add_argument("--model-file", type=str, required=True, help='path/to/the/model.onnx.') - parser.add_argument("--save-model", type=str, required=True, help='path/to/saved/model.onnx.') + parser.add_argument( + "--model-file", type=str, required=True, help='path/to/the/model.onnx.') + parser.add_argument( + "--save-model", + type=str, + required=True, + help='path/to/saved/model.onnx.') # Models must be opset10 or higher to be quantized. - parser.add_argument("--target-opset", type=int, default=11, help='path/to/the/model.onnx.') + parser.add_argument( + "--target-opset", type=int, default=11, help='path/to/the/model.onnx.') args = parser.parse_args() @@ -24,7 +30,8 @@ if __name__ == '__main__': # A full list of supported adapters can be found here: # https://github.com/onnx/onnx/blob/main/onnx/version_converter.py#L21 # Apply the version conversion on the original model - converted_model = version_converter.convert_version(original_model, args.target_opset) + converted_model = version_converter.convert_version(original_model, + args.target_opset) # print('The model after conversion:\n{}'.format(converted_model)) onnx.save(converted_model, args.save_model) diff --git a/speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py b/speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py index 2d364c25..4426d1be 100755 --- a/speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py +++ b/speechx/examples/ds2_ol/onnx/local/onnx_infer_shape.py @@ -494,6 +494,8 @@ class SymbolicShapeInference: # contrib ops + + 'Attention', 'BiasGelu', \ 'EmbedLayerNormalization', \ 'FastGelu', 'Gelu', 'LayerNormalization', \ diff --git a/speechx/examples/ds2_ol/onnx/local/ort_dyanmic_quant.py b/speechx/examples/ds2_ol/onnx/local/ort_dyanmic_quant.py index 331f0821..2c569236 100755 --- a/speechx/examples/ds2_ol/onnx/local/ort_dyanmic_quant.py +++ b/speechx/examples/ds2_ol/onnx/local/ort_dyanmic_quant.py @@ -1,13 +1,20 @@ #!/usr/bin/env python3 import argparse -import onnx -from onnxruntime.quantization import quantize_dynamic, QuantType -def quantize_onnx_model(onnx_model_path, quantized_model_path, nodes_to_exclude=[]): +from onnxruntime.quantization import quantize_dynamic +from onnxruntime.quantization import QuantType + + +def quantize_onnx_model(onnx_model_path, + quantized_model_path, + nodes_to_exclude=[]): print("Starting quantization...") - from onnxruntime.quantization import QuantType, quantize_dynamic - quantize_dynamic(onnx_model_path, quantized_model_path, weight_type=QuantType.QInt8, nodes_to_exclude=nodes_to_exclude) + quantize_dynamic( + onnx_model_path, + quantized_model_path, + weight_type=QuantType.QInt8, + nodes_to_exclude=nodes_to_exclude) print(f"Quantized model saved to: {quantized_model_path}") @@ -18,26 +25,24 @@ def main(): "--model-in", type=str, required=True, - help="ONNX model", - ) + help="ONNX model", ) parser.add_argument( "--model-out", type=str, required=True, default='model.quant.onnx', - help="ONNX model", - ) + help="ONNX model", ) parser.add_argument( "--nodes-to-exclude", type=str, required=True, - help="nodes to exclude. e.g. conv,linear.", - ) + help="nodes to exclude. e.g. conv,linear.", ) args = parser.parse_args() - + nodes_to_exclude = args.nodes_to_exclude.split(',') quantize_onnx_model(args.model_in, args.model_out, nodes_to_exclude) + if __name__ == "__main__": - main() \ No newline at end of file + main()