|
|
|
@ -464,18 +464,97 @@ def voc_to_static(voc_inference,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# inference
|
|
|
|
|
def get_predictor(model_dir: Optional[os.PathLike]=None,
|
|
|
|
|
model_file: Optional[os.PathLike]=None,
|
|
|
|
|
params_file: Optional[os.PathLike]=None,
|
|
|
|
|
device: str='cpu'):
|
|
|
|
|
def get_predictor(
|
|
|
|
|
model_dir: Optional[os.PathLike]=None,
|
|
|
|
|
model_file: Optional[os.PathLike]=None,
|
|
|
|
|
params_file: Optional[os.PathLike]=None,
|
|
|
|
|
device: str='cpu',
|
|
|
|
|
# for gpu
|
|
|
|
|
use_trt: bool=False,
|
|
|
|
|
# for trt
|
|
|
|
|
use_dynamic_shape: bool=True,
|
|
|
|
|
min_subgraph_size: int=5,
|
|
|
|
|
# for cpu
|
|
|
|
|
cpu_threads: int=1,
|
|
|
|
|
use_mkldnn: bool=False,
|
|
|
|
|
# for trt or mkldnn
|
|
|
|
|
precision: int="fp32"):
|
|
|
|
|
"""
|
|
|
|
|
Args:
|
|
|
|
|
model_dir (os.PathLike): root path of model.pdmodel and model.pdiparams.
|
|
|
|
|
model_file (os.PathLike): name of model_file.
|
|
|
|
|
params_file (os.PathLike): name of params_file.
|
|
|
|
|
device (str): Choose the device you want to run, it can be: cpu/gpu, default is cpu.
|
|
|
|
|
use_trt (bool): whether to use TensorRT or not in GPU.
|
|
|
|
|
use_dynamic_shape (bool): use dynamic shape or not in TensorRT.
|
|
|
|
|
use_mkldnn (bool): whether to use MKLDNN or not in CPU.
|
|
|
|
|
cpu_threads (int): num of thread when use CPU.
|
|
|
|
|
precision (str): mode of running (fp32/fp16/bf16/int8).
|
|
|
|
|
"""
|
|
|
|
|
rerun_flag = False
|
|
|
|
|
if device != "gpu" and use_trt:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"Predict by TensorRT mode: {}, expect device=='gpu', but device == {}".
|
|
|
|
|
format(precision, device))
|
|
|
|
|
|
|
|
|
|
config = inference.Config(
|
|
|
|
|
str(Path(model_dir) / model_file), str(Path(model_dir) / params_file))
|
|
|
|
|
config.enable_memory_optim()
|
|
|
|
|
config.switch_ir_optim(True)
|
|
|
|
|
if device == "gpu":
|
|
|
|
|
config.enable_use_gpu(100, 0)
|
|
|
|
|
elif device == "cpu":
|
|
|
|
|
else:
|
|
|
|
|
config.disable_gpu()
|
|
|
|
|
config.enable_memory_optim()
|
|
|
|
|
config.set_cpu_math_library_num_threads(cpu_threads)
|
|
|
|
|
if use_mkldnn:
|
|
|
|
|
# fp32
|
|
|
|
|
config.enable_mkldnn()
|
|
|
|
|
if precision == "int8":
|
|
|
|
|
config.enable_mkldnn_int8({
|
|
|
|
|
"conv2d_transpose", "conv2d", "depthwise_conv2d", "pool2d",
|
|
|
|
|
"transpose2", "elementwise_mul"
|
|
|
|
|
})
|
|
|
|
|
# config.enable_mkldnn_int8()
|
|
|
|
|
elif precision in {"fp16", "bf16"}:
|
|
|
|
|
config.enable_mkldnn_bfloat16()
|
|
|
|
|
print("MKLDNN with {}".format(precision))
|
|
|
|
|
if use_trt:
|
|
|
|
|
if precision == "bf16":
|
|
|
|
|
print("paddle trt does not support bf16, switching to fp16.")
|
|
|
|
|
precision = "fp16"
|
|
|
|
|
precision_map = {
|
|
|
|
|
"int8": inference.Config.Precision.Int8,
|
|
|
|
|
"fp32": inference.Config.Precision.Float32,
|
|
|
|
|
"fp16": inference.Config.Precision.Half,
|
|
|
|
|
}
|
|
|
|
|
assert precision in precision_map.keys()
|
|
|
|
|
pdtxt_name = model_file.split(".")[0] + "_" + precision + ".txt"
|
|
|
|
|
if use_dynamic_shape:
|
|
|
|
|
dynamic_shape_file = os.path.join(model_dir, pdtxt_name)
|
|
|
|
|
if os.path.exists(dynamic_shape_file):
|
|
|
|
|
config.enable_tuned_tensorrt_dynamic_shape(dynamic_shape_file,
|
|
|
|
|
True)
|
|
|
|
|
# for fastspeech2
|
|
|
|
|
config.exp_disable_tensorrt_ops(["reshape2"])
|
|
|
|
|
print("trt set dynamic shape done!")
|
|
|
|
|
else:
|
|
|
|
|
# In order to avoid memory overflow when collecting dynamic shapes, it is changed to use CPU.
|
|
|
|
|
config.disable_gpu()
|
|
|
|
|
config.set_cpu_math_library_num_threads(10)
|
|
|
|
|
config.collect_shape_range_info(dynamic_shape_file)
|
|
|
|
|
print("Start collect dynamic shape...")
|
|
|
|
|
rerun_flag = True
|
|
|
|
|
|
|
|
|
|
if not rerun_flag:
|
|
|
|
|
print("Tensor RT with {}".format(precision))
|
|
|
|
|
config.enable_tensorrt_engine(
|
|
|
|
|
workspace_size=1 << 30,
|
|
|
|
|
max_batch_size=1,
|
|
|
|
|
min_subgraph_size=min_subgraph_size,
|
|
|
|
|
precision_mode=precision_map[precision],
|
|
|
|
|
use_static=True,
|
|
|
|
|
use_calib_mode=False, )
|
|
|
|
|
|
|
|
|
|
predictor = inference.create_predictor(config)
|
|
|
|
|
return predictor
|
|
|
|
|
|
|
|
|
|