diff --git a/ppdiffusers/deploy/export_model.py b/ppdiffusers/deploy/export_model.py index 230fbd447..583bf6fe0 100644 --- a/ppdiffusers/deploy/export_model.py +++ b/ppdiffusers/deploy/export_model.py @@ -42,6 +42,10 @@ def convert_ppdiffusers_pipeline_to_fastdeploy_pipeline( pipeline = StableDiffusionPipeline.from_pretrained( model_path, unet=unet_model, safety_checker=None, feature_extractor=None ) + # make sure we disable xformers + pipeline.disable_xformers_memory_efficient_attention() + pipeline.unet.set_default_attn_processor() + pipeline.vae.set_default_attn_processor() output_path = Path(output_path) # calculate latent's H and W latent_height = height // 8 if height is not None else None diff --git a/ppdiffusers/deploy/infer.py b/ppdiffusers/deploy/infer.py index 3b3e1bb63..d90dd7165 100644 --- a/ppdiffusers/deploy/infer.py +++ b/ppdiffusers/deploy/infer.py @@ -161,10 +161,13 @@ def create_paddle_inference_runtime( disable_paddle_pass=[], paddle_stream=None, workspace=None, + show_log=False ): assert not use_fp16 or not use_bf16, "use_fp16 and use_bf16 are mutually exclusive" option = fd.RuntimeOption() option.use_paddle_backend() + if show_log: + option.enable_paddle_log_info() if device_id == -1: option.use_cpu() else: @@ -378,6 +381,15 @@ def main(args): elif args.backend == "paddle" or args.backend == "paddle_tensorrt": args.use_trt = args.backend == "paddle_tensorrt" runtime_options = dict( + unet=create_paddle_inference_runtime( + use_trt=args.use_trt, + dynamic_shape=unet_dynamic_shape, + use_fp16=args.use_fp16, + use_bf16=args.use_bf16, + device_id=args.device_id, + paddle_stream=paddle_stream, + workspace=10*1024*1024*1024 + ), text_encoder=create_paddle_inference_runtime( use_trt=args.use_trt, dynamic_shape=text_encoder_dynamic_shape, @@ -386,6 +398,7 @@ def main(args): device_id=args.device_id, disable_paddle_trt_ops=["arg_max", "range", "lookup_table_v2"], paddle_stream=paddle_stream, + workspace=10*1024*1024*1024 ), vae_encoder=create_paddle_inference_runtime( use_trt=args.use_trt, @@ -394,6 +407,7 @@ def main(args): use_bf16=args.use_bf16, device_id=args.device_id, paddle_stream=paddle_stream, + workspace=10*1024*1024*1024 ), vae_decoder=create_paddle_inference_runtime( use_trt=args.use_trt, @@ -402,14 +416,7 @@ def main(args): use_bf16=args.use_bf16, device_id=args.device_id, paddle_stream=paddle_stream, - ), - unet=create_paddle_inference_runtime( - use_trt=args.use_trt, - dynamic_shape=unet_dynamic_shape, - use_fp16=args.use_fp16, - use_bf16=args.use_bf16, - device_id=args.device_id, - paddle_stream=paddle_stream, + workspace=10*1024*1024*1024 ), ) pipe = FastDeployStableDiffusionMegaPipeline.from_pretrained( diff --git a/ppdiffusers/ppdiffusers/pipelines/stable_diffusion/pipeline_fastdeploy_stable_diffusion.py b/ppdiffusers/ppdiffusers/pipelines/stable_diffusion/pipeline_fastdeploy_stable_diffusion.py index 319346858..a29bb65ee 100644 --- a/ppdiffusers/ppdiffusers/pipelines/stable_diffusion/pipeline_fastdeploy_stable_diffusion.py +++ b/ppdiffusers/ppdiffusers/pipelines/stable_diffusion/pipeline_fastdeploy_stable_diffusion.py @@ -272,7 +272,7 @@ def __call__( unet_inputs = dict( sample=latent_model_input, - timestep=t, + timestep=paddle.to_tensor(t).reshape([1,]), encoder_hidden_states=prompt_embeds, infer_op=infer_op_dict.get("unet", None), output_shape=latent_model_input.shape,