Merge pull request #252 from mrhan1993:patch

After running for a long time, due to the loading problem of the model, the GPU memory and memory footprint will continue to increase, which will lead to the emergence of OOM. Add manual release logic while avoiding repeated loading of the model as much as possible. thanks for @PeakLee and his code: #245 (comment)
mrhan1993 · Mar 20, 2024 · d9869be · d9869be
2 parents b2c2377 + 858eed6
commit d9869be
Showing 1 changed file with 12 additions and 0 deletions.
diff --git a/fooocusapi/worker.py b/fooocusapi/worker.py
@@ -12,6 +12,7 @@
 from fooocusapi.task_queue import QueueTask, TaskQueue, TaskOutputs
 
 worker_queue: TaskQueue = None
+last_model_name = None
 
 def process_top():
     import ldm_patched.modules.model_management
@@ -118,6 +119,17 @@ def yield_result(_, imgs, tasks, extension='png'):
 
     try:
         print(f"[Task Queue] Task queue start task, job_id={async_task.job_id}")
+        # clear memory
+        global last_model_name
+
+        if last_model_name is None:
+            last_model_name = async_task.req_param.base_model_name
+        if last_model_name != async_task.req_param.base_model_name:
+            model_management.cleanup_models() # key1
+            model_management.unload_all_models()
+            model_management.soft_empty_cache() # key2
+            last_model_name = async_task.req_param.base_model_name
+
         worker_queue.start_task(async_task.job_id)
 
         execution_start_time = time.perf_counter()