fffiloni commited on
Commit
9bd0658
1 Parent(s): f351c05

ensure no other thread is running

Browse files
Files changed (1) hide show
  1. app.py +43 -43
app.py CHANGED
@@ -102,77 +102,77 @@ def generate_image(setup_args, num_iterations):
102
  steps_completed = []
103
  result_container = {"best_image": None, "total_init_rewards": None, "total_best_rewards": None}
104
  error_status = {"error_occurred": False} # Shared dictionary to track error status
 
105
 
106
- # Define progress_callback that updates steps_completed
107
  def progress_callback(step):
108
  # Limit redundant prints by checking the step number
109
  if not steps_completed or step > steps_completed[-1]:
110
  steps_completed.append(step)
111
  print(f"Progress: Step {step} completed.")
112
-
113
- # Function to run main in a separate thread
114
  def run_main():
 
115
  try:
116
- # Call main and handle any potential OOM errors
117
- result_container["best_image"], result_container["total_init_rewards"], result_container["total_best_rewards"] = execute_task(args, trainer, device, dtype, shape, enable_grad, settings, progress_callback)
118
  except torch.cuda.OutOfMemoryError as e:
119
- # Handle CUDA OOM error
120
- print("CUDA Out of Memory Error: ", e)
121
- error_status["error_occurred"] = True # Update status on error
122
  except RuntimeError as e:
123
  if 'out of memory' in str(e):
124
- error_status["error_occurred"] = True # Update status on error
 
125
  else:
126
- raise # Reraise if it's not a CUDA OOM error
 
 
127
 
128
- # Start main in a separate thread
129
- main_thread = threading.Thread(target=run_main)
130
- main_thread.start()
131
-
132
- last_step_yielded = 0
133
- while main_thread.is_alive() and not error_status["error_occurred"]:
134
- # Check if new steps have been completed
135
- if steps_completed and steps_completed[-1] > last_step_yielded:
136
- last_step_yielded = steps_completed[-1]
137
- png_number = last_step_yielded - 1
138
- # Get the image for this step
139
- image_path = os.path.join(save_dir, f"{png_number}.png")
140
- if os.path.exists(image_path):
141
- yield (image_path, f"Iteration {last_step_yielded}/{num_iterations} - Image saved", None)
 
 
142
  else:
143
- yield (None, f"Iteration {last_step_yielded}/{num_iterations} - Image not found", None)
144
- else:
145
- # Small sleep to prevent busy waiting
146
- time.sleep(0.1)
147
 
148
- # If an error occurred, clean up resources and stop
149
- if error_status["error_occurred"]:
150
- torch.cuda.empty_cache() # Free up cached memory
151
- yield (None, "CUDA out of memory. Please reduce your batch size or image resolution.", None)
152
- else:
153
- main_thread.join()
154
-
155
- # After main is complete, yield the final image
156
- final_image_path = os.path.join(save_dir, "best_image.png")
157
- if os.path.exists(final_image_path):
158
- iter_images = list_iter_images(save_dir)
159
  torch.cuda.empty_cache() # Free up cached memory
160
- yield (final_image_path, f"Final image saved at {final_image_path}", iter_images)
161
  else:
162
- torch.cuda.empty_cache() # Free up cached memory
163
- yield (None, "Image generation completed, but no final image was found.", None)
 
 
 
 
 
 
 
164
 
165
  torch.cuda.empty_cache() # Free up cached memory
166
 
167
  except torch.cuda.OutOfMemoryError as e:
168
- # Handle CUDA OOM error globally
169
  yield (None, "CUDA out of memory.", None)
170
  except RuntimeError as e:
171
  if 'out of memory' in str(e):
 
172
  yield (None, "CUDA out of memory.", None)
173
  else:
174
  yield (None, f"An error occurred: {str(e)}", None)
175
  except Exception as e:
 
176
  yield (None, f"An unexpected error occurred: {str(e)}", None)
177
 
178
  def show_gallery_output(gallery_state):
 
102
  steps_completed = []
103
  result_container = {"best_image": None, "total_init_rewards": None, "total_best_rewards": None}
104
  error_status = {"error_occurred": False} # Shared dictionary to track error status
105
+ thread_status = {"running": False} # Track whether a thread is already running
106
 
 
107
  def progress_callback(step):
108
  # Limit redundant prints by checking the step number
109
  if not steps_completed or step > steps_completed[-1]:
110
  steps_completed.append(step)
111
  print(f"Progress: Step {step} completed.")
112
+
 
113
  def run_main():
114
+ thread_status["running"] = True # Mark thread as running
115
  try:
116
+ result_container["best_image"], result_container["total_init_rewards"], result_container["total_best_rewards"] = execute_task(
117
+ args, trainer, device, dtype, shape, enable_grad, settings, progress_callback)
118
  except torch.cuda.OutOfMemoryError as e:
119
+ print(f"CUDA Out of Memory Error: {e}")
120
+ error_status["error_occurred"] = True
 
121
  except RuntimeError as e:
122
  if 'out of memory' in str(e):
123
+ print(f"Runtime Error: {e}")
124
+ error_status["error_occurred"] = True
125
  else:
126
+ raise
127
+ finally:
128
+ thread_status["running"] = False # Mark thread as completed
129
 
130
+ if not thread_status["running"]: # Ensure no other thread is running
131
+ main_thread = threading.Thread(target=run_main)
132
+ main_thread.start()
133
+
134
+ last_step_yielded = 0
135
+ while main_thread.is_alive() and not error_status["error_occurred"]:
136
+ # Check if new steps have been completed
137
+ if steps_completed and steps_completed[-1] > last_step_yielded:
138
+ last_step_yielded = steps_completed[-1]
139
+ png_number = last_step_yielded - 1
140
+ # Get the image for this step
141
+ image_path = os.path.join(save_dir, f"{png_number}.png")
142
+ if os.path.exists(image_path):
143
+ yield (image_path, f"Iteration {last_step_yielded}/{num_iterations} - Image saved", None)
144
+ else:
145
+ yield (None, f"Iteration {last_step_yielded}/{num_iterations} - Image not found", None)
146
  else:
147
+ time.sleep(0.1) # Sleep to prevent busy waiting
 
 
 
148
 
149
+ if error_status["error_occurred"]:
 
 
 
 
 
 
 
 
 
 
150
  torch.cuda.empty_cache() # Free up cached memory
151
+ yield (None, "CUDA out of memory. Please reduce your batch size or image resolution.", None)
152
  else:
153
+ main_thread.join() # Ensure thread completion
154
+ final_image_path = os.path.join(save_dir, "best_image.png")
155
+ if os.path.exists(final_image_path):
156
+ iter_images = list_iter_images(save_dir)
157
+ torch.cuda.empty_cache() # Free up cached memory
158
+ yield (final_image_path, f"Final image saved at {final_image_path}", iter_images)
159
+ else:
160
+ torch.cuda.empty_cache() # Free up cached memory
161
+ yield (None, "Image generation completed, but no final image was found.", None)
162
 
163
  torch.cuda.empty_cache() # Free up cached memory
164
 
165
  except torch.cuda.OutOfMemoryError as e:
166
+ print(f"Global CUDA Out of Memory Error: {e}")
167
  yield (None, "CUDA out of memory.", None)
168
  except RuntimeError as e:
169
  if 'out of memory' in str(e):
170
+ print(f"Runtime Error: {e}")
171
  yield (None, "CUDA out of memory.", None)
172
  else:
173
  yield (None, f"An error occurred: {str(e)}", None)
174
  except Exception as e:
175
+ print(f"Unexpected Error: {e}")
176
  yield (None, f"An unexpected error occurred: {str(e)}", None)
177
 
178
  def show_gallery_output(gallery_state):