davidberenstein1957 HF staff commited on
Commit
4d1c962
1 Parent(s): c0c68e7

feat: added duration for run

Browse files
.gitignore CHANGED
@@ -160,3 +160,4 @@ cython_debug/
160
  # and can be added to the global gitignore or merged into this file. For a more nuclear
161
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
  #.idea/
 
 
160
  # and can be added to the global gitignore or merged into this file. For a more nuclear
161
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
  #.idea/
163
+ .DS_Store
src/distilabel_dataset_generator/sft.py CHANGED
@@ -232,16 +232,29 @@ def generate_dataset(
232
  )
233
  num_rows = 5000
234
 
 
 
 
 
 
 
 
 
 
235
  gr.Info(
236
- "Started pipeline execution. This might take a while, depending on the number of rows and turns you have selected. Don't close this page."
 
237
  )
238
  result_queue = multiprocessing.Queue()
239
  p = multiprocessing.Process(
240
  target=_run_pipeline,
241
  args=(result_queue, num_turns, num_rows, system_prompt),
242
  )
243
- p.start()
244
- p.join()
 
 
 
245
  distiset = result_queue.get()
246
 
247
  if dataset_name is not None:
 
232
  )
233
  num_rows = 5000
234
 
235
+ if num_rows < 50:
236
+ duration = 60
237
+ elif num_rows < 250:
238
+ duration = 300
239
+ elif num_rows < 1000:
240
+ duration = 500
241
+ else:
242
+ duration = 1000
243
+
244
  gr.Info(
245
+ "Started pipeline execution. This might take a while, depending on the number of rows and turns you have selected. Don't close this page.",
246
+ duration=duration,
247
  )
248
  result_queue = multiprocessing.Queue()
249
  p = multiprocessing.Process(
250
  target=_run_pipeline,
251
  args=(result_queue, num_turns, num_rows, system_prompt),
252
  )
253
+ try:
254
+ p.start()
255
+ p.join()
256
+ except Exception as e:
257
+ raise gr.Error(f"An error occurred during dataset generation: {str(e)}")
258
  distiset = result_queue.get()
259
 
260
  if dataset_name is not None: