asoria HF staff commited on
Commit
90bcf2d
β€’
1 Parent(s): 117da13

Render notebook as HTML instead

Browse files
Files changed (2) hide show
  1. app.py +23 -33
  2. requirements.txt +2 -1
app.py CHANGED
@@ -13,6 +13,7 @@ from utils.notebook_utils import (
13
  )
14
  from dotenv import load_dotenv
15
  import os
 
16
 
17
  # TODOs:
18
  # Improve UI code preview
@@ -64,6 +65,9 @@ def create_notebook_file(cells, notebook_name):
64
  with open(notebook_name, "w") as f:
65
  nbf.write(nb, f)
66
  logging.info(f"Notebook {notebook_name} created successfully")
 
 
 
67
 
68
 
69
  def get_first_rows_as_df(dataset: str, config: str, split: str, limit: int):
@@ -96,15 +100,15 @@ def longest_string_column(df):
96
 
97
 
98
  def generate_eda_cells(dataset_id):
99
- yield from generate_cells(dataset_id, eda_cells, "eda")
100
 
101
 
102
  def generate_rag_cells(dataset_id):
103
- yield from generate_cells(dataset_id, rag_cells, "rag")
104
 
105
 
106
  def generate_embedding_cells(dataset_id):
107
- yield from generate_cells(dataset_id, embeggins_cells, "embeddings")
108
 
109
 
110
  def _push_to_hub(
@@ -135,20 +139,18 @@ def generate_cells(dataset_id, cells, notebook_type="eda"):
135
  except Exception as err:
136
  gr.Error("Unable to retrieve dataset info from HF Hub.")
137
  logging.error(f"Failed to fetch compatible libraries: {err}")
138
- return []
139
 
140
  if not libraries:
141
  logging.error(f"Dataset not compatible with pandas library - not libraries")
142
- yield "", "## ❌ This dataset is not compatible with pandas library ❌"
143
- return
144
  pandas_library = next(
145
  (lib for lib in libraries.get("libraries", []) if lib["library"] == "pandas"),
146
  None,
147
  )
148
  if not pandas_library:
149
  logging.error("Dataset not compatible with pandas library - not pandas library")
150
- yield "", "## ❌ This dataset is not compatible with pandas library ❌"
151
- return
152
  first_config_loading_code = pandas_library["loading_codes"][0]
153
  first_code = first_config_loading_code["code"]
154
  first_config = first_config_loading_code["config_name"]
@@ -166,48 +168,38 @@ def generate_cells(dataset_id, cells, notebook_type="eda"):
166
  logging.error(
167
  "Dataset does not have categorical columns, which are required for RAG generation."
168
  )
169
- yield (
170
  "",
171
  "## ❌ This dataset does not have categorical columns, which are required for Embeddings/RAG generation ❌",
172
  )
173
- return
174
  if notebook_type == "eda" and not (has_categoric_columns or has_numeric_columns):
175
  logging.error(
176
  "Dataset does not have categorical or numeric columns, which are required for EDA generation."
177
  )
178
- yield (
179
  "",
180
  "## ❌ This dataset does not have categorical or numeric columns, which are required for EDA generation ❌",
181
  )
182
- return
183
 
184
  cells = replace_wildcards(
185
  cells, wildcards, replacements, has_numeric_columns, has_categoric_columns
186
  )
187
- generated_text = ""
188
- # Show only the first 30 lines, would like to have a scroll in gr.Code https://github.com/gradio-app/gradio/issues/9192
189
- for cell in cells:
190
- if cell["cell_type"] == "markdown":
191
- continue
192
- generated_text += cell["source"] + "\n\n"
193
- yield generated_text, ""
194
- if generated_text.count("\n") > 30:
195
- generated_text += (
196
- f"## See more lines available in the generated notebook πŸ€— ......"
197
- )
198
- yield generated_text, ""
199
- break
200
  notebook_name = f"{dataset_id.replace('/', '-')}-{notebook_type}.ipynb"
201
- create_notebook_file(cells, notebook_name=notebook_name)
202
  _push_to_hub(dataset_id, notebook_name)
203
  notebook_link = f"https://colab.research.google.com/#fileId=https%3A//huggingface.co/datasets/asoria/dataset-notebook-creator-content/blob/main/{notebook_name}"
204
- yield (
205
- generated_text,
206
- f"## βœ… Here you have the [generated notebook]({notebook_link}) βœ…",
207
  )
208
 
209
 
210
- with gr.Blocks(fill_height=True, fill_width=True) as demo:
 
 
 
 
211
  gr.Markdown("# πŸ€– Dataset notebook creator πŸ•΅οΈ")
212
  with gr.Row(equal_height=True):
213
  with gr.Column(scale=2):
@@ -262,9 +254,7 @@ with gr.Blocks(fill_height=True, fill_width=True) as demo:
262
  )
263
 
264
  with gr.Column(scale=2):
265
- code_component = gr.Code(
266
- language="python", label="Notebook Code Preview", lines=40
267
- )
268
  go_to_notebook = gr.Markdown("", visible=True)
269
 
270
  generate_eda_btn.click(
 
13
  )
14
  from dotenv import load_dotenv
15
  import os
16
+ from nbconvert import HTMLExporter
17
 
18
  # TODOs:
19
  # Improve UI code preview
 
65
  with open(notebook_name, "w") as f:
66
  nbf.write(nb, f)
67
  logging.info(f"Notebook {notebook_name} created successfully")
68
+ html_exporter = HTMLExporter()
69
+ html_data, _ = html_exporter.from_notebook_node(nb)
70
+ return html_data
71
 
72
 
73
  def get_first_rows_as_df(dataset: str, config: str, split: str, limit: int):
 
100
 
101
 
102
  def generate_eda_cells(dataset_id):
103
+ return generate_cells(dataset_id, eda_cells, "eda")
104
 
105
 
106
  def generate_rag_cells(dataset_id):
107
+ return generate_cells(dataset_id, rag_cells, "rag")
108
 
109
 
110
  def generate_embedding_cells(dataset_id):
111
+ return generate_cells(dataset_id, embeggins_cells, "embeddings")
112
 
113
 
114
  def _push_to_hub(
 
139
  except Exception as err:
140
  gr.Error("Unable to retrieve dataset info from HF Hub.")
141
  logging.error(f"Failed to fetch compatible libraries: {err}")
142
+ return "", "## ❌ This dataset is not accessible from the Hub ❌"
143
 
144
  if not libraries:
145
  logging.error(f"Dataset not compatible with pandas library - not libraries")
146
+ return "", "## ❌ This dataset is not compatible with pandas library ❌"
 
147
  pandas_library = next(
148
  (lib for lib in libraries.get("libraries", []) if lib["library"] == "pandas"),
149
  None,
150
  )
151
  if not pandas_library:
152
  logging.error("Dataset not compatible with pandas library - not pandas library")
153
+ return "", "## ❌ This dataset is not compatible with pandas library ❌"
 
154
  first_config_loading_code = pandas_library["loading_codes"][0]
155
  first_code = first_config_loading_code["code"]
156
  first_config = first_config_loading_code["config_name"]
 
168
  logging.error(
169
  "Dataset does not have categorical columns, which are required for RAG generation."
170
  )
171
+ return (
172
  "",
173
  "## ❌ This dataset does not have categorical columns, which are required for Embeddings/RAG generation ❌",
174
  )
 
175
  if notebook_type == "eda" and not (has_categoric_columns or has_numeric_columns):
176
  logging.error(
177
  "Dataset does not have categorical or numeric columns, which are required for EDA generation."
178
  )
179
+ return (
180
  "",
181
  "## ❌ This dataset does not have categorical or numeric columns, which are required for EDA generation ❌",
182
  )
 
183
 
184
  cells = replace_wildcards(
185
  cells, wildcards, replacements, has_numeric_columns, has_categoric_columns
186
  )
187
+
 
 
 
 
 
 
 
 
 
 
 
 
188
  notebook_name = f"{dataset_id.replace('/', '-')}-{notebook_type}.ipynb"
189
+ html_content = create_notebook_file(cells, notebook_name=notebook_name)
190
  _push_to_hub(dataset_id, notebook_name)
191
  notebook_link = f"https://colab.research.google.com/#fileId=https%3A//huggingface.co/datasets/asoria/dataset-notebook-creator-content/blob/main/{notebook_name}"
192
+ return (
193
+ html_content,
194
+ f"## πŸŽ‰ Ready to explore? Play and run the generated notebook πŸ‘‰ [here]({notebook_link})!",
195
  )
196
 
197
 
198
+ with gr.Blocks(
199
+ fill_height=True,
200
+ fill_width=True,
201
+ css="#box { height: 650px; overflow-y: scroll !important}",
202
+ ) as demo:
203
  gr.Markdown("# πŸ€– Dataset notebook creator πŸ•΅οΈ")
204
  with gr.Row(equal_height=True):
205
  with gr.Column(scale=2):
 
254
  )
255
 
256
  with gr.Column(scale=2):
257
+ code_component = gr.HTML(elem_id="box")
 
 
258
  go_to_notebook = gr.Markdown("", visible=True)
259
 
260
  generate_eda_btn.click(
requirements.txt CHANGED
@@ -3,4 +3,5 @@ huggingface_hub
3
  nbformat
4
  httpx
5
  outlines
6
- python-dotenv
 
 
3
  nbformat
4
  httpx
5
  outlines
6
+ python-dotenv
7
+ nbconvert