simonduerr commited on
Commit
e6f46d3
β€’
1 Parent(s): 780cc53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -144,8 +144,9 @@ def update_protGPT2(inp, length,repetitionPenalty, top_k_poolsize, max_seqs):
144
  sequencestxt = ""
145
  for i, seq in enumerate(gen_seqs):
146
  s = seq.replace("\n","")
 
147
  s = "\n".join([s[i:i+70] for i in range(0, len(s), 70)])
148
- sequencestxt +=f">seq{i}\n{s}\n"
149
  return sequencestxt
150
 
151
 
@@ -349,7 +350,7 @@ with proteindream:
349
  gr.Markdown("# GradioFold")
350
  gr.Markdown(
351
  """GradioFold is a web-based tool that combines a large language model trained on natural protein sequence (protGPT2) with structure prediction using AlphaFold.
352
- Type a start sequence that protGPT2 can complete or let protGPT2 generate a complete sequence."""
353
  )
354
  gr.Markdown("## protGPT2")
355
  gr.Markdown(
@@ -364,7 +365,7 @@ with proteindream:
364
  with gr.Row():
365
  repetitionPenalty = gr.Slider(minimum=1, maximum=5,value=1.2, label="Repetition penalty")
366
  top_k_poolsize = gr.Slider(minimum=700, maximum=52056,value=950, label="Top-K sampling pool size")
367
- max_seqs = gr.Slider(minimum=2, maximum=20,value=5, label="Number of sequences to generate")
368
  btn = gr.Button("Predict sequences using protGPT2")
369
 
370
  results = gr.Textbox(label="Results", lines=15)
@@ -372,7 +373,7 @@ with proteindream:
372
 
373
  gr.Markdown("## AlphaFold")
374
  gr.Markdown(
375
- "Select a generated sequence above and copy it in the field below for structure prediction using AlphaFold2."
376
  )
377
  with gr.Group():
378
  chosenSeq = gr.Textbox(label="Chosen sequence")
@@ -384,7 +385,7 @@ with proteindream:
384
  plot = gr.Plot(label="pLDDT")
385
  gr.Markdown(
386
  """## Acknowledgements
387
- This was a fun demo using Gradio, Huggingface Spaces and ColabFold as inspiration. More information about the used algorithms can be found below.
388
 
389
  All code is available on [Huggingface](https://huggingface.co/spaces/simonduerr/protGPT2_gradioFold/blob/main) and licensed under MIT license.
390
 
@@ -394,6 +395,7 @@ with proteindream:
394
  - 3Dmol.js: Rego & Koes πŸ“„[Paper](https://academic.oup.com/bioinformatics/article/31/8/1322/213186) πŸ’» [Code](https://github.com/3dmol/3Dmol.js)
395
 
396
  Created by [@simonduerr](https://twitter.com/simonduerr)
 
397
  """
398
  )
399
  #seqChoice.change(fn=update_seqs, inputs=seqChoice, outputs=chosenSeq)
 
144
  sequencestxt = ""
145
  for i, seq in enumerate(gen_seqs):
146
  s = seq.replace("\n","")
147
+ seqlen = len(s)
148
  s = "\n".join([s[i:i+70] for i in range(0, len(s), 70)])
149
+ sequencestxt +=f">seq{i}, {seqlen} residues \n{s}\n\n"
150
  return sequencestxt
151
 
152
 
 
350
  gr.Markdown("# GradioFold")
351
  gr.Markdown(
352
  """GradioFold is a web-based tool that combines a large language model trained on natural protein sequence (protGPT2) with structure prediction using AlphaFold.
353
+ Type a start sequence that protGPT2 can complete or let protGPT2 generate a complete sequence without a start token."""
354
  )
355
  gr.Markdown("## protGPT2")
356
  gr.Markdown(
 
365
  with gr.Row():
366
  repetitionPenalty = gr.Slider(minimum=1, maximum=5,value=1.2, label="Repetition penalty")
367
  top_k_poolsize = gr.Slider(minimum=700, maximum=52056,value=950, label="Top-K sampling pool size")
368
+ max_seqs = gr.Slider(minimum=2, maximum=20,value=5, step=1, label="Number of sequences to generate")
369
  btn = gr.Button("Predict sequences using protGPT2")
370
 
371
  results = gr.Textbox(label="Results", lines=15)
 
373
 
374
  gr.Markdown("## AlphaFold")
375
  gr.Markdown(
376
+ "Select a generated sequence above and copy it in the field below for structure prediction using AlphaFold2. You can also edit the sequence. Predictions will take around 2-5 minutes to be processed. Proteins larger than about 1000 residues will not fit into memory."
377
  )
378
  with gr.Group():
379
  chosenSeq = gr.Textbox(label="Chosen sequence")
 
385
  plot = gr.Plot(label="pLDDT")
386
  gr.Markdown(
387
  """## Acknowledgements
388
+ More information about the used algorithms can be found below.
389
 
390
  All code is available on [Huggingface](https://huggingface.co/spaces/simonduerr/protGPT2_gradioFold/blob/main) and licensed under MIT license.
391
 
 
395
  - 3Dmol.js: Rego & Koes πŸ“„[Paper](https://academic.oup.com/bioinformatics/article/31/8/1322/213186) πŸ’» [Code](https://github.com/3dmol/3Dmol.js)
396
 
397
  Created by [@simonduerr](https://twitter.com/simonduerr)
398
+ Thanks to Hugginface team for sponsoring a free GPU for this demo.
399
  """
400
  )
401
  #seqChoice.change(fn=update_seqs, inputs=seqChoice, outputs=chosenSeq)