fffiloni commited on
Commit
3ca7acb
1 Parent(s): 4b738f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -71,7 +71,7 @@ def infer(audio_file):
71
 
72
  print(result)
73
 
74
- images = pipe(prompt=result).images[0]
75
 
76
  #return cap_result, result, images
77
  return images
@@ -95,16 +95,17 @@ with gr.Blocks(css=css) as demo:
95
  </h1>
96
  </div>
97
  <p style="margin-bottom: 10px; font-size: 94%">
98
- Sends an audio in to <a href="https://huggingface.co/spaces/seungheondoh/LP-Music-Caps-demo" target="_blank">LP-Music-Caps</a>
99
- to generate a audio cpation which is then translated to an illustrative image description with Llama2, then run through
100
- Stable Diffusion XL to generate an image from the audio !
 
101
  </p>
102
  </div>""")
103
  audio_input = gr.Audio(type="filepath", source="upload")
104
- infer_btn = gr.Button("Generate")
105
- lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
106
- llama_trans_cap = gr.Textbox(label="Llama translation")
107
- img_result = gr.Image(label="Result")
108
 
109
  #infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap, img_result])
110
  infer_btn.click(fn=infer, inputs=[audio_input], outputs=[img_result])
 
71
 
72
  print(result)
73
 
74
+ images = pipe(prompt=result).images
75
 
76
  #return cap_result, result, images
77
  return images
 
95
  </h1>
96
  </div>
97
  <p style="margin-bottom: 10px; font-size: 94%">
98
+ Sends an audio into <a href="https://huggingface.co/spaces/seungheondoh/LP-Music-Caps-demo" target="_blank">LP-Music-Caps</a>
99
+ to generate a audio caption which is then translated to an illustrative image description with Llama2, and finally run through
100
+ Stable Diffusion XL to generate an image from the audio ! <br /><br />
101
+ Note: Only the first 30 seconds of your audio will be used for inference.
102
  </p>
103
  </div>""")
104
  audio_input = gr.Audio(type="filepath", source="upload")
105
+ infer_btn = gr.Button("Generate Image from Music")
106
+ #lpmc_cap = gr.Textbox(label="Lp Music Caps caption")
107
+ #llama_trans_cap = gr.Textbox(label="Llama translation")
108
+ img_result = gr.gallerG(label="Result", grid=2)
109
 
110
  #infer_btn.click(fn=infer, inputs=[audio_input], outputs=[lpmc_cap, llama_trans_cap, img_result])
111
  infer_btn.click(fn=infer, inputs=[audio_input], outputs=[img_result])