Spaces:

oconnoob
/

audio-intelligence-dashboard

Build error

File size: 17,806 Bytes

import json

import gradio as gr
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import requests

from helpers import make_header, upload_file, request_transcript, make_polling_endpoint, wait_for_completion, \
    make_html_from_topics, make_paras_string, create_highlighted_list, make_summary, \
    make_sentiment_output, make_entity_dict, make_entity_html, make_true_dict, make_final_json, make_content_safety_fig

from helpers import transcription_options_headers, audio_intelligence_headers, language_headers


def change_audio_source(radio, plot, file_data, mic_data):
    """When the audio source radio selector is changed, update the wave plot and change the audio selector accordingly"""

    # Empty plot
    plot.update_traces(go.Line(y=[]))
    # Update plot with appropriate data and change visibility audio components
    if radio == "Audio File":
        sample_rate, audio_data = file_data
        plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate))
        return [gr.Audio.update(visible=True),
                gr.Audio.update(visible=False),
                plot,
                plot]
    elif radio == "Record Audio":
        sample_rate, audio_data = mic_data
        plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate))
        return [gr.Audio.update(visible=False),
                gr.Audio.update(visible=True),
                plot,
                plot]


def plot_data(audio_data, plot):
    """Updates plot and appropriate state variable when audio is uploaded/recorded or deleted"""
    # If the current audio file is deleted
    if audio_data is None:
        # Replace the state variable for the audio source with placeholder values
        sample_rate, audio_data = [0, np.array([])]
        # Update the plot to be empty
        plot.update_traces(go.Line(y=[]))
    # If new audio is uploaded/recorded
    else:
        # Replace the current state variable with new
        sample_rate, audio_data = audio_data
        # Plot the new data
        plot.update_traces(go.Line(y=audio_data, x=np.arange(len(audio_data)) / sample_rate))

    # Update the plot component and data state variable
    return [plot, [sample_rate, audio_data], plot]


def set_lang_vis(transcription_options):
    """Sets visibility of language selector/warning when automatic language detection is (de)selected"""
    if 'Automatic Language Detection' in transcription_options:
        text = w
        return [gr.Dropdown.update(visible=False),
                gr.Textbox.update(value=text, visible=True)]
    else:
        text = ""
        return [gr.Dropdown.update(visible=True),
                gr.Textbox.update(value=text, visible=False)]


def option_verif(language, selected_tran_opts, selected_audint_opts):
    """When the language is changed, this function automatically deselects options that are not allowed for that
    language."""

    not_available_tran, not_available_audint = get_unavailable_opts(language)

    current_tran_opts = list(set(selected_tran_opts) - set(not_available_tran))
    current_audint_opts = list(set(selected_audint_opts) - set(not_available_audint))

    return [current_tran_opts,
            current_audint_opts,
            current_tran_opts,
            current_audint_opts]


# Get tran/audint opts that are not available by language
def get_unavailable_opts(language):
    """Get transcription and audio intelligence options that are unavailable for a given language"""
    if language in ['Spanish', 'French', 'German', 'Portuguese']:
        not_available_tran = ['Speaker Labels']
        not_available_audint = ['PII Redaction', 'Auto Highlights', 'Sentiment Analysis', 'Summarization',
                                'Entity Detection']

    elif language in ['Italian', 'Dutch']:
        not_available_tran = ['Speaker Labels']
        not_available_audint = ['PII Redaction', 'Auto Highlights', 'Content Moderation', 'Topic Detection',
                                'Sentiment Analysis', 'Summarization', 'Entity Detection']

    elif language in ['Hindi', 'Japanese']:
        not_available_tran = ['Speaker Labels']
        not_available_audint = ['PII Redaction', 'Auto Highlights', 'Content Moderation', 'Topic Detection',
                                'Sentiment Analysis', 'Summarization', 'Entity Detection']

    else:
        not_available_tran = []
        not_available_audint = []

    return not_available_tran, not_available_audint


# When selecting new tran option, checks to make sure allowed by language and
# then adds to selected_tran_opts and updates
def tran_selected(language, transcription_options):
    """When a transcription option is selected, """
    unavailable, _ = get_unavailable_opts(language)
    selected_tran_opts = list(set(transcription_options) - set(unavailable))

    return [selected_tran_opts, selected_tran_opts]


# When selecting new audint option, checks to make sure allowed by language and
# then adds to selected_audint_opts and updates
def audint_selected(language, audio_intelligence_selector):
    """Deselected"""
    _, unavailable = get_unavailable_opts(language)
    selected_audint_opts = list(set(audio_intelligence_selector) - set(unavailable))

    return [selected_audint_opts, selected_audint_opts]


def create_output(r, paras, language, transc_opts=None, audint_opts=None):
    """From a transcript response, return all outputs for audio intelligence"""
    if transc_opts is None:
        transc_opts = ['Automatic Language Detection', 'Speaker Labels', 'Filter Profanity']

    if audint_opts is None:
        audint_opts = ['Summarization', 'Auto Highlights', 'Topic Detection', 'Entity Detection',
         'Sentiment Analysis', 'PII Redaction', 'Content Moderation']

    # DIARIZATION
    if "Speaker Labels" in transc_opts:
        utts = '\n\n\n'.join([f"Speaker {utt['speaker']}:\n\n" + utt['text'] for utt in r['utterances']])
    else:
        utts = " NOT ANALYZED"

    # HIGHLIGHTS
    if 'Auto Highlights' in audint_opts:
        highlight_dict = create_highlighted_list(paras, r['auto_highlights_result']['results'])
    else:
        highlight_dict =[["NOT ANALYZED", 0]]

    # SUMMARIZATION'
    if 'Summarization' in audint_opts:
        chapters = r['chapters']
        summary_html = make_summary(chapters)
    else:
        summary_html = "<p>NOT ANALYZED</p>"

    # TOPIC DETECTION
    if "Topic Detection" in audint_opts:
        topics = r['iab_categories_result']['summary']
        topics_html = make_html_from_topics(topics)
    else:
        topics_html = "<p>NOT ANALYZED</p>"

    # SENTIMENT
    if "Sentiment Analysis" in audint_opts:
        sent_results = r['sentiment_analysis_results']
        sent = make_sentiment_output(sent_results)
    else:
        sent = "<p>NOT ANALYZED</p>"

    # ENTITY
    if "Entity Detection" in audint_opts:
        entities = r['entities']
        t = r['text']
        d = make_entity_dict(entities, t)
        entity_html = make_entity_html(d)
    else:
        entity_html = "<p>NOT ANALYZED</p>"

    # CONTENT SAFETY
    if "Content Moderation" in audint_opts:
        cont = r['content_safety_labels']['summary']
        content_fig = make_content_safety_fig(cont)
    else:
        content_fig = go.Figure()

    return [language, paras, utts, highlight_dict, summary_html, topics_html, sent, entity_html, content_fig]


def submit_to_AAI(api_key,
                  transcription_options,
                  audio_intelligence_selector,
                  language,
                  radio,
                  audio_file,
                  mic_recording):
    # Make request header
    header = make_header(api_key)

    # Map transcription/audio intelligence options to AssemblyAI API request JSON dict
    true_dict = make_true_dict(transcription_options, audio_intelligence_selector)

    final_json, language = make_final_json(true_dict, language)
    final_json = {**true_dict, **final_json}

    # Select which audio to use
    if radio == "Audio File":
        audio_data = audio_file
    elif radio == "Record Audio":
        audio_data = mic_recording

    # Upload the audio
    upload_url = upload_file(audio_data, header, is_file=False)

    # Request transcript
    transcript_response = request_transcript(upload_url, header, **final_json)

    # Wait for the transcription to complete
    polling_endpoint = make_polling_endpoint(transcript_response)
    wait_for_completion(polling_endpoint, header)

    # Fetch results JSON
    r = requests.get(polling_endpoint, headers=header, json=final_json).json()

    # Fetch paragraphs of transcript
    transc_id = r['id']
    paras = make_paras_string(transc_id, header)
    return create_output(r, paras, language, transcription_options, audio_intelligence_selector)


def example_output(language):
    """Displays example output"""
    with open("example_data/paras.txt", 'r') as f:
        paras = f.read()

    with open('example_data/response.json', 'r') as f:
        r = json.load(f)

    return create_output(r, paras, language)


with open('app/styles.css', 'r') as f:
    css = f.read()

with gr.Blocks(css=css) as demo:
    # Load image
    gr.HTML('<a href="https://www.assemblyai.com/"><img src="file/app/images/logo.png" class="logo"></a>')

    # Load descriptions
    # www.assemblyai.com/blog/how-to-build-an-audio-intelligence-dashboard-with-gradio/
    gr.HTML("<h1 class='title'>Audio Intelligence Dashboard</h1>"
            "<br>"
            "<p>Check out the <a href=\"https://www.assemblyai.com/blog/getting-started-with-huggingfaces-gradio/\">Getting Started with Hugging Face's Gradio</a> blog to learn how to build this dashboard.</p>")

    gr.HTML("<h1 class='title'>Directions</h1>"
            "<p>To use this dashboard:</p>"
            "<ul>"
            "<li>1)  Paste your AssemblyAI API Key into the box below - you can copy it from <a href=\"https://app.assemblyai.com/signup\">here</a> (or get one for free if you don't already have one)</li>"
            "<li>2)  Choose an audio source and upload or record audio</li>"
            "<li>3)  Select the types of analyses you would like to perform on the audio</li>"
            "<li>4)  Click <i>Submit</i></li>"
            "<li>5)  View the results at the bottom of the page</li>"
            "<ul>"
            "<br>"
            "<p>You may also click <b>Show Example Output</b> below to see an example without having to enter an API key.")

    gr.HTML('<div class="alert alert__warning"><span>'
            'Note that this dashboard is not an official AssemblyAI product and is intended for educational purposes.'
            '</span></div>')

    # API Key title
    with gr.Box():
        gr.HTML("<p class=\"apikey\">API Key:</p>")
        # API key textbox (password-style)
        api_key = gr.Textbox(label="", elem_id="pw")

    # Gradio states for - plotly Figure object, audio data for file source, and audio data for mic source
    plot = gr.State(px.line(labels={'x': 'Time (s)', 'y': ''}))
    file_data = gr.State([1, [0]])  # [sample rate, [data]]
    mic_data = gr.State([1, [0]])  # [Sample rate, [data]]

    # Options that the user wants
    selected_tran_opts = gr.State(list(transcription_options_headers.keys()))
    selected_audint_opts = gr.State(list(audio_intelligence_headers.keys()))

    # Current options = selected options - unavailable options for specified language
    current_tran_opts = gr.State([])
    current_audint_opts = gr.State([])

    # Selector for audio source
    radio = gr.Radio(["Audio File", "Record Audio"], label="Audio Source", value="Audio File")

    # Audio object for both file and microphone data
    audio_file = gr.Audio()
    mic_recording = gr.Audio(source="microphone", visible=False)

    # Audio wave plot
    audio_wave = gr.Plot(plot.value)

    # Checkbox for transcription options
    transcription_options = gr.CheckboxGroup(
        choices=list(transcription_options_headers.keys()),
        value=list(transcription_options_headers.keys()),
        label="Transcription Options",
    )

    # Warning for using Automatic Language detection
    w = "<div class='alert alert__warning'>" \
        "<p>Automatic Language Detection not available for Hindi or Japanese. For best results on non-US " \
        "English audio, specify the dialect instead of using Automatic Language Detection. " \
        "<br>" \
        "Some Audio Intelligence features are not available in some languages. See " \
        "<a href='https://airtable.com/shr53TWU5reXkAmt2/tblf7O4cffFndmsCH?backgroundColor=green'>here</a> " \
        "for more details.</p>" \
        "</div>"

    auto_lang_detect_warning = gr.HTML(w)

    # Checkbox for Audio Intelligence options
    audio_intelligence_selector = gr.CheckboxGroup(
        choices=list(audio_intelligence_headers.keys()),
        value=list(audio_intelligence_headers.keys()),
        label='Audio Intelligence Options'
    )

    # Language selector for manual language selection
    language = gr.Dropdown(
        choices=list(language_headers.keys()),
        value="US English",
        label="Language Specification",
        visible=False,
    )

    # Button to submit audio for processing with selected options
    submit = gr.Button('Submit')

    # Button to submit audio for processing with selected options
    example = gr.Button('Show Example Output')

    # Results tab group
    phl = 10
    with gr.Tab('Transcript'):
        trans_tab = gr.Textbox(placeholder="Your formatted transcript will appear here ...",
                               lines=phl,
                               max_lines=25,
                               show_label=False)
    with gr.Tab('Speaker Labels'):
        diarization_tab = gr.Textbox(placeholder="Your diarized transcript will appear here ...",
                                     lines=phl,
                                     max_lines=25,
                                     show_label=False)
    with gr.Tab('Auto Highlights'):
        highlights_tab = gr.HighlightedText()
    with gr.Tab('Summary'):
        summary_tab = gr.HTML("<br>" * phl)
    with gr.Tab("Detected Topics"):
        topics_tab = gr.HTML("<br>" * phl)
    with gr.Tab("Sentiment Analysis"):
        sentiment_tab = gr.HTML("<br>" * phl)
    with gr.Tab("Entity Detection"):
        entity_tab = gr.HTML("<br>" * phl)
    with gr.Tab("Content Safety"):
        content_tab = gr.Plot()

    ####################################### Functionality ######################################################

    # Changing audio source changes Audio input component
    radio.change(fn=change_audio_source,
                 inputs=[
                     radio,
                     plot,
                     file_data,
                     mic_data],
                 outputs=[
                     audio_file,
                     mic_recording,
                     audio_wave,
                     plot])

    # Inputting audio updates plot
    audio_file.change(fn=plot_data,
                      inputs=[audio_file, plot],
                      outputs=[audio_wave, file_data, plot]
                      )
    mic_recording.change(fn=plot_data,
                         inputs=[mic_recording, plot],
                         outputs=[audio_wave, mic_data, plot])

    # Deselecting Automatic Language Detection shows Language Selector
    transcription_options.change(
        fn=set_lang_vis,
        inputs=transcription_options,
        outputs=[language, auto_lang_detect_warning])

    # Changing language deselects certain Tran / Audio Intelligence options
    language.change(
        fn=option_verif,
        inputs=[language,
                selected_tran_opts,
                selected_audint_opts],
        outputs=[transcription_options, audio_intelligence_selector, current_tran_opts, current_audint_opts]
    )

    # Selecting Tran options adds it to selected if language allows it
    transcription_options.change(
        fn=tran_selected,
        inputs=[language, transcription_options],
        outputs=[transcription_options, selected_tran_opts, ]
    )

    # Selecting audio intelligence options adds it to selected if language allows it
    audio_intelligence_selector.change(
        fn=audint_selected,
        inputs=[language, audio_intelligence_selector],
        outputs=[audio_intelligence_selector, selected_audint_opts]
    )

    # Clicking "submit" uploads selected audio to AssemblyAI, performs requested analyses, and displays results
    submit.click(fn=submit_to_AAI,
                 inputs=[api_key,
                         transcription_options,
                         audio_intelligence_selector,
                         language,
                         radio,
                         audio_file,
                         mic_recording],
                 outputs=[language,
                          trans_tab,
                          diarization_tab,
                          highlights_tab,
                          summary_tab,
                          topics_tab,
                          sentiment_tab,
                          entity_tab,
                          content_tab])

    # Clicking "Show Example Output" displays example results
    example.click(fn=example_output,
                  inputs=language,
                  outputs=[language,
                           trans_tab,
                           diarization_tab,
                           highlights_tab,
                           summary_tab,
                           topics_tab,
                           sentiment_tab,
                           entity_tab,
                           content_tab])

# Launch the application
demo.launch()  # share=True