Spaces:

zkcpku
/

CodeAgentBench

Running

File size: 11,216 Bytes

a52a4e5

"""
Usage:
python3 qa_browser.py --share
"""

# import argparse
from collections import defaultdict
import re

import gradio as gr
import json

MAX_py_file_paths_depth = 3




py_file_paths = {}

comment = {"title2comment": {}, "path2comment": {}}
model_judgments_normal = {}
model_judgments_math = {}

def load_py_file_paths(data_path):
    global py_file_paths
    py_file_paths = []
    with open(data_path, "r") as f:
        py_file_paths = f.readlines()
        py_file_paths = [json.loads(line) for line in py_file_paths]
        py_file_paths = {line["path"]: line['content'] for line in py_file_paths}
    return py_file_paths

def load_comment(data_path):
    global comment
    comment = {"title2comment": {}, "path2comment": {}}
    title2comment = {}
    path2comment = {}
    with open(data_path, "r") as f:
        data_f = f.readlines()
        data_f = [json.loads(line) for line in data_f]
        for line in data_f:
            title2comment[line["title"]] = line
            path2comment[line["class_link"]] = line
    comment["title2comment"] = title2comment
    comment["path2comment"] = path2comment
    return comment

def display_question(question_selector,file_path_selector, request: gr.Request):
    if question_selector == None:
        return [gr.Dropdown.update(
        value=file_path_selector
    )] +  [""] * 4

    choice = comment['title2comment'][question_selector]['class_link'].split("#")[0]
    lines = comment['title2comment'][question_selector]['class_link'].split("#")[1].replace("L","")

    question = comment['title2comment'][question_selector]['comment']
    code_path = choice
    test_path = comment['title2comment'][question_selector]['test_file_path']

    highlight_lines = [int(line) for line in lines.split("-")]
    class_link = comment['title2comment'][question_selector]['class_link']
    question_mds,code_mds = to_gradio_chat_mds(question, code_path, highlight_lines, class_link, test_path)
    return [gr.Dropdown.update(
        value=choice
    )] +  question_mds + code_mds

def display_answer(question_selector,file_path_selector, request: gr.Request):
    choice = comment['title2comment'][question_selector]['class_link'].split("#")[0]
    lines = comment['title2comment'][question_selector]['class_link'].split("#")[1].replace("L","")

    question = comment['title2comment'][question_selector]['comment']
    code_path = choice
    test_path = comment['title2comment'][question_selector]['test_file_path']

    highlight_lines = [int(line) for line in lines.split("-")]
    class_link = comment['title2comment'][question_selector]['class_link']
    if comment['title2comment'][question_selector]['class_link'].split("#")[0] != file_path_selector:
        highlight_lines = None
        class_link = None
        test_path = None
        code_path = file_path_selector
    question_mds,code_mds = to_gradio_chat_mds(question, code_path, highlight_lines, class_link, test_path)
    return code_mds

# def display_answer(
#     file_path_selector, request: gr.Request
# ):
#     # if comment['title2comment'][question_selector]['class_link'].split("#")[0] == file_path_selector:
#     #     choice = question_selector
#     # else:
#     #     choice = None

#     code_path = file_path_selector
#     question = ""
#     highlight_lines = None
    
#     question_mds,code_mds = to_gradio_chat_mds(question, code_path, highlight_lines)

#     return code_mds


newline_pattern1 = re.compile("\n\n(\d+\. )")
newline_pattern2 = re.compile("\n\n(- )")


def to_gradio_chat_mds(question, code_path, highlight_lines = None, class_link = None, test_path = None):
    mds = [""] * 1 + [""] * 3
    mds[0] = question
    source_code = py_file_paths[code_path]
    if test_path:
        test_code = py_file_paths[test_path]
    else:
        test_code = [""]
    # no highlight, highlight, no highlight
    if not highlight_lines:
        # mds[1] = "".join(source_code)
        mds[1] = "#################################################################\n" + "# Please Check the `Full Code in the File` column\n" + "#################################################################\n"
        # mds[3] = ""
    else:
        highlight_lines[0] -= 1
        mds[1] = "#################################################################\n" + "# From " + class_link + "\n" + f"# From Line {highlight_lines[0]+1} to Line {highlight_lines[1]}\n" + "#################################################################\n"
        mds[1] += "".join(source_code[highlight_lines[0]:highlight_lines[1]])
    mds[2] = "".join(source_code)
    mds[3] = "".join(test_code)

    return mds[:1], mds[1:]


def build_pairwise_browser_tab():
    file_path_list = list(py_file_paths.keys())
    question_list = list(comment['title2comment'].keys())
    num_py_file_paths = MAX_py_file_paths_depth

    # Build question selector map
    with gr.Row():
        with gr.Column():
            question_selector = gr.Dropdown(
                choices=question_list,
                label="Question",
                container=False
            )
            # locate_button = gr.Button(
            #     text="Locate",
            #     label="Locate",
            #     type="default"
            #     )
        with gr.Column():
            file_path_selector = gr.Dropdown(
                choices=file_path_list,
                label="File Path",
                container=False
            )

    question_mds = []
    code_mds = []
    with gr.Row():
        with gr.Column():
            question_mds.append(gr.Code(language="markdown"))
        with gr.Column():
            with gr.Tab("Infilling Code"):
            # add code highlight
                code_mds.append(gr.Code(language="python",elem_id="locate_code"))
            with gr.Tab("Full Code in the File"):
                code_mds.append(gr.Code(language="python"))
            with gr.Tab("Test Code"):
                code_mds.append(gr.Code(language="python"))

    # Callbacks
    question_selector.change(display_question, [question_selector, file_path_selector], [file_path_selector] + question_mds + code_mds)
    file_path_selector.change(display_answer, [question_selector,file_path_selector], code_mds)

    return (question_selector,)




code_highlight_css = """
#chatbot .hll { background-color: #ffffcc }
#chatbot .c { color: #408080; font-style: italic }
#chatbot .err { border: 1px solid #FF0000 }
#chatbot .k { color: #008000; font-weight: bold }
#chatbot .o { color: #666666 }
#chatbot .ch { color: #408080; font-style: italic }
#chatbot .cm { color: #408080; font-style: italic }
#chatbot .cp { color: #BC7A00 }
#chatbot .cpf { color: #408080; font-style: italic }
#chatbot .c1 { color: #408080; font-style: italic }
#chatbot .cs { color: #408080; font-style: italic }
#chatbot .gd { color: #A00000 }
#chatbot .ge { font-style: italic }
#chatbot .gr { color: #FF0000 }
#chatbot .gh { color: #000080; font-weight: bold }
#chatbot .gi { color: #00A000 }
#chatbot .go { color: #888888 }
#chatbot .gp { color: #000080; font-weight: bold }
#chatbot .gs { font-weight: bold }
#chatbot .gu { color: #800080; font-weight: bold }
#chatbot .gt { color: #0044DD }
#chatbot .kc { color: #008000; font-weight: bold }
#chatbot .kd { color: #008000; font-weight: bold }
#chatbot .kn { color: #008000; font-weight: bold }
#chatbot .kp { color: #008000 }
#chatbot .kr { color: #008000; font-weight: bold }
#chatbot .kt { color: #B00040 }
#chatbot .m { color: #666666 }
#chatbot .s { color: #BA2121 }
#chatbot .na { color: #7D9029 }
#chatbot .nb { color: #008000 }
#chatbot .nc { color: #0000FF; font-weight: bold }
#chatbot .no { color: #880000 }
#chatbot .nd { color: #AA22FF }
#chatbot .ni { color: #999999; font-weight: bold }
#chatbot .ne { color: #D2413A; font-weight: bold }
#chatbot .nf { color: #0000FF }
#chatbot .nl { color: #A0A000 }
#chatbot .nn { color: #0000FF; font-weight: bold }
#chatbot .nt { color: #008000; font-weight: bold }
#chatbot .nv { color: #19177C }
#chatbot .ow { color: #AA22FF; font-weight: bold }
#chatbot .w { color: #bbbbbb }
#chatbot .mb { color: #666666 }
#chatbot .mf { color: #666666 }
#chatbot .mh { color: #666666 }
#chatbot .mi { color: #666666 }
#chatbot .mo { color: #666666 }
#chatbot .sa { color: #BA2121 }
#chatbot .sb { color: #BA2121 }
#chatbot .sc { color: #BA2121 }
#chatbot .dl { color: #BA2121 }
#chatbot .sd { color: #BA2121; font-style: italic }
#chatbot .s2 { color: #BA2121 }
#chatbot .se { color: #BB6622; font-weight: bold }
#chatbot .sh { color: #BA2121 }
#chatbot .si { color: #BB6688; font-weight: bold }
#chatbot .sx { color: #008000 }
#chatbot .sr { color: #BB6688 }
#chatbot .s1 { color: #BA2121 }
#chatbot .ss { color: #19177C }
#chatbot .bp { color: #008000 }
#chatbot .fm { color: #0000FF }
#chatbot .vc { color: #19177C }
#chatbot .vg { color: #19177C }
#chatbot .vi { color: #19177C }
#chatbot .vm { color: #19177C }
#chatbot .il { color: #666666 }
"""
# .highlight  { background: #f8f8f8; }

table_css = """
table {
    line-height: 0em
}
"""
old_block_css = (
    code_highlight_css
    + """
pre {
    white-space: pre-wrap;       /* Since CSS 2.1 */
    white-space: -moz-pre-wrap;  /* Mozilla, since 1999 */
    white-space: -pre-wrap;      /* Opera 4-6 */
    white-space: -o-pre-wrap;    /* Opera 7 */
    word-wrap: break-word;       /* Internet Explorer 5.5+ */
}
#notice_markdown th {
    display: none;
}
#notice_markdown td {
    padding-top: 8px;
    padding-bottom: 8px;
}
#leaderboard_markdown td {
    padding-top: 8px;
    padding-bottom: 8px;
}
"""
)
block_css = old_block_css + (
    """
#locate_code {
    background-color: #DEEBF7;
}
#user_question {
    background-color: #E2F0D9;
}
#reference {
    background-color: #FFF2CC;
}
#model_explanation {
    background-color: #FBE5D6;
}
"""
)


def load_demo():
    dropdown_update = gr.Dropdown.update(value=list(comment['title2comment'].keys())[0])
    return dropdown_update


def build_demo():
    with gr.Blocks(
        title="Class-level Repo Code Generation Benchmark (numpy-ml)",
        theme=gr.themes.Base(text_size=gr.themes.sizes.text_lg),
        css=block_css,
    ) as demo:
        
        (question_selector,) = build_pairwise_browser_tab()

        demo.load(load_demo, [], [question_selector])

        thanks_txt = gr.Markdown(
            """
            # Acknowledgement
            This benchmark is based on the [numpy-ml](https://github.com/ddbourgin/numpy-ml) project.
            """
        )

    return demo


# if __name__ == "__main__":
#     parser = argparse.ArgumentParser()
#     parser.add_argument("--host", type=str, default="0.0.0.0")
#     parser.add_argument("--port", type=int)
#     parser.add_argument("--share", action="store_true")
#     parser.add_argument("--bench-name", type=str, default="mt_bench")
#     args = parser.parse_args()
#     print(args)

py_file_paths_path = "numpyml-display/all_py_content.jsonl"
comment_path = "numpyml-display/final_dataset.jsonl"

# Load py_file_paths
py_file_paths = load_py_file_paths(py_file_paths_path)

# Load answers
comment = load_comment(comment_path)

demo = build_demo()
# demo.queue(concurrency_count=10, status_update_rate=10, api_open=False).launch()
demo.launch()