File size: 6,649 Bytes
b1c8f17
eb92a4f
b5b2e6a
b1c8f17
 
 
 
b5b2e6a
b1c8f17
7bd26ee
b1c8f17
 
 
 
 
 
 
 
 
576d9e2
 
 
 
 
 
 
b5b2e6a
270e05e
b1c8f17
 
270e05e
db9256f
7bd26ee
 
 
 
 
 
 
 
 
24a6044
 
 
7bd26ee
 
 
 
41880ba
b1c8f17
 
 
7bd26ee
 
 
b5b2e6a
b1c8f17
7bd26ee
41880ba
b1c8f17
 
7bd26ee
6fe2220
7bd26ee
6fe2220
b1c8f17
6fe2220
54775b1
a56efa8
6fe2220
 
 
b5b2e6a
6fe2220
 
270e05e
6fe2220
 
8d2a747
b1c8f17
6fe2220
 
 
 
 
 
b1c8f17
6fe2220
b1c8f17
 
 
 
 
 
 
 
 
 
949bf2b
c8510e0
 
 
 
 
7bd26ee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from fastapi import FastAPI, HTTPException, Request, Query
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Dict, Any
from helper_functions_api import md_to_html, search_brave, fetch_and_extract_content, limit_tokens, together_response, insert_data
import os
from dotenv import load_dotenv, find_dotenv

# Load environment variables from .env file
#load_dotenv("keys.env")

app = FastAPI()
TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
BRAVE_API_KEY = os.getenv('BRAVE_API_KEY')
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
HELICON_API_KEY = os.getenv("HELICON_API_KEY")
SUPABASE_USER = os.environ['SUPABASE_USER']
SUPABASE_PASSWORD = os.environ['SUPABASE_PASSWORD']

# Groq model names
llm_default_small = "llama3-8b-8192"
llm_default_medium = "llama3-70b-8192"

# Together Model names (fallback)
llm_fallback_small = "meta-llama/Llama-3-8b-chat-hf"
llm_fallback_medium = "meta-llama/Llama-3-70b-chat-hf"

SysPromptJson = "You are now in the role of an expert AI who can extract structured information from user request. Both key and value pairs must be in double quotes. You must respond ONLY with a valid JSON file. Do not add any additional comments."
SysPromptList = "You are now in the role of an expert AI who can extract structured information from user request. All elements must be in double quotes. You must respond ONLY with a valid python List. Do not add any additional comments."
SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
SysPromptMd = "You are an expert AI who can create a structured report using information provided in the context from user request.The report should be in markdown format consists of markdown tables structured into subtopics. Do not add any additional comments."
SysPromptSearch = "Extract from the input text to form a concise Google search query, focusing only on the main topic and omitting additional redundant details in max 8 words, include year if necessory, 2024, Do not add any additional comments."
sys_prompts = {
    "offline": {
        "Chat": "You are an expert AI, complete the given task. Do not add any additional comments.",
        "Full Text Report": "You are an expert AI who can create a detailed report from user request. The report should be in markdown format. Do not add any additional comments.",
        "Tabular Report": "You are an expert AI who can create a structured report from user request.The report should be in markdown format structured into subtopics/tables/lists. Do not add any additional comments.",
        "Tables only": "You are an expert AI who can create a structured tabular report from user request.The report should be in markdown format consists of only markdown tables. Do not add any additional comments.",
    },
    "online": {
        "Chat": "You are an expert AI, complete the given task using the provided context. Do not add any additional comments.",
        "Full Text Report": "You are an expert AI who can create a detailed report using information scraped from the internet. You should decide which information is relevant to the given task and use it to create a report. The report should be in markdown format. Do not add any additional comments.",
        "Tabular Report": "You are an expert AI who can create a structured report using information scraped from the internet. You should decide which information is relevant to the given task and use it to create a report. The report should be in markdown format structured into subtopics/tables/lists. Do not add any additional comments.",
        "Tables only": "You are an expert AI who can create a structured tabular report using information scraped from the internet. You should decide which information is relevant to the given task. The report should be in markdown format consists of only markdown tables. Do not add any additional comments.",
    },
}

class QueryModel(BaseModel):
    topic: str = Query(default="market research", description="input query to generate Report")
    description: str = Query(default="", description="additional context for report")
    user_id: str = Query(default="", description="unique user id")
    user_name: str = Query(default="", description="user name")
    internet: bool = Query(default=True, description="Enable Internet search")
    output_format: str = Query(default="Tabular Report", description="Output format for the report", enum=["Chat", "Full Text Report", "Tabular Report", "Tables only"])
    data_format: str = Query(default="Structured data", description="Type of data to extract from the internet", enum=["No presets", "Structured data", "Quantitative data"])

@app.post("/generate_report")
async def generate_report(request: Request, query: QueryModel):
    query_str = query.topic
    description = query.description
    user_id = query.user_id
    internet = "online" if query.internet else "offline"
    sys_prompt_output_format = sys_prompts[internet][query.output_format]
    data_format = query.data_format

    # Combine query with user keywords
    if query.internet:
        search_query = description[:20]#together_response(description, model=llm_default_small, SysPrompt=SysPromptSearch, max_tokens = 25)
        print(f"query:{description}, formatted query:{search_query}")
        
        # Search for relevant URLs
        urls = search_brave(search_query, num_results=4)

        # Fetch and extract content from the URLs
        all_text_with_urls = fetch_and_extract_content(data_format, urls, query_str)

        # Prepare the prompt for generating the report
        additional_context = limit_tokens(str(all_text_with_urls))
        prompt = f"#### COMPLETE THE TASK: {description} #### IN THE CONTEXT OF ### CONTEXT: {query_str} USING THE #### SCRAPED DATA:{additional_context}"

    else:
        prompt = f"#### COMPLETE THE TASK: {query_str} #### IN THE CONTEXT OF ### CONTEXT: {description}"
        all_text_with_urls = [("","")]

    md_report = together_response(prompt, model=llm_default_medium, SysPrompt=sys_prompt_output_format)
    
    # Insert data into database (or other storage)
    insert_data(user_id, query_str, description,  str(all_text_with_urls), md_report)
    references_html = dict()
    for text, url in all_text_with_urls:
        references_html[url] = str(md_to_html(text))


    # Return the generated report
    return {
        "report": md_to_html(md_report),
        "references": references_html
    }

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],)