File size: 3,363 Bytes
5932572
 
 
 
 
bebc451
5932572
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2778ad7
 
614c5c3
5932572
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655c4c5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import pandas as pd
from PyPDF2 import PdfReader
from docx import Document
import os
from groq import Groq
import gradio as gr

# Function to read and process different document types
def read_document(file):
    try:
        file_extension = os.path.splitext(file.name)[-1].lower()
        print(f"Processing file: {file.name} with extension {file_extension}")
        
        if file_extension == '.txt':
            return file.read().decode('utf-8')
        elif file_extension == '.pdf':
            reader = PdfReader(file)
            text = ''
            for page in reader.pages:
                text += page.extract_text()
            return text
        elif file_extension == '.docx':
            doc = Document(file)
            return '\n'.join([paragraph.text for paragraph in doc.paragraphs])
        elif file_extension in ['.csv', '.xls', '.xlsx']:
            df = pd.read_excel(file) if file_extension != '.csv' else pd.read_csv(file)
            return df.to_string(index=False)
        else:
            return "Unsupported file format"
    except Exception as e:
        print(f"Error processing file: {file.name} - {str(e)}")
        return f"Error processing file: {file.name} - {str(e)}"

# Pass your API key directly when initializing the Groq client
api_key = "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"  # Replace with your actual API key
client = Groq(api_key=api_key)

# Function to validate and truncate content to prevent API errors
def validate_content(text):
    # Basic validation to remove unwanted characters
    validated_text = ''.join(e for e in text if e.isalnum() or e.isspace())
    # Truncate text if it's too long
    max_length = 8000  # Adjust as needed
    if len(validated_text) > max_length:
        validated_text = validated_text[:max_length] + "..."
    return validated_text

# Function to get an answer from the Groq API
def get_answer(question, model="llama3-8b-8192"):
    try:
        chat_completion = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": question}],
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        print(f"Error in Groq API call: {str(e)}")
        if hasattr(e, 'response'):
            print(f"Full response: {e.response.json()}")
        return f"Error in API call: {str(e)}"

# Function to interface with the Gradio UI
def chatbot_interface(documents, question):
    text = ''
    for doc in documents:
        content = read_document(doc)
        text += validate_content(content) + "\n\n"

    answer = get_answer(f"{text}\n\nQuestion: {question}")
    return answer

# Gradio Interface
with gr.Blocks(theme=gr.themes.Default(primary_hue="slate")) as demo:
    gr.Markdown("# RAG-based Q/A Chatbot with Document Support", elem_id="title")
    gr.Markdown("Upload documents and ask questions related to them.", elem_id="description")

    with gr.Row():
        with gr.Column():
            doc_input = gr.File(file_count="multiple", label="Upload Documents")
            question_input = gr.Textbox(label="Ask a Question")

        with gr.Column():
            output = gr.Textbox(label="Answer")

    submit_button = gr.Button("Get Answer")
    submit_button.click(chatbot_interface, inputs=[doc_input, question_input], outputs=output)

demo.launch()