Almahfouz's picture
Update app.py
63e9c23 verified
raw
history blame contribute delete
No virus
4.13 kB
import pandas as pd
from transformers import pipeline
import gradio as gr
import seaborn as sns
import matplotlib.pyplot as plt
from fuzzywuzzy import fuzz
# Load the restaurant reviews dataset
reviews_df = pd.read_csv('Restaurant_reviews.csv')
# Load the Hugging Face sentiment analysis model
sentiment_model = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
# Function to classify a user's review based on dataset matching and sentiment analysis
def classify_review(user_review):
try:
if not user_review.strip():
return "Please enter a valid review."
best_match = None
best_score = 0
for _, row in reviews_df.iterrows():
if pd.isna(row['Review']):
continue
# Use fuzzy matching to find the most similar review in the dataset
score = fuzz.token_sort_ratio(user_review.lower(), str(row['Review']).lower())
if score > best_score:
best_score = score
best_match = row
if best_score > 80: # If the fuzzy match score is greater than 80
rating = best_match['Rating']
rating_based_classification = f"Positive review based on rating: {rating}" if int(rating) >= 4 else f"Negative review based on rating: {rating}"
sentiment_result = sentiment_model(user_review)[0]
sentiment = sentiment_result['label']
confidence = sentiment_result['score']
sentiment_based_classification = f"Model prediction: {sentiment} with confidence: {confidence:.2f}"
return f"{rating_based_classification}\n{sentiment_based_classification}\nMatching Score: {best_score}%\nBest Match\n{best_match}"
else:
return "Review not found in the dataset."
except Exception as e:
return f"An error occurred: {str(e)}"
# Function to plot the distribution of ratings from the dataset
def plot_rating_distribution():
plt.figure(figsize=(8, 6))
sns.countplot(x='Rating', data=reviews_df, order=[1, 2, 3, 4, 5])
plt.title('Distribution of Ratings')
plt.xlabel('Rating')
plt.ylabel('Count')
plt.tight_layout()
return plt.gcf()
# Function to allow users to preview the dataset (first 10 rows)
def preview_dataset():
return reviews_df.head(15) # Return the first 10 rows of the dataset
# Create the Gradio interface for classifying reviews
review_interface = gr.Interface(
fn=classify_review,
inputs=gr.Textbox(lines=2, placeholder="Enter your review here", label="Reviews"),
outputs="text",
title="Review Classifier Based on Rating and Hugging Face Model",
description="Enter a restaurant review. The system will classify it based on the dataset rating and use a sentiment analysis model."
)
# Create the Gradio interface for classifying reviews
review_interface = gr.Interface(
fn=classify_review,
inputs=gr.Textbox(lines=2, placeholder="Enter your review here", label="Reviews"),
outputs="text",
title="Review Classifier Based on Rating and Hugging Face Model",
description="Enter a restaurant review. The system will classify it based on the dataset rating and use a sentiment analysis model."
)
# Create the Gradio interface for plotting the rating distribution
plot_interface = gr.Interface(
fn=plot_rating_distribution,
inputs=[],
outputs="plot",
title="Rating Distribution",
description="Shows the distribution of ratings in the dataset."
)
# Create the Gradio interface for previewing the dataset
preview_interface = gr.Interface(
fn=preview_dataset,
inputs=[],
outputs="dataframe",
title="Preview Restaurant Reviews Dataset",
description="Displays the first 10 rows of the dataset for preview."
)
# Combine all interfaces (Review Classifier, Rating Distribution, Dataset Preview, Dataset Download) into tabs
tabbed_interface = gr.TabbedInterface(
[review_interface, plot_interface, preview_interface],
["Review Classifier", "Rating Distribution", "Dataset Preview", ""]
)
# Launch the Gradio interface
tabbed_interface.launch()