import pandas as pd from transformers import pipeline import gradio as gr import seaborn as sns import matplotlib.pyplot as plt from fuzzywuzzy import fuzz # Load the restaurant reviews dataset reviews_df = pd.read_csv('Restaurant_reviews.csv') # Load the Hugging Face sentiment analysis model sentiment_model = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") # Function to classify a user's review based on dataset matching and sentiment analysis def classify_review(user_review): try: if not user_review.strip(): return "Please enter a valid review." best_match = None best_score = 0 for _, row in reviews_df.iterrows(): if pd.isna(row['Review']): continue # Use fuzzy matching to find the most similar review in the dataset score = fuzz.token_sort_ratio(user_review.lower(), str(row['Review']).lower()) if score > best_score: best_score = score best_match = row if best_score > 80: # If the fuzzy match score is greater than 80 rating = best_match['Rating'] rating_based_classification = f"Positive review based on rating: {rating}" if int(rating) >= 4 else f"Negative review based on rating: {rating}" sentiment_result = sentiment_model(user_review)[0] sentiment = sentiment_result['label'] confidence = sentiment_result['score'] sentiment_based_classification = f"Model prediction: {sentiment} with confidence: {confidence:.2f}" return f"{rating_based_classification}\n{sentiment_based_classification}\nMatching Score: {best_score}%\nBest Match\n{best_match}" else: return "Review not found in the dataset." except Exception as e: return f"An error occurred: {str(e)}" # Function to plot the distribution of ratings from the dataset def plot_rating_distribution(): plt.figure(figsize=(8, 6)) sns.countplot(x='Rating', data=reviews_df, order=[1, 2, 3, 4, 5]) plt.title('Distribution of Ratings') plt.xlabel('Rating') plt.ylabel('Count') plt.tight_layout() return plt.gcf() # Function to allow users to preview the dataset (first 10 rows) def preview_dataset(): return reviews_df.head(15) # Return the first 10 rows of the dataset # Create the Gradio interface for classifying reviews review_interface = gr.Interface( fn=classify_review, inputs=gr.Textbox(lines=2, placeholder="Enter your review here", label="Reviews"), outputs="text", title="Review Classifier Based on Rating and Hugging Face Model", description="Enter a restaurant review. The system will classify it based on the dataset rating and use a sentiment analysis model." ) # Create the Gradio interface for classifying reviews review_interface = gr.Interface( fn=classify_review, inputs=gr.Textbox(lines=2, placeholder="Enter your review here", label="Reviews"), outputs="text", title="Review Classifier Based on Rating and Hugging Face Model", description="Enter a restaurant review. The system will classify it based on the dataset rating and use a sentiment analysis model." ) # Create the Gradio interface for plotting the rating distribution plot_interface = gr.Interface( fn=plot_rating_distribution, inputs=[], outputs="plot", title="Rating Distribution", description="Shows the distribution of ratings in the dataset." ) # Create the Gradio interface for previewing the dataset preview_interface = gr.Interface( fn=preview_dataset, inputs=[], outputs="dataframe", title="Preview Restaurant Reviews Dataset", description="Displays the first 10 rows of the dataset for preview." ) # Combine all interfaces (Review Classifier, Rating Distribution, Dataset Preview, Dataset Download) into tabs tabbed_interface = gr.TabbedInterface( [review_interface, plot_interface, preview_interface], ["Review Classifier", "Rating Distribution", "Dataset Preview", ""] ) # Launch the Gradio interface tabbed_interface.launch()