Spaces:
Sleeping
Sleeping
File size: 4,259 Bytes
d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f d19290a 7d9573f 751c133 7d9573f 751c133 d19290a 751c133 d19290a 7d9573f d19290a 7d9573f d19290a e03dfb5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import VotingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
# Set Streamlit interface title
st.title('Classification Model Comparison: Stacking and Voting Classifiers')
# Allow user to upload data
uploaded_file = st.file_uploader("Please upload a CSV file", type=["csv"])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
# Define features and target variable
X = df.drop(columns=['Target_goal'])
y = df['Target_goal']
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Define base models
estimators = [
('lr', LogisticRegression()),
('dt', DecisionTreeClassifier()),
('rf', RandomForestClassifier()),
('gb', GradientBoostingClassifier()),
('svc', SVC(probability=True))
]
# Stacking classifier
stacking_clf = StackingClassifier(
estimators=estimators,
final_estimator=LogisticRegression()
)
stacking_clf.fit(X_train, y_train)
y_pred_stack = stacking_clf.predict(X_test)
y_pred_stack_proba = stacking_clf.predict_proba(X_test)[:, 1]
# Stacking classifier accuracy
accuracy_stack = accuracy_score(y_test, y_pred_stack)
st.write(f'Stacking Classifier Accuracy: {accuracy_stack:.2f}')
# Stacking classifier classification report
st.write("Stacking Classifier Classification Report:")
st.text(classification_report(y_test, y_pred_stack))
# Voting classifier
voting_clf = VotingClassifier(
estimators=estimators,
voting='soft'
)
voting_clf.fit(X_train, y_train)
y_pred_vote = voting_clf.predict(X_test)
y_pred_vote_proba = voting_clf.predict_proba(X_test)[:, 1]
# Voting classifier accuracy
accuracy_vote = accuracy_score(y_test, y_pred_vote)
st.write(f'Voting Classifier Accuracy: {accuracy_vote:.2f}')
# Voting classifier classification report
st.write("Voting Classifier Classification Report:")
st.text(classification_report(y_test, y_pred_vote))
# Confusion matrix visualization
st.write("Stacking Classifier Confusion Matrix:")
conf_matrix_stack = confusion_matrix(y_test, y_pred_stack)
fig, ax = plt.subplots()
sns.heatmap(conf_matrix_stack, annot=True, fmt='d', cmap='Blues', ax=ax)
ax.set_title('Stacking Classifier Confusion Matrix')
st.pyplot(fig)
st.write("Voting Classifier Confusion Matrix:")
conf_matrix_vote = confusion_matrix(y_test, y_pred_vote)
fig, ax = plt.subplots()
sns.heatmap(conf_matrix_vote, annot=True, fmt='d', cmap='Blues', ax=ax)
ax.set_title('Voting Classifier Confusion Matrix')
st.pyplot(fig)
# ROC curve
# Convert y_test labels to 0 and 1
y_test_binary = (y_test == 2).astype(int) # Assume 2 is the positive label
# Calculate ROC curve
fpr_stack, tpr_stack, _ = roc_curve(y_test_binary, y_pred_stack_proba)
roc_auc_stack = auc(fpr_stack, tpr_stack)
fpr_vote, tpr_vote, _ = roc_curve(y_test_binary, y_pred_vote_proba)
roc_auc_vote = auc(fpr_vote, tpr_vote)
fig, ax = plt.subplots()
ax.plot(fpr_stack, tpr_stack, color='blue', lw=2, label='Stacking Classifier (AUC = %0.2f)' % roc_auc_stack)
ax.plot(fpr_vote, tpr_vote, color='red', lw=2, label='Voting Classifier (AUC = %0.2f)' % roc_auc_vote)
ax.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')
ax.set_xlim([0.0, 1.0])
ax.set_ylim([0.0, 1.05])
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
ax.set_title('ROC Curve')
ax.legend(loc="lower right")
st.pyplot(fig) |