File size: 4,259 Bytes
d19290a
 
 
 
 
 
 
 
 
 
 
 
 
7d9573f
 
d19290a
7d9573f
 
d19290a
 
 
 
7d9573f
d19290a
 
 
7d9573f
d19290a
 
7d9573f
d19290a
 
 
 
7d9573f
d19290a
 
 
 
 
 
 
 
7d9573f
d19290a
 
 
 
 
 
 
 
7d9573f
d19290a
7d9573f
d19290a
7d9573f
 
d19290a
 
7d9573f
d19290a
 
 
 
 
 
 
 
7d9573f
d19290a
7d9573f
d19290a
7d9573f
 
d19290a
 
7d9573f
 
d19290a
 
 
7d9573f
d19290a
 
7d9573f
d19290a
 
 
7d9573f
d19290a
 
7d9573f
 
 
751c133
7d9573f
751c133
d19290a
 
751c133
d19290a
 
 
7d9573f
 
d19290a
 
 
7d9573f
 
 
d19290a
e03dfb5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import VotingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Set Streamlit interface title
st.title('Classification Model Comparison: Stacking and Voting Classifiers')

# Allow user to upload data
uploaded_file = st.file_uploader("Please upload a CSV file", type=["csv"])

if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)

    # Define features and target variable
    X = df.drop(columns=['Target_goal'])
    y = df['Target_goal']

    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Define base models
    estimators = [
        ('lr', LogisticRegression()),
        ('dt', DecisionTreeClassifier()),
        ('rf', RandomForestClassifier()),
        ('gb', GradientBoostingClassifier()),
        ('svc', SVC(probability=True))
    ]

    # Stacking classifier
    stacking_clf = StackingClassifier(
        estimators=estimators,
        final_estimator=LogisticRegression()
    )
    stacking_clf.fit(X_train, y_train)
    y_pred_stack = stacking_clf.predict(X_test)
    y_pred_stack_proba = stacking_clf.predict_proba(X_test)[:, 1]
    
    # Stacking classifier accuracy
    accuracy_stack = accuracy_score(y_test, y_pred_stack)
    st.write(f'Stacking Classifier Accuracy: {accuracy_stack:.2f}')
    
    # Stacking classifier classification report
    st.write("Stacking Classifier Classification Report:")
    st.text(classification_report(y_test, y_pred_stack))

    # Voting classifier
    voting_clf = VotingClassifier(
        estimators=estimators,
        voting='soft'
    )
    voting_clf.fit(X_train, y_train)
    y_pred_vote = voting_clf.predict(X_test)
    y_pred_vote_proba = voting_clf.predict_proba(X_test)[:, 1]
    
    # Voting classifier accuracy
    accuracy_vote = accuracy_score(y_test, y_pred_vote)
    st.write(f'Voting Classifier Accuracy: {accuracy_vote:.2f}')
    
    # Voting classifier classification report
    st.write("Voting Classifier Classification Report:")
    st.text(classification_report(y_test, y_pred_vote))

    # Confusion matrix visualization
    st.write("Stacking Classifier Confusion Matrix:")
    conf_matrix_stack = confusion_matrix(y_test, y_pred_stack)
    fig, ax = plt.subplots()
    sns.heatmap(conf_matrix_stack, annot=True, fmt='d', cmap='Blues', ax=ax)
    ax.set_title('Stacking Classifier Confusion Matrix')
    st.pyplot(fig)

    st.write("Voting Classifier Confusion Matrix:")
    conf_matrix_vote = confusion_matrix(y_test, y_pred_vote)
    fig, ax = plt.subplots()
    sns.heatmap(conf_matrix_vote, annot=True, fmt='d', cmap='Blues', ax=ax)
    ax.set_title('Voting Classifier Confusion Matrix')
    st.pyplot(fig)

    # ROC curve
    # Convert y_test labels to 0 and 1
    y_test_binary = (y_test == 2).astype(int)  # Assume 2 is the positive label

    # Calculate ROC curve
    fpr_stack, tpr_stack, _ = roc_curve(y_test_binary, y_pred_stack_proba)
    roc_auc_stack = auc(fpr_stack, tpr_stack)

    fpr_vote, tpr_vote, _ = roc_curve(y_test_binary, y_pred_vote_proba)
    roc_auc_vote = auc(fpr_vote, tpr_vote)

    fig, ax = plt.subplots()
    ax.plot(fpr_stack, tpr_stack, color='blue', lw=2, label='Stacking Classifier (AUC = %0.2f)' % roc_auc_stack)
    ax.plot(fpr_vote, tpr_vote, color='red', lw=2, label='Voting Classifier (AUC = %0.2f)' % roc_auc_vote)
    ax.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('ROC Curve')
    ax.legend(loc="lower right")
    st.pyplot(fig)