File size: 4,224 Bytes
b4f6507
 
 
 
ba42d6d
b4f6507
 
 
 
 
d74d254
b4f6507
07c839f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba42d6d
 
 
 
 
 
 
708526e
 
 
 
 
07c839f
 
708526e
07c839f
 
 
708526e
 
 
 
 
 
 
 
 
 
 
07c839f
 
ba42d6d
 
07c839f
d74d254
 
 
 
 
7eb6b34
d74d254
 
 
 
 
 
 
 
 
07c839f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from io import BytesIO

# 讓使用者上傳 CSV 檔案
uploaded_file = st.file_uploader("上傳一個 CSV 檔案", type="csv")

if uploaded_file is not None:
    # 讀取上傳的 CSV 檔案
    df = pd.read_csv(uploaded_file)
    
    # 確保數據裡有 "target" 欄位
    if 'target' in df.columns:
        # 準備特徵和目標變量
        X = df.drop('target', axis=1)
        y = df['target']

        # 分割數據
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # 標準化特徵
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # 計算特徵重要性
        def calculate_importance():
            # Linear Regression
            lr = LinearRegression()
            lr.fit(X_train_scaled, y_train)
            lr_importance = np.abs(lr.coef_)

            # CART
            cart = DecisionTreeClassifier(random_state=42)
            cart.fit(X_train, y_train)
            cart_importance = cart.feature_importances_

            # Random Forest
            rf = RandomForestClassifier(n_estimators=100, random_state=42)
            rf.fit(X_train, y_train)
            rf_importance = rf.feature_importances_

            return lr_importance, cart_importance, rf_importance

        # 創建特徵重要性 DataFrame
        lr_importance, cart_importance, rf_importance = calculate_importance()
        feature_importance = pd.DataFrame({
            'Feature': X.columns,
            'Linear Regression': lr_importance,
            'CART': cart_importance,
            'Random Forest': rf_importance
        })

        # 排序
        feature_importance = feature_importance.sort_values('Random Forest', ascending=False)

        # 繪製相關矩陣
        st.write("### 相關矩陣")
        corr_matrix = df.corr()
        plt.figure(figsize=(10, 8))
        sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
        st.pyplot(plt)

        # 分別繪製各個模型的特徵重要性圖表
        def plot_individual_model(model_name):
            plt.figure(figsize=(10, 6))
            plt.bar(feature_importance['Feature'], feature_importance[model_name])
            plt.title(f'{model_name} Feature Importance')
            plt.xlabel('Features')
            plt.ylabel('Importance')
            plt.xticks(rotation=45, ha='right')
            st.pyplot(plt)

        # Streamlit UI
        st.write("### 特徵重要性分析")

        # 分開顯示三個模型的特徵重要性圖表
        st.write("#### Linear Regression")
        plot_individual_model('Linear Regression')

        st.write("#### CART (Decision Tree)")
        plot_individual_model('CART')

        st.write("#### Random Forest")
        plot_individual_model('Random Forest')

        # 顯示數據框
        st.write("### 特徵重要性數據表")
        st.dataframe(feature_importance)

        # 讓使用者下載特徵重要性的 Excel 檔案
        def to_excel(df):
            output = BytesIO()
            writer = pd.ExcelWriter(output, engine='xlsxwriter')
            df.to_excel(writer, index=False, sheet_name='Feature Importance')
            writer.close()  # 使用 close() 來正確保存 Excel 文件
            processed_data = output.getvalue()
            return processed_data

        excel_data = to_excel(feature_importance)
        st.download_button(label='下載特徵重要性數據為 Excel 檔案',
                           data=excel_data,
                           file_name='feature_importance.xlsx',
                           mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')

    else:
        st.error("上傳的檔案中找不到 'target' 欄位,請確認檔案格式。")