Update app.py
Browse files
app.py
CHANGED
@@ -8,72 +8,81 @@ from sklearn.tree import DecisionTreeClassifier
|
|
8 |
from sklearn.ensemble import RandomForestClassifier
|
9 |
from sklearn.preprocessing import StandardScaler
|
10 |
|
11 |
-
#
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
#
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
#
|
78 |
-
st.
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
from sklearn.ensemble import RandomForestClassifier
|
9 |
from sklearn.preprocessing import StandardScaler
|
10 |
|
11 |
+
# 讓使用者上傳 CSV 檔案
|
12 |
+
uploaded_file = st.file_uploader("上傳一個 CSV 檔案", type="csv")
|
13 |
+
|
14 |
+
if uploaded_file is not None:
|
15 |
+
# 讀取上傳的 CSV 檔案
|
16 |
+
df = pd.read_csv(uploaded_file)
|
17 |
+
|
18 |
+
# 確保數據裡有 "target" 欄位
|
19 |
+
if 'target' in df.columns:
|
20 |
+
# 準備特徵和目標變量
|
21 |
+
X = df.drop('target', axis=1)
|
22 |
+
y = df['target']
|
23 |
+
|
24 |
+
# 分割數據
|
25 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
26 |
+
|
27 |
+
# 標準化特徵
|
28 |
+
scaler = StandardScaler()
|
29 |
+
X_train_scaled = scaler.fit_transform(X_train)
|
30 |
+
X_test_scaled = scaler.transform(X_test)
|
31 |
+
|
32 |
+
# 計算特徵重要性
|
33 |
+
def calculate_importance():
|
34 |
+
# Linear Regression
|
35 |
+
lr = LinearRegression()
|
36 |
+
lr.fit(X_train_scaled, y_train)
|
37 |
+
lr_importance = np.abs(lr.coef_)
|
38 |
+
|
39 |
+
# CART
|
40 |
+
cart = DecisionTreeClassifier(random_state=42)
|
41 |
+
cart.fit(X_train, y_train)
|
42 |
+
cart_importance = cart.feature_importances_
|
43 |
+
|
44 |
+
# Random Forest
|
45 |
+
rf = RandomForestClassifier(n_estimators=100, random_state=42)
|
46 |
+
rf.fit(X_train, y_train)
|
47 |
+
rf_importance = rf.feature_importances_
|
48 |
+
|
49 |
+
return lr_importance, cart_importance, rf_importance
|
50 |
+
|
51 |
+
# 創建特徵重要性 DataFrame
|
52 |
+
lr_importance, cart_importance, rf_importance = calculate_importance()
|
53 |
+
feature_importance = pd.DataFrame({
|
54 |
+
'Feature': X.columns,
|
55 |
+
'Linear Regression': lr_importance,
|
56 |
+
'CART': cart_importance,
|
57 |
+
'Random Forest': rf_importance
|
58 |
+
})
|
59 |
+
|
60 |
+
# 排序
|
61 |
+
feature_importance = feature_importance.sort_values('Random Forest', ascending=False)
|
62 |
+
|
63 |
+
# 繪製特徵重要性圖表
|
64 |
+
def plot_importance(model):
|
65 |
+
plt.figure(figsize=(10, 6))
|
66 |
+
plt.bar(feature_importance['Feature'], feature_importance[model])
|
67 |
+
plt.title(f'{model} Feature Importance')
|
68 |
+
plt.xlabel('Features')
|
69 |
+
plt.ylabel('Importance')
|
70 |
+
plt.xticks(rotation=45, ha='right')
|
71 |
+
st.pyplot(plt)
|
72 |
+
|
73 |
+
# Streamlit UI
|
74 |
+
st.title("自定義CSV檔案分析 - 特徵重要性分析")
|
75 |
+
st.write("選擇一個模型來查看其特徵重要性:")
|
76 |
+
|
77 |
+
# 下拉選擇模型
|
78 |
+
model = st.selectbox("選擇模型", ["Linear Regression", "CART", "Random Forest"])
|
79 |
+
|
80 |
+
# 顯示圖表
|
81 |
+
plot_importance(model)
|
82 |
+
|
83 |
+
# 顯示數據框
|
84 |
+
st.write(f"{model} 特徵重要性數據:")
|
85 |
+
st.dataframe(feature_importance[['Feature', model]])
|
86 |
+
|
87 |
+
else:
|
88 |
+
st.error("上傳的檔案中找不到 'target' 欄位,請確認檔案格式。")
|