JERNGOC commited on
Commit
07c839f
1 Parent(s): 108ca19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -69
app.py CHANGED
@@ -8,72 +8,81 @@ from sklearn.tree import DecisionTreeClassifier
8
  from sklearn.ensemble import RandomForestClassifier
9
  from sklearn.preprocessing import StandardScaler
10
 
11
- # 讀取數據
12
- df = pd.read_csv('heart.csv')
13
-
14
- # 準備特徵和目標變量
15
- X = df.drop('target', axis=1)
16
- y = df['target']
17
-
18
- # 分割數據
19
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
20
-
21
- # 標準化特徵
22
- scaler = StandardScaler()
23
- X_train_scaled = scaler.fit_transform(X_train)
24
- X_test_scaled = scaler.transform(X_test)
25
-
26
- # 計算特徵重要性
27
- def calculate_importance():
28
- # Linear Regression
29
- lr = LinearRegression()
30
- lr.fit(X_train_scaled, y_train)
31
- lr_importance = np.abs(lr.coef_)
32
-
33
- # CART
34
- cart = DecisionTreeClassifier(random_state=42)
35
- cart.fit(X_train, y_train)
36
- cart_importance = cart.feature_importances_
37
-
38
- # Random Forest
39
- rf = RandomForestClassifier(n_estimators=100, random_state=42)
40
- rf.fit(X_train, y_train)
41
- rf_importance = rf.feature_importances_
42
-
43
- return lr_importance, cart_importance, rf_importance
44
-
45
- # 創建特徵重要性 DataFrame
46
- lr_importance, cart_importance, rf_importance = calculate_importance()
47
- feature_importance = pd.DataFrame({
48
- 'Feature': X.columns,
49
- 'Linear Regression': lr_importance,
50
- 'CART': cart_importance,
51
- 'Random Forest': rf_importance
52
- })
53
-
54
- # 排序
55
- feature_importance = feature_importance.sort_values('Random Forest', ascending=False)
56
-
57
- # 繪製特徵重要性圖表
58
- def plot_importance(model):
59
- plt.figure(figsize=(10, 6))
60
- plt.bar(feature_importance['Feature'], feature_importance[model])
61
- plt.title(f'{model} Feature Importance')
62
- plt.xlabel('Features')
63
- plt.ylabel('Importance')
64
- plt.xticks(rotation=45, ha='right')
65
- st.pyplot(plt)
66
-
67
- # Streamlit UI
68
- st.title("心臟病預測模型特徵重要性分析")
69
- st.write("選擇一個模型來查看其特徵重要性:")
70
-
71
- # 下拉選擇模型
72
- model = st.selectbox("選擇模型", ["Linear Regression", "CART", "Random Forest"])
73
-
74
- # 顯示圖表
75
- plot_importance(model)
76
-
77
- # 顯示數據框
78
- st.write(f"{model} 特徵重要性數據:")
79
- st.dataframe(feature_importance[['Feature', model]])
 
 
 
 
 
 
 
 
 
 
8
  from sklearn.ensemble import RandomForestClassifier
9
  from sklearn.preprocessing import StandardScaler
10
 
11
+ # 讓使用者上傳 CSV 檔案
12
+ uploaded_file = st.file_uploader("上傳一個 CSV 檔案", type="csv")
13
+
14
+ if uploaded_file is not None:
15
+ # 讀取上傳的 CSV 檔案
16
+ df = pd.read_csv(uploaded_file)
17
+
18
+ # 確保數據裡有 "target" 欄位
19
+ if 'target' in df.columns:
20
+ # 準備特徵和目標變量
21
+ X = df.drop('target', axis=1)
22
+ y = df['target']
23
+
24
+ # 分割數據
25
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
26
+
27
+ # 標準化特徵
28
+ scaler = StandardScaler()
29
+ X_train_scaled = scaler.fit_transform(X_train)
30
+ X_test_scaled = scaler.transform(X_test)
31
+
32
+ # 計算特徵重要性
33
+ def calculate_importance():
34
+ # Linear Regression
35
+ lr = LinearRegression()
36
+ lr.fit(X_train_scaled, y_train)
37
+ lr_importance = np.abs(lr.coef_)
38
+
39
+ # CART
40
+ cart = DecisionTreeClassifier(random_state=42)
41
+ cart.fit(X_train, y_train)
42
+ cart_importance = cart.feature_importances_
43
+
44
+ # Random Forest
45
+ rf = RandomForestClassifier(n_estimators=100, random_state=42)
46
+ rf.fit(X_train, y_train)
47
+ rf_importance = rf.feature_importances_
48
+
49
+ return lr_importance, cart_importance, rf_importance
50
+
51
+ # 創建特徵重要性 DataFrame
52
+ lr_importance, cart_importance, rf_importance = calculate_importance()
53
+ feature_importance = pd.DataFrame({
54
+ 'Feature': X.columns,
55
+ 'Linear Regression': lr_importance,
56
+ 'CART': cart_importance,
57
+ 'Random Forest': rf_importance
58
+ })
59
+
60
+ # 排序
61
+ feature_importance = feature_importance.sort_values('Random Forest', ascending=False)
62
+
63
+ # 繪製特徵重要性圖表
64
+ def plot_importance(model):
65
+ plt.figure(figsize=(10, 6))
66
+ plt.bar(feature_importance['Feature'], feature_importance[model])
67
+ plt.title(f'{model} Feature Importance')
68
+ plt.xlabel('Features')
69
+ plt.ylabel('Importance')
70
+ plt.xticks(rotation=45, ha='right')
71
+ st.pyplot(plt)
72
+
73
+ # Streamlit UI
74
+ st.title("自定義CSV檔案分析 - 特徵重要性分析")
75
+ st.write("選擇一個模型來查看其特徵重要性:")
76
+
77
+ # 下拉選擇模型
78
+ model = st.selectbox("選擇模型", ["Linear Regression", "CART", "Random Forest"])
79
+
80
+ # 顯示圖表
81
+ plot_importance(model)
82
+
83
+ # 顯示數據框
84
+ st.write(f"{model} 特徵重要性數據:")
85
+ st.dataframe(feature_importance[['Feature', model]])
86
+
87
+ else:
88
+ st.error("上傳的檔案中找不到 'target' 欄位,請確認檔案格式。")