JERNGOC commited on
Commit
b4f6507
1 Parent(s): 7745710

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.linear_model import LinearRegression
7
+ from sklearn.tree import DecisionTreeClassifier
8
+ from sklearn.ensemble import RandomForestClassifier
9
+ from sklearn.preprocessing import StandardScaler
10
+
11
+ # 讀取數據
12
+ df = pd.read_csv('heart.csv')
13
+
14
+ # 準備特徵和目標變量
15
+ X = df.drop('target', axis=1)
16
+ y = df['target']
17
+
18
+ # 分割數據
19
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
20
+
21
+ # 標準化特徵
22
+ scaler = StandardScaler()
23
+ X_train_scaled = scaler.fit_transform(X_train)
24
+ X_test_scaled = scaler.transform(X_test)
25
+
26
+ # 計算特徵重要性
27
+ def calculate_importance():
28
+ # Linear Regression
29
+ lr = LinearRegression()
30
+ lr.fit(X_train_scaled, y_train)
31
+ lr_importance = np.abs(lr.coef_)
32
+
33
+ # CART
34
+ cart = DecisionTreeClassifier(random_state=42)
35
+ cart.fit(X_train, y_train)
36
+ cart_importance = cart.feature_importances_
37
+
38
+ # Random Forest
39
+ rf = RandomForestClassifier(n_estimators=100, random_state=42)
40
+ rf.fit(X_train, y_train)
41
+ rf_importance = rf.feature_importances_
42
+
43
+ return lr_importance, cart_importance, rf_importance
44
+
45
+ # 創建特徵重要性 DataFrame
46
+ lr_importance, cart_importance, rf_importance = calculate_importance()
47
+ feature_importance = pd.DataFrame({
48
+ 'Feature': X.columns,
49
+ 'Linear Regression': lr_importance,
50
+ 'CART': cart_importance,
51
+ 'Random Forest': rf_importance
52
+ })
53
+
54
+ # 排序
55
+ feature_importance = feature_importance.sort_values('Random Forest', ascending=False)
56
+
57
+ # 繪製特徵重要性圖表
58
+ def plot_importance(model):
59
+ plt.figure(figsize=(10, 6))
60
+ plt.bar(feature_importance['Feature'], feature_importance[model])
61
+ plt.title(f'{model} Feature Importance')
62
+ plt.xlabel('Features')
63
+ plt.ylabel('Importance')
64
+ plt.xticks(rotation=45, ha='right')
65
+ st.pyplot(plt)
66
+
67
+ # Streamlit UI
68
+ st.title("心臟病預測模型特徵重要性分析")
69
+ st.write("選擇一個模型來查看其特徵重要性:")
70
+
71
+ # 下拉選擇模型
72
+ model = st.selectbox("選擇模型", ["Linear Regression", "CART", "Random Forest"])
73
+
74
+ # 顯示圖表
75
+ plot_importance(model)
76
+
77
+ # 顯示數據框
78
+ st.write(f"{model} 特徵重要性數據:")
79
+ st.dataframe(feature_importance[['Feature', model]])