JERNGOC's picture
Update app.py
7eb6b34 verified
raw
history blame contribute delete
No virus
4.22 kB
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from io import BytesIO
# 讓使用者上傳 CSV 檔案
uploaded_file = st.file_uploader("上傳一個 CSV 檔案", type="csv")
if uploaded_file is not None:
# 讀取上傳的 CSV 檔案
df = pd.read_csv(uploaded_file)
# 確保數據裡有 "target" 欄位
if 'target' in df.columns:
# 準備特徵和目標變量
X = df.drop('target', axis=1)
y = df['target']
# 分割數據
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 標準化特徵
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 計算特徵重要性
def calculate_importance():
# Linear Regression
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)
lr_importance = np.abs(lr.coef_)
# CART
cart = DecisionTreeClassifier(random_state=42)
cart.fit(X_train, y_train)
cart_importance = cart.feature_importances_
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_importance = rf.feature_importances_
return lr_importance, cart_importance, rf_importance
# 創建特徵重要性 DataFrame
lr_importance, cart_importance, rf_importance = calculate_importance()
feature_importance = pd.DataFrame({
'Feature': X.columns,
'Linear Regression': lr_importance,
'CART': cart_importance,
'Random Forest': rf_importance
})
# 排序
feature_importance = feature_importance.sort_values('Random Forest', ascending=False)
# 繪製相關矩陣
st.write("### 相關矩陣")
corr_matrix = df.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
st.pyplot(plt)
# 分別繪製各個模型的特徵重要性圖表
def plot_individual_model(model_name):
plt.figure(figsize=(10, 6))
plt.bar(feature_importance['Feature'], feature_importance[model_name])
plt.title(f'{model_name} Feature Importance')
plt.xlabel('Features')
plt.ylabel('Importance')
plt.xticks(rotation=45, ha='right')
st.pyplot(plt)
# Streamlit UI
st.write("### 特徵重要性分析")
# 分開顯示三個模型的特徵重要性圖表
st.write("#### Linear Regression")
plot_individual_model('Linear Regression')
st.write("#### CART (Decision Tree)")
plot_individual_model('CART')
st.write("#### Random Forest")
plot_individual_model('Random Forest')
# 顯示數據框
st.write("### 特徵重要性數據表")
st.dataframe(feature_importance)
# 讓使用者下載特徵重要性的 Excel 檔案
def to_excel(df):
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Feature Importance')
writer.close() # 使用 close() 來正確保存 Excel 文件
processed_data = output.getvalue()
return processed_data
excel_data = to_excel(feature_importance)
st.download_button(label='下載特徵重要性數據為 Excel 檔案',
data=excel_data,
file_name='feature_importance.xlsx',
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
else:
st.error("上傳的檔案中找不到 'target' 欄位,請確認檔案格式。")