import pandas as pd from sklearn.cluster import KMeans import plotly.express as px def k_means(dataset, cols, drop_features, sample_data): X = sample_data print(X) N = len(sample_data.columns) print(N) distortions = [] K = range(1,11) print('ok') for i in K: try: print(i) kmeans = KMeans(n_clusters=i, init='k-means++') print("length before",len(X.columns)) kmeans = kmeans.fit(X) print("length after fit",len(X.columns)) distortions.append(kmeans.inertia_) except Exception as e: print(e) pass print(distortions) df = pd.DataFrame({'Clusters': K, 'Distortions': distortions}) print(df) elbow_curve = (px.line(df, x='Clusters', y='Distortions')).update_traces(mode='lines+markers') #Silhouette score # silhouette_scores = [] # rang = range(2,12) # for cluster_size in rang: # kmeans = cluster.KMeans(n_clusters=cluster_size, init='k-means++', random_state=200) # labels = kmeans.fit(X).labels_ # silhouette_score = metrics.silhouette_score(sample_data, # labels, # metric='euclidean', # sample_size=1000, # random_state=200) # silhouette_scores.append(silhouette_score) # df = pd.DataFrame({'Clusters': rang, 'Silhouette Score': silhouette_scores}) # silhouette = (px.line(df, x='Clusters', y='Silhouette Score', template='seaborn')).update_traces(mode='lines+markers') return elbow_curve