Click here to Skip to main content
15,898,036 members
Please Sign up or sign in to vote.
0.00/5 (No votes)
See more:
We have been working on a project based on K means clustering algorithm. First of all,we read a dataset of 505 values and need to cluster on  particular conditions. Then we need to do
1) value=centroid [data1]
2)loop i=(2,505)
Centroid (dataset length)=final centroid 
4)loop (i=1,505)
Euclidean distance of 505 values Against final centroid 
ED[505]
Max(ED)


What I have tried:

Python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random,math
import os
random.seed(1234)

from sklearn.cluster import DBSCAN 
from sklearn.preprocessing import StandardScaler 
from sklearn.preprocessing import normalize 
from sklearn.decomposition import PCA 

df = pd.read_csv('/content/sample_data/run dataset - Sheet1.csv') 

#df=pd.read_csv('data.txt',sep=" ",dtype='Float64')
df_arr=df.values

# Scaling the data to bring all the attributes to a comparable level 
scaler = StandardScaler() 
X_scaled = scaler.fit_transform(df_arr) 
  
# Normalizing the data so that  
# the data approximately follows a Gaussian distribution 
X_normalized = normalize(X_scaled) 
  
# Converting the numpy array into a pandas DataFrame 
X_normalized = pd.DataFrame(X_normalized) 
pca = PCA(n_components = 2) 
X_principal = pca.fit_transform(X_normalized) 
X_principal = pd.DataFrame(X_principal) 
X_principal.columns = ['P1', 'P2'] 
print(X_principal.head()) 

xtrain=X_principal['P1']
ytrain=X_principal['P2']

flagc1=0
flagc2=0

for i in range(0,len(xtrain)):
        if flagc1==0:
            plt.scatter(xtrain[i],ytrain[i],s=20,c='r',marker='o',label='class1 train.txt')
            flagc1=1
        else:
            flagc1=0
            plt.scatter(xtrain[i], ytrain[i], s=20, c='r', marker='o')

plt.show()
k = 2
x1 =1#random.randint(0, 2998)
x2 =2#random.randint(0, 2998)
if (x1 == x2):
    x2 = x1 + 1
centroid1x = xtrain[x1]
centroid1y = ytrain[x1]
centroid2x = xtrain[x2]
centroid2y = ytrain[x2]

cluster1x=[]
cluster1y=[]
cluster2x=[]
cluster2y=[]
for kk in range(0,len(xtrain)):
        dist1=math.sqrt((xtrain[kk]-centroid1x)**2+(ytrain[kk]-centroid1y)**2)
        dist2=math.sqrt((xtrain[kk]-centroid2x)**2+(ytrain[kk]-centroid2y)**2)
      if(dist1<dist2):
             plt.scatter(xtrain[kk], ytrain[kk], s=20, c='b', marker='x')
             cluster1x.append(xtrain[kk])
             cluster1y.append(ytrain[kk])
        else:
            plt.scatter(xtrain[kk], ytrain[kk], s=20, c='g', marker='o')
            cluster2x.append(xtrain[kk])
            cluster2y.append(ytrain[kk])
prevcentroid1x=centroid1x
prevcentroid1y=centroid1y
prevcentroid2x=centroid2x
prevcentroid2y=centroid2y
calculate =0
for new in range(0,10):
    centroid1x = np.mean(cluster1x)
    centroid1y = np.mean(cluster1y)
    centroid2x = np.mean(cluster2x)
    centroid2y = np.mean(cluster2y)
    if(centroid1x!=prevcentroid1x or centroid1y!=prevcentroid1yor centroid2x!=prevcentroid2x or centroid2y!=prevcentroid2y ):
        cluster1x=[]
        cluster2x=[]
        cluster2y=[]
        cluster1y=[]
        for kk in range(0,len(xtrain)):
            dist1 = math.sqrt((xtrain[kk] - centroid1x) * 2 + (ytrain[kk] - centroid1y) * 2)
            dist2 = math.sqrt((xtrain[kk] - centroid2x) * 2 + (ytrain[kk] - centroid2y) * 2)
            if (dist1 < dist2):
                cluster1x.append(xtrain[kk])
                cluster1y.append(ytrain[kk])
            else:
                cluster2x.append(xtrain[kk])
                cluster2y.append(ytrain[kk])
    else:
        calculate=new
        break
    prevcentroid1x=centroid1x
    prevcentroid1y=centroid1y
    prevcentroid2x=centroid2x
    prevcentroid2y=centroid2y
print(calculate)
plt.scatter(cluster1x, cluster1y, s=20, c='b', marker='x',label='Cluster 1')
plt.scatter(cluster2x, cluster2y, s=20, c='g', marker='o',label='Cluster 2')
plt.legend(loc='best')
plt.show()
Aa
Posted
Updated 17-Feb-21 10:38am
v2
Comments
[no name] 17-Feb-21 14:28pm    
What is the problem?
What you get and what you expect to get?
Patrice T 17-Feb-21 17:16pm    
What is the problem with this code ?

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)



CodeProject, 20 Bay Street, 11th Floor Toronto, Ontario, Canada M5J 2N8 +1 (416) 849-8900