# Error in code of KMeans

Hi,
I am learning unit4 of “machine learning for robotics”. In the example code of K-Means, I found some error. It is alright when only 2 cluster centroids are applied, but when I change self.K to higher digit, it goes wrong. The original code in course book is:

``````import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import numpy as np
import random

class Kmeans_dev:
"""
class definition
"""

def __init__(self,X,K):
"""
class constructor
"""
self.X = X
self.output = {}
self.centroids = np.array([]).reshape(self.X.shape[1],0)
self.K = K
self.m = self.X.shape[0]

def start_centroid_pos(self, X, K):
"""
Random initialization of K centroids.
"""
m,n = X.shape[0], X.shape[1]
centroids = np.zeros((K,n))

for i in range(K):
#for i in range(1,K+1,1):
centroids[i] = X[np.random.randint(0,m),:]

return centroids

def fit(self,n_iter):
"""
Method to train the data set (position of centroids()
"""
#randomly Initialize the centroids (callstart_centroid_pos() )
self.centroids=self.start_centroid_pos(self.X,self.K)

#compute Euclidian distances and assign clusters
for n in range(n_iter):
EuclidianDistance=np.array([]).reshape(self.m,0)
for k in range(self.K):
tempDist=np.sum((self.X-self.centroids[:,k])**2,axis=1)
EuclidianDistance=np.c_[EuclidianDistance,tempDist]
C=np.argmin(EuclidianDistance,axis=1)+1

Y={}
for k in range(self.K):
Y[k+1]=np.array([]).reshape(2,0)
for i in range(self.m):
Y[C[i]]=np.c_[Y[C[i]],self.X[i]]

for k in range(self.K):
Y[k+1] = Y[k+1].T
for k in range(self.K):
self.centroids[:,k] = np.mean(Y[k+1],axis = 0)

self.output=Y

def predict(self):
"""
Return of data set adherence to certain cluster
"""

return self.output,self.centroids.T

# data set
X = np.array([[0.3,8.3 ], [3, 8], [2, 9],[0.3, 8.9],[1.7, 9.7 ],
[0.9, 10.5], [10.3, 2.1],[10, 2],[7, 7 ], [6.9, 6.5],
[6, 6],[1, 2], [1.5, 1.8], [5, 8 ], [8, 8], [1, 0.6],
[9,11],[12, 5], [4.5, 4.8], [4.5, 3 ], [2, 8], [9, 3], [9,7]])

"""
Run k-means algorithm on given data set. Printing the output (position of centrod) and adherence of point to
certain cluster
"""

K= 2 # number of cluster you would like to create

#creation of class object and training (fit)
kmeans=Kmeans_dev(X,K)
kmeans.fit(50)

color=['blue','green']
labels=['cluster1','cluster2']

fig, axs = plt.subplots(5,2, figsize=(14, 28), facecolor='w', edgecolor='k')

axs = axs.ravel()

#you print the test results iteration by iteration (in order to show how the centroids moves)

for i in range(10):
kmeans=Kmeans_dev(X,K)

kmeans.fit(1)

Output,Centroids=kmeans.predict()

for k in range(K):
axs[i].scatter(Output[k+1][:,0],Output[k+1][:,1],c=color[k],label=labels[k])
axs[i].scatter(Centroids[:,0],Centroids[:,1],s=300,c='red',label='Centroids', marker='*')

axs[i].set_title("Centroids movement. Iteration "+ str(1+i))

plt.show()
``````

The errors take place:
1.
in ` def fit(self,n_iter):`, the second `for` iteration,

``````            for k in range(self.K):
tempDist=np.sum((self.X-self.centroids[:,k])**2,axis=1)
EuclidianDistance=np.c_[EuclidianDistance,tempDist]
``````

it should be:

``````            for k in range(self.K):
tempDist=np.sum((self.X-self.centroids[k,:])**2,axis=1)
EuclidianDistance=np.c_[EuclidianDistance,tempDist]
``````

the index k is opposite.
2.
the same function, in 6th `for` iteration:

``````            for k in range(self.K):
self.centroids[k, :] = np.mean(Y[k+1], axis=0)
``````

it is with the same index problem. It should be:

``````            for k in range(self.K):
self.centroids[:, k] = np.mean(Y[k+1], axis=0)
``````

in `plot` part, when we draw steriod mark on final centroids:

``````    axs[i].scatter(Centroids[:,0],Centroids[:,1],s=300,c='red',label='Centroids', marker='*')
``````

it should be:

``````    axs[i].scatter(Centroids[0, :],Centroids[1, :],s=300,c='red',label='Centroids', marker='*')
``````

the whole rectified code is:

``````import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import numpy as np
import random

class Kmeans_dev:
"""
class definition
"""

def __init__(self,X,K):
"""
class constructor
"""
self.X = X
self.output = {}
self.centroids = np.array([]).reshape(self.X.shape[1],0)
self.K = K
self.m = self.X.shape[0]

def start_centroid_pos(self, X, K):
"""
Random initialization of K centroids.
"""
m,n = X.shape[0], X.shape[1]
centroids = np.zeros((K,n))

for i in range(K):
#for i in range(1,K+1,1):
centroids[i] = X[np.random.randint(0,m),:]

return centroids

def fit(self,n_iter):
"""
Method to train the data set (position of centroids()
"""
#randomly Initialize the centroids (callstart_centroid_pos() )
self.centroids=self.start_centroid_pos(self.X,self.K)

#compute Euclidian distances and assign clusters
for n in range(n_iter):
EuclidianDistance=np.array([]).reshape(self.m,0)
for k in range(self.K):
tempDist=np.sum((self.X-self.centroids[k, :])**2,axis=1)
EuclidianDistance=np.c_[EuclidianDistance,tempDist]
C=np.argmin(EuclidianDistance,axis=1)+1

Y={}
for k in range(self.K):
Y[k+1]=np.array([]).reshape(2,0)
for i in range(self.m):
Y[C[i]]=np.c_[Y[C[i]],self.X[i]]

for k in range(self.K):
Y[k+1] = Y[k+1].T
for k in range(self.K):
self.centroids[k, :] = np.mean(Y[k+1],axis = 0)

self.output=Y

def predict(self):
"""
Return of data set adherence to certain cluster
"""

return self.output,self.centroids.T

# data set
X = np.array([[0.3,8.3 ], [3, 8], [2, 9],[0.3, 8.9],[1.7, 9.7 ],
[0.9, 10.5], [10.3, 2.1],[10, 2],[7, 7 ], [6.9, 6.5],
[6, 6],[1, 2], [1.5, 1.8], [5, 8 ], [8, 8], [1, 0.6],
[9,11],[12, 5], [4.5, 4.8], [4.5, 3 ], [2, 8], [9, 3], [9,7]])

"""
Run k-means algorithm on given data set. Printing the output (position of centrod) and adherence of point to
certain cluster
"""

K= 2 # number of cluster you would like to create

#creation of class object and training (fit)
kmeans=Kmeans_dev(X,K)
kmeans.fit(50)

color=['blue','green']
labels=['cluster1','cluster2']

fig, axs = plt.subplots(5,2, figsize=(14, 28), facecolor='w', edgecolor='k')

axs = axs.ravel()

#you print the test results iteration by iteration (in order to show how the centroids moves)

for i in range(10):
kmeans=Kmeans_dev(X,K)

kmeans.fit(1)

Output,Centroids=kmeans.predict()

for k in range(K):
axs[i].scatter(Output[k+1][:,0],Output[k+1][:,1],c=color[k],label=labels[k])
axs[i].scatter(Centroids[0, :],Centroids[1, :],s=300,c='red',label='Centroids', marker='*')

axs[i].set_title("Centroids movement. Iteration "+ str(1+i))

plt.show()
``````

change K from 2 to 3 or 4, the error will show up.

Hello @MeineLiebeAxt ,

Thanks for the feedback. We will analyze this in the following days and update the course accordingly.

This topic was automatically closed after 7 days. New replies are no longer allowed.