# Golshayan ############ Question Number 3 ################ library( mvtnorm ) # Simulation : x1 = rmvnorm( 25 , mean=c(5,10) , sigma=diag(c(4,1))) l1 = rep( 1 , 25 ) x2 = rmvnorm( 40 , mean=c(10,5) , sigma=diag(c(5,3))) l2 = rep( 2 , 40 ) x3 = rmvnorm( 55 , mean=c(20,15) , sigma=diag(c(9,2))) l3 = rep( 3 , 55 ) x4 = rmvnorm( 80 , mean=c(12,6) , sigma=diag(c(2,7))) l4 = rep( 4 , 80 ) as.matrix(x1) ; as.matrix(x2) ; as.matrix(x3) ; as.matrix(x4) ; X = rbind( x1 , x2 , x3 , x4 ) ; X La = c( l1 , l2 , l3 , l4 ) ; as.matrix(La) simu = data.frame( X , La ) ; simu ideal_mean_vector = c( 12,5 , 7,13 , 23,2 , 6,4 ) MM = matrix( ideal_mean_vector , 4 , 2 , byrow=T ) ; MM nM = matrix( NA , dim(X)[1] , 2 , byrow=F ) ds = c() Lb =c() for( i in 1:dim(X)[1] ){ for( j in 1:4 ){ nM1 = X[i,1] - MM[j,1] nM2 = X[i,2] - MM[j,2] ds[j] = nM1^2 + nM2^2 } Lb[i] = which( ds == min(ds) ) } ds Lb # Making Function : X c = c( 12,5 , 7,13 , 23,2 , 6,4 ) center = matrix( ideal_mean_vector , 4 , 2 , byrow=T ) ; center k = 10 k_means_clustering = function( X,k,center ){ ds = c() Lb = c() for ( i in 1:dim(X)[1] ) { for ( j in 1:4 ){ nM1 = X[i,1] - MM[j,1] nM2 = X[i,2] - MM[j,2] ds[j] = nM1^2 + nM2^2 } Lb[i] = which( ds==min(ds) ) } centMat = matrix( NA,k,4 ) for ( iter in 1:k ){ Main_cent = c( mean(X[which(Lb==1)]),mean(X[which(Lb==2)]) ,mean(X[which(Lb==3)]),mean(X[which(Lb==4)]) ) centMat[iter, 1:4] = Main_cent } Main_cent for ( i in 1:dim(X)[1] ){ for( j in 1:4 ){ nM1 = X[i,1] - MM[j,1] nM2 = X[i,2] - MM[j,2] ds[j] = nM1^2 + nM2^2 } Lb[i] = which( ds == min(ds) ) } return( list( Lb , centMat )) } k_means_clustering(X,k,center) # Analysis : # The points have converged Main_cent = c( mean(X[which(Lb==1)]),mean(X[which(Lb==2)]),mean(X[which(Lb==3)]),mean(X[which(Lb==4)]) ) Main_cent_Mat = matrix( Main_cent , 4 , 2 , byrow=T ) ; Main_cent_Mat for( i in 1:dim(X)[1] ){ for( j in 1:4 ){ nM1 = X[i,1] - Main_cent_Mat[j,1] nM2 = X[i,2] - Main_cent_Mat[j,2] ds[j] = nM1^2 + nM2^2 } Lb[i] = which( ds == min(ds) ) } ds Lb