D<-read.csv("cars2.csv", header = TRUE, row.names=1) dim(D) head(D) apply(D , 2, mean) apply(D , 2, var) D.sd<-scale(D) head(D.sd) ## AHC hc.sing<-hclust(dist(D.sd), method="single") plot(hc.sing, cex=0.5) hc.ave<-hclust(dist(D.sd), method="average") plot(hc.ave, cex=0.5) hc.com<-hclust(dist(D.sd), method="complete") plot(hc.com, cex=0.5) library(cluster) hc.ward<-agnes(D.sd, method="ward") pltree(hc.ward, cex=0.5, hang = -1) cl3<-cutree(hc.ward, 3) #cl5<-cutree(hc.ward, 5) table(cl3) labels<-row.names(D) labels[cl3==1] labels[cl3==2] labels[cl3==3] # PAM with 3 groups pam.out3<-pam(D.sd,3) pam.out3$silinfo$avg.width pam.out3$medoids #D[pam.out3$id.med, ] # with 5 groups pam.out5<-pam(D.sd,5) pam.out5$silinfo$avg.width #D[pam.out5$id.med, ] par(mfrow=c(1,2)) plot(pam.out3, which=1, main=" ") plot(pam.out3, which=2, main=" ") par(mfrow=c(1,2)) plot(pam.out5, which=1, main=" ") plot(pam.out5, which=2, main=" ") D$Group <- as.factor(pam.out3$clustering) par(mfrow=c(2,2), mar=c(2,2,2,2)) plot(D$Group, D$pw, col=2:4, main="Power", xlab="cluster") plot(D$Group, D$speed, col=2:4, main="Max. Speed", xlab="cluster") plot(D$Group, D$acc, col=2:4, main="Acceleration", xlab="cluster") plot(D$Group, D$cons, col=2:4, main="Fuel Consumption", xlab="cluster")