D<-read.csv("cars.csv", header = TRUE, row.names=1) dim(D) head(D) apply(D , 2, mean) apply(D , 2, var) D.sd<-scale(D) head(D.sd) ## AHC library(cluster) eu.dist<-dist(D.sd) hc.sing<-agnes(eu.dist, method="single") pltree(hc.sing, cex=0.8, hang = -1, main="SL") hc.ave<-agnes(eu.dist, method="average") pltree(hc.ave, cex=0.8, hang = -1, main="AL") hc.com<-agnes(eu.dist, method="complete") pltree(hc.com, cex=0.8, hang = -1, main="CL") hc.ward<-agnes(eu.dist, method="ward") pltree(hc.ward, cex=0.8, hang = -1, main="Ward") cl3<-cutree(hc.ward, 3) #cl5<-cutree(hc.ward, 5) table(cl3) labels<-row.names(D) labels[cl3==1] labels[cl3==2] labels[cl3==3] # PAM con K=3 gruppi pam.out3<-pam(D.sd,3) pam.out3$silinfo$avg.width pam.out3$medoids #D[pam.out3$id.med, ] # PAM con K=5 gruppi pam.out5<-pam(D.sd,5) pam.out5$silinfo$avg.width pam.out5$medoids #D[pam.out5$id.med, ] par(mfrow=c(1,2)) plot(pam.out3, which=2, main=" ") plot(pam.out5, which=2, main=" ") D$Group <- as.factor(pam.out3$clustering) par(mfrow=c(2,2), mar=c(2,2,2,2)) plot(D$Group, D$pw, col=2:4, main="Power", xlab="cluster") plot(D$Group, D$speed, col=2:4, main="Max. Speed", xlab="cluster") plot(D$Group, D$acc, col=2:4, main="Acceleration", xlab="cluster") plot(D$Group, D$cons, col=2:4, main="Fuel Consumption", xlab="cluster")