D<-read.csv("cars.csv", header = TRUE, row.names=1)
dim(D)
head(D)

apply(D , 2, mean)
apply(D , 2, var)

D.sd<-scale(D)
head(D.sd)

## AHC
library(cluster)
eu.dist<-dist(D.sd)

hc.sing<-agnes(eu.dist, method="single")
pltree(hc.sing, cex=0.8, hang = -1, main="SL")

hc.ave<-agnes(eu.dist, method="average")
pltree(hc.ave, cex=0.8, hang = -1, main="AL")

hc.com<-agnes(eu.dist, method="complete")
pltree(hc.com, cex=0.8, hang = -1, main="CL")

hc.ward<-agnes(eu.dist, method="ward")
pltree(hc.ward, cex=0.8, hang = -1, main="Ward")

cl3<-cutree(hc.ward, 3) 
#cl5<-cutree(hc.ward, 5)
table(cl3)
labels<-row.names(D)
labels[cl3==1]
labels[cl3==2]
labels[cl3==3]

# PAM con K=3 gruppi
pam.out3<-pam(D.sd,3) 
pam.out3$silinfo$avg.width
pam.out3$medoids  
#D[pam.out3$id.med, ]

# PAM con K=5 gruppi
pam.out5<-pam(D.sd,5) 
pam.out5$silinfo$avg.width
pam.out5$medoids  
#D[pam.out5$id.med, ]

par(mfrow=c(1,2))
plot(pam.out3, which=2, main=" ")
plot(pam.out5, which=2, main=" ")

D$Group <- as.factor(pam.out3$clustering)
par(mfrow=c(2,2), mar=c(2,2,2,2))
plot(D$Group, D$pw, col=2:4, main="Power", xlab="cluster")
plot(D$Group, D$speed, col=2:4, main="Max. Speed", xlab="cluster")
plot(D$Group, D$acc, col=2:4, main="Acceleration", xlab="cluster")
plot(D$Group, D$cons, col=2:4, main="Fuel Consumption", xlab="cluster")