########################################################################################### # 2 whiskey ########################################################################################### originale<-read.table(file.choose(), header=TRUE,sep="\t") dataset<-as.matrix(originale[,-c(1:2)]) Type<-originale$type Sample<-originale$sample #-- Normalizzazione dataNorm<-scale(dataset, center = TRUE, scale = TRUE) #-- Kmeans selezione numero cluster Tot<-NULL for(i in c(1:6)) {set.seed(7); km<- kmeans(dataNorm, centers=i,iter.max=50); Tot<-c(Tot,km$tot.withinss)} plot(Tot, xlab="K",ylab="Somma dei quadrati delle distanze intra-cluster", pch=16, type="o", main="Elbow plot") #--2 o 4 Cluster KM2<- kmeans(dataNorm, centers=2,iter.max=50) KM4<- kmeans(dataNorm, centers=4,iter.max=50) Newdata2<-data.frame(Newdata,KM2=KM2$cluster,KM4=KM4$cluster) View(Newdata2) ############################ #-- Riduzione del numero di variabili con PCA PCA<-prcomp(dataset, center= TRUE, scale=TRUE) #-- Per raggruppamento in base alle prime 3 PC: datasetPCA<-PCA$x[,c(1:3)] #--2 Cluster KM2PCA<- kmeans(datasetPCA, centers=2,iter.max=50) #-- Plot scores #-- a colori Color<-as.factor(Type) levels(Color)<-c("red","blue") Color<-as.character(Color) plot(PCA$x[,1], PCA$x[,2],type="n") points(PCA$x[,1], PCA$x[,2],col=Color,cex=1.2,pch=16) text(PCA$x[,1], PCA$x[,2],labels=KM2PCA$cluster,pos=3) abline(h=0,col="gray") abline(v=0,col="gray")