########################################################################################### # Alimenti ########################################################################################### originale<-read.table(file.choose(), header=TRUE,sep="\t") dataset<-as.matrix(originale[,-c(1:2)]) Alimento<-originale$Alimento Categoria<-originale$Categoria levels(as.factor(Categoria)) ############################ #-- Riduzione del numero di variabili con PCA PCA<-prcomp(dataset, center= TRUE, scale=TRUE) varianze<-PCA$sdev^2 varianzecum<-cumsum(varianze/sum(varianze)*100) #-- Scree plot plot(varianze,pch=16,type="o") abline(h=1,col="gray") #-- Varianza cumulata % plot(varianzecum,pch=16,type="o") abline(h=c(50,75),col="gray") #-- Plot loadings plot(PCA$rotation[,1], PCA$rotation[,2],pch=16) text(PCA$rotation[,1], PCA$rotation[,2],labels=colnames(dataset),cex=0.8,pos=3) abline(h=0,col="gray") abline(v=0,col="gray") #-- Plot scores plot(PCA$x[,1], PCA$x[,2],pch=16) text(PCA$x[,1], PCA$x[,2],labels=Alimento,cex=0.6,pos=3) abline(h=0,col="gray") abline(v=0,col="gray") plot(PCA$x[,1], PCA$x[,2],type="n") text(PCA$x[,1], PCA$x[,2],labels=Categoria,cex=0.8) abline(h=0,col="gray") abline(v=0,col="gray") #-- Per raggruppamento in base alle prime 3 PC: datasetPCA<-PCA$x[,c(1:3)] #-- Kmeans selezione numero cluster Tot<-NULL for(i in c(1:15)) {set.seed(7); km<- kmeans(datasetPCA, centers=i,iter.max=50); Tot<-c(Tot,km$tot.withinss)} plot(Tot, xlab="K",ylab="Somma dei quadrati delle distanze intra-cluster", pch=16, type="o", main="Elbow plot") #--5 Cluster set.seed(7) KM5PCA<- kmeans(datasetPCA, centers=5,iter.max=50) #-- Plot scores #-- a colori Color<-as.factor(KM5PCA$cluster) levels(Color)<-c("red","blue","green","purple","orange") Color<-as.character(Color) plot(PCA$x[,1], PCA$x[,2],type="n") points(PCA$x[,1], PCA$x[,2],col=Color,cex=1.2,pch=16) text(PCA$x[,1], PCA$x[,2],labels=Categoria,pos=3,cex=0.5) abline(h=0,col="gray") abline(v=0,col="gray") ##### Seconda vs. terza componente plot(PCA$rotation[,2], PCA$rotation[,3],pch=16) text(PCA$rotation[,2], PCA$rotation[,3],labels=colnames(dataset),cex=0.8,pos=3) abline(h=0,col="gray") abline(v=0,col="gray") #-- Plot scores #-- a colori Color<-as.factor(KM5PCA$cluster) levels(Color)<-c("red","blue","green","purple","orange") Color<-as.character(Color) plot(PCA$x[,2], PCA$x[,3],type="n") points(PCA$x[,2], PCA$x[,3],col=Color,cex=1.2,pch=16) text(PCA$x[,2], PCA$x[,3],labels=Categoria,pos=3,cex=0.5) abline(h=0,col="gray") abline(v=0,col="gray") ### Newdata Newdata<-data.frame(KM5PCA$cluster,originale) View(Newdata) Newdata2<-Newdata[order(Newdata$KM5PCA.cluster),] View(Newdata2)