# %%%%%%%%%%%%%%%%%%%%%% # ANALISI DATI SUICIDIO # %%%%%%%%%%%%%%%%%%%%%% # Regressione logistica # VD = variabili suicidarie, PENSIERO SUICIDARIO-PIANIFICAZIONE SUICIDARIA-TENTATO SUICIDIO # Predittori di interesse: dipendenza da sostanze per ciascuna sostanza # 2008 # IRSEX = SEX (1-M; 2-F) # MHSUITHK = PENSIERO SUICIDARIO # MHSUIPLN = PIANIFICAZIONE SUICIDARIA # MHSUITRY = Tentamen # NDSSDNSP = Nicotine Dependence Syndrome Scale (NDSS) # DEPNDALC = ALCOHOL # DEPNDCOC = COCAINE # DEPNDHER = HEROINE # DEPNDANL = PAIN RELIEVERS # DEPNDSED = SEDATIVES # DEPNDSTM = STIMULANTS # DEPNDHAL = HALLUCINOGENS # DEPNDINH = INHALANTS # DEPNDMRJ = MARIJUANA # DEPNDTRN = TRANQUILIZERS ## Carichiamo il dataset load("dataset08.RData") # %%%%%%%%%%%%%%%%%%%%%% # Vecchi Preamboli... ## Convertiamo IRSEX in fattore: 1-Maschio; 2-Femmina dataset08_reduced$IRSEX<-factor(dataset08_reduced$IRSEX,labels=names(attributes(dataset08_reduced$IRSEX)$labels)) ## ATTENZIONE: Convertiamo "MISSING = -9" in SPSS --> NA in R dataset08_reduced$MHSUITHK<-ifelse(dataset08_reduced$MHSUITHK=="-9",NA,dataset08_reduced$MHSUITHK) dataset08_reduced$MHSUIPLN<-ifelse(dataset08_reduced$MHSUIPLN=="-9",NA,dataset08_reduced$MHSUIPLN) dataset08_reduced$MHSUITRY<-ifelse(dataset08_reduced$MHSUITRY=="-9",NA,dataset08_reduced$MHSUITRY) ## ATTENZIONE: Rimuoviamo i missing data nelle variabili di "SUICIDAL OUTCOME" [MHSUITHK, MHSUIPLN, MHSUITRY] dataset08_reduced=na.omit(dataset08_reduced) summary(dataset08_reduced) # %%%%%%%%%%%%%%%%%%%%%% ## Regressione logistica: PENSIERO SUICIDARIO Mod.PS_08<-glm(MHSUITHK ~ IRSEX + NDSSDNSP + DEPNDALC + DEPNDCOC + DEPNDHER + DEPNDANL + DEPNDSED + DEPNDSTM + DEPNDHAL + DEPNDINH + DEPNDMRJ + DEPNDTRN, data = dataset08_reduced, family=binomial) summary(Mod.PS_08) ## Regressione logistica: PIANIFICAZIONE SUICIDARIA Mod.PianS_08<-glm(MHSUIPLN ~ IRSEX + NDSSDNSP + DEPNDALC + DEPNDCOC + DEPNDHER + DEPNDANL + DEPNDSED + DEPNDSTM + DEPNDHAL + DEPNDINH + DEPNDMRJ + DEPNDTRN, data = dataset08_reduced, family=binomial) summary(Mod.PianS_08) ## Regressione logistica: TENTATO SUICIDIO Mod.Tent_08<-glm(MHSUITRY ~ IRSEX + NDSSDNSP + DEPNDALC + DEPNDCOC + DEPNDHER + DEPNDANL + DEPNDSED + DEPNDSTM + DEPNDHAL + DEPNDINH + DEPNDMRJ + DEPNDTRN, data = dataset08_reduced, family=binomial) summary(Mod.Tent_08) ## probabilità previste dai 3 modelli prob_PS = predict(Mod.PS_08, dataset08_reduced,type="response") prob_PianS = predict(Mod.PianS_08, dataset08_reduced,type="response") prob_Tent = predict(Mod.Tent_08, dataset08_reduced,type="response") # %%%%%%%%%%%%%%%%%%%%%%%%%%% # Curve ROC per modelli Logit # con pacchetto pROC: library(pROC) # %%%%%%%%%%%%%%%%%%%%%%%%%%% ROC_PS <-roc(dataset08_reduced$MHSUITHK,prob_PS,smooth=FALSE,ci=TRUE) ROC_PianS<-roc(dataset08_reduced$MHSUIPLN,prob_PianS,smooth=FALSE,ci=TRUE) ROC_Tent<-roc(dataset08_reduced$MHSUITRY,prob_Tent,smooth=FALSE,ci=TRUE) ## Il locus delle coppie (H,F) generate dai tre modelli e basate su diversi valori k ## sarà solo approssimativamente una curva ROC, che chiameremo curva ROC empirica: par(pty = "s") # area di disegno "quadrata" plot(x=1-ROC_PS$specificities,y= ROC_PS$sensitivities,col="red",type="l",lwd=2) abline(a=0,b=1,lty="dashed") points(x=1-ROC_PianS$specificities,y= ROC_PianS$sensitivities,col="blue",type="l",lwd=2) points(x=1-ROC_Tent$specificities,y= ROC_Tent$sensitivities,col="orange",type="l",lwd=2) ## d prime medio per i tre modelli dprime_PS<-qnorm(ROC_PS$sensitivities)-qnorm(1-ROC_PS$specificities) mean_dprime_PS<-mean(dprime_PS[!is.na(dprime_PS) & !is.infinite(dprime_PS)]) mean_dprime_PS ## dprime_PianS<-qnorm(ROC_PianS$sensitivities)-qnorm(1-ROC_PianS$specificities) mean_dprime_PianS<-mean(dprime_PianS[!is.na(dprime_PianS) & !is.infinite(dprime_PianS)]) mean_dprime_PianS ## dprime_Tent<-qnorm(ROC_Tent$sensitivities)-qnorm(1-ROC_Tent$specificities) mean_dprime_Tent<-mean(dprime_Tent[!is.na(dprime_Tent) & !is.infinite(dprime_Tent)]) mean_dprime_Tent ## sovrapponiamo le cruve ROC per tre modelli "ridotti", contenenti solo "Alcool" Mod.PS_08_RID<-glm(MHSUITHK ~ IRSEX + DEPNDALC, data = dataset08_reduced, family=binomial) prob_PS_RID = predict(Mod.PS_08_RID, dataset08_reduced,type="response") ROC_PS_RID<-roc(dataset08_reduced$MHSUITHK,prob_PS_RID,smooth=FALSE,ci=TRUE) points(x=1-ROC_PS_RID$specificities,y=ROC_PS_RID$sensitivities,col="red",type="l",lwd=1,lty="dotdash") ## Mod.PianS_08_RID<-glm(MHSUIPLN ~ IRSEX + DEPNDALC, data = dataset08_reduced, family=binomial) prob_PianS_RID = predict(Mod.PianS_08_RID, dataset08_reduced,type="response") ROC_PianS_RID<-roc(dataset08_reduced$MHSUIPLN,prob_PianS_RID,smooth=FALSE,ci=TRUE) points(x=1-ROC_PianS_RID$specificities,y=ROC_PianS_RID$sensitivities,col="blue",type="l",lwd=1,lty="dotdash") ## Mod.Tent_08_RID<-glm(MHSUITRY ~ IRSEX + DEPNDALC, data = dataset08_reduced, family=binomial) prob_Tent_RID = predict(Mod.Tent_08_RID, dataset08_reduced,type="response") ROC_Tent_RID<-roc(dataset08_reduced$MHSUITRY,prob_Tent_RID,smooth=FALSE,ci=TRUE) points(x=1-ROC_Tent_RID$specificities,y=ROC_Tent_RID$sensitivities,col="blue",type="l",lwd=1,lty="dotdash") ## Test per confrontare i valori AUC nei modelli "nested", completo e ridotto: roc.test(ROC_PS, ROC_PS_RID,paired=TRUE) ## roc.test(ROC_PianS, ROC_PianS_RID,paired=TRUE) ## roc.test(ROC_Tent, ROC_Tent_RID,paired=TRUE) ## Test per confrontare i valori AUC nei tre modelli indipendenti (y diverso) roc.test(ROC_PS, ROC_PianS,paired=FALSE) roc.test(ROC_PS, ROC_Tent,paired=FALSE) roc.test(ROC_PianS, ROC_Tent,paired=FALSE) ## roc.test(ROC_PS_RID, ROC_PianS_RID,paired=FALSE) roc.test(ROC_PS_RID, ROC_Tent_RID,paired=FALSE) roc.test(ROC_PianS_RID, ROC_Tent_RID,paired=FALSE) ## Per casa... ## Scegliere mediante le curve ROC e il test di DeLong il set migliore di predittori. install.packages("rsq") library("rsq") data(hcrabs) summary(hcrabs) attach(hcrabs) y <- ifelse(num.satellites>0,1,0) logit.crbs <- glm(y~color+spine+width+weight,family=binomial(link="logit")) summary(logit.crbs) prob_crbs = predict(logit.crbs, hcrabs,type="response") ROC_crbs <-roc(y, prob_crbs,smooth=FALSE,ci=TRUE) par(pty = "s") # area di disegno "quadrata" plot(x=1-ROC_crbs$specificities,y= ROC_crbs$sensitivities,col="red",type="l",lwd=2)