library(tidyverse) library(skimr) dir_path <- "../../slides/datamanaging/data/" list.files(dir_path) file_path <- paste0(dir_path, "hotels.csv") hotels <- read_csv(file_path) #ESERCIZIO 1 skim(hotel) #ESERCIZIO 2 last_minute <- hotels %>% filter( country != "USA", lead_time < 1 ) # la 'virgola' potrebbe essere convertita in '&' (AND logico) hotels %>% filter(country != "USA" & lead_time < 1) # last_minute: 6,174 rows #ESERCIZIO 3 no0_little_one <- hotels %>% filter( children >= 1 | babies >= 1 ) # 9,332 #ESERCIZIO 4 no0_little_one_grouped <- no0_little_one %>% group_by(hotel) %>% summarize(n = n()) # A tibble: 2 × 2 # hotel n # # 1 City Hotel 5403 # 2 Resort Hotel 3929 # check by an alternative code (used by most of you) nrow(hotels %>% filter(children >= 1 | babies >= 1, hotel=="City Hotel")) nrow(hotels %>% filter(children >= 1 | babies >= 1, hotel!="City Hotel")) #ESERCIZIO 5 hotels %>% count(adults, sort=T) # or hotels %>% count(adults) %>% arrange(desc(n)) #ESERCIZIO 6 hotels %>% count(is_canceled, adults) %>% arrange(is_canceled, desc(n)) # or hotels %>% group_by(is_canceled) %>% count(adults) %>% arrange(is_canceled, desc(n)) # an alternative code used by some of you hotels %>% filter(is_canceled==0) %>% count(adults, sort=T) hotels %>% filter(is_canceled==1) %>% count(adults, sort=T) #ESERCIZIO 7 hotels %>% group_by(hotel) %>% summarise(min_adr=min(adr), mean_adr=mean(adr), median_adr=median(adr), max_adr=max(adr)) # A tibble: 2 × 5 # hotel Min Mean Median Max # # 1 City Hotel 0 105. 99.9 5400 # 2 Resort Hotel -6.38 95.0 75 508 #ESERCIZIO 8 anomalous_adr <- hotels %>% filter(adr %in% c(-6.38, 5400)) %>% select(adr, everything()) anomalous_adr %>% select( adr, hotel, adults, children, babies, starts_with("arrival")) %>% select(-contains(c("week", "day"))) hotels %>% slice_min(adr) %>% select(adr, everything()) hotels %>% slice_max(adr) %>% select(adr, everything()) hotels %>% slice_min(adr) %>% select( adr, hotel, adults, children, babies, starts_with("arrival"))) hotels %>% slice_max(adr) %>% select( adr, hotel, adults, children, babies, starts_with("arrival")))