## ----child = "../setup.Rmd"------------------------------------ ## ----setup, include=FALSE-------------------------------------- # R options options( htmltools.dir.version = FALSE, dplyr.print_min = 6, dplyr.print_max = 6, tibble.width = 65, width = 65 ) # figure height, width, dpi knitr::opts_chunk$set(echo = TRUE, fig.width = 8, fig.asp = 0.618, out.width = "60%", fig.align = "center", dpi = 300, message = FALSE) # ggplot2 ggplot2::theme_set(ggplot2::theme_gray(base_size = 16)) # set seed set.seed(1234) # fontawesome htmltools::tagList(rmarkdown::html_dependency_font_awesome()) # magick dev.off <- function(){ invisible(grDevices::dev.off()) } # conflicted library(conflicted) conflict_prefer("filter", "dplyr") # xaringanExtra library(xaringanExtra) xaringanExtra::use_panelset() # output number of lines hook_output <- knitr::knit_hooks$get("output") knitr::knit_hooks$set(output = function(x, options) { lines <- options$output.lines if (is.null(lines)) { return(hook_output(x, options)) # pass to default hook } x <- unlist(strsplit(x, "\n")) more <- "..." if (length(lines)==1) { # first n lines if (length(x) > lines) { # truncate the output, but add .... x <- c(head(x, lines), more) } } else { x <- c(more, x[lines], more) } # paste these lines together x <- paste(c(x, ""), collapse = "\n") hook_output(x, options) }) ## ----packages, echo=FALSE, message=FALSE, warning=FALSE-------- library(tidyverse) ## ----message=FALSE--------------------------------------------- hotels <- read_csv("data/hotels.csv") ## -------------------------------------------------------------- hotels %>% select(hotel, lead_time) #<< ## ----output.lines=18------------------------------------------- hotels %>% select(-agent) #<< ## -------------------------------------------------------------- hotels %>% select(hotel:arrival_date_month) #<< ## -------------------------------------------------------------- hotels %>% select(starts_with("arrival")) #<< ## -------------------------------------------------------------- hotels %>% select(ends_with("type")) #<< ## ----results='hide'-------------------------------------------- hotels %>% pull(lead_time) #<< ## -------------------------------------------------------------- hotels %>% pull(lead_time) %>% class() #<< ## -------------------------------------------------------------- hotels %>% select(adults, children, babies) %>% arrange(babies) #<< ## -------------------------------------------------------------- hotels %>% select(adults, children, babies) %>% arrange(desc(babies)) #<< ## ----output.lines=17------------------------------------------- # first five hotels %>% slice(1:5) #<< ## ----output.lines=10------------------------------------------- hotels %>% # slice the first five rows # this line is a comment # select(hotel) %>% # this one doesn't run slice(1:5) # this line runs ## ----output.lines=17------------------------------------------- # bookings in City Hotels hotels %>% filter(hotel == "City Hotel") #<< ## -------------------------------------------------------------- hotels %>% filter( adults == 0, #<< children >= 1 #<< ) %>% select(adults, babies, children) ## -------------------------------------------------------------- # bookings with no adults and some children or babies in the room hotels %>% filter( adults == 0, children >= 1 | babies >= 1 # | means or #<< ) %>% select(adults, babies, children) ## ----include=FALSE--------------------------------------------- options(dplyr.print_max = 20) ## -------------------------------------------------------------- hotels %>% distinct(market_segment) %>% #<< arrange(market_segment) ## ----output.lines=13------------------------------------------- hotels %>% distinct(hotel, market_segment) %>% #<< arrange(hotel, market_segment) ## -------------------------------------------------------------- # alphabetical order by default hotels %>% count(market_segment) #<< ## -------------------------------------------------------------- # descending frequency order hotels %>% count(market_segment, sort = TRUE) #<< ## -------------------------------------------------------------- # ascending frequency order hotels %>% count(market_segment) %>% arrange(n) #<< ## -------------------------------------------------------------- # descending frequency order # just like adding sort = TRUE hotels %>% count(market_segment) %>% arrange(desc(n)) #<< ## -------------------------------------------------------------- hotels %>% count(hotel, market_segment) #<< ## -------------------------------------------------------------- # hotel type first hotels %>% count(hotel, market_segment) #<< ## -------------------------------------------------------------- # market segment first hotels %>% count(market_segment, hotel) #<< ## -------------------------------------------------------------- hotels %>% mutate(little_ones = children + babies) %>% #<< select(children, babies, little_ones) %>% arrange(desc(little_ones)) ## -------------------------------------------------------------- # Resort Hotel hotels %>% mutate(little_ones = children + babies) %>% filter( little_ones >= 1, hotel == "Resort Hotel" ) %>% select(hotel, little_ones) ## -------------------------------------------------------------- # City Hotel hotels %>% mutate(little_ones = children + babies) %>% filter( little_ones >= 1, hotel == "City Hotel" ) %>% select(hotel, little_ones) ## -------------------------------------------------------------- hotels %>% mutate(little_ones = children + babies) %>% count(hotel, little_ones) %>% mutate(prop = n / sum(n)) ## -------------------------------------------------------------- # mean average daily rate for all bookings # (calculated by dividing the sum of all lodging transactions by the total number of staying nights) hotels %>% summarise(mean_adr = mean(adr)) #<< ## -------------------------------------------------------------- hotels %>% summarise(mean(adr)) ## -------------------------------------------------------------- hotels %>% summarise(mean_adr = mean(adr)) ## -------------------------------------------------------------- # mean average daily rate for all booking at city and resort hotels hotels %>% group_by(hotel) %>% #<< summarise(mean_adr = mean(adr)) ## -------------------------------------------------------------- hotels %>% group_by(hotel) %>% summarise(n = n()) ## -------------------------------------------------------------- hotels %>% count(hotel) ## -------------------------------------------------------------- hotels %>% summarise( min_adr = min(adr), mean_adr = mean(adr), median_adr = median(adr), max_adr = max(adr) )