.Rhistory

tt <- c(
20100101120101,
"2009-01-02 12-01-02",
"2009.01.03 12:01:03",
"2009-1-4 12-1-4",
"2009-1, 5 12:1, 5",
"200901-08 1201-08",
"2009 arbitrary 1 non-decimal 6 chars 12 in between 1 !!! 6",
"OR collapsed formats: 20090107 120107 (as long as prefixed with zeros)",
"Automatic wday, Thu, detection, 10-01-10 10:01:10 and p format: AM",
"Created on 10-01-11 at 10:01:11 PM")
ymd_hms(tt)
# pull specific time components
now() %>% day
now() %>% hour
now() %>% second
# change the day of an assigned time vector
dd <- now()
day(dd) <- 31
dd
# setting a day greater than the last day of the chosen month will automatically roll over to the following month
dd <- now()
day(dd) <- 32
dd
require(nycflights13)
flights %>% str # stored dataset from nycflights13 package
flights$time_hour %>% str # time component
ft <- flights$time_hour
# get a smaller sample
set.seed(13)
ft <- ft[sample(ft %>% length,20,replace = T)]
require(nycflights13)
ft <- ft[sample(ft %>% length,20,replace = T)]
ymd_hms(ft) # return full date and time
ydm_hms(ft) # returns some NAs b/c month > 12
hm("00:01") %>% as.numeric # returns 60 seconds after 00:00:00
hm("23:59") %>% as.numeric # returns seconds elapsed since 00:00:00 as integer
OlsonNames() %>% sample(20)
# OlsonNames() for complete list
# roll back to first or last day of previous month
now() %>% rollback(preserve_hms = T,roll_to_first = T)
2012.75 %>% date_decimal() # decimal dates, e.g. 3/4 into 2012
today() # today's date
ft
ft %>% as.character()
ft %>% as.character() %>% class
ft %>% as.character()
ftc <- ft %>% as.character()
ftc %>% as.factor %>% lubridate::hms() %>% period_to_seconds() %>% hms::as_hms()
ftc
ftc %>% as.factor
ftc %>% as.factor %>% lubridate::hms()
ftc %>% as.factor %>% period_to_seconds()
ftc %>% as.factor %>% lubridate::ymd_hms()
ftc %>% as.factor %>% lubridate::ymd_hms() %>% period_to_seconds() %>% hms::as_hms()
ftc %>% as.factor %>% lubridate::ymd_hms() %>% period_to_seconds()
ftc %>% as.factor %>% lubridate::ymd_hms()
ftc %>% as.factor %>% lubridate::ymd_hms() %>% class
ftc %>% as.factor %>% lubridate::ymd_hms()
ftc %>% lubridate::ymd_hms()
ftc <- ft %>% as.character()
ftc %>% lubridate::ymd_hms()
ftc %>% lubridate::ymd_hms() %>% class
ftc
ftc[1]
ftc[1:5]
ftc[1:5] %>% stringr::str_replace(":",".")
ftc[1:5] <- ftc[1:5] %>% stringr::str_replace(":",".") # create erraneous data
ftc
ftc %>% as.factor %>% lubridate::ymd_hms()
ftc %>% as.factor %>% lubridate::ymd_hms() %>% class
ftc
ftc %>% as.factor
ftc %>% as.factor %>% lubridate::ymd_hms()
ftc %>% as.factor %>% lubridate::ymd_hms() %>% period_to_seconds() %>% hms::as_hms()
ftc %>% lubridate::ymd_hms()
ftc
ftc %>% hms::as_hms()
ftc
require(stringr) # for generating character error
ftc <- ft %>% as.character()
ftc[1:5] <- ftc[1:5] %>% stringr::str_replace(":",".") # create erraneous data
ftc %>% lubridate::ymd_hms()
options(width=100)
knitr::opts_chunk$set(
eval = T, # run all code
echo = TRUE, # show code chunks in output
tidy = TRUE, # make output as tidy
message = FALSE,  # mask all messages
warning = FALSE, # mask all warnings
comment = "",
tidy.opts=list(width.cutoff=100), # set width of code chunks in output
size="small" # set code chunk size
)
# install.packages("pacman")
pacman::p_load(stringr,stringi,dplyr,reprex,xml2,rvest)
# reprex = for rendering text string in HTML
# install.packages("pacman") # uncomment and install this first
pacman::p_load(stringr,stringi,dplyr,reprex,xml2,rvest)
require(xml2)# read html data
require(rvest) # select html elements
url <- "https://r4ds.had.co.nz/strings.html"
txt <- url %>% read_html %>% html_text() # scrape web text from url
txt %>% str
txt %>% str_length() # get length of vector
pat <- "strings" # string pattern to search for
txt %>% str_detect(pat) # returns logical if vector contains that pattern
txt %>% str_which(pat) # show which vector the pattern exists
txt %>% str_locate(pat) # show character positions of the first instance of pattern
txt %>% str_locate_all(pat) # show all positions
txt
txt[1:100]
rmarkdown::render_site()
options(width=100)
knitr::opts_chunk$set(
eval = T, # run all code
echo = TRUE, # show code chunks in output
tidy = TRUE, # make output as tidy
message = FALSE,  # mask all messages
warning = FALSE, # mask all warnings
comment = "",
tidy.opts=list(width.cutoff=100), # set width of code chunks in output
size="small" # set code chunk size
)
packages <- c("ggplot2","ggthemes","dplyr","tidyverse","zoo","RColorBrewer","viridis","plyr")
if (require(packages)) {
install.packages(packages,dependencies = T)
require(packages)
# load tvthemes
devtools::install_github("Ryo-N7/tvthemes")
}
lapply(packages,library,character.only=T)
fh <- "Melbourne"
url <- "https://en.wikipedia.org/wiki/"
fh <- "Melbourne"
require(rvest)
url <- "https://en.wikipedia.org/wiki/"
fh <- "Melbourne"
paste0(url,fh) %>%
read_html() %>% # read webpage
html_nodes(":contains('infobox')")
paste0(url,fh) %>%
read_html() %>% # read webpage
html_nodes(":contains(infobox)")
identical(paste0(url,fh) %>%
read_html() %>% # read webpage
html_nodes(":contains(infobox)"),
paste0(url,fh) %>%
read_html() %>% # read webpage
html_nodes(":contains('infobox')")
)
paste0(url,fh) %>%
read_html() %>% # read webpage
html_nodes(":contains(.infobox)")
paste0(url,fh) %>%
read_html() %>% # read webpage
html_nodes(":contains('.infobox')")
paste0(url,fh) %>%
read_html() %>% # read webpage
html_nodes(":contains('.infobox')") %>% .[6]
paste0(url,fh) %>%
read_html() %>% # read webpage
html_nodes(":contains('.infobox')") %>% .[6] %>%
html_text
paste0(url,fh) %>%
read_html() %>% # read webpage
html_nodes(":contains('.infobox')") %>% .[6] %>%
html_structure()
xx <- LETTERS[1:10]
xx %>% unique
xx %>% rowMeans()
xx <- LETTERS[1:10]
browser()
1
2
xx %>% rowMeans()
xx <- LETTERS[1:10]
xx %>% rowMeans()
browser()
remove(xx)
clear
options(width=100)
knitr::opts_chunk$set(
eval = FALSE, # run all code
echo = TRUE, # show code chunks in output
tidy = TRUE, # make output as tidy
message = FALSE,  # mask all messages
warning = FALSE, # mask all warnings
comment = "",
tidy.opts=list(width.cutoff=100), # set width of code chunks in output
size="small" # set code chunk size
)
glimpse(nyc)
length(nyc$id) # print length of 'id' column
ggplot(data = nyc) +
geom_point(mapping = aes(x = neighborhood_overview, y = price, colour =
neighborhood_overview), shape = 21, stroke = 1) +
my_theme
glimpse(nyc)
url <- "http://data.insideairbnb.com/united-states/ny/new-york-city/2021-04-07/data/listings.csv.gz"
nyc <-  read_csv(url)
mpg
packages <- c("ggplot2","ggthemes","dplyr","tidyverse","zoo","RColorBrewer","viridis","plyr")
if (require(packages)) {
install.packages(packages,dependencies = T)
require(packages)
# load tvthemes
devtools::install_github("Ryo-N7/tvthemes")
}
lapply(packages,library,character.only=T)
install.packages("tidyverse")  # install package
library(tidyverse) # load the package library
require(tidyverse) # same as library
install.packages("tidyverse")
str(mpg) # structure of data
glimpse(mpg) # preview of data
summary(mpg) # basic summary stats
table(mpg$manufacturer) # counts of each column
head(mpg) # visualise first 6 rows of data
tail(mpg,10) # visualise last 10 (or N) rows of data
names(mpg) # get column names
class(mpg) # class of data frame
class(mpg$manufacturer) # class of data column
mpg$displ # print a column
mpg$hwy # print a column
packages <- c("ggplot2","ggthemes","dplyr","tidyverse","zoo","RColorBrewer","viridis","plyr", "rlang")
if (require(packages)) {
install.packages(packages,dependencies = T)
require(packages)
# load tvthemes
# devtools::install_github("Ryo-N7/tvthemes")
}
lapply(packages,library,character.only=T)
install.packages("tidyverse")  # install package
install.packages("tidyverse")
glimpse(nyc)
url <- "http://data.insideairbnb.com/united-states/ny/new-york-city/2021-04-07/data/listings.csv.gz"
nyc <-  read_csv(url)
nyc <- nyc[nyc$id < 1000000,] # get smaller subet of data
length(nyc$id) # print length of 'id' column
glimpse(nyc)
nyc <-  read_csv(url)
nyc <- nyc[nyc$id < 10000,] # get smaller subet of data
length(nyc$id) # print length of 'id' column
glimpse(nyc)
nyc <-  read_csv(url)
nyc <- nyc[nyc$id < 10000,] # get smaller subet of data
length(nyc$id) # print length of 'id' column
str(nyc)
# smaller csv file (16 cols)
pacman::p_load(readr,dplyr)
url <- "http://data.insideairbnb.com/united-states/ny/new-york-city/2021-04-07/data/listings.csv.gz"
nyc <-  read_csv(url)
nyc <- nyc[nyc$id < 1000000,] # get smaller subet of data
nyc_trunc <- nyc %>% filter(price < 200,
neighbourhood_group_cleansed != "Staten Island",
minimum_nights > 5 & minimum_nights < 15)
# smaller csv file (16 cols)
pacman::p_load(readr,dplyr)
url <- "http://data.insideairbnb.com/united-states/ny/new-york-city/2021-04-07/data/listings.csv.gz"
nyc <-  read_csv(url)
nyc <- nyc[nyc$id < 20000,] # get smaller subet of data
nyc_trunc <- nyc %>% filter(price < 200,
neighbourhood_group_cleansed != "Staten Island",
minimum_nights > 5 & minimum_nights < 15)
nyc_trunc %>% glimpse
# ---------------------------------------
#
# the printout of your plotting code here
#
# ---------------------------------------
suppressWarnings(require(ggplot2))
ttl <- "Properties less than US$200 available between 5 and 15 nights"
subttl <- "across NYC boroughs except Staten Island"
xlab = "Price (US)"
ylab <- "Number of properties"
caption <- "Source: NYC Airbnb data"
legend_ttl <- "Boroughs"
ggplot(nyc_trunc,aes(price)) +
geom_histogram(binwidth = 20, aes(fill = neighbourhood_group_cleansed)) +
facet_grid(. ~ neighbourhood_group_cleansed) +
theme_classic() +
labs(title = ttl, subtitle = subttl, x = xlab, y = ylab, caption = caption, fill = legend_ttl)
suppressWarnings(require(ggplot2))
ttl <- "Properties less than US$200 available between 5 and 15 nights"
subttl <- "across NYC boroughs except Staten Island"
xlab = "Price (US)"
ylab <- "Number of properties"
caption <- "Source: NYC Airbnb data"
legend_ttl <- "Boroughs"
nyc_trunc %>% str
ggplot(nyc_trunc,aes(price)) +
geom_histogram(binwidth = 20, aes(fill = neighbourhood_group_cleansed)) +
facet_grid(. ~ neighbourhood_group_cleansed) +
theme_classic() +
labs(title = ttl, subtitle = subttl, x = xlab, y = ylab, caption = caption, fill = legend_ttl)
nyc_trunc %>% str
ggplot(nyc_trunc,aes(price)) +
geom_histogram(binwidth = 20, aes(fill = neighbourhood_group_cleansed))
nyc_trunc$neighbourhood_group_cleansed %>% class
ggplot(nyc_trunc,aes(price)) +
geom_histogram(binwidth = 20
) +
facet_grid(. ~ neighbourhood_group_cleansed) +
theme_classic() +
labs(title = ttl, subtitle = subttl, x = xlab, y = ylab, caption = caption, fill = legend_ttl)
ggplot(nyc_trunc,aes(price)) +
geom_histogram(binwidth = 20, aes(fill = neighbourhood_group_cleansed), stat = "count") +
facet_grid(. ~ neighbourhood_group_cleansed) +
theme_classic() +
labs(title = ttl, subtitle = subttl, x = xlab, y = ylab, caption = caption, fill = legend_ttl)
nyc <- nyc[nyc$id < 50000,] # get smaller subet of data
nyc_trunc <- nyc %>% filter(price < 200,
neighbourhood_group_cleansed != "Staten Island",
minimum_nights > 5 & minimum_nights < 15)
suppressWarnings(require(ggplot2))
ttl <- "Properties less than US$200 available between 5 and 15 nights"
subttl <- "across NYC boroughs except Staten Island"
xlab = "Price (US)"
ylab <- "Number of properties"
caption <- "Source: NYC Airbnb data"
legend_ttl <- "Boroughs"
ggplot(nyc_trunc,aes(price)) +
geom_histogram(binwidth = 20, aes(fill = neighbourhood_group_cleansed), stat = "count") +
facet_grid(. ~ neighbourhood_group_cleansed) +
theme_classic() +
labs(title = ttl, subtitle = subttl, x = xlab, y = ylab, caption = caption, fill = legend_ttl)
url <- "http://data.insideairbnb.com/united-states/ny/new-york-city/2021-04-07/data/listings.csv.gz"
nyc <-  read_csv(url)
nyc <- nyc[nyc$id < 100000,] # get smaller subet of data
nyc_trunc <- nyc %>% filter(price < 200,
neighbourhood_group_cleansed != "Staten Island",
minimum_nights > 5 & minimum_nights < 15)
suppressWarnings(require(ggplot2))
ttl <- "Properties less than US$200 available between 5 and 15 nights"
subttl <- "across NYC boroughs except Staten Island"
xlab = "Price (US)"
ylab <- "Number of properties"
caption <- "Source: NYC Airbnb data"
legend_ttl <- "Boroughs"
ggplot(nyc_trunc,aes(price)) +
geom_histogram(binwidth = 20, aes(fill = neighbourhood_group_cleansed), stat = "count") +
facet_grid(. ~ neighbourhood_group_cleansed) +
theme_classic() +
labs(title = ttl, subtitle = subttl, x = xlab, y = ylab, caption = caption, fill = legend_ttl)
ggplot() +
geom_histogram(data = nyc_trunc,aes(price),
binwidth = 20, aes(fill = neighbourhood_group_cleansed)) +
facet_grid(. ~ neighbourhood_group_cleansed) +
theme_classic() +
labs(title = ttl, subtitle = subttl, x = xlab, y = ylab, caption = caption, fill = legend_ttl)
ggplot() +
geom_histogram(data = nyc_trunc,
aes(price, fill = neighbourhood_group_cleansed)) +
facet_grid(. ~ neighbourhood_group_cleansed)
ggplot() +
geom_histogram(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
binwidth = 20)
?geom_histogram
ggplot() +
?geom_histogram(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
stat = "bin",
binwidth = 20)
ggplot() +
geom_histogram(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
stat = "bin",
binwidth = 20)
ggplot() +
geom_histogram(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
stat = "bin",
binwidth = 20) +
scale_x_binned() +
facet_grid(. ~ neighbourhood_group_cleansed)
ggplot() +
geom_histogram(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
stat = "bin",
) +
facet_grid(. ~ neighbourhood_group_cleansed)
ggplot() +
geom_histogram(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
# stat = "bin",
binwidth = 20) +
facet_grid(. ~ neighbourhood_group_cleansed)
ggplot() +
geom_density(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
stat = "bin",
binwidth = 20) +
facet_grid(. ~ neighbourhood_group_cleansed)
ggplot() +
geom_density(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
# stat = "bin",
# binwidth = 20
)
ggplot() +
geom_density(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
# stat = "bin",
binwidth = 30
)
ggplot() +
geom_density(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
# stat = "bin",
binwidth = 100
)
ggplot() +
geom_density(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
# stat = "bin",
binwidth = 100
) +
facet_grid(. ~ neighbourhood_group_cleansed)
ggplot() +
geom_histogram(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
stat = "bin",
binwidth = 50)
ggplot() +
geom_histogram(data = nyc_trunc,
aes(price,fill = neighbourhood_group_cleansed),
# stat = "bin",
binwidth = 50)
nyc_trunc %>% str
ggplot() +
geom_histogram(data = nyc_trunc %>% str,
aes(price),
stat = "bin",
binwidth = 50) +
facet_grid(. ~ neighbourhood_group_cleansed) +
theme_classic()
ggplot() +
geom_histogram(data = nyc_trunc %>% str,
aes(price),
stat = "bin",
binwidth = 50)
nyc_trunc %>% names
nyc_trunc$neighbourhood
nyc_trunc$neighbourhood_cleansed
nyc_trunc$neighbourhood_group_cleansed
nyc_trunc$price
lapply(nyc$price,as,numeric)
lapply(nyc$price,as.numeric)
nyc$price %>% as.factor %>% as.numeric()
nyc %>%
mutate_at(price, as.factor, as.numeric)
nyc %>%
mutate_at(vars(price), as.factor, as.numeric)
nyc$price <- nyc$price %>% as.factor %>%  as.numeric
ggplot() +
geom_histogram(data = nyc_trunc %>% str,
aes(price),
stat = "bin",
binwidth = 50)
nyc_trunc$price
nyc_trunc$price <- nyc_trunc$price %>% as.factor %>%  as.numeric
nyc_trunc$price
pacman::p_load(readr,dplyr)
url <- "http://data.insideairbnb.com/united-states/ny/new-york-city/2021-04-07/data/listings.csv.gz"
nyc <-  read_csv(url)
nyc <- nyc[nyc$id < 100000,] # get smaller subet of data
nyc_trunc <- nyc %>% filter(price < 200,
neighbourhood_group_cleansed != "Staten Island",
minimum_nights > 5 & minimum_nights < 15)
nyc_trunc$price
nyc_trunc$price %>% as.factor
nyc_trunc$price %>% as.factor %>%  as.numeric
nyc_trunc$price %>% as.factor
nyc_trunc$price %>% as.factor %>% as.numeric
nyc_trunc$price %>% as.factor %>% as.integer()
nyc_trunc$price %>% as.integer()
url <- "https://www.imdb.com/title/tt0094625/?ref_=fn_al_tt_1"
url %>% read_html %>% html_table(trim = T)  # get just tables
url <- "https://r4ds.had.co.nz/data-visualisation.html" # url to scrape
url %>% read_html # scrape HTML text and data
require(dplyr)
url %>% read_html # scrape HTML text and data
require(rvest)
require(xml2)
require(dplyr)
url %>% read_html # scrape HTML text and data
url %>% readr::read_html # scrape HTML text and data
url %>% rvest::read_html # scrape HTML text and data
url %>% rvest::read_html() # scrape HTML text and data
url <- "https://r4ds.had.co.nz/data-visualisation.html" # url to scrape
url %>% rvest::read_html() # scrape HTML text and data
require(rvest)
node("main") %>%
html_nodes("a") %>%
.[[3]] %>% # get the 3rd scraped term
html_text()
node <- function(n){
url %>% read_html %>% html_nodes(n)  # n = user defined node
}
node("main") %>%
html_nodes("a") %>%
.[[3]] %>% # get the 3rd scraped term
html_text()
rmarkdown::render_site()
node("img") %>% html_attr("src") %>% extract2(3) %>% image_read()
node("img") %>% html_attr("src")
url <- "https://www.imdb.com/title/tt0094625/?ref_=fn_al_tt_1"
node("img") %>% html_attr("src")
node("img") %>% html_attr("src") %>% extract2(3)
rmarkdown::render_site()