-
Notifications
You must be signed in to change notification settings - Fork 0
/
TripAdvisorScraping.R
50 lines (26 loc) · 1.07 KB
/
TripAdvisorScraping.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# load libraries
packages=c("dplyr","XML","rvest","stringr","plyr","xml2","pryr","lubridate","readxl")
package.check <- lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE)
}
})
source("AuxiliaryDownloadFunctions.R")
source("mainFunctionForScrapping.R")
options(stringsAsFactors = FALSE, silent=TRUE)
# Hotels' data
# Check Hoteli1.csv for needed data
#Just read your file and leave all the rest alone
ratingTable<-read_xlsx("RatingTable.xlsx")
datah = read.csv(file = "Hoteli1.csv", sep = ";", header = TRUE, stringsAsFactors = FALSE)
###############################################################################################
hotelid = gsub(" ", "", paste(datah$Region,datah$Location, datah$Hotel, sep = ""), fixed = TRUE)
datah = cbind(datah, hotelid)
#Do we want to download memberid
memberid=FALSE
##Download data
scrap(hotelid,1,1,"./data/",memberid)
##Gather data and produce TAOUPUT.txt with | delimiter
#providing subdir to data
gather("./data/")