-
Notifications
You must be signed in to change notification settings - Fork 1
/
ReadXls.R
121 lines (107 loc) · 4.55 KB
/
ReadXls.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
########################################################################################
# #
# Code to read in Gatekeeper spreadsheets, parse into a data frame and write to csv #
# For this to run, you need a perl executable saved locally, to match the location #
# given just below and saved as value 'perl'. You also need the gdata and tidyr #
# packages installed. #
# This code assumes you have farm spreadsheets saved in the subfolder 'Farm_data' # #
# #
########################################################################################
rm(list=ls())
perl <- 'C:/Strawberry/perl/bin/perl5.26.1.exe'
## First, load up the libraries and functions
#install.packages('gdata')
#install.packages('tidyr')
library('gdata')
library('tidyr')
source('./ReadDetailed.R')
source('./ParseDetails.R')
source('./ReadCondensed.R')
source('./ReadAnalysis.R')
source('./ReadAppData.R')
source('./RemoveList.R')
source('./OutputRepro.R')
source('./LookupTable.R')
## Set the file path
datapath <- file.path('.','Farm_data')
## Discover all the files in the data folder
FileList <- list.files(file.path(datapath),recursive=T)
FileList <- FileList[grepl('xls(x)?$|csv$',FileList)]
## Set the output
DataOut <- c()
## Create column headers for the table
ColHeaders <- c('Farm','Field','Crop','Variety',
'MapSheet','NGNumber','Centroid',
'Product','ProductID',
'Harvest Interval','Active Ingredients','Manufacturer','Expires',
'Area','Area Units','Rate','Rate Units','Quantity','Quantity Units',
'Year','Start Date','End Date','Start Time','End Time',
'Weather','Temp','Wind speed/direction','Soil','Implement',
'Reference','Advisor','Operator','Issued By','Source')
DataOut <- data.frame()
for(i in ColHeaders){
DataOut <- data.frame(DataOut,character())
}
colnames(DataOut) <- ColHeaders
for(i in FileList){
FileType <- ''
## Read in the file
if(grepl('xls(x)?$',i)){
mydf <- read.xls(file.path(datapath,i),
sheet = 'Sheet1',
perl = perl,
header = F)
} else if(grepl('csv$',i)){
mydf <- read.csv(file.path(datapath,i))
}
##Find the parent folder for the file
FarmFolder <- substr(i,1,regexpr('\\/',i)[1]-1)
## Replace awkward NA's with ''
mydf[is.na(mydf)] <- ''
## Check whether file is detailed or condensed, then call the relevant function
if(grepl('\\/Detailed.*xls(x)?$',i)){
TmpDF <- read.detailed(mydf,ColHeaders,FarmFolder)
FileType <- 'detailed'
} else if(grepl('\\/Condensed.*xls(x)?$',i)){
TmpDF <- read.condensed(mydf,ColHeaders,FarmFolder)
FileType <- 'condensed'
} else if(grepl('\\/Analysis.*csv$',i)){
TmpDF <- read.analysis(mydf,ColHeaders,FarmFolder)
FileType <- 'analysis'
} else if(grepl('\\/ApplicationData_',i)){
TmpDF <- read.appdata(mydf,ColHeaders,FarmFolder,i)
FileType <- 'applicationdata'
}
if(FileType!=''){
DataOut <- rbind(DataOut,TmpDF)
print(paste0('Finished reading (',FileType,') file ',
match(i,FileList),' of ',length(FileList),':'))
} else {
print(paste0('Did not read file (',FileType,') file ',
match(i,FileList),' of ',length(FileList),':'))
}
print(paste(' ',(i)))
}
## Check if we have the output folder, and if not, create one for the output file
if(!dir.exists(file.path('.','Output'))){
dir.create(file.path('.','Output'))
}
## Make a lookup table to populate the data frame with additional data regarding
## product details (manufacturer, active ingredients, product ID), where possible
LookupList <- lookup.table(DataOut)
DataOut <- LookupList$df
LookupTable <- LookupList$lookup
## Do a bit of error checking, then, if all good, output the data frame to a csv
if(!dir.exists(datapath)){
stop('Cannot find folder Farm_data')
} else if(is.na(FileList[1])){
stop('Unable to find files in Farm_data folder')
} else {
write.csv(DataOut,file.path('.','Output','DataOut.csv'),row.names = F)
print(paste('File saved to:',file.path('.','Output','DataOut.csv')))
}
## Create list for summary and run summary function
SummaryCols <- c('Farm','Field','Crop','Variety','Product')
ReturnList <- repro.field.summary(DataOut,SummaryCols,
'ProductSummary.csv',writetofile=T)
print(ReturnList$msg)