-
Notifications
You must be signed in to change notification settings - Fork 5
/
createTestData.R
34 lines (26 loc) · 898 Bytes
/
createTestData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
load("test.sample.RData")
library(doParallel)
library(tm)
nodes <- detectCores() - 1
cl <- makeCluster(nodes)
registerDoParallel(cl)
phraseToTestPattern <- function(phrase) {
words <- unlist(strsplit(phrase," "))
target <- removePunctuation(words[length(words)])
input <- paste(words[1:length(words)-1], collapse = " ")
c(input,target)
}
#For each item, everything as input, and las word as taget.
parallelFunction <- function() {
system.time(outForeach <- foreach(i=1:length(test.sample),.combine = rbind, .inorder = F, .export = c("removePunctuation"), .multicombine = T ) %dopar% {
phraseToTestPattern(test.sample[i])
})
}
#Sapply version, the winner!
system.time({
outSapply <- sapply(test.sample,phraseToTestPattern,USE.NAMES = F)
outSapply <- t(outSapply)
} )
stopCluster(cl)
save(outSapply,file = "outSapply.RData")
save(outForeach,file = "outForeach.RData")