-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
49 lines (40 loc) · 1.92 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#function used to move columns to first position in a data frame
movetofirst <- function(data, move) {
data[c(move, setdiff(names(data), move))]
}
#install dplyr if not installed and load
if("dplyr" %in% rownames(installed.packages()) == FALSE) {install.packages("dplyr")}
library(dplyr)
#read data
subject_test <- read.table("data/test/subject_test.txt", stringsAsFactors=FALSE)
subject_train <- read.table("data/train/subject_train.txt", stringsAsFactors=FALSE)
X_test <- read.table("data/test/X_test.txt", stringsAsFactors=FALSE)
X_train <- read.table("data/train/X_train.txt", stringsAsFactors=FALSE)
y_test <- read.table("data/test/y_test.txt", stringsAsFactors=FALSE)
y_train <- read.table("data/train/y_train.txt", stringsAsFactors=FALSE)
activity_labels <- read.table("data/activity_labels.txt", stringsAsFactors=FALSE)
features <- read.table("data/features.txt", stringsAsFactors=FALSE)
#merge test and train data
subject <- rbind(subject_test, subject_train)
data <- rbind(X_test, X_train)
y <- rbind(y_test, y_train)
#properly name variables
names(data) <- features[, 2]
#remove duplicated names (they are not useful anyway)
data <- data[ , !duplicated(colnames(data))]
#keep only mean and std variables
data <- select(data, contains("mean"), contains("std"))
#add subject and activity columns
data$subject <- subject[[1]]
data$activity <- as.factor(apply(y, MARGIN = 2, FUN = function(x) activity_labels[x, 2]))
#move activity and subject columns to beginning
data <- movetofirst(data, "activity")
data <- movetofirst(data, "subject")
#save data
write.table(data, file = "dirty.txt", row.names = FALSE)
#clean up the workspace
# remove(subject_test, subject_train, X_test, X_train, y_test, y_train)
# remove(y, subject, features, activity_labels)
#generate final tidy dataset
tidy <- data %>% group_by(subject, activity) %>% summarize_each (funs(mean), contains("mean"), contains("std"))
write.table(tidy, file = "tidy.txt", row.names = FALSE)