diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/Class_7_Instructions.html b/Class_7_Instructions.html new file mode 100644 index 0000000..9d5a255 --- /dev/null +++ b/Class_7_Instructions.html @@ -0,0 +1,250 @@ + + + + +
+ + + + + + + + + + +We will use two packages: tidyr and dplyr
+#Insall packages
+install.packages("tidyr")
+install.packages("dplyr")
+#Load packages
+library(tidyr, dplyr)
+data_wide <- read.table("~/Career/TC/Courses/Core Methods in EDM/class7/instructor_activity_wide.csv", sep = ",", header = TRUE)
+
+#Now view the data you have uploaded and notice how its structure: each variable is a date and each row is a type of measure.
+View(data_wide)
+
+#R doesn't like having variable names that consist only of numbers so, as you can see, every variable starts with the letter "X". The numbers represent dates in the format year-month-day.
+The gather command requires the following input arguments:
+data_long <- gather(data_wide, date, variables)
+#Rename the variables so we don't get confused about what is what!
+names(data_long) <- c("variables", "date", "measure")
+#Take a look at your new data, looks weird huh?
+View(data_long)
+The spread function requires the following input:
+instructor_data <- spread(data_long, variables, measure)
+student_activity <- read.table("~/Career/TC/Courses/Core Methods in EDM/class7/student_activity.csv", sep = ",", header = TRUE)
+student_data <- spread(student_activity, variable, measure)
+To do this we will use the dplyr package (We will need to call dplyr in the command by writing dplyr:: because dplyr uses commands that exist in other packages but to do different operations.)
+Notice that the way we subset is with a logical rule, in this case date == 20160204. In R, when we want to say that something “equals” something else we need to use a double equals sign “==”. (A single equals sign means the same as <-).
+student_data_2 <- dplyr::filter(student_data, date == 20160204)
+Now subset the student_activity data frame to create a data frame that only includes students who have sat at table 4. Write your code in the following chunk:
+student_data_3 <- dplyr::filter(student_data, table == 4)
+It is useful to be able to make new variables for analysis. We can either apend a new variable to our dataframe or we can replace some variables with a new variable. Below we will use the “mutate” function to create a new variable “total_sleep” from the light and deep sleep variables in the instructor data.
+instructor_data <- dplyr::mutate(instructor_data, total_sleep = s_deep + s_light)
+Now, refering to the cheat sheet, create a data frame called “instructor_sleep” that contains ONLY the total_sleep variable. Write your code in the following code chunk:
+instructor_sleep <- dplyr::select(instructor_data, total_sleep)
+Now, we can combine several commands together to create a new variable that contains a grouping. The following code creates a weekly grouping variable called “week” in the instructor data set:
+instructor_data <- dplyr::mutate(instructor_data, week = dplyr::ntile(date, 3))
+Create the same variables for the student data frame, write your code in the code chunk below:
+student_data <- dplyr::mutate(student_data, week=dplyr::ntile(date, 3))
+Next we will summarize the student data. First we can simply take an average of one of our student variables such as motivation:
+student_data %>% dplyr::summarise(mean(motivation))
+
+#That isn't super interesting, so let's break it down by week:
+
+student_data %>% dplyr::group_by(date) %>% dplyr::summarise(mean(motivation))
+Create two new data sets using this method. One that sumarizes average motivation for students for each week (student_week) and another than sumarizes “m_active_time” for the instructor per week (instructor_week). Write your code in the following chunk:
+student_week <- student_data %>% dplyr::group_by(week) %>% dplyr::summarise(mean(motivation))
+instructor_week <- instructor_data %>% dplyr::group_by(week) %>% dplyr::summarise(mean(m_active_time))
+Now we will merge these two data frames using dplyr.
+merge <- dplyr::full_join(instructor_week, student_week, "week")
+Visualize the relationship between these two variables (mean motivation and mean instructor activity) with the “plot” command and then run a Pearson correlation test (hint: cor.test()). Write the code for the these commands below:
+names(merge)<-c("week", "avg_student", "avg_instructor")
+plot(merge$avg_student, merge$avg_instructor)
+cor.test(merge$avg_student, merge$avg_instructor)
+Fnally save your markdown document and your plot to this folder and comit, push and pull your repo to submit.
+