-
Notifications
You must be signed in to change notification settings - Fork 0
/
class2.1.Rmd
97 lines (73 loc) · 1.18 KB
/
class2.1.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#Data Transformation
#package dplyr
```{r}
options(warn = 1)
library("nycflights13")
library("dplyr")
names(flights)
```
#filter
```{r}
jan1 <-filter(flights,month == 1, day == 1)
jan1
```
```{r}
nov_dec <- filter(flights, month %in% c(11,12))
nov_dec
```
#missing values
```{r}
x <-NA
is.na(x)
```
```{r}
df <- data.frame(x = c(1,2,NA,4,5))
filter(df,x>1)
```
```{r}
filter(df,is.na(x)|x>1)
```
```{r}
filter(df, is.na(x))
```
#Arrange
#simillar to order by
```{r}
arrange(flights,year,month,day)
```
```{r}
arrange(flights,desc(year,month,day))
```
```{r}
select(flights,year,day)
```
#select keyword
```{r}
select(flights,year:day)
```
```{r}
select(flights,-(year:day))
```
#rename and mutate(calculated filed)
```{r}
renamed_df = rename(flights,tail_num = tailnum)
```
```{r}
mutated_df = select(mutate(flights, day_month = day+month),day_month,day,month)
mutated_df
```
#summarize
```{r}
summarise(flights,delay=mean(dep_delay,na.rm=TRUE))
```
```{r}
by_day <- group_by(flights,year,month,day)
summarise(by_day,delay=mean(dep_delay,na.rm=TRUE))
```
#pipe
```{r}
flight_agg <- flights %>%
group_by(year,month,day) %>%
summarise(delay=mean(dep_delay,na.rm=TRUE))
flight_agg
```