-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdiversity_stats.R
148 lines (117 loc) · 4.6 KB
/
diversity_stats.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
library(lme4)
library(lmerTest)
library(bbmle)
library(reshape)
# this needs data.nosing.rar from data_wrangling.R
head(data.nosing.rar[,1:10])
#calculating richness, shannons, and evenness
richness<-fisher.alpha(data.nosing.rar[,-c(1:5)],1)
head(richness)
shannons<-diversity(data.nosing.rar[,-c(1:5)])
evenness<-shannons/log(richness)
hist(richness)
div_stats<-data.frame(data.nosing.rar[,1:5],richness,shannons,evenness)
head(div_stats)
#looking at data distribution
#ggplot(div_stats)+geom_histogram(aes(shannons^2.7))
#ggplot(div_stats)+geom_histogram(aes(richness^2.7))
#ggplot(div_stats)+geom_histogram(aes(evenness^2.7))
#richness is skewed heavily to right
ggplot(div_stats)+geom_histogram(aes(richness))
#keeping log transformation helps normalize data
#Testing main effects of date, crop, soil frac on diversity measures
summary(test<-aov(richness~Date+Crop+SoilFrac, data=div_stats))
TukeyHSD(test)
#Looking with block effect, no diff
summary(test<-aov(richness~Date+Crop+SoilFrac+(1|Block), data=div_stats))
#Looking with nested agg:
test2<-lmer(richness~Date*Crop*SoilFrac+(1|Block)+(1|Date/Crop/SoilFrac), data=div_stats)
test3<-lmer(richness~Date+Crop+SoilFrac+Date:Crop+(1|Block)+(1|Date/Crop/SoilFrac), data=div_stats)
summary(test3)
test4<-lmer(richness~Date*Crop*SoilFrac+(1|Block)+(1|Date/Crop/SoilFrac), data=div_stats)
summary(test2)
TukeyHSD(test)
#SoilFrac P=0.001, micro > LM, WS
#Looking at distribution among crop
Rich<-ddply(div_stats, .(Crop), summarise,.progress="text",
mean=mean(richness),
high95=boot.high(richness),
low95=boot.low(richness)
)
Rich
ggplot(Rich, aes(Crop, mean))+geom_pointrange(aes(ymax=high95, ymin=low95))
summary(test<-aov(evenness~Date+Crop+SoilFrac, data=div_stats))
TukeyHSD(test)
#no effects of any main factor
summary(test<-aov(shannons~Date+Crop+SoilFrac, data=div_stats))
TukeyHSD(test)
#SoilFrac P=0.02, micro>LM
?diversity
ggplot(div_stats)+geom_boxplot(aes(x=SoilFrac, y=richness))
head(data.nosing.rar[,1:10])
data_melt<-melt(data.nosing.rar, id=c("SampleName","Date","Block","Crop","SoilFrac"))
taxonomy<-read.csv(file.choose())
head(taxonomy)
head(data_melt)
data_taxa<-merge(data_melt,taxonomy,by.x="variable",by.y="X.OTU.ID")
head(data_taxa)
data_taxa2<-data_taxa[ which(data_taxa$value>0),]
head(data_taxa2)
#Calculating percent of unique OTUs micros contribute
SoilFrac.OTU<-ddply(data_taxa, .(SoilFrac~variable), summarise, .progress="text", total=sum(value))
library(compare)
micros<-subset(data_taxa, data_taxa$SoilFrac=="Micro"))
micros2<-droplevels(micros[which(micros$value>0),])
micro.OTU<-data.frame(num=1:1337,OTU=levels(micros2$variable))
str(micro.OTU)
Others<-data.frame(data_taxa[data_taxa$SoilFrac %in% c("LM","MM","SM","WS"),])
Others2<-droplevels(Others[which(Others$value>0),])
Others.OTU<-data.frame(num=1:1945, OTU=levels(Others2$variable))
str(Others.OTU)
Common.OTU<-merge(micro.OTU,Others.OTU, by="OTU")
head(Common.OTU[1:10,])
dim(Common.OTU)
#Bootstrap functions from R. Williams
boot.high<-function(XX){
boot.mean<-numeric(1000)
for (i in 1:1000){
boot.mean[i]<-mean(sample(XX,replace=T))
}
return(quantile(boot.mean,(0.975)))
}
boot.low<-function(XX){
boot.mean<-numeric(1000)
for (i in 1:1000){
boot.mean[i]<-mean(sample(XX,replace=T))
}
return(quantile(boot.mean,(0.025)))
}
library(plyr)
Phyla.data<-ddply(data_taxa2, .(SoilFrac, Phylum), summarise,.progress="text",
mean=mean(value),
high95=boot.high(value),
low95=boot.low(value)
)
head(Phyla.data)
?geom_pointrange
ggplot(Phyla.data)+geom_pointrange(aes(x=Phylum,y=mean,ymax=high95,ymin=low95, color=SoilFrac),position=position_dodge(width=1))+coord_flip()+scale_y_log10()
stats$SoilFrac<-factor(stats$SoilFrac, levels=c("Micro","SM","MM","LM","WS"))
ggplot(stats)+geom_pointrange(aes(x=Phylum,y=mean,ymax=high95,ymin=low95,colour=SoilFrac),position=position_dodge(width=.5))+coord_flip()+scale_y_log10()
#SampleID to look for crop
Phyla.datac<-ddply(data_taxa2, .(Crop, Phylum), summarise,.progress="text",
mean=mean(value),
high95=boot.high(value),
low95=boot.low(value)
)
head(Phyla.datac)
ggplot(Phyla.datac)+geom_pointrange(aes(x=Phylum,y=mean,ymax=high95,ymin=low95, color=Crop),position=position_dodge(width=1))+coord_flip()+scale_y_log10()
#Some interesting differences to pursue here
#SampleID to look for date
Phyla.datad<-ddply(data_taxa2, .(Date, Phylum), summarise,.progress="text",
mean=mean(value),
high95=boot.high(value),
low95=boot.low(value)
)
head(Phyla.datad)
ggplot(Phyla.datad)+geom_pointrange(aes(x=Phylum,y=mean,ymax=high95,ymin=low95, color=Date),position=position_dodge(width=1))+coord_flip()+scale_y_log10()
#Asco and Zygo are higher in Oct, may not be significant. All others essentiall the same.