-
Notifications
You must be signed in to change notification settings - Fork 1
/
mammal_zdx_assemble.R
67 lines (62 loc) · 2.28 KB
/
mammal_zdx_assemble.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#read in data on non-carnivore, non-primate mammals.
df1 = read.csv("animal-dx-parasites - animal-dx-parasites.csv")
df1$parasiteGMPD=as.character(df1$parasiteGMPD)
df1$ParasiteCorrectedNameGMPD = as.character(df1$ParasiteCorrectedNameGMPD)
corrected_inds = which(df1$ParasiteCorrectedNameGMPD !="")
df1$parasiteGMPD[corrected_inds]=df1$ParasiteCorrectedNameGMPD[corrected_inds]
names(df1)
keep.col = c("Spp",
"Order",
"Zoonosis",
"Label",
"parasiteGMPD",
"Citation",
"ParasiteCorrectedNameGMPD",
"Note",
"multiple.countries")
df1$ParasiteCorrectedNameGMPD =""
df1 = df1[,keep.col]
test = subset(df1, parasiteGMPD != "")
dim(test)
print(dim(df1))
#read in data on carnivores
df2 = read.csv("carnivore-zdx-parasites.csv")
print(dim(df2))
names(df2)[names(df2)=="ParasiteGMPD"]="parasiteGMPD"
df2$Citation = ""
df2$Note = ""
df2$multiple.countries = ""
df2$ParasiteCorrectedNameGMPD =""
# setdiff(names(df1),names(df2))
# setdiff(names(df2),names(df1))
df = rbind(df1,df2)
#in primate data, ParasiteCorrectedNameGMPD has parasite for some rows for which parasiteGMPD is empty.
#for these rows, copy ParasiteCorrectedNameGMPD into parasiteGMPD.
df3 = read.csv("prim-zdx-parasites.xls - prim-zdx-parasites.csv")
print(dim(df3))
names(df3)[names(df3)=="ParasiteGMPD"]="parasiteGMPD"
inds_empty = which(df3$parasiteGMPD=="")
df3$parasiteGMPD=as.character(df3$parasiteGMPD)
df3$ParasiteCorrectedNameGMPD=as.character(df3$ParasiteCorrectedNameGMPD)
df3$parasiteGMPD[inds_empty]=df3$ParasiteCorrectedNameGMPD[inds_empty]
names(df3)[names(df3)=="Zoonoses"]="Zoonosis"
df3$multiple.countries = ""
setdiff(names(df),names(df3))
setdiff(names(df3),names(df))
df3 = df3[,keep.col]
#put together all three
df = rbind(df, df3)
dim(df)
# df_host_zdx_parasite= df
# save(df_host_zdx_parasite,
# file = "df_host_zdx_parasite.Rdata")
df_parasite = subset(df, parasiteGMPD != "")
save(df_parasite, file = "df_parasite.Rdata")
df_no_parasite = subset(df, parasiteGMPD == "")
save(df_no_parasite, file = "df_no_parasite.Rdata")
out = df_parasite$parasiteGMPD
write.csv(out, file = "parasiteGMPD.csv", row.names=FALSE)
# write.csv(out, file = "parasiteGMPD.csv")
df$row = seq(1,dim(df)[1])
df_all = df
save(df_all, file = "df_all.Rdata")