-
Notifications
You must be signed in to change notification settings - Fork 0
/
makeGraph_moreDetails.R
executable file
·130 lines (95 loc) · 4.72 KB
/
makeGraph_moreDetails.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
library(RCy3)
library(igraph)
# Take data from dataPreparation.R script
# function takes the OTU table of choosing - selects the rows where at least one of the samples passes the
# threshold set up in the input to the function - if saveNodesAndEdges = T tables with nodes and edges are
# going to be exported to csv
# nodes table columns:
# - "Nodes" - name of the node = organism or ID of the kid
# - "type" - type of the node = is it organism node or ID node
# - "typeNode" - contains information about site also = 3 categories : UVA, Banglades, Organism
# - "sizeLog2Median"
# - "sizeLog2Mean"
# - "sizelog2Max"
# -> size of the ID nodes is fixed, size of the organism ID is base on the median / mean/ max
# abundance across the row
# edges:
# only edges higher than threshold are created
# edges are normalized - (row /(max(row))) *5 -> the 5 is there only to make the edges overall thicker
# then I append the metadata to the edges for potential coloring and weighting
constructGraph = function(dataToPlot,
threshold = 0.001,
saveNodesAndEdges = F){
# select the rows where at least one of the organisms reached the threshold - drop the others
data = dataToPlot[apply(dataToPlot[,-c(1,2)], 1, function(x) !all(x < threshold)),-1]
rownamesData = data$Organism
data = data[, -1]
rownames(data) = rownamesData
## create vertices (nodes)
numSamples = ncol(data) #number of samples
numOTU = nrow(data) #number of identified (bacterial) taxa
numNodes = numSamples+numOTU #number of nodes
nodes = c(colnames(data), rownames(data)) #vector of nodes = their label
rownames(metadata) = metadata$ID
# type of node- for node coloring in cytoscape - for kids do Banglades vs UVA / Organisms - just do Organism
typeNode = c(as.character(metadata[colnames(data), "site"]), rep("Organism",numOTU))
type = c(rep(0, numSamples), rep(1,numOTU))
# get the medians / means / max across rows - median of the viral abundance - do log2 transformation and shift
# up to eliminate negative numbers -> the lowest abundance = 1
medSize = log2(apply(data, 1, median) * 100)
medSize = ceiling(medSize + abs(min(medSize)) + 10)
meanSize = log2(apply(data, 1, mean) * 100)
meanSize = ceiling(meanSize + abs(min(meanSize)) + 10)
maxSize = log2(apply(data, 1, max) * 100)
maxSize = ceiling(maxSize + abs(min(maxSize)) + 10)
# set the kid nodes as the biggest and all the same size
typeMedian = c(rep(max(medSize + 5), numSamples), medSize)
typeMean = c(rep(max(meanSize + 5), numSamples), meanSize)
typeMax = c(rep(max(maxSize + 5), numSamples), maxSize)
tableNodes = data.frame(nodes, type, typeNode, typeMedian, typeMean, typeMax)
names(tableNodes) = c("Nodes", "type","typeNode", "sizeLog2Median", "sizeLog2Mean", "sizelog2Max")
if(saveNodesAndEdges){
write.table(tableNodes,"nodes.txt", quote = FALSE, row.names = FALSE)
write.csv(tableNodes,"nodes.csv", quote = FALSE, row.names = FALSE)
}
## create edges
edgeSource = c()
edgeTarget = c()
edgeWeight = c()
for(i in 1:numOTU){ #for every taxon
for(j in 1:numSamples){ #for every sample
if(data[i,j]>threshold){ #if abundance of i-th taxon in j-th sample is higher that threshold
edgeSource = c(edgeSource, i+numSamples) #source node (taxon - virus)
edgeTarget = c(edgeTarget, j) #target node (sample)
edgeWeight = c(edgeWeight, ceiling(5*(data[i,j]/max(data[i,])))) #edge weighting
}
}
}
tableEdges = data.frame(edgeSource, edgeTarget, edgeWeight)
names(tableEdges) = c("Source","Target","Weight")
if(saveNodesAndEdges){
write.table(tableEdges,"graph/edges.txt", quote = FALSE, row.names = FALSE)
write.csv(tableEdges,"graph/edges.csv", quote = FALSE, row.names = FALSE)
}
graphList = list(tableNodes = tableNodes, tableEdges=tableEdges)
return(graphList)
}
# run the function
graphList = constructGraph(as.data.frame(finalBestHit))
# transport to cytoscape
tableNodes = graphList$tableNodes
tableEdges = graphList$tableEdges
cytNodes = tableNodes
cytEdges = tableEdges
# name the nodes
cytEdges$Source = tableNodes$Nodes[tableEdges$Source]
cytEdges$Target = tableNodes$Nodes[tableEdges$Target]
#rename edgets
colnames(cytEdges) = c("from", "to", "weight")
#asign te metadata to edges - extract the site - to color the edges
cytEdges$to = as.character(cytEdges$to)
colorVec = left_join(cytEdges, metadata, by = c("to" = "ID"))
cytEdges$site = colorVec$site
# create graph and export to cytoscape
ig = graph_from_data_frame(cytEdges, directed=F, vertices= cytNodes)
createNetworkFromIgraph(ig,"finalBestHit0_001")