forked from se-sic/coronet
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move setting of the 'bins' attribute on networks from 'split.network.time.based' and 'split.networks.time.based' into 'split.network.by.bins'. This makes calculating the 'bins' attribute in 'split.networks.time.based' obsolete. Introduce 'get.bin.dates.from.ranges' wrapper function to aid in converting from ranges to bins where needed. Simplify conversion from bins to ranges and back in 'split.network.time.based'. Replace unneccessary mapply in 'split.network.time.based.by.ranges' by simpler lapply. Signed-off-by: Maximilian Löffler <[email protected]>
- Loading branch information
1 parent
5d3dd57
commit a18a932
Showing
1 changed file
with
29 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -470,7 +470,6 @@ split.data.time.based.by.ranges = function(project.data, ranges) { | |
return(data.split) | ||
} | ||
|
||
|
||
## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / | ||
## Split networks ---------------------------------------------------------- | ||
|
||
|
@@ -532,28 +531,20 @@ split.network.time.based = function(network, time.period = "3 months", bins = NU | |
if (sliding.window) { | ||
ranges = construct.overlapping.ranges(start = min(bins.date), end = max(bins.date), | ||
time.period = time.period, overlap = 0.5, raw = FALSE, | ||
include.end.date = FALSE) # bins have already been prepared correctly | ||
bins.info = construct.overlapping.ranges(start = min(bins.date), end = max(bins.date), | ||
time.period = time.period, overlap = 0.5, raw = TRUE, | ||
include.end.date = FALSE) # bins have already been prepared correctly | ||
bins.date = sort(unname(unique(get.date.from.unix.timestamp(unlist(bins.info))))) | ||
|
||
logging::loginfo("Splitting network into time ranges [%s].", | ||
include.end.date = FALSE) | ||
logging::loginfo("Splitting network into overlapping time ranges [%s].", | ||
paste(ranges, collapse = ", ")) | ||
nets = split.network.time.based.by.ranges(network, ranges, remove.isolates) | ||
} else { | ||
logging::loginfo("Splitting network into bins [%s].", | ||
paste(bins.date, collapse = ", ")) | ||
nets = split.network.by.bins(network, bins, bins.vector, remove.isolates) | ||
revs = get.date.string(bins.date) | ||
ranges = construct.ranges(revs, sliding.window = FALSE) | ||
logging::loginfo("Splitting network into non-overlapping time ranges [%s].", | ||
paste(ranges, collapse = ", ")) | ||
nets = split.network.by.bins(network, bins, bins.vector, bins.date, remove.isolates) | ||
} | ||
|
||
## set bin attribute | ||
attr(nets, "bins") = bins.date | ||
|
||
## set ranges as names | ||
revs = get.date.string(bins.date) | ||
names(nets) = construct.ranges(revs, sliding.window = sliding.window) | ||
|
||
names(nets) = ranges | ||
return(nets) | ||
} | ||
|
||
|
@@ -613,10 +604,6 @@ split.networks.time.based = function(networks, time.period = "3 months", bins = | |
ranges = construct.overlapping.ranges(start = min(dates), end = max(dates), | ||
time.period = time.period, overlap = 0.5, raw = FALSE, | ||
include.end.date = TRUE) | ||
bins.info = construct.overlapping.ranges(start = min(dates), end = max(dates), | ||
time.period = time.period, overlap = 0.5, raw = TRUE, | ||
include.end.date = TRUE) | ||
bins.date = sort(unname(unique(get.date.from.unix.timestamp(unlist(bins.info))))) | ||
} else { | ||
bins.info = split.get.bins.time.based(dates, time.period, number.windows) | ||
bins.date = get.date.from.string(bins.info[["bins"]]) | ||
|
@@ -634,7 +621,6 @@ split.networks.time.based = function(networks, time.period = "3 months", bins = | |
if (sliding.window) { | ||
nets = split.network.time.based.by.ranges(network = net, ranges = ranges, | ||
remove.isolates = remove.isolates) | ||
attr(nets, "bins") = bins.date | ||
} else { | ||
nets = split.network.time.based(network = net, bins = bins.date, sliding.window = sliding.window, | ||
remove.isolates = remove.isolates) | ||
|
@@ -715,7 +701,7 @@ split.network.activity.based = function(network, number.edges = 5000, number.win | |
bins.vector = bins.vector[ with(df, order(my.unique.id)) ] # re-order to get igraph ordering | ||
bins = sort(unique(bins.vector)) | ||
## split network by bins | ||
networks = split.network.by.bins(network, bins, bins.vector, remove.isolates) | ||
networks = split.network.by.bins(network, bins, bins.vector, remove.isolates = remove.isolates) | ||
|
||
if (number.edges >= edge.count) { | ||
logging::logwarn("Sliding-window approach does not apply: not enough edges (%s) for number of edges %s", | ||
|
@@ -816,11 +802,9 @@ split.network.time.based.by.ranges = function(network, ranges, remove.isolates = | |
ranges.bounds = lapply(ranges, get.range.bounds) | ||
|
||
## loop over all ranges and split the network accordingly: | ||
nets.split = mapply( | ||
ranges, ranges.bounds, SIMPLIFY = FALSE, | ||
FUN = function(range, start.end) { | ||
nets.split = lapply(ranges.bounds, function(bounds) { | ||
## 1) split the network to the current range | ||
range.net = split.network.time.based(network, bins = start.end, sliding.window = FALSE, | ||
range.net = split.network.time.based(network, bins = bounds, sliding.window = FALSE, | ||
remove.isolates = remove.isolates)[[1]] | ||
|
||
## 2) return the network | ||
|
@@ -853,10 +837,12 @@ split.dataframe.by.bins = function(df, bins) { | |
#' @param network a network | ||
#' @param bins a vector with the unique bin identifiers, describing the order in which the bins are created | ||
#' @param bins.vector a vector of length 'ecount(network)' assigning a bin for each edge of 'network' | ||
#' @param bins.date a vector of dates representing the start of each bin. If present, then the dates will be set | ||
#' as an attribute on the returned networks [default: NULL] | ||
#' @param remove.isolates whether to remove isolates in the resulting split networks [default: TRUE] | ||
#' | ||
#' @return a list of networks, with the length of 'unique(bins.vector)' | ||
split.network.by.bins = function(network, bins, bins.vector, remove.isolates = TRUE) { | ||
split.network.by.bins = function(network, bins, bins.vector, bins.date = NULL, remove.isolates = TRUE) { | ||
logging::logdebug("split.network.by.bins: starting.") | ||
## create a network for each bin of edges | ||
nets = parallel::mclapply(bins, function(bin) { | ||
|
@@ -867,6 +853,10 @@ split.network.by.bins = function(network, bins, bins.vector, remove.isolates = T | |
g = igraph::subgraph.edges(network, edges, delete.vertices = remove.isolates) | ||
return(g) | ||
}) | ||
## set 'bins' attribute, if specified | ||
if (!is.null(bins.date)) { | ||
attr(nets, "bins") = bins.date | ||
} | ||
logging::logdebug("split.network.by.bins: finished.") | ||
return(nets) | ||
} | ||
|
@@ -1046,7 +1036,7 @@ split.data.by.time.or.bins = function(project.data, splitting.length, bins, spli | |
bins.info = construct.overlapping.ranges(start = min(bins.date), end = max(bins.date), | ||
time.period = splitting.length, overlap = 0.5, raw = TRUE, | ||
include.end.date = FALSE) # bins have already been prepared correctly | ||
bins.date = sort(unname(unique(get.date.from.unix.timestamp(unlist(bins.info))))) | ||
bins.date = get.bin.dates.from.ranges(bins.info) | ||
bins = get.date.string(bins.date) | ||
|
||
logging::loginfo("Splitting data '%s' into time ranges using sliding windows [%s].", | ||
|
@@ -1092,6 +1082,16 @@ split.data.by.time.or.bins = function(project.data, splitting.length, bins, spli | |
return(cf.data) | ||
} | ||
|
||
#' Obtain the start and end dates from given ranges. | ||
#' | ||
#' @param ranges the ranges to get the dates from | ||
#' | ||
#' @return a sorted vector of all the start the end dates of the given ranges | ||
get.bin.dates.from.ranges = function(ranges) { | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
maxloeffler
Author
Owner
|
||
dates = sort(unname(unique(get.date.from.unix.timestamp(unlist(ranges))))) | ||
return(dates) | ||
} | ||
|
||
|
||
## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / | ||
## Unification of range names ---------------------------------------------- | ||
|
I am not sure, but maybe it would make sense to move this function to
util-misc.R
where we have all the range-related helper functions, since this function could potentially also be used in other cases that are not related to splitting.