Skip to content

Commit

Permalink
Simplify network splitting
Browse files Browse the repository at this point in the history
Move setting of the 'bins' attribute on networks from
'split.network.time.based' and 'split.networks.time.based' into
'split.network.by.bins'. This makes calculating the 'bins' attribute
in 'split.networks.time.based' obsolete.

Introduce 'get.bin.dates.from.ranges' wrapper function to aid in converting
from ranges to bins where needed.

Simplify conversion from bins to ranges and back in 'split.network.time.based'.

Replace unneccessary mapply in 'split.network.time.based.by.ranges' by
simpler lapply.

Signed-off-by: Maximilian Löffler <[email protected]>
  • Loading branch information
maxloeffler committed Dec 2, 2023
1 parent 5d3dd57 commit a18a932
Showing 1 changed file with 29 additions and 29 deletions.
58 changes: 29 additions & 29 deletions util-split.R
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,6 @@ split.data.time.based.by.ranges = function(project.data, ranges) {
return(data.split)
}


## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Split networks ----------------------------------------------------------

Expand Down Expand Up @@ -532,28 +531,20 @@ split.network.time.based = function(network, time.period = "3 months", bins = NU
if (sliding.window) {
ranges = construct.overlapping.ranges(start = min(bins.date), end = max(bins.date),
time.period = time.period, overlap = 0.5, raw = FALSE,
include.end.date = FALSE) # bins have already been prepared correctly
bins.info = construct.overlapping.ranges(start = min(bins.date), end = max(bins.date),
time.period = time.period, overlap = 0.5, raw = TRUE,
include.end.date = FALSE) # bins have already been prepared correctly
bins.date = sort(unname(unique(get.date.from.unix.timestamp(unlist(bins.info)))))

logging::loginfo("Splitting network into time ranges [%s].",
include.end.date = FALSE)
logging::loginfo("Splitting network into overlapping time ranges [%s].",
paste(ranges, collapse = ", "))
nets = split.network.time.based.by.ranges(network, ranges, remove.isolates)
} else {
logging::loginfo("Splitting network into bins [%s].",
paste(bins.date, collapse = ", "))
nets = split.network.by.bins(network, bins, bins.vector, remove.isolates)
revs = get.date.string(bins.date)
ranges = construct.ranges(revs, sliding.window = FALSE)
logging::loginfo("Splitting network into non-overlapping time ranges [%s].",
paste(ranges, collapse = ", "))
nets = split.network.by.bins(network, bins, bins.vector, bins.date, remove.isolates)
}

## set bin attribute
attr(nets, "bins") = bins.date

## set ranges as names
revs = get.date.string(bins.date)
names(nets) = construct.ranges(revs, sliding.window = sliding.window)

names(nets) = ranges
return(nets)
}

Expand Down Expand Up @@ -613,10 +604,6 @@ split.networks.time.based = function(networks, time.period = "3 months", bins =
ranges = construct.overlapping.ranges(start = min(dates), end = max(dates),
time.period = time.period, overlap = 0.5, raw = FALSE,
include.end.date = TRUE)
bins.info = construct.overlapping.ranges(start = min(dates), end = max(dates),
time.period = time.period, overlap = 0.5, raw = TRUE,
include.end.date = TRUE)
bins.date = sort(unname(unique(get.date.from.unix.timestamp(unlist(bins.info)))))
} else {
bins.info = split.get.bins.time.based(dates, time.period, number.windows)
bins.date = get.date.from.string(bins.info[["bins"]])
Expand All @@ -634,7 +621,6 @@ split.networks.time.based = function(networks, time.period = "3 months", bins =
if (sliding.window) {
nets = split.network.time.based.by.ranges(network = net, ranges = ranges,
remove.isolates = remove.isolates)
attr(nets, "bins") = bins.date
} else {
nets = split.network.time.based(network = net, bins = bins.date, sliding.window = sliding.window,
remove.isolates = remove.isolates)
Expand Down Expand Up @@ -715,7 +701,7 @@ split.network.activity.based = function(network, number.edges = 5000, number.win
bins.vector = bins.vector[ with(df, order(my.unique.id)) ] # re-order to get igraph ordering
bins = sort(unique(bins.vector))
## split network by bins
networks = split.network.by.bins(network, bins, bins.vector, remove.isolates)
networks = split.network.by.bins(network, bins, bins.vector, remove.isolates = remove.isolates)

if (number.edges >= edge.count) {
logging::logwarn("Sliding-window approach does not apply: not enough edges (%s) for number of edges %s",
Expand Down Expand Up @@ -816,11 +802,9 @@ split.network.time.based.by.ranges = function(network, ranges, remove.isolates =
ranges.bounds = lapply(ranges, get.range.bounds)

## loop over all ranges and split the network accordingly:
nets.split = mapply(
ranges, ranges.bounds, SIMPLIFY = FALSE,
FUN = function(range, start.end) {
nets.split = lapply(ranges.bounds, function(bounds) {
## 1) split the network to the current range
range.net = split.network.time.based(network, bins = start.end, sliding.window = FALSE,
range.net = split.network.time.based(network, bins = bounds, sliding.window = FALSE,
remove.isolates = remove.isolates)[[1]]

## 2) return the network
Expand Down Expand Up @@ -853,10 +837,12 @@ split.dataframe.by.bins = function(df, bins) {
#' @param network a network
#' @param bins a vector with the unique bin identifiers, describing the order in which the bins are created
#' @param bins.vector a vector of length 'ecount(network)' assigning a bin for each edge of 'network'
#' @param bins.date a vector of dates representing the start of each bin. If present, then the dates will be set
#' as an attribute on the returned networks [default: NULL]
#' @param remove.isolates whether to remove isolates in the resulting split networks [default: TRUE]
#'
#' @return a list of networks, with the length of 'unique(bins.vector)'
split.network.by.bins = function(network, bins, bins.vector, remove.isolates = TRUE) {
split.network.by.bins = function(network, bins, bins.vector, bins.date = NULL, remove.isolates = TRUE) {
logging::logdebug("split.network.by.bins: starting.")
## create a network for each bin of edges
nets = parallel::mclapply(bins, function(bin) {
Expand All @@ -867,6 +853,10 @@ split.network.by.bins = function(network, bins, bins.vector, remove.isolates = T
g = igraph::subgraph.edges(network, edges, delete.vertices = remove.isolates)
return(g)
})
## set 'bins' attribute, if specified
if (!is.null(bins.date)) {
attr(nets, "bins") = bins.date
}
logging::logdebug("split.network.by.bins: finished.")
return(nets)
}
Expand Down Expand Up @@ -1046,7 +1036,7 @@ split.data.by.time.or.bins = function(project.data, splitting.length, bins, spli
bins.info = construct.overlapping.ranges(start = min(bins.date), end = max(bins.date),
time.period = splitting.length, overlap = 0.5, raw = TRUE,
include.end.date = FALSE) # bins have already been prepared correctly
bins.date = sort(unname(unique(get.date.from.unix.timestamp(unlist(bins.info)))))
bins.date = get.bin.dates.from.ranges(bins.info)
bins = get.date.string(bins.date)

logging::loginfo("Splitting data '%s' into time ranges using sliding windows [%s].",
Expand Down Expand Up @@ -1092,6 +1082,16 @@ split.data.by.time.or.bins = function(project.data, splitting.length, bins, spli
return(cf.data)
}

#' Obtain the start and end dates from given ranges.
#'
#' @param ranges the ranges to get the dates from
#'
#' @return a sorted vector of all the start the end dates of the given ranges
get.bin.dates.from.ranges = function(ranges) {

This comment has been minimized.

Copy link
@bockthom

bockthom Dec 6, 2023

I am not sure, but maybe it would make sense to move this function to util-misc.R where we have all the range-related helper functions, since this function could potentially also be used in other cases that are not related to splitting.

This comment has been minimized.

Copy link
@maxloeffler

maxloeffler Dec 6, 2023

Author Owner

Oh yeah, I haven't checked util misc out and just added it where I was coding ^^.

dates = sort(unname(unique(get.date.from.unix.timestamp(unlist(ranges)))))
return(dates)
}


## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Unification of range names ----------------------------------------------
Expand Down

0 comments on commit a18a932

Please sign in to comment.