Skip to content

Commit

Permalink
MAINT: Refactor Network.nsi_betweenness()
Browse files Browse the repository at this point in the history
Changes:
- Cythonize outer loop over nodes
- remove repeated array allocations
- enable parallel outer loop using `multiprocess` package
  (naive, no shared memory)
- assert precondition on array sizes related to node degrees

Context:
- related: #142
- original Weave -> Cython port: 20ad40d
  • Loading branch information
ntfrgl committed Oct 19, 2023
1 parent ed8b158 commit 9cbc0bc
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 131 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ install:
- eval "$(conda shell.bash hook)"
- conda activate test-env
- travis_retry conda install -c conda-forge python=$TRAVIS_PYTHON_VERSION
- travis_retry conda install -c conda-forge numpy scipy python-igraph h5netcdf tqdm
- travis_retry conda install -c conda-forge numpy scipy python-igraph h5netcdf multiprocess tqdm
- travis_retry conda update -c conda-forge --all

# testing dependencies
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ install_requires =
scipy >= 1.10
igraph >= 0.10
h5netcdf >= 1.1
multiprocess >= 0.70
tqdm >= 4.66
python_requires = >=3.8
packages = find:
Expand Down
158 changes: 87 additions & 71 deletions src/pyunicorn/core/_ext/numerics.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -411,85 +411,101 @@ def _local_cliquishness_5thorder(

def _nsi_betweenness(
int N, ndarray[DWEIGHT_t, ndim=1] w,
ndarray[DEGREE_t, ndim=1] k, int j,
ndarray[DFIELD_t, ndim=1] betweenness_to_j,
ndarray[DFIELD_t, ndim=1] excess_to_j,
ndarray[NODE_t, ndim=1] offsets,
ndarray[DEGREE_t, ndim=1] k,
ndarray[NODE_t, ndim=1] targets,
ndarray[NODE_t, ndim=1] flat_neighbors,
ndarray[MASK_t, ndim=1] is_source,
ndarray[NODE_t, ndim=1] flat_predecessors):
ndarray[MASK_t, ndim=1] is_source):
"""
Performs Newman's algorithm. [Newman2001]_
"""

cdef:
int qi, oi, queue_len, l_index, ql
long int E = len(flat_neighbors)
int j, qi, oi, queue_len, l_index, ql
NODE_t l, i, next_d, dl, ol, fi
DFIELD_t base_factor
ndarray[NODE_t, ndim=1] distances_to_j =\
2 * N * np.ones(N, dtype=NODE)
ndarray[NODE_t, ndim=1] offsets = np.zeros(N, dtype=NODE)
ndarray[NODE_t, ndim=1] distances_to_j = np.ones(N, dtype=NODE)
ndarray[NODE_t, ndim=1] n_predecessors = np.zeros(N, dtype=NODE)
ndarray[NODE_t, ndim=1] flat_predecessors = np.zeros(E, dtype=NODE)
ndarray[NODE_t, ndim=1] queue = np.zeros(N, dtype=NODE)
ndarray[DFIELD_t, ndim=1] multiplicity_to_j = np.zeros(N, dtype=DFIELD)

# init distances to j and queue of nodes by distance from j
for l in range(N):
# distances_to_j[l] = 2 * N
# n_predecessors[l] = 0
# multiplicity_to_j[l] = 0.0
# initialize contribution of paths ending in j to the betweenness of l
excess_to_j[l] = betweenness_to_j[l] = is_source[l] * w[l]

distances_to_j[j] = 0
queue[0] = j
queue_len = 1
multiplicity_to_j[j] = w[j]

# process the queue forward and grow it on the way: (this is the standard
# breadth-first search giving all the shortest paths to j)
qi = 0
while qi < queue_len:
#for qi in range(queue_len):
i = queue[qi]
if i == -1:
# this should never happen ...
print("Opps: %d,%d,%d\n" % qi, queue_len, i)
break
next_d = distances_to_j[i] + 1
#iterate through all neighbors l of i
oi = offsets[i]
for l_index in range(oi, oi+k[i]):
# if on a shortes j-l-path, register i as predecessor of l
l = flat_neighbors[l_index]
dl = distances_to_j[l]
if dl >= next_d:
fi = offsets[l] + n_predecessors[l]
n_predecessors[l] += 1
flat_predecessors[fi] = i
multiplicity_to_j[l] += w[l] * multiplicity_to_j[i]
if dl > next_d:
distances_to_j[l] = next_d
queue[queue_len] = l
queue_len += 1
qi += 1

# process the queue again backward: (this is Newman's 2nd part where
# the contribution of paths ending in j to the betweenness of all nodes
# is computed recursively by traversing the shortest paths backwards)
for ql in range(queue_len-1, -1, -1):
l = queue[ql]
if l == -1:
print("Opps: %d,%d,%d\n" % ql, queue_len, l)
break
if l == j:
# set betweenness and excess to zero
betweenness_to_j[l] = excess_to_j[l] = 0
else:
# otherwise, iterate through all predecessors i of l:
base_factor = w[l] / multiplicity_to_j[l]
ol = offsets[l]
for fi in range(ol, ol+n_predecessors[l]):
# add betweenness to predecessor
i = flat_predecessors[fi]
betweenness_to_j[i] += betweenness_to_j[l] * base_factor * \
multiplicity_to_j[i]
ndarray[DFIELD_t, ndim=1] betweenness_to_j = np.zeros(N, dtype=DFIELD)
ndarray[DFIELD_t, ndim=1] excess_to_j = np.zeros(N, dtype=DFIELD)
ndarray[DFIELD_t, ndim=1] betweenness_times_w = np.zeros(N, dtype=DFIELD)

# init node offsets
# NOTE: We don't use k.cumsum() since that uses too much memory!
for i in range(1, N):
offsets[i] = offsets[i-1] + k[i-1]

for j in targets:
# init distances to j and queue of nodes by distance from j
distances_to_j.fill(2 * N)
n_predecessors.fill(0)
flat_predecessors.fill(0)
queue.fill(0)
multiplicity_to_j.fill(0)

# init contribution of paths ending in j to the betweenness of l
for l in range(N):
excess_to_j[l] = betweenness_to_j[l] = is_source[l] * w[l]

distances_to_j[j] = 0
queue[0] = j
queue_len = 1
multiplicity_to_j[j] = w[j]

# process the queue forward and grow it on the way: (this is the
# standard breadth-first search giving all the shortest paths to j)
qi = 0
while qi < queue_len:
i = queue[qi]
if i == -1:
# this should never happen ...
print("Opps: %d,%d,%d\n" % qi, queue_len, i)
break
next_d = distances_to_j[i] + 1
# iterate through all neighbors l of i
oi = offsets[i]
for l_index in range(oi, oi+k[i]):
# if on a shortest j-l-path, register i as predecessor of l
l = flat_neighbors[l_index]
dl = distances_to_j[l]
if dl >= next_d:
fi = offsets[l] + n_predecessors[l]
n_predecessors[l] += 1
flat_predecessors[fi] = i
multiplicity_to_j[l] += w[l] * multiplicity_to_j[i]
if dl > next_d:
distances_to_j[l] = next_d
queue[queue_len] = l
queue_len += 1
qi += 1

# process the queue again backward: (this is Newman's 2nd part where the
# contribution of paths ending in j to the betweenness of all nodes is
# computed recursively by traversing the shortest paths backwards)
for ql in range(queue_len-1, -1, -1):
l = queue[ql]
if l == -1:
print("Opps: %d,%d,%d\n" % ql, queue_len, l)
break
if l == j:
# set betweenness and excess to zero
betweenness_to_j[l] = excess_to_j[l] = 0
else:
# otherwise, iterate through all predecessors i of l:
base_factor = w[l] / multiplicity_to_j[l]
ol = offsets[l]
for fi in range(ol, ol+n_predecessors[l]):
# add betweenness to predecessor
i = flat_predecessors[fi]
betweenness_to_j[i] += betweenness_to_j[l] * base_factor * \
multiplicity_to_j[i]

betweenness_times_w += w[j] * (betweenness_to_j - excess_to_j)
return betweenness_times_w


def _mpi_newman_betweenness(
Expand Down
77 changes: 34 additions & 43 deletions src/pyunicorn/core/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@

import igraph # high performance graph theory tools

from multiprocess import Pool, cpu_count
from ..utils import mpi # parallelized computations

from ._ext.types import \
Expand Down Expand Up @@ -261,8 +262,8 @@ def __init__(self, adjacency=None, n_nodes=None, edge_list=None,
elif edge_list is not None:
self.set_edge_list(edge_list, n_nodes)
else:
raise NetworkError("An adjacency matrix or edge list has to be \
given to initialize an instance of Network.")
raise NetworkError("An adjacency matrix or edge list has to be "
"given to initialize an instance of Network.")

self._set_node_weights(node_weights)
self.degree()
Expand Down Expand Up @@ -854,7 +855,8 @@ def ErdosRenyi(n_nodes=100, link_probability=None, n_links=None,
graph = igraph.Graph.Erdos_Renyi(n=n_nodes, m=n_links)

else:
return None
raise ValueError("`ErdosRenyi()` requires either a "
"`link_probability` or `n_links` argument.")

# return adjacency matrix
return np.array(graph.get_adjacency(type=2).data)
Expand Down Expand Up @@ -2681,7 +2683,7 @@ def assortativity(self):
>>> r(Network.SmallTestNetwork().assortativity())
-0.4737
:rtype: float between 0 and 1
:rtype: float
"""
degrees = self.graph.degree()
degrees_sq = [deg**2 for deg in degrees]
Expand Down Expand Up @@ -3067,7 +3069,7 @@ def link_betweenness(self):
[ 0. 2. 0. 0. 3. 0. ] [ 3.5 3.5 0. 0. 0. 0. ]
[ 5.5 2.5 3. 0. 0. 0. ] [ 5. 0. 0. 0. 0. 0. ]]
:rtype: square numpy array [node,node] of floats between 0 and 1
:rtype: square numpy array [node,node] of floats
:return: Entry [i,j] is the betweenness of the link between i and j,
or 0 if i is not linked to j.
"""
Expand Down Expand Up @@ -3106,7 +3108,7 @@ def edge_betweenness(self):
[ 0. 2. 0. 0. 3. 0. ] [ 3.5 3.5 0. 0. 0. 0. ]
[ 5.5 2.5 3. 0. 0. 0. ] [ 5. 0. 0. 0. 0. 0. ]]
:rtype: square numpy array [node,node] of floats between 0 and 1
:rtype: square numpy array [node,node] of floats
:return: Entry [i,j] is the betweenness of the link between i and j,
or 0 if i is not linked to j.
"""
Expand Down Expand Up @@ -3172,7 +3174,7 @@ def interregional_betweenness(self, sources=None, targets=None):
:type targets: 1d numpy array or list of ints from 0 to n_nodes-1
:arg targets: Set of target node indices.
:rtype: 1d numpy array [node] of floats between 0 and 1
:rtype: 1d numpy array [node] of floats
"""
return self.nsi_betweenness(sources=sources, targets=targets,
aw=0, silent=1)
Expand Down Expand Up @@ -3201,13 +3203,13 @@ def nsi_interregional_betweenness(self, sources, targets):
Calculating interregional betweenness...
array([ 1., 1., 0., 0., 1., 0.])
:rtype: 1d numpy array [node] of floats between 0 and 1
:rtype: 1d numpy array [node] of floats
"""
return self.nsi_betweenness(sources=sources, targets=targets, silent=1)

def nsi_betweenness(self, **kwargs):
def nsi_betweenness(self, parallelize: bool = False, **kwargs):
"""
For each node, return its n.s.i. betweenness.
For each node, return its n.s.i. betweenness. [Newman2001]_
This measures roughly how many shortest paths pass through the node,
taking node weights into account.
Expand All @@ -3232,7 +3234,7 @@ def nsi_betweenness(self, **kwargs):
Calculating node betweenness...
array([ 8.5, 1.5, 0. , 1.5, 4.5, 0. , 0. ])
:rtype: 1d numpy array [node] of floats between 0 and 1
:rtype: 1d numpy array [node] of floats
"""
if self.silence_level <= 1:
if "silent" not in kwargs:
Expand All @@ -3242,50 +3244,39 @@ def nsi_betweenness(self, **kwargs):
if "aw" in kwargs:
if kwargs["aw"] == 0:
w = np.ones_like(w)

N, k = self.N, self.degree()
rN = range(0, N)
betweenness_times_w = np.zeros(N, dtype=DFIELD)

# initialize node lists:
# initialize node lists
is_source = np.zeros(N, dtype=MASK)
if "sources" in kwargs and kwargs["sources"] is not None:
is_source[kwargs["sources"]] = 1
else:
is_source[rN] = 1
is_source[range(0, N)] = 1
if "targets" in kwargs and kwargs["targets"] is not None:
targets = kwargs["targets"]
targets = np.array(list(map(int, kwargs["targets"])))
else:
targets = rN
targets = np.arange(0, N)

# node offsets for flat arrays:
# NOTE: We don't use k.cumsum() since that uses too much memory!
offsets = np.zeros(N, dtype=NODE)
for i in range(1, N):
offsets[i] = offsets[i-1] + k[i-1]

# sort links by node indices (contains each link twice!):
# sort links by node indices (contains each link twice!)
links = nz_coords(self.sp_A)

# neighbours of each node:
# neighbours of each node
flat_neighbors = to_cy(np.array(links)[:, 1], NODE)
E = len(flat_neighbors)

# this main loop might be parallelized:
for j0 in targets:
j = int(j0)

betweenness_to_j = to_cy(w, DFIELD)
excess_to_j = to_cy(w, DFIELD)
flat_predecessors = np.zeros(E, dtype=NODE)
_nsi_betweenness(
N, to_cy(w, DWEIGHT), to_cy(k, DEGREE), j,
betweenness_to_j, excess_to_j, offsets, flat_neighbors,
is_source, flat_predecessors)
del flat_predecessors
betweenness_times_w += w[j] * (betweenness_to_j - excess_to_j)

return betweenness_times_w / w
assert k.sum() == len(flat_neighbors) == 2 * self.n_links
w, k = to_cy(w, DWEIGHT), to_cy(k, DEGREE)

def worker(batch):
return _nsi_betweenness(N, w, k, batch, flat_neighbors, is_source)

if parallelize:
# (naively) parallelize loop over nodes
n_workers = cpu_count()
batches = np.array_split(to_cy(targets, NODE), n_workers)
pool = Pool()
betw_w = np.sum(pool.map(worker, batches), axis=0)
else:
betw_w = worker(to_cy(targets, NODE))
return betw_w / w

def _eigenvector_centrality_slow(self, link_attribute=None):
"""
Expand Down
Loading

0 comments on commit 9cbc0bc

Please sign in to comment.