MAINT: fully cythonize funcnet.CouplingAnalysis

- port remaining C functions to cython and delete C file - resolve multiple `index out of bounds` errors in ported cython code which had previously led to segfaults - all tests passing again, issue pik-copan#187 resolved
fkuehlein · Sep 22, 2023 · e9df484 · e9df484
1 parent ddca65f
commit e9df484
Show file tree

Hide file tree

Showing 2 changed files with 130 additions and 247 deletions.
diff --git a/src/pyunicorn/funcnet/_ext/numerics.pyx b/src/pyunicorn/funcnet/_ext/numerics.pyx
@@ -15,32 +15,32 @@
 cimport cython
 
 import numpy as np
-cimport numpy as cnp
 from numpy cimport ndarray
 
-from ...core._ext.types import LAG, FIELD, INT32TYPE
-from ...core._ext.types cimport LAG_t, FIELD_t, INT32TYPE_t
-
-cdef extern from "src_numerics.c":
-    void _symmetrize_by_absmax_fast(float *similarity_matrix,
-            signed char *lag_matrix, int N)
-    void _cross_correlation_max_fast(float *array, float *similarity_matrix,
-            signed char *lag_matrix, int N, int tau_max, int corr_range)
-    void _cross_correlation_all_fast(float *array, float *lagfuncs, int N,
-            int tau_max, int corr_range)
-    void _get_nearest_neighbors_fast(float *array, int T, int dim_x, int dim_y,
-            int k, int dim, int *k_xy, int *k_yz, int *k_z)
-
+from ...core._ext.types import LAG, FIELD, DFIELD, INT32TYPE
+from ...core._ext.types cimport LAG_t, FIELD_t, DFIELD_t, INT32TYPE_t
 
 # coupling_analysis ===========================================================
 
 def _symmetrize_by_absmax(
     ndarray[FIELD_t, ndim=2, mode='c'] similarity_matrix not None,
     ndarray[LAG_t, ndim=2, mode='c'] lag_matrix not None, int N):
 
-    _symmetrize_by_absmax_fast(
-        <FIELD_t*> cnp.PyArray_DATA(similarity_matrix),
-        <LAG_t*> cnp.PyArray_DATA(lag_matrix), N)
+    cdef:
+        int i, j
+
+    # loop over all node pairs
+    for i in range(N):
+        for j in range(N):
+            # calculate max and argmax by comparing to
+            # previous value and storing max
+            if abs(similarity_matrix[i,j]) > abs(similarity_matrix[j,i]):
+                similarity_matrix[j,i] = similarity_matrix[i,j]
+                lag_matrix[j,i] = -lag_matrix[i,j]
+
+            else:
+                similarity_matrix[i,j] = similarity_matrix[j,i]
+                lag_matrix[i,j] = -lag_matrix[j,i]
 
     return similarity_matrix, lag_matrix
 
@@ -55,7 +55,7 @@ def _cross_correlation_max(
         ndarray[LAG_t, ndim=2, mode='c'] lag_matrix = np.zeros(
             (N, N), dtype=LAG)
         double crossij, max
-        int i,j,tau,k, argmax
+        int i, j, tau, k, argmax
 
     # loop over all node pairs, NOT symmetric due to time shifts!
     for i in range(N):
@@ -116,16 +116,119 @@ def _get_nearest_neighbors(
 
     # Initialize
     cdef:
+        int i, j, index=0, t, m, n, d, kxz, kyz, kz
+        ndarray[INT32TYPE_t, ndim=1, mode='c'] indexfound = np.zeros(
+            T, dtype=INT32TYPE)
+        double dz=0., dxyz=0., dx=0., dy=0., eps, epsmax
+        ndarray[DFIELD_t, ndim=1, mode='c'] dist = np.zeros(
+            (T*dim), dtype=DFIELD)
+        ndarray[DFIELD_t, ndim=1, mode='c'] dxyzarray = np.zeros(
+            (k+1), dtype=DFIELD)
         ndarray[INT32TYPE_t, ndim=1, mode='c'] k_xz = np.zeros(
-            (T), dtype=INT32TYPE)
+            T, dtype=INT32TYPE)
         ndarray[INT32TYPE_t, ndim=1, mode='c'] k_yz = np.zeros(
-            (T), dtype=INT32TYPE)
+            T, dtype=INT32TYPE)
         ndarray[INT32TYPE_t, ndim=1, mode='c'] k_z = np.zeros(
-            (T), dtype=INT32TYPE)
-
-    _get_nearest_neighbors_fast(
-        <FIELD_t*> cnp.PyArray_DATA(array), T, dim_x, dim_y, k, dim,
-        <int*> cnp.PyArray_DATA(k_xz), <int*> cnp.PyArray_DATA(k_yz),
-        <int*> cnp.PyArray_DATA(k_z))
-
+            T, dtype=INT32TYPE)
+
+    # Loop over time
+    for i in range(T):
+        # Growing cube algorithm: Test if n = #(points in epsilon-
+        # environment of reference point i) > k
+        # Start with epsilon for which 95% of points are inside the cube
+        # for a multivariate Gaussian
+        # eps increased by 2 later, also the initial eps
+        eps = (k/T)**(1./dim)
+
+        # n counts the number of neighbors
+        n = 0
+        while n <= k:
+            # Increase cube size
+            eps *= 2.
+            # Start with zero again
+            n = 0
+            # Loop through all points
+            for t in range(T):
+                d = 0
+                while (d < dim) and (abs(array[d*T + i]
+                                     - array[d*T + t]) < eps):
+                    d += 1
+
+                # If all distances are within eps, the point t lies
+                # within eps and n is incremented
+                if d == dim:
+                    indexfound[n] = t
+                    n += 1
+
+        # Calculate distance to points only within epsilon environment
+        # according to maximum metric
+        for j in range(n):
+            index = indexfound[j]
+
+            # calculate maximum metric distance to point
+            dxyz = 0.
+            for d in range(dim):
+                dist[d*T + j] = abs(array[d*T + i] - array[d*T + index])
+                dxyz = max(dist[d*T + j], dxyz)
+
+            # insertion-sort current distance into 'dxyzarray'
+            # if it is among the currently smallest k+1 distances 
+            if j == 0:
+                dxyzarray[j] = dxyz
+            else:
+                m = min(k, j-1)
+                # go through previously sorted smallest distances and
+                # if it is smaller than any, find slot for current distance 
+                while (m >= 0) and (dxyz < dxyzarray[m]):
+                    # if it's not in the last slot already,
+                    # move previously found distance to the right
+                    if not m == k:
+                        dxyzarray[m+1] = dxyzarray[m]
+                    m -= 1
+
+                # sort in, if a slot was found
+                if not m == k:
+                    dxyzarray[m+1] = dxyz
+
+        # Epsilon of k-th nearest neighbor in joint space
+        epsmax = dxyzarray[k]
+
+        # Count neighbors within epsmax in subspaces, since the reference
+        # point is included, all neighbors are at least 1
+        kz = 0
+        kxz = 0
+        kyz = 0
+        for j in range(T):
+
+            # X-subspace
+            dx = abs(array[0*T + i] - array[0*T + j])
+            for d in range(1, dim_x):
+                dist[d*T + j] = abs(array[d*T + i] - array[d*T + j])
+                dx = max(dist[d*T + j], dx)
+
+            # Y-subspace
+            dy = abs(array[dim_x*T + i] - array[dim_x*T + j])
+            for d in range(dim_x, dim_y):
+                dist[d*T + j] = abs(array[d*T + i] - array[d*T + j])
+                dy = max(dist[d*T + j], dy)
+
+            # Z-subspace, if empty, dz stays 0
+            dz = 0.
+            for d in range(dim_x+dim_y, dim):
+                dist[d*T + j] = abs(array[d*T + i] - array[d*T + j])
+                dz = max(dist[d*T + j], dz)
+
+            # For no conditions, kz is counted up to T
+            if dz < epsmax:
+                kz += 1
+                if dx < epsmax:
+                    kxz += 1
+                if dy < epsmax:
+                    kyz += 1
+
+        # Write to numpy arrays
+        k_xz[i] = kxz
+        k_yz[i] = kyz
+        k_z[i] = kz
+
     return k_xz, k_yz, k_z