From 213bdbf0c0488befcbed508456f189683fb051c7 Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Thu, 19 Oct 2023 13:32:03 +0200 Subject: [PATCH 01/17] enh: linear and efficient neighbour finder --- src/sisl/CMakeLists.txt | 1 + src/sisl/geom/CMakeLists.txt | 1 + src/sisl/geom/__init__.py | 1 + src/sisl/geom/neighs/CMakeLists.txt | 19 + src/sisl/geom/neighs/__init__.py | 1 + src/sisl/geom/neighs/cneighs.pyx | 531 ++++++++++++++++++++++++ src/sisl/geom/neighs/neighfinder.py | 599 ++++++++++++++++++++++++++++ 7 files changed, 1153 insertions(+) create mode 100644 src/sisl/geom/CMakeLists.txt create mode 100644 src/sisl/geom/neighs/CMakeLists.txt create mode 100644 src/sisl/geom/neighs/__init__.py create mode 100644 src/sisl/geom/neighs/cneighs.pyx create mode 100644 src/sisl/geom/neighs/neighfinder.py diff --git a/src/sisl/CMakeLists.txt b/src/sisl/CMakeLists.txt index 76089d1579..9a77620e57 100644 --- a/src/sisl/CMakeLists.txt +++ b/src/sisl/CMakeLists.txt @@ -12,3 +12,4 @@ endforeach() add_subdirectory("_core") add_subdirectory("io") add_subdirectory("physics") +add_subdirectory("geom") diff --git a/src/sisl/geom/CMakeLists.txt b/src/sisl/geom/CMakeLists.txt new file mode 100644 index 0000000000..51d35ada33 --- /dev/null +++ b/src/sisl/geom/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory("neighs") \ No newline at end of file diff --git a/src/sisl/geom/__init__.py b/src/sisl/geom/__init__.py index 2dd2becd94..1694c9344d 100644 --- a/src/sisl/geom/__init__.py +++ b/src/sisl/geom/__init__.py @@ -51,3 +51,4 @@ from .nanotube import * from .special import * from .surfaces import * +from .neighs import * diff --git a/src/sisl/geom/neighs/CMakeLists.txt b/src/sisl/geom/neighs/CMakeLists.txt new file mode 100644 index 0000000000..d0a8fbffa7 --- /dev/null +++ b/src/sisl/geom/neighs/CMakeLists.txt @@ -0,0 +1,19 @@ +# In this directory we have a set of libraries +# We will need to link to the Numpy includes +set_property(DIRECTORY + APPEND + PROPERTY INCLUDE_DIRECTORIES + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +foreach(source + cneighs + ) + add_cython_library( + SOURCE ${source}.pyx + LIBRARY ${source} + OUTPUT ${source}_C + ) + install(TARGETS ${source} LIBRARY + DESTINATION ${SKBUILD_PROJECT_NAME}/geom/neighs) +endforeach() \ No newline at end of file diff --git a/src/sisl/geom/neighs/__init__.py b/src/sisl/geom/neighs/__init__.py new file mode 100644 index 0000000000..3784cc3801 --- /dev/null +++ b/src/sisl/geom/neighs/__init__.py @@ -0,0 +1 @@ +from .neighfinder import NeighFinder diff --git a/src/sisl/geom/neighs/cneighs.pyx b/src/sisl/geom/neighs/cneighs.pyx new file mode 100644 index 0000000000..f0d3604f9a --- /dev/null +++ b/src/sisl/geom/neighs/cneighs.pyx @@ -0,0 +1,531 @@ +cimport cython +from libc.math cimport sqrt + +import numpy as np +# This enables Cython enhanced compatibilities +cimport numpy as np +@cython.boundscheck(False) +@cython.wraparound(False) +def build_table(np.int64_t nbins, np.int64_t[:] bin_indices): + """Builds the table where the location of all atoms is encoded + + Additionally, it also builds an array with the number of atoms at + each bin. + + Parameters + ---------- + nbins: int + Total number of bins + bin_indices: array of int + An array containing the bin index of each atom. + + Returns + ---------- + list: + contains the list of atoms, modified to encode all bin + locations. Each item in the list contains the index of + the next atom that we can find in the same bin. + If an item is -1, it means that there are no more atoms + in the same bin. + heads: + For each bin, the index of `list` where we can find the + first atom that is contained in it. + counts: + For each bin, the number of atoms that it contains. + """ + cdef: + np.int64_t at, bin_index + + np.int64_t Nat = bin_indices.shape[0] + + # Initialize heads and counts arrays. We don't need to initialize the list array + # since we are going to modify all its positions. + np.int64_t[:] list_array = np.zeros(Nat, dtype=np.int64) + np.int64_t[:] counts = np.zeros(nbins, dtype=np.int64) + np.int64_t[:] heads = np.ones(nbins, dtype=np.int64) * -1 + + # Loop through all atoms + for at in range(Nat): + # Get the index of the bin where this atom is located. + bin_index = bin_indices[at] + + # Replace the head of this bin by the current atom index. + # Before replacing, store the previous head in this atoms' + # position in the list. + list_array[at] = heads[bin_index] + heads[bin_index] = at + + # Update the count of this bin (increment it by 1). + counts[bin_index] = counts[bin_index] + 1 + + # Return the memory views as numpy arrays + return np.asarray(list_array), np.asarray(heads), np.asarray(counts) + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_pairs(np.int64_t[:] at_indices, np.int64_t[:, :] indices, np.int64_t[:, :, :] iscs, + np.int64_t[:] heads, np.int64_t[:] list_array, np.int64_t max_npairs, + bint self_interaction, np.float64_t[:, :] xyz, np.float64_t[:, :] cell, + np.npy_bool[:] pbc, np.float64_t[:] thresholds, bint sphere_overlap): + """Gets (possibly duplicated) pairs of neighbour atoms. + + Parameters + --------- + at_indices: + The indices of the atoms that we want to get potential + neighbours for. + indices: + For each atom index (first dimension), the indices of the + 8 bins that contain potential neighbours. + iscs: + For each bin, the supercell index. + heads: + For each bin, the index of `list` where we can find the + first atom that is contained in it. + This array is constructed by `build_table`. + list_array: + contains the list of atoms, modified to encode all bin + locations. Each item in the list contains the index of + the next atom that we can find in the same bin. + If an item is -1, it means that there are no more + atoms in the same bin. + This array is constructed by `build_table`. + max_npairs: + The number of maximum pairs that can be found. + It is used to allocate the `neighs` array. This is computed + in python with the help of the `counts` array constructed + by `build_table`. + self_interaction: bool, optional + whether to consider an atom a neighbour of itself. + xyz: + the cartesian coordinates of all atoms in the geometry. + cell: + the lattice vectors of the geometry, where cell(i, :) is the + ith lattice vector. + pbc: + for each direction, whether periodic boundary conditions should + be considered. + thresholds: + the threshold radius for each atom in the geometry. + sphere_overlap: + If true, two atoms are considered neighbours if their spheres + overlap. + If false, two atoms are considered neighbours if the second atom + is within the sphere of the first atom. Note that this implies that + atom `i` might be atom `j`'s neighbour while the opposite is not true. + + Returns + -------- + neighs: + contains the atom indices of all pairs of neighbor atoms. + split_indices: + array containing the breakpoints in `neighs`. These + breakpoints delimit the position where one can find pairs + for each 8 bin search. It can be used later to quickly + split `neighs` on the multiple individual searches. + """ + cdef: + int N_ind = at_indices.shape[0] + + int i_pair = 0 + int search_index, at, j + bint not_unit_cell, should_not_check + np.int64_t neigh_at, bin_index + np.float64_t threshold, dist + + np.float64_t[:] ref_xyz = np.zeros(3, dtype=np.float64) + np.int64_t[:] neigh_isc = np.zeros(3, dtype=np.int64) + + np.int64_t[:,:] neighs = np.zeros((max_npairs, 5), dtype=np.int64) + np.int64_t[:] split_indices = np.zeros(N_ind, dtype=np.int64) + + for search_index in range(N_ind): + at = at_indices[search_index] + + for j in range(8): + # Find the bin index. + bin_index = indices[search_index, j] + + # Get the first atom index in this bin + neigh_at = heads[bin_index] + + # If there are no atoms in this bin, do not even bother + # checking supercell indices. + if neigh_at == -1: + continue + + # Find the supercell indices for this bin + neigh_isc[:] = iscs[search_index, j, :] + + # And check if this bin corresponds to the unit cell + not_unit_cell = neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + + ref_xyz[:] = xyz[at, :] + if not_unit_cell: + # If we are looking at a neighbouring cell in a direction + # where there are no periodic boundary conditions, go to + # next bin. + should_not_check = False + for i in range(3): + if not pbc[i] and neigh_isc[i] != 0: + should_not_check = True + if should_not_check: + continue + # Otherwise, move the atom to the neighbor cell. We do this + # instead of moving potential neighbors to the unit cell + # because in this way we reduce the number of operations. + for i in range(3): + ref_xyz[i] = ref_xyz[i] - cell[0, i] * neigh_isc[0] - cell[1, i] * neigh_isc[1] - cell[2, i] * neigh_isc[2] + + # Loop through all atoms that are in this bin. + # If neigh_at == -1, this means no more atoms are in this bin. + while neigh_at >= 0: + # If this is a self interaction and the user didn't want them, + # go to next atom. + if not self_interaction and at == neigh_at: + neigh_at = list_array[neigh_at] + continue + + # Calculate the distance between the atom and the potential + # neighbor. + dist = 0.0 + for i in range(3): + dist += (xyz[neigh_at, i] - ref_xyz[i])**2 + dist = sqrt(dist) + + # Get the threshold for this pair of atoms + threshold = thresholds[at] + if sphere_overlap: + # If the user wants to check for sphere overlaps, we have + # to sum the radius of the neighbor to the threshold + threshold = threshold + thresholds[neigh_at] + + if dist < threshold: + # Store the pair of neighbours. + neighs[i_pair, 0] = at + neighs[i_pair, 1] = neigh_at + neighs[i_pair, 2] = neigh_isc[0] + neighs[i_pair, 3] = neigh_isc[1] + neighs[i_pair, 4] = neigh_isc[2] + + # Increment the pair index + i_pair = i_pair + 1 + + # Get the next atom in this bin. Sum 1 to get fortran index. + neigh_at = list_array[neigh_at] + + # We have finished this search, store the breakpoint. + split_indices[search_index] = i_pair + + return np.asarray(neighs), np.asarray(split_indices) + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_all_unique_pairs(np.int64_t[:, :] indices, np.int64_t[:, :, :] iscs, + np.int64_t[:] heads, np.int64_t[:] list_array, np.int64_t max_npairs, + bint self_interaction, np.float64_t[:, :] xyz, np.float64_t[:, :] cell, + np.npy_bool[:] pbc, np.float64_t[:] thresholds, bint sphere_overlap): + """Gets all unique pairs of atoms that are neighbours. + + Parameters + --------- + indices: + For each atom index (first dimension), the indices of the + 8 bins that contain potential neighbours. + iscs: + For each bin, the supercell index. + heads: + For each bin, the index of `list` where we can find the + first atom that is contained in it. + This array is constructed by `build_table`. + list_array: + contains the list of atoms, modified to encode all bin + locations. Each item in the list contains the index of + the next atom that we can find in the same bin. + If an item is -1, it means that there are no more + atoms in the same bin. + This array is constructed by `build_table`. + max_npairs: + The number of maximum pairs that can be found. + It is used to allocate the `neighs` array. This is computed + in python with the help of the `counts` array constructed + by `build_table`. + self_interaction: bool, optional + whether to consider an atom a neighbour of itself. + xyz: + the cartesian coordinates of all atoms in the geometry. + cell: + the lattice vectors of the geometry, where cell(i, :) is the + ith lattice vector. + pbc: + for each direction, whether periodic boundary conditions should + be considered. + thresholds: + the threshold radius for each atom in the geometry. + sphere_overlap: + If true, two atoms are considered neighbours if their spheres + overlap. + If false, two atoms are considered neighbours if the second atom + is within the sphere of the first atom. Note that this implies that + atom `i` might be atom `j`'s neighbour while the opposite is not true. + + Returns + -------- + neighs: + contains the atom indices of all unique neighbor pairs + First column of atoms is sorted in increasing order. + Columns 3 to 5 contain the supercell indices of the neighbour + atom (the one in column 2, column 1 atoms are always in the + unit cell). + npairs: + The number of unique pairs that have been found, which + will be less than `max_npairs`. This should presumably + be used to slice the `neighs` array once in python. + """ + cdef: + int N_ats = list_array.shape[0] + + int i_pair = 0 + int at, j + bint not_unit_cell, should_not_check + np.int64_t neigh_at, bin_index + np.float64_t threshold, dist + + np.float64_t[:] ref_xyz = np.zeros(3, dtype=np.float64) + np.int64_t[:] neigh_isc = np.zeros(3, dtype=np.int64) + + np.int64_t[:,:] neighs = np.zeros((max_npairs, 5), dtype=np.int64) + + for at in range(N_ats): + for j in range(8): + # Find the bin index. + bin_index = indices[at, j] + + # Get the first atom index in this bin + neigh_at = heads[bin_index] + + # If there are no atoms in this bin, do not even bother + # checking supercell indices. + if neigh_at == -1: + continue + + # Find the supercell indices for this bin + neigh_isc[:] = iscs[at, j, :] + + # And check if this bin corresponds to the unit cell + not_unit_cell = neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + + ref_xyz[:] = xyz[at, :] + if not_unit_cell: + # If we are looking at a neighbouring cell in a direction + # where there are no periodic boundary conditions, go to + # next bin. + should_not_check = False + for i in range(3): + if not pbc[i] and neigh_isc[i] != 0: + should_not_check = True + if should_not_check: + continue + # Otherwise, move the atom to the neighbor cell. We do this + # instead of moving potential neighbors to the unit cell + # because in this way we reduce the number of operations. + for i in range(3): + ref_xyz[i] = ref_xyz[i] - cell[0, i] * neigh_isc[0] - cell[1, i] * neigh_isc[1] - cell[2, i] * neigh_isc[2] + + # Loop through all atoms that are in this bin. + # If neigh_at == -1, this means no more atoms are in this bin. + while neigh_at >= 0: + # If neigh_at is smaller than at, we already stored + # this pair when performing the search for neigh_at. + # The following atoms will have even lower indices + # So we can just move to the next bin. However, if + # we are checking a neighboring cell, this connection + # will always be unique. + if (not not_unit_cell and neigh_at <= at): + break + + # Calculate the distance between the atom and the potential + # neighbor. + dist = 0.0 + for i in range(3): + dist += (xyz[neigh_at, i] - ref_xyz[i])**2 + dist = sqrt(dist) + + # Get the threshold for this pair of atoms + threshold = thresholds[at] + if sphere_overlap: + # If the user wants to check for sphere overlaps, we have + # to sum the radius of the neighbor to the threshold + threshold = threshold + thresholds[neigh_at] + + if dist < threshold: + # Store the pair of neighbours. + neighs[i_pair, 0] = at + neighs[i_pair, 1] = neigh_at + neighs[i_pair, 2] = neigh_isc[0] + neighs[i_pair, 3] = neigh_isc[1] + neighs[i_pair, 4] = neigh_isc[2] + + # Increment the pair index + i_pair = i_pair + 1 + + # Get the next atom in this bin. Sum 1 to get fortran index. + neigh_at = list_array[neigh_at] + + if j == 1 and self_interaction: + # Add the self interaction + neighs[i_pair, 0] = at + neighs[i_pair, 1] = neigh_at + neighs[i_pair, 2:] = 0 + + # Increment the pair index + i_pair = i_pair + 1 + + # Return the array of neighbours, but only the filled part + return np.asarray(neighs[:i_pair + 1]), i_pair + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_close(np.float64_t[:, :] search_xyz, np.int64_t[:, :] indices, np.int64_t[:, :, :] iscs, + np.int64_t[:] heads, np.int64_t[:] list_array, np.int64_t max_npairs, + np.float64_t[:, :] xyz, np.float64_t[:, :] cell, + np.npy_bool[:] pbc, np.float64_t[:] thresholds): + """Gets the atoms that are close to given positions + + Parameters + --------- + search_xyz: + The coordinates for which we want to look for atoms that + are close. + indices: + For each point (first dimension), the indices of the + 8 bins that contain potential neighbours. + iscs: + For each bin, the supercell index. + heads: + For each bin, the index of `list` where we can find the + first atom that is contained in it. + This array is constructed by `build_table`. + list_array: + contains the list of atoms, modified to encode all bin + locations. Each item in the list contains the index of + the next atom that we can find in the same bin. + If an item is -1, it means that there are no more + atoms in the same bin. + This array is constructed by `build_table`. + max_npairs: + The number of maximum pairs that can be found. + It is used to allocate the `neighs` array. This is computed + in python with the help of the `counts` array constructed + by `build_table`. + self_interaction: bool, optional + whether to consider an atom a neighbour of itself. + xyz: + the cartesian coordinates of all atoms in the geometry. + cell: + the lattice vectors of the geometry, where cell(i, :) is the + ith lattice vector. + pbc: + for each direction, whether periodic boundary conditions should + be considered. + thresholds: + the threshold radius for each atom in the geometry. + sphere_overlap: + If true, two atoms are considered neighbours if their spheres + overlap. + If false, two atoms are considered neighbours if the second atom + is within the sphere of the first atom. Note that this implies that + atom `i` might be atom `j`'s neighbour while the opposite is not true. + + Returns + -------- + neighs: + contains the atom indices of all pairs of neighbor atoms. + split_indices: + array containing the breakpoints in `neighs`. These + breakpoints delimit the position where one can find pairs + for each 8 bin search. It can be used later to quickly + split `neighs` on the multiple individual searches. + """ + cdef: + int N_ind = search_xyz.shape[0] + + int i_pair = 0 + int search_index, j + bint not_unit_cell, should_not_check + np.int64_t neigh_at, bin_index + np.float64_t threshold, dist + + np.float64_t[:] ref_xyz = np.zeros(3, dtype=np.float64) + np.int64_t[:] neigh_isc = np.zeros(3, dtype=np.int64) + + np.int64_t[:,:] neighs = np.zeros((max_npairs, 5), dtype=np.int64) + np.int64_t[:] split_indices = np.zeros(N_ind, dtype=np.int64) + + for search_index in range(N_ind): + for j in range(8): + # Find the bin index. + bin_index = indices[search_index, j] + + # Get the first atom index in this bin + neigh_at = heads[bin_index] + + # If there are no atoms in this bin, do not even bother + # checking supercell indices. + if neigh_at == -1: + continue + + # Find the supercell indices for this bin + neigh_isc[:] = iscs[search_index, j, :] + + # And check if this bin corresponds to the unit cell + not_unit_cell = neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + + ref_xyz[:] = search_xyz[search_index, :] + if not_unit_cell: + # If we are looking at a neighbouring cell in a direction + # where there are no periodic boundary conditions, go to + # next bin. + should_not_check = False + for i in range(3): + if not pbc[i] and neigh_isc[i] != 0: + should_not_check = True + if should_not_check: + continue + # Otherwise, move the atom to the neighbor cell. We do this + # instead of moving potential neighbors to the unit cell + # because in this way we reduce the number of operations. + for i in range(3): + ref_xyz[i] = ref_xyz[i] - cell[0, i] * neigh_isc[0] - cell[1, i] * neigh_isc[1] - cell[2, i] * neigh_isc[2] + + # Loop through all atoms that are in this bin. + # If neigh_at == -1, this means no more atoms are in this bin. + while neigh_at >= 0: + # Calculate the distance between the atom and the potential + # neighbor. + dist = 0.0 + for i in range(3): + dist += (xyz[neigh_at, i] - ref_xyz[i])**2 + dist = sqrt(dist) + + # Get the threshold for the potential neighbour + threshold = thresholds[neigh_at] + + if dist < threshold: + # Store the pair of neighbours. + neighs[i_pair, 0] = search_index + neighs[i_pair, 1] = neigh_at + neighs[i_pair, 2] = neigh_isc[0] + neighs[i_pair, 3] = neigh_isc[1] + neighs[i_pair, 4] = neigh_isc[2] + + # Increment the pair index + i_pair = i_pair + 1 + + # Get the next atom in this bin. Sum 1 to get fortran index. + neigh_at = list_array[neigh_at] + + # We have finished this search, store the breakpoint. + split_indices[search_index] = i_pair + + return np.asarray(neighs), np.asarray(split_indices) diff --git a/src/sisl/geom/neighs/neighfinder.py b/src/sisl/geom/neighs/neighfinder.py new file mode 100644 index 0000000000..b60163e715 --- /dev/null +++ b/src/sisl/geom/neighs/neighfinder.py @@ -0,0 +1,599 @@ +import numpy as np +from numbers import Real + +from sisl.utils.mathematics import fnorm +from . import _fneighs +from . import cneighs + + +class NeighFinder: + """Efficient linear scaling finding of neighbours. + + Once this class is instantiated, a table is build. Then, + the neighbour finder can be queried as many times as wished + as long as the geometry doesn't change. + + Note that the radius (`R`) is used to build the table. + Therefore, if one wants to look for neighbours using a different + R, one needs to create another finder or call `setup_finder`. + + Parameters + ---------- + geometry: sisl.Geometry + the geometry to find neighbours in + R: float or array-like of shape (geometry.na), optional + The radius to consider two atoms neighbours. + If it is a single float, the same radius is used for all atoms. + If it is an array, it should contain the radius for each atom. + + If not provided, an array is constructed, where the radius for + each atom is its maxR. + sphere_overlap: bool, optional + If `True`, two atoms are considered neighbours if their spheres + overlap. + If `False`, two atoms are considered neighbours if the second atom + is within the sphere of the first atom. Note that this implies that + atom `i` might be atom `j`'s neighbour while the opposite is not true. + + If not provided, it will be `True` if `R` is an array and `False` if + it is a single float. + """ + def __init__(self, geometry, R=None, sphere_overlap=None, fortran=True): + if fortran: + self._fneighs = _fneighs + else: + self._fneighs = cneighs + + self.setup_finder(geometry, R=R, sphere_overlap=sphere_overlap) + + def setup_finder(self, geometry=None, R=None, sphere_overlap=None): + """Prepares everything for neighbour finding. + + Parameters + ---------- + geometry: sisl.Geometry, optional + the geometry to find neighbours in. + + If not provided, the current geometry is kept. + R: float or array-like of shape (geometry.na), optional + The radius to consider two atoms neighbours. + If it is a single float, the same radius is used for all atoms. + If it is an array, it should contain the radius for each atom. + + If not provided, an array is constructed, where the radius for + each atom is its maxR. + + Note that negative R values are allowed + sphere_overlap: bool, optional + If `True`, two atoms are considered neighbours if their spheres + overlap. + If `False`, two atoms are considered neighbours if the second atom + is within the sphere of the first atom. Note that this implies that + atom `i` might be atom `j`'s neighbour while the opposite is not true. + + If not provided, it will be `True` if `R` is an array and `False` if + it is a single float. + """ + # Set the geometry. Copy it because we may need to modify the supercell size. + if geometry is not None: + self.geometry = geometry.copy() + + # If R is not a single float, make sure it is a numpy array + R_is_float = isinstance(R, Real) + if not R_is_float: + R = np.array(R) + # If R was not provided, build an array of Rs + if R is None: + R = geometry.atoms.maxR(all=True) + + # Set the radius + self._R = R + + # If sphere overlap was not provided, set it to False if R is a single float + # and True otherwise. + if sphere_overlap is None: + sphere_overlap = not R_is_float + self._sphere_overlap = sphere_overlap + + # Determine the bin_size as the maximum DIAMETER to ensure that we ALWAYS + # only need to look one bin away for neighbors. + bin_size = np.max(self._R) * 2 + # Check that the bin size is positive. + if bin_size <= 0: + raise ValueError("All R values are 0 or less. Please provide some positive values") + if sphere_overlap: + # In the case when we want to check for sphere overlap, the size should + # be twice as big. + bin_size *= 2 + + # We add a small amount to bin_size to avoid ambiguities when + # a position is exactly at the center of a bin. + bin_size += 0.01 + + # Get the number of bins along each cell direction. + nbins_float = fnorm(geometry.cell, axis=-1) / bin_size + self.nbins = np.floor(nbins_float).astype(int) + self.total_nbins = np.prod(self.nbins) + + if self.total_nbins == 0: + # This means that nsc must be at least 5. + + # We round 1/nbins (i.e. the amount of cells needed in each direction) + # to the closest next odd number. + nsc = np.ceil(1/nbins_float) // 2 * 2 + 1 + # And then set it as the number of supercells. + self.geometry.set_nsc(nsc.astype(int)) + raise ValueError( + "The diameter occupies more space than the whole unit cell," + "which means that we need to look for neighbours more than one unit cell away." + f"This is not yet supported by {self.__class__.__name__}" + ) + + # Get the scalar bin indices of all atoms + scalar_bin_indices = self._get_bin_indices(self.geometry.fxyz) + + # Build the tables that will allow us to look for neighbours in an efficient + # and linear scaling manner. + self._build_table(scalar_bin_indices) + + def _build_table(self, bin_indices): + """Builds the tables that will allow efficient linear scaling neighbour search. + + Parameters + ---------- + bin_indices: array-like of shape (self.total_nbins, ) + Array containing the scalar bin index for each atom. + """ + # Call the fortran routine that builds the table + self._list, self._heads, self._counts = self._fneighs.build_table(self.total_nbins, bin_indices) + + def _get_search_atom_counts(self, scalar_bin_indices): + """Computes the number of atoms that will be explored for each search + + Parameters + ----------- + scalar_bin_indices: np.ndarray of shape ([n_searches], 8) + Array containing the bin indices for each search. + Bin indices should be within the unit cell! + + Returns + ----------- + np.ndarray of shape (n_searches, ): + For each search, the number of atoms that will be involved. + """ + return self._counts[scalar_bin_indices.ravel()].reshape(-1, 8).sum(axis=1) + + def _get_bin_indices(self, fxyz, cartesian=False, floor=True): + """Gets the bin indices for a given fractional coordinate. + + Parameters + ----------- + fxyz: np.ndarray of shape (N, 3) + the fractional coordinates for which we want to get the bin indices. + cartesian: bool, optional + whether the indices should be returned as cartesian. + If `False`, scalar indices are returned. + floor: bool, optional + whether to floor the indices or not. + + If asking for scalar indices (i.e. `cartesian=False`), the indices will + ALWAYS be floored regardless of this argument. + + Returns + -------- + np.ndarray: + The bin indices. If `cartesian=True`, the shape of the array is (N, 3), + otherwise it is (N,). + """ + bin_indices = ((fxyz) % 1) * self.nbins + # Atoms that are exactly at the limit of the cell might have fxyz = 1 + # which would result in a bin index outside of range. + # We just bring it back to the unit cell. + bin_indices = bin_indices % self.nbins + + if floor or not cartesian: + bin_indices = np.floor(bin_indices).astype(int) + + if not cartesian: + bin_indices = self._cartesian_to_scalar_index(bin_indices) + + return bin_indices + + def _get_search_indices(self, fxyz, cartesian=False): + """Gets the bin indices to explore for a given fractional coordinate. + + Given a fractional coordinate, we will need to look for neighbours + in its own bin, and one bin away in each direction. That is, $2^3=8$ bins. + + Parameters + ----------- + fxyz: np.ndarray of shape (N, 3) + the fractional coordinates for which we want to get the search indices. + cartesian: bool, optional + whether the indices should be returned as cartesian. + If `False`, scalar indices are returned. + + Returns + -------- + np.ndarray: + The bin indices where we need to perform the search for each + fractional coordinate. These indices are all inside the unit cell. + If `cartesian=True`, cartesian indices are returned and the array + has shape (N, 8, 3). + If `cartesian=False`, scalar indices are returned and the array + has shape (N, 8). + np.ndarray of shape (N, 8, 3): + The supercell indices of each bin index in the search. + """ + # Get the bin indices for the positions that are requested. + # Note that we don't floor the indices so that we can know to which + # border of the bin are we closer in each direction. + bin_indices = self._get_bin_indices(fxyz, floor=False, cartesian=True) + bin_indices = np.atleast_2d(bin_indices) + + # Determine which is the neighbouring cell that we need to look for + # along each direction. + signs = np.ones(bin_indices.shape, dtype=int) + signs[(bin_indices % 1) < 0.5] = -1 + + # Build and arrays with all the indices that we need to look for. Since + # we have to move one bin away in each direction, we have to look for + # neighbors along a total of 8 bins (2**3) + search_indices = np.tile(bin_indices.astype(int), 8).reshape(-1, 8, 3) + + search_indices[:, 1::2, 0] += signs[:, 0].reshape(-1, 1) + search_indices[:, [2,3,6,7], 1] += signs[:, 1].reshape(-1, 1) + search_indices[:, 4:, 2] += signs[:, 2].reshape(-1, 1) + + # Convert search indices to unit cell indices, but keep the supercell indices. + isc, search_indices = np.divmod(search_indices, self.nbins) + + if not cartesian: + search_indices = self._cartesian_to_scalar_index(search_indices) + + return search_indices, isc + + def _cartesian_to_scalar_index(self, index): + """Converts cartesian indices to scalar indices""" + if not np.issubdtype(index.dtype, int): + raise ValueError("Decimal scalar indices do not make sense, please floor your cartesian indices.") + return index.dot([1, self.nbins[0], self.nbins[0] * self.nbins[1]]) + + def _scalar_to_cartesian_index(self, index): + """Converts cartesian indices to scalar indices""" + if np.min(index) < 0 or np.max(index) > self.total_nbins: + raise ValueError("Some scalar indices are not within the unit cell. We cannot uniquely convert to cartesian") + + third, index = np.divmod(index, self.nbins[0]*self.nbins[1]) + second, first = np.divmod(index, self.nbins[0]) + return np.array([first, second, third]).T + + def find_neighbours(self, atoms=None, as_pairs=False, self_interaction=False, pbc=(True, True, True)): + """Find neighbours as specified in the finder. + + This routine only executes the action of finding neighbours, + the parameters of the search (`geometry`, `R`, `sphere_overlap`...) + are defined when the finder is initialized or by calling `setup_finder`. + + Parameters + ---------- + atoms: optional + the atoms for which neighbours are desired. Anything that can + be sanitized by `sisl.Geometry` is a valid value. + + If not provided, neighbours for all atoms are searched. + as_pairs: bool, optional + If `True` pairs of atoms are returned. Otherwise a list containing + the neighbours for each atom is returned. + self_interaction: bool, optional + whether to consider an atom a neighbour of itself. + pbc: bool or array-like of shape (3, ) + whether periodic conditions should be considered. + If a single bool is passed, all directions use that value. + + Returns + ---------- + np.ndarray or list: + If `as_pairs=True`: + A `np.ndarray` of shape (n_pairs, 5) is returned. + Each pair `ij` means that `j` is a neighbour of `i`. + The three extra columns are the supercell indices of atom `j`. + If `as_pairs=False`: + A list containing a numpy array of shape (n_neighs, 4) for each atom. + """ + # Sanitize atoms + atoms = self.geometry._sanitize_atoms(atoms) + + # Cast R and pbc into arrays of appropiate shape and type. + thresholds = np.full(self.geometry.na, self._R, dtype=np.float64) + pbc = np.full(3, pbc, dtype=np.bool) + + # Get search indices + search_indices, isc = self._get_search_indices(self.geometry.fxyz[atoms], cartesian=False) + + # Get atom counts + at_counts = self._get_search_atom_counts(search_indices) + + max_pairs = at_counts.sum() + if not self_interaction: + max_pairs -= search_indices.shape[0] + + # Find the neighbour pairs + neighbour_pairs, split_ind = self._fneighs.get_pairs( + atoms, search_indices, isc, self._heads, self._list, max_pairs, self_interaction, + self.geometry.xyz, self.geometry.cell, pbc, thresholds, self._sphere_overlap + ) + + if as_pairs: + # Just return the neighbour pairs + return neighbour_pairs[:split_ind[-1]] + else: + # Split to get the neighbours of each atom + return np.split(neighbour_pairs[:, 1:], split_ind, axis=0)[:-1] + + def find_all_unique_pairs(self, self_interaction=False, pbc=(True, True, True)): + """Find all unique neighbour pairs within the geometry. + + A pair of atoms is considered unique if atoms have the same index + and correspond to the same unit cell. As an example, the connection + atom 0 (unit cell) to atom 5 (1, 0, 0) is not the same as the + connection atom 5 (unit cell) to atom 0 (-1, 0, 0). + + Note that this routine can not be called if `sphere_overlap` is + set to `False` and the radius is not a single float. In that case, + there is no way of defining "uniqueness" since pair `ij` can have + a different threshold radius than pair `ji`. + + Parameters + ---------- + atoms: optional + the atoms for which neighbours are desired. Anything that can + be sanitized by `sisl.Geometry` is a valid value. + + If not provided, neighbours for all atoms are searched. + as_pairs: bool, optional + If `True` pairs of atoms are returned. Otherwise a list containing + the neighbours for each atom is returned. + self_interaction: bool, optional + whether to consider an atom a neighbour of itself. + pbc: bool or array-like of shape (3, ) + whether periodic conditions should be considered. + If a single bool is passed, all directions use that value. + + Returns + ---------- + np.ndarray of shape (n_pairs, 5): + Each pair `ij` means that `j` is a neighbour of `i`. + The three extra columns are the supercell indices of atom `j`. + """ + if not self._sphere_overlap and not isinstance(self._R, Real): + raise ValueError("Unique atom pairs do not make sense if we are not looking for sphere overlaps." + " Please setup the finder again setting `sphere_overlap` to `True` if you wish so.") + + # Cast R and pbc into arrays of appropiate shape and type. + thresholds = np.full(self.geometry.na, self._R, dtype=np.float64) + pbc = np.full(3, pbc, dtype=np.bool) + + # Get search indices + search_indices, isc = self._get_search_indices(self.geometry.fxyz, cartesian=False) + + # Get atom counts + at_counts = self._get_search_atom_counts(search_indices) + + max_pairs = at_counts.sum() + if not self_interaction: + max_pairs -= search_indices.shape[0] + + # Find the candidate pairs + candidate_pairs, n_pairs = self._fneighs.get_all_unique_pairs( + search_indices, isc, self._heads, self._list, max_pairs, self_interaction, + self.geometry.xyz, self.geometry.cell, pbc, thresholds, self._sphere_overlap + ) + + return candidate_pairs[:n_pairs] + + def find_close(self, xyz, as_pairs=False, pbc=(True, True, True)): + """Find all atoms that are close to some coordinates in space. + + This routine only executes the action of finding neighbours, + the parameters of the search (`geometry`, `R`, `sphere_overlap`...) + are defined when the finder is initialized or by calling `setup_finder`. + + Parameters + ---------- + xyz: array-like of shape ([npoints], 3) + the coordinates for which neighbours are desired. + as_pairs: bool, optional + If `True` pairs are returned, where the first item is the index + of the point and the second one is the atom index. + Otherwise a list containing the neighbours for each point is returned. + pbc: bool or array-like of shape (3, ) + whether periodic conditions should be considered. + If a single bool is passed, all directions use that value. + + Returns + ---------- + np.ndarray or list: + If `as_pairs=True`: + A `np.ndarray` of shape (n_pairs, 5) is returned. + Each pair `ij` means that `j` is a neighbour of `i`. + The three extra columns are the supercell indices of atom `j`. + If `as_pairs=False`: + A list containing a numpy array of shape (n_neighs, 4) for each atom. + """ + # Cast R and pbc into arrays of appropiate shape and type. + thresholds = np.full(self.geometry.na, self._R, dtype=np.float64) + pbc = np.full(3, pbc, dtype=np.bool) + + xyz = np.atleast_2d(xyz) + # Get search indices + search_indices, isc = self._get_search_indices(xyz.dot(self.geometry.icell.T) % 1, cartesian=False) + + # Get atom counts + at_counts = self._get_search_atom_counts(search_indices) + + max_pairs = at_counts.sum() + + # Find the neighbour pairs + neighbour_pairs, split_ind = self._fneighs.get_close( + xyz, search_indices, isc, self._heads, self._list, max_pairs, + self.geometry.xyz, self.geometry.cell, pbc, thresholds + ) + + if as_pairs: + # Just return the neighbour pairs + return neighbour_pairs[:split_ind[-1]] + else: + # Split to get the neighbours of each position + return np.split(neighbour_pairs[:, 1:], split_ind, axis=0)[:-1] + return + + def find_neighbours_old(self, atoms=None, as_pairs=False, self_interaction=False): + """Find neighbours as specified in the finder. + + This routine only executes the action of finding neighbours, + the parameters of the search (`geometry`, `R`, `sphere_overlap`...) + are defined when the finder is initialized or by calling `setup_finder`. + + Parameters + ---------- + atoms: optional + the atoms for which neighbours are desired. Anything that can + be sanitized by `sisl.Geometry` is a valid value. + + If not provided, neighbours for all atoms are searched. + as_pairs: bool, optional + If `True` pairs of atoms are returned. Otherwise a list containing + the neighbours for each atom is returned. + self_interaction: bool, optional + whether to consider an atom a neighbour of itself. + + Returns + ---------- + np.ndarray or list: + If `as_pairs=True`: + A `np.ndarray` of shape (n_pairs, 2) is returned. Each pair `ij` + means that `j` is a neighbour of `i`. + If `as_pairs=False`: + A list containing a numpy array of shape (n_neighs) for each atom. + """ + # Sanitize atoms + atoms = self.geometry._sanitize_atoms(atoms) + + # Get search indices + search_indices, isc = self._get_search_indices(self.geometry.fxyz[atoms], cartesian=False) + + # Get atom counts + at_counts = self._get_search_atom_counts(search_indices) + + total_pairs = at_counts.sum() + if not self_interaction: + total_pairs -= search_indices.shape[0] + + # Find the candidate pairs + candidate_pairs, split_ind = self._fneighs.get_pairs_old(atoms, search_indices, self._heads, self._list, total_pairs, self_interaction) + + if as_pairs: + # Just returned the filtered pairs + return self._filter_pairs(candidate_pairs) + else: + # Get the mask to filter + mask = self._filter_pairs(candidate_pairs, return_mask=True) + + # Split both the neighbours and mask and then filter each array + candidate_pairs[:, 0] = mask + return [at_pairs[:, 1][at_pairs[:,0].astype(bool)] for at_pairs in np.split(candidate_pairs, split_ind[:-1])] + + def find_all_unique_pairs_old(self, self_interaction=False): + """Find all unique neighbour pairs within the geometry. + + Note that this routine can not be called if `sphere_overlap` is + set to `False` and the radius is not a single float. In that case, + there is no way of defining "uniqueness" since pair `ij` can have + a different threshold radius than pair `ji`. + + Parameters + ---------- + atoms: optional + the atoms for which neighbours are desired. Anything that can + be sanitized by `sisl.Geometry` is a valid value. + + If not provided, neighbours for all atoms are searched. + as_pairs: bool, optional + If `True` pairs of atoms are returned. Otherwise a list containing + the neighbours for each atom is returned. + self_interaction: bool, optional + whether to consider an atom a neighbour of itself. + + Returns + ---------- + np.ndarray or list: + If `as_pairs=True`: + A `np.ndarray` of shape (n_pairs, 2) is returned. Each pair `ij` + means that `j` is a neighbour of `i`. + If `as_pairs=False`: + A list containing a numpy array of shape (n_neighs) for each atom. + """ + if not self._sphere_overlap and not isinstance(self._R, Real): + raise ValueError("Unique atom pairs do not make sense if we are not looking for sphere overlaps." + " Please setup the finder again setting `sphere_overlap` to `True` if you wish so.") + + # Get search indices + search_indices, isc = self._get_search_indices(self.geometry.fxyz, cartesian=False) + + # Get atom counts + at_counts = self._get_search_atom_counts(search_indices) + + max_pairs = at_counts.sum() + if not self_interaction: + max_pairs -= search_indices.shape[0] + + # Find the candidate pairs + candidate_pairs, n_pairs = self._fneighs.get_all_unique_pairs_old(search_indices, self._heads, self._list, max_pairs, self_interaction) + candidate_pairs = candidate_pairs[:n_pairs] + + # Filter them and return them. + return self._filter_pairs(candidate_pairs) + + def _filter_pairs(self, candidate_pairs, return_mask=False): + """Filters candidate neighbour pairs. + + It does so according to the parameters (`geometry`, `R`, `sphere_overlap`...) + with which the finder was setup. + + Parameters + ---------- + candidate_pairs: np.ndarray of shape (n_pairs, 2) + The candidate atom pairs. + return_mask: bool, optional + Whether to return the filtering mask. + If `True`, the filtering is not performed. This function will just + return the mask. + + Returns + ---------- + np.ndarray: + If `return_mask=True`: + The filtering mask, a boolean array of shape (n_pairs,). + If `return_mask=False`: + The filtered neighbour pairs. An integer array of shape (n_filtered, 2) + """ + + if isinstance(self._R, Real): + thresh = self._R + if self._sphere_overlap: + thresh *= 2 + else: + thresh = self._R[candidate_pairs[:, 0]] + if self._sphere_overlap: + thresh += self._R[candidate_pairs[:, 1]] + + dists = fnorm( + self.geometry.xyz[candidate_pairs[:, 0]] - self.geometry.xyz[candidate_pairs[:, 1]], + axis=-1) + + mask = dists < thresh + + if return_mask: + return mask + return candidate_pairs[mask] \ No newline at end of file From 90580701e9a81642b18ecf1a443c0d08bbdb66fe Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Thu, 19 Oct 2023 21:36:20 +0200 Subject: [PATCH 02/17] neighs in python syntax and added tests --- src/sisl/geom/neighs/CMakeLists.txt | 6 +- .../{cneighs.pyx => _neigh_operations.py} | 171 ++++++------ src/sisl/geom/neighs/neighfinder.py | 244 ++++++------------ .../geom/neighs/tests/test_neighfinder.py | 236 +++++++++++++++++ 4 files changed, 394 insertions(+), 263 deletions(-) rename src/sisl/geom/neighs/{cneighs.pyx => _neigh_operations.py} (82%) create mode 100644 src/sisl/geom/neighs/tests/test_neighfinder.py diff --git a/src/sisl/geom/neighs/CMakeLists.txt b/src/sisl/geom/neighs/CMakeLists.txt index d0a8fbffa7..0c2a94aebf 100644 --- a/src/sisl/geom/neighs/CMakeLists.txt +++ b/src/sisl/geom/neighs/CMakeLists.txt @@ -6,11 +6,9 @@ set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/.. ) -foreach(source - cneighs - ) +foreach(source _neigh_operations) add_cython_library( - SOURCE ${source}.pyx + SOURCE ${source}.py LIBRARY ${source} OUTPUT ${source}_C ) diff --git a/src/sisl/geom/neighs/cneighs.pyx b/src/sisl/geom/neighs/_neigh_operations.py similarity index 82% rename from src/sisl/geom/neighs/cneighs.pyx rename to src/sisl/geom/neighs/_neigh_operations.py index f0d3604f9a..bdefca80f0 100644 --- a/src/sisl/geom/neighs/cneighs.pyx +++ b/src/sisl/geom/neighs/_neigh_operations.py @@ -1,12 +1,25 @@ -cimport cython -from libc.math cimport sqrt +"""File meant to be compiled with Cython so that operations are much faster.""" +from __future__ import annotations + +try: + import cython + + from cython.cimports.libc.math import sqrt as SQRT + # This little trick is because cython does not allow you + # to have an statement that sets an imported variable. + # If we directly import sqrt, then it fails because of the + # statement in the except clause (weird). + sqrt = SQRT + # This enables Cython enhanced compatibilities + import cython.cimports.numpy as cnp +except ImportError: + sqrt = lambda x: x**0.5 import numpy as np -# This enables Cython enhanced compatibilities -cimport numpy as np + @cython.boundscheck(False) @cython.wraparound(False) -def build_table(np.int64_t nbins, np.int64_t[:] bin_indices): +def build_table(nbins: cnp.int64_t, bin_indices: cnp.int64_t[:]): """Builds the table where the location of all atoms is encoded Additionally, it also builds an array with the number of atoms at @@ -33,19 +46,14 @@ def build_table(np.int64_t nbins, np.int64_t[:] bin_indices): counts: For each bin, the number of atoms that it contains. """ - cdef: - np.int64_t at, bin_index - - np.int64_t Nat = bin_indices.shape[0] - - # Initialize heads and counts arrays. We don't need to initialize the list array - # since we are going to modify all its positions. - np.int64_t[:] list_array = np.zeros(Nat, dtype=np.int64) - np.int64_t[:] counts = np.zeros(nbins, dtype=np.int64) - np.int64_t[:] heads = np.ones(nbins, dtype=np.int64) * -1 + n_atoms = bin_indices.shape[0] + + list_array: cnp.int64_t[:] = np.zeros(n_atoms, dtype=np.int64) + counts: cnp.int64_t[:] = np.zeros(nbins, dtype=np.int64) + heads: cnp.int64_t[:] = np.full(nbins, -1, dtype=np.int64) # Loop through all atoms - for at in range(Nat): + for at in range(n_atoms): # Get the index of the bin where this atom is located. bin_index = bin_indices[at] @@ -63,10 +71,10 @@ def build_table(np.int64_t nbins, np.int64_t[:] bin_indices): @cython.boundscheck(False) @cython.wraparound(False) -def get_pairs(np.int64_t[:] at_indices, np.int64_t[:, :] indices, np.int64_t[:, :, :] iscs, - np.int64_t[:] heads, np.int64_t[:] list_array, np.int64_t max_npairs, - bint self_interaction, np.float64_t[:, :] xyz, np.float64_t[:, :] cell, - np.npy_bool[:] pbc, np.float64_t[:] thresholds, bint sphere_overlap): +def get_pairs(at_indices: cnp.int64_t[:], indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], + heads: cnp.int64_t[:], list_array: cnp.int64_t[:], max_npairs: cnp.int64_t, + self_interaction: cython.bint, xyz: cnp.float64_t[:, :], cell: cnp.float64_t[:, :], + pbc: cnp.npy_bool[:], thresholds: cnp.float64_t[:], sphere_overlap: cython.bint): """Gets (possibly duplicated) pairs of neighbour atoms. Parameters @@ -124,20 +132,15 @@ def get_pairs(np.int64_t[:] at_indices, np.int64_t[:, :] indices, np.int64_t[:, for each 8 bin search. It can be used later to quickly split `neighs` on the multiple individual searches. """ - cdef: - int N_ind = at_indices.shape[0] - - int i_pair = 0 - int search_index, at, j - bint not_unit_cell, should_not_check - np.int64_t neigh_at, bin_index - np.float64_t threshold, dist + N_ind = at_indices.shape[0] + + i_pair: cython.int = 0 - np.float64_t[:] ref_xyz = np.zeros(3, dtype=np.float64) - np.int64_t[:] neigh_isc = np.zeros(3, dtype=np.int64) - - np.int64_t[:,:] neighs = np.zeros((max_npairs, 5), dtype=np.int64) - np.int64_t[:] split_indices = np.zeros(N_ind, dtype=np.int64) + ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) + neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) + + neighs: cnp.int64_t[:,:] = np.zeros((max_npairs, 5), dtype=np.int64) + split_indices: cnp.int64_t[:] = np.zeros(N_ind, dtype=np.int64) for search_index in range(N_ind): at = at_indices[search_index] @@ -147,7 +150,7 @@ def get_pairs(np.int64_t[:] at_indices, np.int64_t[:, :] indices, np.int64_t[:, bin_index = indices[search_index, j] # Get the first atom index in this bin - neigh_at = heads[bin_index] + neigh_at: cnp.int64_t = heads[bin_index] # If there are no atoms in this bin, do not even bother # checking supercell indices. @@ -158,7 +161,7 @@ def get_pairs(np.int64_t[:] at_indices, np.int64_t[:, :] indices, np.int64_t[:, neigh_isc[:] = iscs[search_index, j, :] # And check if this bin corresponds to the unit cell - not_unit_cell = neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + not_unit_cell: cython.bint = neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 ref_xyz[:] = xyz[at, :] if not_unit_cell: @@ -188,13 +191,13 @@ def get_pairs(np.int64_t[:] at_indices, np.int64_t[:, :] indices, np.int64_t[:, # Calculate the distance between the atom and the potential # neighbor. - dist = 0.0 + dist: float = 0.0 for i in range(3): dist += (xyz[neigh_at, i] - ref_xyz[i])**2 dist = sqrt(dist) # Get the threshold for this pair of atoms - threshold = thresholds[at] + threshold: float = thresholds[at] if sphere_overlap: # If the user wants to check for sphere overlaps, we have # to sum the radius of the neighbor to the threshold @@ -221,10 +224,10 @@ def get_pairs(np.int64_t[:] at_indices, np.int64_t[:, :] indices, np.int64_t[:, @cython.boundscheck(False) @cython.wraparound(False) -def get_all_unique_pairs(np.int64_t[:, :] indices, np.int64_t[:, :, :] iscs, - np.int64_t[:] heads, np.int64_t[:] list_array, np.int64_t max_npairs, - bint self_interaction, np.float64_t[:, :] xyz, np.float64_t[:, :] cell, - np.npy_bool[:] pbc, np.float64_t[:] thresholds, bint sphere_overlap): +def get_all_unique_pairs(indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], + heads: cnp.int64_t[:], list_array: cnp.int64_t[:], max_npairs: cnp.int64_t, + self_interaction: cython.bint, xyz: cnp.float64_t[:, :], cell: cnp.float64_t[:, :], + pbc: cnp.npy_bool[:], thresholds: cnp.float64_t[:], sphere_overlap: cython.bint): """Gets all unique pairs of atoms that are neighbours. Parameters @@ -282,27 +285,33 @@ def get_all_unique_pairs(np.int64_t[:, :] indices, np.int64_t[:, :, :] iscs, will be less than `max_npairs`. This should presumably be used to slice the `neighs` array once in python. """ - cdef: - int N_ats = list_array.shape[0] - - int i_pair = 0 - int at, j - bint not_unit_cell, should_not_check - np.int64_t neigh_at, bin_index - np.float64_t threshold, dist - - np.float64_t[:] ref_xyz = np.zeros(3, dtype=np.float64) - np.int64_t[:] neigh_isc = np.zeros(3, dtype=np.int64) - np.int64_t[:,:] neighs = np.zeros((max_npairs, 5), dtype=np.int64) + N_ats = list_array.shape[0] + + i_pair: cython.int = 0 + + ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) + neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) + + neighs: cnp.int64_t[:,:] = np.zeros((max_npairs, 5), dtype=np.int64) for at in range(N_ats): + + if self_interaction: + # Add the self interaction + neighs[i_pair, 0] = at + neighs[i_pair, 1] = at + neighs[i_pair, 2:] = 0 + + # Increment the pair index + i_pair = i_pair + 1 + for j in range(8): # Find the bin index. bin_index = indices[at, j] # Get the first atom index in this bin - neigh_at = heads[bin_index] + neigh_at: cnp.int64_t = heads[bin_index] # If there are no atoms in this bin, do not even bother # checking supercell indices. @@ -313,7 +322,7 @@ def get_all_unique_pairs(np.int64_t[:, :] indices, np.int64_t[:, :, :] iscs, neigh_isc[:] = iscs[at, j, :] # And check if this bin corresponds to the unit cell - not_unit_cell = neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + not_unit_cell: cython.bint = neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 ref_xyz[:] = xyz[at, :] if not_unit_cell: @@ -346,13 +355,13 @@ def get_all_unique_pairs(np.int64_t[:, :] indices, np.int64_t[:, :, :] iscs, # Calculate the distance between the atom and the potential # neighbor. - dist = 0.0 + dist: float = 0.0 for i in range(3): dist += (xyz[neigh_at, i] - ref_xyz[i])**2 dist = sqrt(dist) # Get the threshold for this pair of atoms - threshold = thresholds[at] + threshold: float = thresholds[at] if sphere_overlap: # If the user wants to check for sphere overlaps, we have # to sum the radius of the neighbor to the threshold @@ -371,25 +380,17 @@ def get_all_unique_pairs(np.int64_t[:, :] indices, np.int64_t[:, :, :] iscs, # Get the next atom in this bin. Sum 1 to get fortran index. neigh_at = list_array[neigh_at] - - if j == 1 and self_interaction: - # Add the self interaction - neighs[i_pair, 0] = at - neighs[i_pair, 1] = neigh_at - neighs[i_pair, 2:] = 0 - - # Increment the pair index - i_pair = i_pair + 1 + print(np.asarray(neighs[:i_pair + 1]), i_pair) # Return the array of neighbours, but only the filled part return np.asarray(neighs[:i_pair + 1]), i_pair @cython.boundscheck(False) @cython.wraparound(False) -def get_close(np.float64_t[:, :] search_xyz, np.int64_t[:, :] indices, np.int64_t[:, :, :] iscs, - np.int64_t[:] heads, np.int64_t[:] list_array, np.int64_t max_npairs, - np.float64_t[:, :] xyz, np.float64_t[:, :] cell, - np.npy_bool[:] pbc, np.float64_t[:] thresholds): +def get_close(search_xyz: cnp.float64_t[:, :], indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], + heads: cnp.int64_t[:], list_array: cnp.int64_t[:], max_npairs: cnp.int64_t, + xyz: cnp.float64_t[:, :], cell: cnp.float64_t[:, :], + pbc: cnp.npy_bool[:], thresholds: cnp.float64_t[:]): """Gets the atoms that are close to given positions Parameters @@ -447,20 +448,16 @@ def get_close(np.float64_t[:, :] search_xyz, np.int64_t[:, :] indices, np.int64_ for each 8 bin search. It can be used later to quickly split `neighs` on the multiple individual searches. """ - cdef: - int N_ind = search_xyz.shape[0] - - int i_pair = 0 - int search_index, j - bint not_unit_cell, should_not_check - np.int64_t neigh_at, bin_index - np.float64_t threshold, dist - - np.float64_t[:] ref_xyz = np.zeros(3, dtype=np.float64) - np.int64_t[:] neigh_isc = np.zeros(3, dtype=np.int64) - np.int64_t[:,:] neighs = np.zeros((max_npairs, 5), dtype=np.int64) - np.int64_t[:] split_indices = np.zeros(N_ind, dtype=np.int64) + N_ind = search_xyz.shape[0] + + i_pair: cython.int = 0 + + ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) + neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) + + neighs: cnp.int64_t[:,:] = np.zeros((max_npairs, 5), dtype=np.int64) + split_indices: cnp.int64_t[:] = np.zeros(N_ind, dtype=np.int64) for search_index in range(N_ind): for j in range(8): @@ -468,7 +465,7 @@ def get_close(np.float64_t[:, :] search_xyz, np.int64_t[:, :] indices, np.int64_ bin_index = indices[search_index, j] # Get the first atom index in this bin - neigh_at = heads[bin_index] + neigh_at: cnp.int64_t = heads[bin_index] # If there are no atoms in this bin, do not even bother # checking supercell indices. @@ -479,7 +476,7 @@ def get_close(np.float64_t[:, :] search_xyz, np.int64_t[:, :] indices, np.int64_ neigh_isc[:] = iscs[search_index, j, :] # And check if this bin corresponds to the unit cell - not_unit_cell = neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + not_unit_cell: cython.bint = neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 ref_xyz[:] = search_xyz[search_index, :] if not_unit_cell: @@ -503,13 +500,13 @@ def get_close(np.float64_t[:, :] search_xyz, np.int64_t[:, :] indices, np.int64_ while neigh_at >= 0: # Calculate the distance between the atom and the potential # neighbor. - dist = 0.0 + dist: float = 0.0 for i in range(3): dist += (xyz[neigh_at, i] - ref_xyz[i])**2 dist = sqrt(dist) # Get the threshold for the potential neighbour - threshold = thresholds[neigh_at] + threshold: float = thresholds[neigh_at] if dist < threshold: # Store the pair of neighbours. diff --git a/src/sisl/geom/neighs/neighfinder.py b/src/sisl/geom/neighs/neighfinder.py index b60163e715..ce9451ecbe 100644 --- a/src/sisl/geom/neighs/neighfinder.py +++ b/src/sisl/geom/neighs/neighfinder.py @@ -1,9 +1,12 @@ +from typing import Optional, Sequence, Tuple, Union + import numpy as np from numbers import Real +from sisl.typing import AtomsArgument +from sisl import Geometry from sisl.utils.mathematics import fnorm -from . import _fneighs -from . import cneighs +from . import _neigh_operations class NeighFinder: @@ -38,15 +41,32 @@ class NeighFinder: If not provided, it will be `True` if `R` is an array and `False` if it is a single float. """ - def __init__(self, geometry, R=None, sphere_overlap=None, fortran=True): - if fortran: - self._fneighs = _fneighs - else: - self._fneighs = cneighs + geometry: Geometry + + nbins: Tuple[int, int, int] + total_nbins: int + + _R: Union[float, np.ndarray] + _sphere_overlap: bool + + # Data structure + _list: np.ndarray # (natoms, ) + _heads: np.ndarray # (total_nbins, ) + _counts: np.ndarray # (total_nbins, ) + + def __init__(self, + geometry: Geometry, + R: Union[float, np.ndarray, None] = None, + sphere_overlap: Optional[bool] = None + ): self.setup_finder(geometry, R=R, sphere_overlap=sphere_overlap) - def setup_finder(self, geometry=None, R=None, sphere_overlap=None): + def setup_finder(self, + geometry: Optional[Geometry] = None, + R: Union[float, np.ndarray, None] = None, + sphere_overlap: Optional[bool] = None + ): """Prepares everything for neighbour finding. Parameters @@ -84,7 +104,7 @@ def setup_finder(self, geometry=None, R=None, sphere_overlap=None): R = np.array(R) # If R was not provided, build an array of Rs if R is None: - R = geometry.atoms.maxR(all=True) + R = self.geometry.atoms.maxR(all=True) # Set the radius self._R = R @@ -111,8 +131,8 @@ def setup_finder(self, geometry=None, R=None, sphere_overlap=None): bin_size += 0.01 # Get the number of bins along each cell direction. - nbins_float = fnorm(geometry.cell, axis=-1) / bin_size - self.nbins = np.floor(nbins_float).astype(int) + nbins_float = fnorm(self.geometry.cell, axis=-1) / bin_size + self.nbins = tuple(np.floor(nbins_float).astype(int)) self.total_nbins = np.prod(self.nbins) if self.total_nbins == 0: @@ -145,7 +165,26 @@ def _build_table(self, bin_indices): Array containing the scalar bin index for each atom. """ # Call the fortran routine that builds the table - self._list, self._heads, self._counts = self._fneighs.build_table(self.total_nbins, bin_indices) + self._list, self._heads, self._counts = _neigh_operations.build_table(self.total_nbins, bin_indices) + + def assert_consistency(self): + """Asserts that the data structure (self._list, self._heads, self._counts) is consistent. + + It also stores that the shape is consistent with the stored geometry and the store total_nbins. + """ + # Check shapes + assert self._list.shape == (self.geometry.na, ) + assert self._counts.shape == self._heads.shape == (self.total_nbins, ) + + # Check values + for i_bin, bin_count in enumerate(self._counts): + count = 0 + head = self._heads[i_bin] + while head != -1: + count += 1 + head = self._list[head] + + assert count == bin_count, f"Bin {i_bin} has {bin_count} atoms but we found {count} atoms" def _get_search_atom_counts(self, scalar_bin_indices): """Computes the number of atoms that will be explored for each search @@ -268,7 +307,12 @@ def _scalar_to_cartesian_index(self, index): second, first = np.divmod(index, self.nbins[0]) return np.array([first, second, third]).T - def find_neighbours(self, atoms=None, as_pairs=False, self_interaction=False, pbc=(True, True, True)): + def find_neighbours(self, + atoms: AtomsArgument = None, + as_pairs: bool = False, + self_interaction: bool = False, + pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True) + ): """Find neighbours as specified in the finder. This routine only executes the action of finding neighbours, @@ -306,7 +350,7 @@ def find_neighbours(self, atoms=None, as_pairs=False, self_interaction=False, pb # Cast R and pbc into arrays of appropiate shape and type. thresholds = np.full(self.geometry.na, self._R, dtype=np.float64) - pbc = np.full(3, pbc, dtype=np.bool) + pbc = np.full(3, pbc, dtype=bool) # Get search indices search_indices, isc = self._get_search_indices(self.geometry.fxyz[atoms], cartesian=False) @@ -319,7 +363,7 @@ def find_neighbours(self, atoms=None, as_pairs=False, self_interaction=False, pb max_pairs -= search_indices.shape[0] # Find the neighbour pairs - neighbour_pairs, split_ind = self._fneighs.get_pairs( + neighbour_pairs, split_ind = _neigh_operations.get_pairs( atoms, search_indices, isc, self._heads, self._list, max_pairs, self_interaction, self.geometry.xyz, self.geometry.cell, pbc, thresholds, self._sphere_overlap ) @@ -331,7 +375,10 @@ def find_neighbours(self, atoms=None, as_pairs=False, self_interaction=False, pb # Split to get the neighbours of each atom return np.split(neighbour_pairs[:, 1:], split_ind, axis=0)[:-1] - def find_all_unique_pairs(self, self_interaction=False, pbc=(True, True, True)): + def find_all_unique_pairs(self, + self_interaction: bool = False, + pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True) + ): """Find all unique neighbour pairs within the geometry. A pair of atoms is considered unique if atoms have the same index @@ -372,7 +419,7 @@ def find_all_unique_pairs(self, self_interaction=False, pbc=(True, True, True)): # Cast R and pbc into arrays of appropiate shape and type. thresholds = np.full(self.geometry.na, self._R, dtype=np.float64) - pbc = np.full(3, pbc, dtype=np.bool) + pbc = np.full(3, pbc, dtype=bool) # Get search indices search_indices, isc = self._get_search_indices(self.geometry.fxyz, cartesian=False) @@ -385,14 +432,18 @@ def find_all_unique_pairs(self, self_interaction=False, pbc=(True, True, True)): max_pairs -= search_indices.shape[0] # Find the candidate pairs - candidate_pairs, n_pairs = self._fneighs.get_all_unique_pairs( + candidate_pairs, n_pairs = _neigh_operations.get_all_unique_pairs( search_indices, isc, self._heads, self._list, max_pairs, self_interaction, self.geometry.xyz, self.geometry.cell, pbc, thresholds, self._sphere_overlap ) return candidate_pairs[:n_pairs] - def find_close(self, xyz, as_pairs=False, pbc=(True, True, True)): + def find_close(self, + xyz: Sequence, + as_pairs: bool = False, + pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True) + ): """Find all atoms that are close to some coordinates in space. This routine only executes the action of finding neighbours, @@ -423,7 +474,7 @@ def find_close(self, xyz, as_pairs=False, pbc=(True, True, True)): """ # Cast R and pbc into arrays of appropiate shape and type. thresholds = np.full(self.geometry.na, self._R, dtype=np.float64) - pbc = np.full(3, pbc, dtype=np.bool) + pbc = np.full(3, pbc, dtype=bool) xyz = np.atleast_2d(xyz) # Get search indices @@ -435,7 +486,7 @@ def find_close(self, xyz, as_pairs=False, pbc=(True, True, True)): max_pairs = at_counts.sum() # Find the neighbour pairs - neighbour_pairs, split_ind = self._fneighs.get_close( + neighbour_pairs, split_ind = _neigh_operations.get_close( xyz, search_indices, isc, self._heads, self._list, max_pairs, self.geometry.xyz, self.geometry.cell, pbc, thresholds ) @@ -446,154 +497,3 @@ def find_close(self, xyz, as_pairs=False, pbc=(True, True, True)): else: # Split to get the neighbours of each position return np.split(neighbour_pairs[:, 1:], split_ind, axis=0)[:-1] - return - - def find_neighbours_old(self, atoms=None, as_pairs=False, self_interaction=False): - """Find neighbours as specified in the finder. - - This routine only executes the action of finding neighbours, - the parameters of the search (`geometry`, `R`, `sphere_overlap`...) - are defined when the finder is initialized or by calling `setup_finder`. - - Parameters - ---------- - atoms: optional - the atoms for which neighbours are desired. Anything that can - be sanitized by `sisl.Geometry` is a valid value. - - If not provided, neighbours for all atoms are searched. - as_pairs: bool, optional - If `True` pairs of atoms are returned. Otherwise a list containing - the neighbours for each atom is returned. - self_interaction: bool, optional - whether to consider an atom a neighbour of itself. - - Returns - ---------- - np.ndarray or list: - If `as_pairs=True`: - A `np.ndarray` of shape (n_pairs, 2) is returned. Each pair `ij` - means that `j` is a neighbour of `i`. - If `as_pairs=False`: - A list containing a numpy array of shape (n_neighs) for each atom. - """ - # Sanitize atoms - atoms = self.geometry._sanitize_atoms(atoms) - - # Get search indices - search_indices, isc = self._get_search_indices(self.geometry.fxyz[atoms], cartesian=False) - - # Get atom counts - at_counts = self._get_search_atom_counts(search_indices) - - total_pairs = at_counts.sum() - if not self_interaction: - total_pairs -= search_indices.shape[0] - - # Find the candidate pairs - candidate_pairs, split_ind = self._fneighs.get_pairs_old(atoms, search_indices, self._heads, self._list, total_pairs, self_interaction) - - if as_pairs: - # Just returned the filtered pairs - return self._filter_pairs(candidate_pairs) - else: - # Get the mask to filter - mask = self._filter_pairs(candidate_pairs, return_mask=True) - - # Split both the neighbours and mask and then filter each array - candidate_pairs[:, 0] = mask - return [at_pairs[:, 1][at_pairs[:,0].astype(bool)] for at_pairs in np.split(candidate_pairs, split_ind[:-1])] - - def find_all_unique_pairs_old(self, self_interaction=False): - """Find all unique neighbour pairs within the geometry. - - Note that this routine can not be called if `sphere_overlap` is - set to `False` and the radius is not a single float. In that case, - there is no way of defining "uniqueness" since pair `ij` can have - a different threshold radius than pair `ji`. - - Parameters - ---------- - atoms: optional - the atoms for which neighbours are desired. Anything that can - be sanitized by `sisl.Geometry` is a valid value. - - If not provided, neighbours for all atoms are searched. - as_pairs: bool, optional - If `True` pairs of atoms are returned. Otherwise a list containing - the neighbours for each atom is returned. - self_interaction: bool, optional - whether to consider an atom a neighbour of itself. - - Returns - ---------- - np.ndarray or list: - If `as_pairs=True`: - A `np.ndarray` of shape (n_pairs, 2) is returned. Each pair `ij` - means that `j` is a neighbour of `i`. - If `as_pairs=False`: - A list containing a numpy array of shape (n_neighs) for each atom. - """ - if not self._sphere_overlap and not isinstance(self._R, Real): - raise ValueError("Unique atom pairs do not make sense if we are not looking for sphere overlaps." - " Please setup the finder again setting `sphere_overlap` to `True` if you wish so.") - - # Get search indices - search_indices, isc = self._get_search_indices(self.geometry.fxyz, cartesian=False) - - # Get atom counts - at_counts = self._get_search_atom_counts(search_indices) - - max_pairs = at_counts.sum() - if not self_interaction: - max_pairs -= search_indices.shape[0] - - # Find the candidate pairs - candidate_pairs, n_pairs = self._fneighs.get_all_unique_pairs_old(search_indices, self._heads, self._list, max_pairs, self_interaction) - candidate_pairs = candidate_pairs[:n_pairs] - - # Filter them and return them. - return self._filter_pairs(candidate_pairs) - - def _filter_pairs(self, candidate_pairs, return_mask=False): - """Filters candidate neighbour pairs. - - It does so according to the parameters (`geometry`, `R`, `sphere_overlap`...) - with which the finder was setup. - - Parameters - ---------- - candidate_pairs: np.ndarray of shape (n_pairs, 2) - The candidate atom pairs. - return_mask: bool, optional - Whether to return the filtering mask. - If `True`, the filtering is not performed. This function will just - return the mask. - - Returns - ---------- - np.ndarray: - If `return_mask=True`: - The filtering mask, a boolean array of shape (n_pairs,). - If `return_mask=False`: - The filtered neighbour pairs. An integer array of shape (n_filtered, 2) - """ - - if isinstance(self._R, Real): - thresh = self._R - if self._sphere_overlap: - thresh *= 2 - else: - thresh = self._R[candidate_pairs[:, 0]] - if self._sphere_overlap: - thresh += self._R[candidate_pairs[:, 1]] - - dists = fnorm( - self.geometry.xyz[candidate_pairs[:, 0]] - self.geometry.xyz[candidate_pairs[:, 1]], - axis=-1) - - mask = dists < thresh - - if return_mask: - return mask - return candidate_pairs[mask] \ No newline at end of file diff --git a/src/sisl/geom/neighs/tests/test_neighfinder.py b/src/sisl/geom/neighs/tests/test_neighfinder.py new file mode 100644 index 0000000000..9ae6b9246f --- /dev/null +++ b/src/sisl/geom/neighs/tests/test_neighfinder.py @@ -0,0 +1,236 @@ +import pytest + +import numpy as np + +from sisl import Geometry +from sisl.geom import NeighFinder + +@pytest.fixture(scope="module", params=[True, False]) +def sphere_overlap(request): + return request.param + +@pytest.fixture(scope="module", params=[True, False]) +def multiR(request): + return request.param + +@pytest.fixture(scope="module", params=[True, False]) +def self_interaction(request): + return request.param + +@pytest.fixture(scope="module", params=[False, True]) +def post_setup(request): + return request.param + +@pytest.fixture(scope="module", params=[False, True]) +def pbc(request): + return request.param + +@pytest.fixture(scope="module") +def neighfinder(sphere_overlap, multiR): + geom = Geometry([[0,0,0], [1.2, 0, 0], [9, 0, 0]], lattice=np.diag([10, 10, 7])) + + R = np.array([1.1, 1.5, 1.2]) if multiR else 1.5 + + neighfinder = NeighFinder(geom, R=R, sphere_overlap=sphere_overlap) + + neighfinder.assert_consistency() + + return neighfinder + +@pytest.fixture(scope="module") +def expected_neighs(sphere_overlap, multiR, self_interaction, pbc): + + first_at_neighs = [] + if not(multiR and not sphere_overlap): + first_at_neighs.append([0, 1, 0, 0, 0]) + if self_interaction: + first_at_neighs.append([0, 0, 0, 0, 0]) + if pbc: + first_at_neighs.append([0, 2, -1, 0, 0]) + + second_at_neighs = [[1, 0, 0, 0, 0]] + if self_interaction: + second_at_neighs.insert(0, [1, 1, 0, 0, 0]) + if pbc and sphere_overlap: + second_at_neighs.append([1, 2, -1, 0, 0]) + + third_at_neighs = [] + if self_interaction: + third_at_neighs.append([2, 2, 0, 0, 0]) + if pbc: + if sphere_overlap: + third_at_neighs.append([2, 1, 1, 0, 0]) + third_at_neighs.append([2, 0, 1, 0, 0]) + + return np.array(first_at_neighs), np.array(second_at_neighs), np.array(third_at_neighs) + +def test_neighfinder_setup(sphere_overlap, multiR, post_setup): + + geom = Geometry([[0,0,0], [1, 0, 0]], lattice=np.diag([10, 10, 7])) + + R = np.array([0.9, 1.5]) if multiR else 1.5 + + if post_setup: + # We are going to create a data structure with the wrong parameters, + # and then update it. + finder = NeighFinder(geom, R=R - 0.7, sphere_overlap=not sphere_overlap) + finder.setup_finder(R=R, sphere_overlap=sphere_overlap) + else: + # Initialize finder with the right parameters. + finder = NeighFinder(geom, R=R, sphere_overlap=sphere_overlap) + + # Check that R is properly set when its a scalar and an array. + if multiR: + assert isinstance(finder._R, np.ndarray) + assert np.all(finder._R == R) + else: + assert isinstance(finder._R, float) + assert finder._R == R + + # Assert that we have stored a copy of the geometry + assert isinstance(finder.geometry, Geometry) + assert finder.geometry == geom + assert finder.geometry is not geom + + # Check that the total number of bins is correct. If sphere_overlap is + # True, bins are much bigger. + nbins = (1, 1, 1) if sphere_overlap else (3, 3, 2) + assert finder.nbins == nbins + + total_bins = 1 if sphere_overlap else 18 + assert finder.total_nbins == total_bins + + # Assert that the data structure is generated. + for k in ("_list", "_counts", "_heads"): + assert hasattr(finder, k), k + assert isinstance(finder._list, np.ndarray), k + + finder.assert_consistency() + + # Check that all bins are empty except one, which contains the two atoms. + assert (finder._counts == 0).sum() == finder.total_nbins - 1 + assert finder._counts.sum() == 2 + +def test_neighbour_pairs(neighfinder, self_interaction, pbc, expected_neighs): + + neighs = neighfinder.find_neighbours(as_pairs=True, self_interaction=self_interaction, pbc=pbc) + + assert isinstance(neighs, np.ndarray) + + first_at_neighs, second_at_neighs, third_at_neighs = expected_neighs + + n_neighs = len(first_at_neighs) + len(second_at_neighs) + len(third_at_neighs) + + assert neighs.shape == (n_neighs, 5) + + assert np.all(neighs == [*first_at_neighs, *second_at_neighs, *third_at_neighs]) + +def test_neighbours_lists(neighfinder, self_interaction, pbc, expected_neighs): + + neighs = neighfinder.find_neighbours(as_pairs=False, self_interaction=self_interaction, pbc=pbc) + + assert isinstance(neighs, list) + assert len(neighs) == 3 + + assert all(isinstance(n, np.ndarray) for n in neighs) + + first_at_neighs, second_at_neighs, third_at_neighs = expected_neighs + + # Check shapes + for i, i_at_neighs in enumerate([first_at_neighs, second_at_neighs, third_at_neighs]): + assert neighs[i].shape == (len(i_at_neighs), 4), f"Wrong shape for neighbours of atom {i}" + + # Check values + for i, i_at_neighs in enumerate([first_at_neighs, second_at_neighs, third_at_neighs]): + if len(neighs[i]) == 0: + continue + + assert np.all(neighs[i] == i_at_neighs[:, 1:]), f"Wrong values for neighbours of atom {i}" + +def test_all_unique_pairs(neighfinder, self_interaction, pbc, expected_neighs): + + if isinstance(neighfinder._R, np.ndarray) and not neighfinder._sphere_overlap: + with pytest.raises(ValueError): + neighfinder.find_all_unique_pairs(self_interaction=self_interaction, pbc=pbc) + return + + neighs = neighfinder.find_all_unique_pairs(self_interaction=self_interaction, pbc=pbc) + + first_at_neighs, second_at_neighs, third_at_neighs = expected_neighs + + all_expected_neighs = np.array([*first_at_neighs, *second_at_neighs, *third_at_neighs]) + + unique_neighs = [] + for neigh_pair in all_expected_neighs: + if not np.all(neigh_pair[2:] == 0): + unique_neighs.append(neigh_pair) + else: + for others in unique_neighs: + if np.all(others == [neigh_pair[1], neigh_pair[0], *neigh_pair[2:]]): + break + else: + unique_neighs.append(neigh_pair) + + assert neighs.shape == (len(unique_neighs), 5) + return + + # Check that supercell indices are all 0 + assert np.all(neighs[:, 2:] == 0) + + # Check that the atom indices are correct + expected_neighs = [[0, 1]] if not self_interaction else [[0, 0], [0, 1], [1, 1]] + + assert np.all(neighs[:, :2] == expected_neighs) + +def test_close(neighfinder, pbc): + neighs = neighfinder.find_close([0.3, 0, 0], as_pairs=True, pbc=pbc) + + expected_neighs = [[0, 1, 0, 0, 0], [0, 0, 0, 0, 0]] + if pbc and isinstance(neighfinder._R, float): + expected_neighs.append([0, 2, -1, 0, 0]) + + assert neighs.shape == (len(expected_neighs), 5) + assert np.all(neighs == expected_neighs) + +def test_no_neighbours(pbc): + """Test the case where there are no neighbours, to see that it doesn't crash.""" + + geom = Geometry([[0,0,0]]) + + finder = NeighFinder(geom, R=1.5) + + neighs = finder.find_neighbours(as_pairs=True, pbc=pbc) + + assert isinstance(neighs, np.ndarray) + assert neighs.shape == (0, 5) + + neighs = finder.find_neighbours(as_pairs=False, pbc=pbc) + + assert isinstance(neighs, list) + assert len(neighs) == 1 + + assert isinstance(neighs[0], np.ndarray) + assert neighs[0].shape == (0, 4) + + neighs = finder.find_all_unique_pairs(pbc=pbc) + + assert isinstance(neighs, np.ndarray) + assert neighs.shape == (0, 5) + +def test_R_too_big(pbc): + """Test the case when R is so big that it needs a bigger bin + than the unit cell.""" + + geom = Geometry([[0,0,0], [1, 0, 0]], lattice=np.diag([2, 10, 10])) + + neighfinder = NeighFinder(geom, R=1.5) + + neighs = neighfinder.find_all_unique_pairs(pbc=pbc) + + expected_neighs = [[0, 1, 0, 0, 0]] + if pbc: + expected_neighs.append([1, 0, 1, 0, 0]) + expected_neighs.append([0, 1, -1, 0, 0]) + + assert neighs.shape == (len(expected_neighs), 5) + assert np.all(neighs == expected_neighs) From 18ffc6d29ba9a11b4f3529b0b8b220f716b03993 Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Thu, 19 Oct 2023 21:53:49 +0200 Subject: [PATCH 03/17] remove print statement --- src/sisl/geom/neighs/_neigh_operations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sisl/geom/neighs/_neigh_operations.py b/src/sisl/geom/neighs/_neigh_operations.py index bdefca80f0..340b941dbe 100644 --- a/src/sisl/geom/neighs/_neigh_operations.py +++ b/src/sisl/geom/neighs/_neigh_operations.py @@ -381,7 +381,6 @@ def get_all_unique_pairs(indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], # Get the next atom in this bin. Sum 1 to get fortran index. neigh_at = list_array[neigh_at] - print(np.asarray(neighs[:i_pair + 1]), i_pair) # Return the array of neighbours, but only the filled part return np.asarray(neighs[:i_pair + 1]), i_pair From 360e0850f65da25c3a9ceb7326d54f490addce03 Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Thu, 19 Oct 2023 22:19:00 +0200 Subject: [PATCH 04/17] cython was using the python sqrt --- src/sisl/geom/neighs/_neigh_operations.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/sisl/geom/neighs/_neigh_operations.py b/src/sisl/geom/neighs/_neigh_operations.py index 340b941dbe..3a7adaa96a 100644 --- a/src/sisl/geom/neighs/_neigh_operations.py +++ b/src/sisl/geom/neighs/_neigh_operations.py @@ -1,19 +1,10 @@ """File meant to be compiled with Cython so that operations are much faster.""" from __future__ import annotations -try: - import cython - - from cython.cimports.libc.math import sqrt as SQRT - # This little trick is because cython does not allow you - # to have an statement that sets an imported variable. - # If we directly import sqrt, then it fails because of the - # statement in the except clause (weird). - sqrt = SQRT - # This enables Cython enhanced compatibilities - import cython.cimports.numpy as cnp -except ImportError: - sqrt = lambda x: x**0.5 +import cython + +from cython.cimports.libc.math import sqrt +import cython.cimports.numpy as cnp import numpy as np From ebd2980ce51ae0e7069acdb165511b25c4db6087 Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Fri, 20 Oct 2023 13:55:50 +0200 Subject: [PATCH 05/17] make R larger than cell work --- src/sisl/geom/neighs/neighfinder.py | 146 ++++++++++++++---- .../geom/neighs/tests/test_neighfinder.py | 36 +++-- 2 files changed, 134 insertions(+), 48 deletions(-) diff --git a/src/sisl/geom/neighs/neighfinder.py b/src/sisl/geom/neighs/neighfinder.py index ce9451ecbe..7ed91eb764 100644 --- a/src/sisl/geom/neighs/neighfinder.py +++ b/src/sisl/geom/neighs/neighfinder.py @@ -1,5 +1,7 @@ from typing import Optional, Sequence, Tuple, Union +import itertools + import numpy as np from numbers import Real @@ -8,7 +10,6 @@ from sisl.utils.mathematics import fnorm from . import _neigh_operations - class NeighFinder: """Efficient linear scaling finding of neighbours. @@ -43,11 +44,15 @@ class NeighFinder: """ geometry: Geometry + # Geometry actually used for binning. Can be the provided geometry + # or a tiled geometry if the search radius is too big (compared to the lattice size). + _bins_geometry: Geometry nbins: Tuple[int, int, int] total_nbins: int - _R: Union[float, np.ndarray] + R: Union[float, np.ndarray] + _aux_R: Union[float, np.ndarray] # If the geometry has been tiled, R is also tiled here _sphere_overlap: bool # Data structure @@ -107,7 +112,8 @@ def setup_finder(self, R = self.geometry.atoms.maxR(all=True) # Set the radius - self._R = R + self.R = R + self._aux_R = R # If sphere overlap was not provided, set it to False if R is a single float # and True otherwise. @@ -117,7 +123,7 @@ def setup_finder(self, # Determine the bin_size as the maximum DIAMETER to ensure that we ALWAYS # only need to look one bin away for neighbors. - bin_size = np.max(self._R) * 2 + bin_size = np.max(self.R) * 2 # Check that the bin size is positive. if bin_size <= 0: raise ValueError("All R values are 0 or less. Please provide some positive values") @@ -129,28 +135,43 @@ def setup_finder(self, # We add a small amount to bin_size to avoid ambiguities when # a position is exactly at the center of a bin. bin_size += 0.01 - - # Get the number of bins along each cell direction. - nbins_float = fnorm(self.geometry.cell, axis=-1) / bin_size - self.nbins = tuple(np.floor(nbins_float).astype(int)) - self.total_nbins = np.prod(self.nbins) - if self.total_nbins == 0: + lattice_sizes = fnorm(self.geometry.cell, axis=-1) + + if bin_size > np.min(lattice_sizes): + self._R_too_big = True # This means that nsc must be at least 5. - # We round 1/nbins (i.e. the amount of cells needed in each direction) + # We round the amount of cells needed in each direction # to the closest next odd number. - nsc = np.ceil(1/nbins_float) // 2 * 2 + 1 + nsc = np.ceil(bin_size / lattice_sizes) // 2 * 2 + 1 # And then set it as the number of supercells. self.geometry.set_nsc(nsc.astype(int)) - raise ValueError( - "The diameter occupies more space than the whole unit cell," - "which means that we need to look for neighbours more than one unit cell away." - f"This is not yet supported by {self.__class__.__name__}" - ) + if isinstance(self._aux_R, np.ndarray): + self._aux_R = np.tile(self._aux_R, self.geometry.n_s) + + all_xyz = [] + for isc in self.geometry.sc_off: + ats_xyz = self.geometry.axyz(isc=isc) + all_xyz.append(ats_xyz) + + self._bins_geometry = Geometry(np.concatenate(all_xyz), atoms=self.geometry.atoms) + + # Recompute lattice sizes + lattice_sizes = fnorm(self._bins_geometry.cell, axis=-1) + + else: + # Nothing to modify, we can use the geometry and bin it as it is. + self._R_too_big = False + self._bins_geometry = self.geometry + + # Get the number of bins along each cell direction. + nbins_float = lattice_sizes / bin_size + self.nbins = tuple(np.floor(nbins_float).astype(int)) + self.total_nbins = np.prod(self.nbins) # Get the scalar bin indices of all atoms - scalar_bin_indices = self._get_bin_indices(self.geometry.fxyz) + scalar_bin_indices = self._get_bin_indices(self._bins_geometry.fxyz) # Build the tables that will allow us to look for neighbours in an efficient # and linear scaling manner. @@ -173,7 +194,7 @@ def assert_consistency(self): It also stores that the shape is consistent with the stored geometry and the store total_nbins. """ # Check shapes - assert self._list.shape == (self.geometry.na, ) + assert self._list.shape == (self._bins_geometry.na, ) assert self._counts.shape == self._heads.shape == (self.total_nbins, ) # Check values @@ -307,6 +328,40 @@ def _scalar_to_cartesian_index(self, index): second, first = np.divmod(index, self.nbins[0]) return np.array([first, second, third]).T + def _correct_pairs_R_too_big(self, + neighbour_pairs: np.ndarray, # (n_pairs, 5) + split_ind: Union[int, np.ndarray], # (n_queried_atoms, ) + pbc: np.ndarray # (3, ) + ): + """Correction to atom and supercell indices when the binning has been done on a tiled geometry""" + is_sc_neigh = neighbour_pairs[:, 1] >= self.geometry.na + + invalid = None + if not np.any(pbc): + invalid = is_sc_neigh + else: + pbc_neighs = neighbour_pairs.copy() + + sc_neigh, uc_neigh = np.divmod(neighbour_pairs[:, 1][is_sc_neigh], self.geometry.na) + isc_neigh = self.geometry.sc_off[sc_neigh] + + pbc_neighs[is_sc_neigh, 1] = uc_neigh + pbc_neighs[is_sc_neigh, 2:] = isc_neigh + + if not np.all(pbc): + invalid = pbc_neighs[:, 2:][:, ~ pbc].any(axis=1) + + neighbour_pairs = pbc_neighs + + if invalid is not None: + neighbour_pairs = neighbour_pairs[~ invalid] + if isinstance(split_ind, int): + split_ind = split_ind - invalid.sum() + else: + split_ind = split_ind - np.cumsum(invalid)[split_ind - 1] + + return neighbour_pairs, split_ind + def find_neighbours(self, atoms: AtomsArgument = None, as_pairs: bool = False, @@ -349,7 +404,7 @@ def find_neighbours(self, atoms = self.geometry._sanitize_atoms(atoms) # Cast R and pbc into arrays of appropiate shape and type. - thresholds = np.full(self.geometry.na, self._R, dtype=np.float64) + thresholds = np.full(self._bins_geometry.na, self._aux_R, dtype=np.float64) pbc = np.full(3, pbc, dtype=bool) # Get search indices @@ -363,10 +418,15 @@ def find_neighbours(self, max_pairs -= search_indices.shape[0] # Find the neighbour pairs - neighbour_pairs, split_ind = _neigh_operations.get_pairs( + neighbour_pairs, split_ind = _neigh_operations.get_pairs( atoms, search_indices, isc, self._heads, self._list, max_pairs, self_interaction, - self.geometry.xyz, self.geometry.cell, pbc, thresholds, self._sphere_overlap + self._bins_geometry.xyz, self._bins_geometry.cell, pbc, thresholds, self._sphere_overlap ) + + # Correct neighbour indices for the case where R was too big and + # we needed to create an auxiliary supercell. + if self._R_too_big: + neighbour_pairs, split_ind = self._correct_pairs_R_too_big(neighbour_pairs, split_ind, pbc) if as_pairs: # Just return the neighbour pairs @@ -413,12 +473,29 @@ def find_all_unique_pairs(self, Each pair `ij` means that `j` is a neighbour of `i`. The three extra columns are the supercell indices of atom `j`. """ - if not self._sphere_overlap and not isinstance(self._R, Real): + if not self._sphere_overlap and not isinstance(self._aux_R, Real): raise ValueError("Unique atom pairs do not make sense if we are not looking for sphere overlaps." " Please setup the finder again setting `sphere_overlap` to `True` if you wish so.") + # In the case where we tiled the geometry to do the binning, it is much better to + # just find all neighbours and then drop duplicate connections. Otherwise it is a bit of a mess. + if self._R_too_big: + # Find all neighbours + all_neighbours = self.find_neighbours(as_pairs=True, self_interaction=self_interaction, pbc=pbc) + + # Find out which of the pairs are uc connections + is_uc_neigh = ~ np.any(all_neighbours[:, 2:], axis=1) + + # Create an array with unit cell connections where duplicates are removed + unique_uc = np.unique(np.sort(all_neighbours[is_uc_neigh][:, :2]), axis=0) + uc_neighbours = np.zeros((len(unique_uc), 5), dtype=int) + uc_neighbours[:, :2] = unique_uc + + # Concatenate the uc connections with the rest of the connections. + return np.concatenate((uc_neighbours, all_neighbours[~is_uc_neigh])) + # Cast R and pbc into arrays of appropiate shape and type. - thresholds = np.full(self.geometry.na, self._R, dtype=np.float64) + thresholds = np.full(self.geometry.na, self.R, dtype=np.float64) pbc = np.full(3, pbc, dtype=bool) # Get search indices @@ -431,13 +508,13 @@ def find_all_unique_pairs(self, if not self_interaction: max_pairs -= search_indices.shape[0] - # Find the candidate pairs - candidate_pairs, n_pairs = _neigh_operations.get_all_unique_pairs( + # Find all unique neighbour pairs + neighbour_pairs, n_pairs = _neigh_operations.get_all_unique_pairs( search_indices, isc, self._heads, self._list, max_pairs, self_interaction, self.geometry.xyz, self.geometry.cell, pbc, thresholds, self._sphere_overlap ) - - return candidate_pairs[:n_pairs] + + return neighbour_pairs[:n_pairs] def find_close(self, xyz: Sequence, @@ -473,12 +550,12 @@ def find_close(self, A list containing a numpy array of shape (n_neighs, 4) for each atom. """ # Cast R and pbc into arrays of appropiate shape and type. - thresholds = np.full(self.geometry.na, self._R, dtype=np.float64) + thresholds = np.full(self._bins_geometry.na, self._aux_R, dtype=np.float64) pbc = np.full(3, pbc, dtype=bool) xyz = np.atleast_2d(xyz) # Get search indices - search_indices, isc = self._get_search_indices(xyz.dot(self.geometry.icell.T) % 1, cartesian=False) + search_indices, isc = self._get_search_indices(xyz.dot(self._bins_geometry.icell.T) % 1, cartesian=False) # Get atom counts at_counts = self._get_search_atom_counts(search_indices) @@ -488,9 +565,14 @@ def find_close(self, # Find the neighbour pairs neighbour_pairs, split_ind = _neigh_operations.get_close( xyz, search_indices, isc, self._heads, self._list, max_pairs, - self.geometry.xyz, self.geometry.cell, pbc, thresholds + self._bins_geometry.xyz, self._bins_geometry.cell, pbc, thresholds ) - + + # Correct neighbour indices for the case where R was too big and + # we needed to create an auxiliary supercell. + if self._R_too_big: + neighbour_pairs, split_ind = self._correct_pairs_R_too_big(neighbour_pairs, split_ind, pbc) + if as_pairs: # Just return the neighbour pairs return neighbour_pairs[:split_ind[-1]] diff --git a/src/sisl/geom/neighs/tests/test_neighfinder.py b/src/sisl/geom/neighs/tests/test_neighfinder.py index 9ae6b9246f..0004c0c502 100644 --- a/src/sisl/geom/neighs/tests/test_neighfinder.py +++ b/src/sisl/geom/neighs/tests/test_neighfinder.py @@ -81,11 +81,11 @@ def test_neighfinder_setup(sphere_overlap, multiR, post_setup): # Check that R is properly set when its a scalar and an array. if multiR: - assert isinstance(finder._R, np.ndarray) - assert np.all(finder._R == R) + assert isinstance(finder.R, np.ndarray) + assert np.all(finder.R == R) else: - assert isinstance(finder._R, float) - assert finder._R == R + assert isinstance(finder.R, float) + assert finder.R == R # Assert that we have stored a copy of the geometry assert isinstance(finder.geometry, Geometry) @@ -149,7 +149,7 @@ def test_neighbours_lists(neighfinder, self_interaction, pbc, expected_neighs): def test_all_unique_pairs(neighfinder, self_interaction, pbc, expected_neighs): - if isinstance(neighfinder._R, np.ndarray) and not neighfinder._sphere_overlap: + if isinstance(neighfinder.R, np.ndarray) and not neighfinder._sphere_overlap: with pytest.raises(ValueError): neighfinder.find_all_unique_pairs(self_interaction=self_interaction, pbc=pbc) return @@ -172,21 +172,12 @@ def test_all_unique_pairs(neighfinder, self_interaction, pbc, expected_neighs): unique_neighs.append(neigh_pair) assert neighs.shape == (len(unique_neighs), 5) - return - - # Check that supercell indices are all 0 - assert np.all(neighs[:, 2:] == 0) - - # Check that the atom indices are correct - expected_neighs = [[0, 1]] if not self_interaction else [[0, 0], [0, 1], [1, 1]] - - assert np.all(neighs[:, :2] == expected_neighs) def test_close(neighfinder, pbc): neighs = neighfinder.find_close([0.3, 0, 0], as_pairs=True, pbc=pbc) expected_neighs = [[0, 1, 0, 0, 0], [0, 0, 0, 0, 0]] - if pbc and isinstance(neighfinder._R, float): + if pbc and isinstance(neighfinder.R, float): expected_neighs.append([0, 2, -1, 0, 0]) assert neighs.shape == (len(expected_neighs), 5) @@ -229,8 +220,21 @@ def test_R_too_big(pbc): expected_neighs = [[0, 1, 0, 0, 0]] if pbc: - expected_neighs.append([1, 0, 1, 0, 0]) expected_neighs.append([0, 1, -1, 0, 0]) + expected_neighs.append([1, 0, 1, 0, 0]) + + assert neighs.shape == (len(expected_neighs), 5) + assert np.all(neighs == expected_neighs) + + neighfinder = NeighFinder(geom, R=[0.6, 2.2]) + + neighs = neighfinder.find_close([[0.5, 0, 0]], as_pairs=True, pbc=pbc) + + expected_neighs = [[0, 1, 0, 0, 0], [0, 0, 0, 0, 0]] + if pbc: + expected_neighs.insert(0, [0, 1, -1, 0, 0]) + + print(neighs, expected_neighs) assert neighs.shape == (len(expected_neighs), 5) assert np.all(neighs == expected_neighs) From 706c0df702f265c3a40c571bfd87936e54939974 Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Fri, 20 Oct 2023 14:23:31 +0200 Subject: [PATCH 06/17] remove itertools --- src/sisl/geom/neighs/neighfinder.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/sisl/geom/neighs/neighfinder.py b/src/sisl/geom/neighs/neighfinder.py index 7ed91eb764..bd6f91de6d 100644 --- a/src/sisl/geom/neighs/neighfinder.py +++ b/src/sisl/geom/neighs/neighfinder.py @@ -1,7 +1,5 @@ from typing import Optional, Sequence, Tuple, Union -import itertools - import numpy as np from numbers import Real From e8b9197995f161488fde8cdb69833ad27218da35 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Fri, 24 Nov 2023 14:57:08 +0100 Subject: [PATCH 07/17] ran black and isort Signed-off-by: Nick Papior --- src/sisl/geom/__init__.py | 2 +- src/sisl/geom/neighs/_neigh_operations.py | 211 +++++++----- src/sisl/geom/neighs/neighfinder.py | 299 +++++++++++------- .../geom/neighs/tests/test_neighfinder.py | 81 +++-- 4 files changed, 369 insertions(+), 224 deletions(-) diff --git a/src/sisl/geom/__init__.py b/src/sisl/geom/__init__.py index 1694c9344d..910e7bf731 100644 --- a/src/sisl/geom/__init__.py +++ b/src/sisl/geom/__init__.py @@ -49,6 +49,6 @@ from .flat import * from .nanoribbon import * from .nanotube import * +from .neighs import * from .special import * from .surfaces import * -from .neighs import * diff --git a/src/sisl/geom/neighs/_neigh_operations.py b/src/sisl/geom/neighs/_neigh_operations.py index 3a7adaa96a..6dac016573 100644 --- a/src/sisl/geom/neighs/_neigh_operations.py +++ b/src/sisl/geom/neighs/_neigh_operations.py @@ -2,36 +2,35 @@ from __future__ import annotations import cython - -from cython.cimports.libc.math import sqrt import cython.cimports.numpy as cnp - import numpy as np +from cython.cimports.libc.math import sqrt + @cython.boundscheck(False) @cython.wraparound(False) def build_table(nbins: cnp.int64_t, bin_indices: cnp.int64_t[:]): """Builds the table where the location of all atoms is encoded - + Additionally, it also builds an array with the number of atoms at each bin. - + Parameters ---------- nbins: int Total number of bins bin_indices: array of int An array containing the bin index of each atom. - + Returns ---------- - list: + list: contains the list of atoms, modified to encode all bin locations. Each item in the list contains the index of - the next atom that we can find in the same bin. + the next atom that we can find in the same bin. If an item is -1, it means that there are no more atoms in the same bin. - heads: + heads: For each bin, the index of `list` where we can find the first atom that is contained in it. counts: @@ -42,12 +41,12 @@ def build_table(nbins: cnp.int64_t, bin_indices: cnp.int64_t[:]): list_array: cnp.int64_t[:] = np.zeros(n_atoms, dtype=np.int64) counts: cnp.int64_t[:] = np.zeros(nbins, dtype=np.int64) heads: cnp.int64_t[:] = np.full(nbins, -1, dtype=np.int64) - + # Loop through all atoms for at in range(n_atoms): # Get the index of the bin where this atom is located. bin_index = bin_indices[at] - + # Replace the head of this bin by the current atom index. # Before replacing, store the previous head in this atoms' # position in the list. @@ -56,18 +55,29 @@ def build_table(nbins: cnp.int64_t, bin_indices: cnp.int64_t[:]): # Update the count of this bin (increment it by 1). counts[bin_index] = counts[bin_index] + 1 - + # Return the memory views as numpy arrays return np.asarray(list_array), np.asarray(heads), np.asarray(counts) + @cython.boundscheck(False) @cython.wraparound(False) -def get_pairs(at_indices: cnp.int64_t[:], indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], - heads: cnp.int64_t[:], list_array: cnp.int64_t[:], max_npairs: cnp.int64_t, - self_interaction: cython.bint, xyz: cnp.float64_t[:, :], cell: cnp.float64_t[:, :], - pbc: cnp.npy_bool[:], thresholds: cnp.float64_t[:], sphere_overlap: cython.bint): +def get_pairs( + at_indices: cnp.int64_t[:], + indices: cnp.int64_t[:, :], + iscs: cnp.int64_t[:, :, :], + heads: cnp.int64_t[:], + list_array: cnp.int64_t[:], + max_npairs: cnp.int64_t, + self_interaction: cython.bint, + xyz: cnp.float64_t[:, :], + cell: cnp.float64_t[:, :], + pbc: cnp.npy_bool[:], + thresholds: cnp.float64_t[:], + sphere_overlap: cython.bint, +): """Gets (possibly duplicated) pairs of neighbour atoms. - + Parameters --------- at_indices: @@ -85,8 +95,8 @@ def get_pairs(at_indices: cnp.int64_t[:], indices: cnp.int64_t[:, :], iscs: cnp. list_array: contains the list of atoms, modified to encode all bin locations. Each item in the list contains the index of - the next atom that we can find in the same bin. - If an item is -1, it means that there are no more + the next atom that we can find in the same bin. + If an item is -1, it means that there are no more atoms in the same bin. This array is constructed by `build_table`. max_npairs: @@ -112,10 +122,10 @@ def get_pairs(at_indices: cnp.int64_t[:], indices: cnp.int64_t[:, :], iscs: cnp. If false, two atoms are considered neighbours if the second atom is within the sphere of the first atom. Note that this implies that atom `i` might be atom `j`'s neighbour while the opposite is not true. - + Returns -------- - neighs: + neighs: contains the atom indices of all pairs of neighbor atoms. split_indices: array containing the breakpoints in `neighs`. These @@ -126,11 +136,11 @@ def get_pairs(at_indices: cnp.int64_t[:], indices: cnp.int64_t[:, :], iscs: cnp. N_ind = at_indices.shape[0] i_pair: cython.int = 0 - - ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) + + ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) - neighs: cnp.int64_t[:,:] = np.zeros((max_npairs, 5), dtype=np.int64) + neighs: cnp.int64_t[:, :] = np.zeros((max_npairs, 5), dtype=np.int64) split_indices: cnp.int64_t[:] = np.zeros(N_ind, dtype=np.int64) for search_index in range(N_ind): @@ -150,9 +160,11 @@ def get_pairs(at_indices: cnp.int64_t[:], indices: cnp.int64_t[:, :], iscs: cnp. # Find the supercell indices for this bin neigh_isc[:] = iscs[search_index, j, :] - + # And check if this bin corresponds to the unit cell - not_unit_cell: cython.bint = neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + not_unit_cell: cython.bint = ( + neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + ) ref_xyz[:] = xyz[at, :] if not_unit_cell: @@ -165,13 +177,18 @@ def get_pairs(at_indices: cnp.int64_t[:], indices: cnp.int64_t[:, :], iscs: cnp. should_not_check = True if should_not_check: continue - # Otherwise, move the atom to the neighbor cell. We do this - # instead of moving potential neighbors to the unit cell + # Otherwise, move the atom to the neighbor cell. We do this + # instead of moving potential neighbors to the unit cell # because in this way we reduce the number of operations. for i in range(3): - ref_xyz[i] = ref_xyz[i] - cell[0, i] * neigh_isc[0] - cell[1, i] * neigh_isc[1] - cell[2, i] * neigh_isc[2] - - # Loop through all atoms that are in this bin. + ref_xyz[i] = ( + ref_xyz[i] + - cell[0, i] * neigh_isc[0] + - cell[1, i] * neigh_isc[1] + - cell[2, i] * neigh_isc[2] + ) + + # Loop through all atoms that are in this bin. # If neigh_at == -1, this means no more atoms are in this bin. while neigh_at >= 0: # If this is a self interaction and the user didn't want them, @@ -179,12 +196,12 @@ def get_pairs(at_indices: cnp.int64_t[:], indices: cnp.int64_t[:, :], iscs: cnp. if not self_interaction and at == neigh_at: neigh_at = list_array[neigh_at] continue - + # Calculate the distance between the atom and the potential # neighbor. dist: float = 0.0 for i in range(3): - dist += (xyz[neigh_at, i] - ref_xyz[i])**2 + dist += (xyz[neigh_at, i] - ref_xyz[i]) ** 2 dist = sqrt(dist) # Get the threshold for this pair of atoms @@ -193,7 +210,7 @@ def get_pairs(at_indices: cnp.int64_t[:], indices: cnp.int64_t[:, :], iscs: cnp. # If the user wants to check for sphere overlaps, we have # to sum the radius of the neighbor to the threshold threshold = threshold + thresholds[neigh_at] - + if dist < threshold: # Store the pair of neighbours. neighs[i_pair, 0] = at @@ -213,14 +230,24 @@ def get_pairs(at_indices: cnp.int64_t[:], indices: cnp.int64_t[:, :], iscs: cnp. return np.asarray(neighs), np.asarray(split_indices) + @cython.boundscheck(False) @cython.wraparound(False) -def get_all_unique_pairs(indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], - heads: cnp.int64_t[:], list_array: cnp.int64_t[:], max_npairs: cnp.int64_t, - self_interaction: cython.bint, xyz: cnp.float64_t[:, :], cell: cnp.float64_t[:, :], - pbc: cnp.npy_bool[:], thresholds: cnp.float64_t[:], sphere_overlap: cython.bint): +def get_all_unique_pairs( + indices: cnp.int64_t[:, :], + iscs: cnp.int64_t[:, :, :], + heads: cnp.int64_t[:], + list_array: cnp.int64_t[:], + max_npairs: cnp.int64_t, + self_interaction: cython.bint, + xyz: cnp.float64_t[:, :], + cell: cnp.float64_t[:, :], + pbc: cnp.npy_bool[:], + thresholds: cnp.float64_t[:], + sphere_overlap: cython.bint, +): """Gets all unique pairs of atoms that are neighbours. - + Parameters --------- indices: @@ -235,8 +262,8 @@ def get_all_unique_pairs(indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], list_array: contains the list of atoms, modified to encode all bin locations. Each item in the list contains the index of - the next atom that we can find in the same bin. - If an item is -1, it means that there are no more + the next atom that we can find in the same bin. + If an item is -1, it means that there are no more atoms in the same bin. This array is constructed by `build_table`. max_npairs: @@ -262,12 +289,12 @@ def get_all_unique_pairs(indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], If false, two atoms are considered neighbours if the second atom is within the sphere of the first atom. Note that this implies that atom `i` might be atom `j`'s neighbour while the opposite is not true. - + Returns -------- - neighs: + neighs: contains the atom indices of all unique neighbor pairs - First column of atoms is sorted in increasing order. + First column of atoms is sorted in increasing order. Columns 3 to 5 contain the supercell indices of the neighbour atom (the one in column 2, column 1 atoms are always in the unit cell). @@ -276,24 +303,23 @@ def get_all_unique_pairs(indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], will be less than `max_npairs`. This should presumably be used to slice the `neighs` array once in python. """ - + N_ats = list_array.shape[0] i_pair: cython.int = 0 - + ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) - neighs: cnp.int64_t[:,:] = np.zeros((max_npairs, 5), dtype=np.int64) + neighs: cnp.int64_t[:, :] = np.zeros((max_npairs, 5), dtype=np.int64) for at in range(N_ats): - if self_interaction: # Add the self interaction neighs[i_pair, 0] = at neighs[i_pair, 1] = at neighs[i_pair, 2:] = 0 - + # Increment the pair index i_pair = i_pair + 1 @@ -311,9 +337,11 @@ def get_all_unique_pairs(indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], # Find the supercell indices for this bin neigh_isc[:] = iscs[at, j, :] - + # And check if this bin corresponds to the unit cell - not_unit_cell: cython.bint = neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + not_unit_cell: cython.bint = ( + neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + ) ref_xyz[:] = xyz[at, :] if not_unit_cell: @@ -326,13 +354,18 @@ def get_all_unique_pairs(indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], should_not_check = True if should_not_check: continue - # Otherwise, move the atom to the neighbor cell. We do this - # instead of moving potential neighbors to the unit cell + # Otherwise, move the atom to the neighbor cell. We do this + # instead of moving potential neighbors to the unit cell # because in this way we reduce the number of operations. for i in range(3): - ref_xyz[i] = ref_xyz[i] - cell[0, i] * neigh_isc[0] - cell[1, i] * neigh_isc[1] - cell[2, i] * neigh_isc[2] - - # Loop through all atoms that are in this bin. + ref_xyz[i] = ( + ref_xyz[i] + - cell[0, i] * neigh_isc[0] + - cell[1, i] * neigh_isc[1] + - cell[2, i] * neigh_isc[2] + ) + + # Loop through all atoms that are in this bin. # If neigh_at == -1, this means no more atoms are in this bin. while neigh_at >= 0: # If neigh_at is smaller than at, we already stored @@ -341,14 +374,14 @@ def get_all_unique_pairs(indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], # So we can just move to the next bin. However, if # we are checking a neighboring cell, this connection # will always be unique. - if (not not_unit_cell and neigh_at <= at): + if not not_unit_cell and neigh_at <= at: break # Calculate the distance between the atom and the potential # neighbor. dist: float = 0.0 for i in range(3): - dist += (xyz[neigh_at, i] - ref_xyz[i])**2 + dist += (xyz[neigh_at, i] - ref_xyz[i]) ** 2 dist = sqrt(dist) # Get the threshold for this pair of atoms @@ -357,7 +390,7 @@ def get_all_unique_pairs(indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], # If the user wants to check for sphere overlaps, we have # to sum the radius of the neighbor to the threshold threshold = threshold + thresholds[neigh_at] - + if dist < threshold: # Store the pair of neighbours. neighs[i_pair, 0] = at @@ -373,16 +406,25 @@ def get_all_unique_pairs(indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], neigh_at = list_array[neigh_at] # Return the array of neighbours, but only the filled part - return np.asarray(neighs[:i_pair + 1]), i_pair + return np.asarray(neighs[: i_pair + 1]), i_pair + @cython.boundscheck(False) @cython.wraparound(False) -def get_close(search_xyz: cnp.float64_t[:, :], indices: cnp.int64_t[:, :], iscs: cnp.int64_t[:, :, :], - heads: cnp.int64_t[:], list_array: cnp.int64_t[:], max_npairs: cnp.int64_t, - xyz: cnp.float64_t[:, :], cell: cnp.float64_t[:, :], - pbc: cnp.npy_bool[:], thresholds: cnp.float64_t[:]): +def get_close( + search_xyz: cnp.float64_t[:, :], + indices: cnp.int64_t[:, :], + iscs: cnp.int64_t[:, :, :], + heads: cnp.int64_t[:], + list_array: cnp.int64_t[:], + max_npairs: cnp.int64_t, + xyz: cnp.float64_t[:, :], + cell: cnp.float64_t[:, :], + pbc: cnp.npy_bool[:], + thresholds: cnp.float64_t[:], +): """Gets the atoms that are close to given positions - + Parameters --------- search_xyz: @@ -400,8 +442,8 @@ def get_close(search_xyz: cnp.float64_t[:, :], indices: cnp.int64_t[:, :], iscs: list_array: contains the list of atoms, modified to encode all bin locations. Each item in the list contains the index of - the next atom that we can find in the same bin. - If an item is -1, it means that there are no more + the next atom that we can find in the same bin. + If an item is -1, it means that there are no more atoms in the same bin. This array is constructed by `build_table`. max_npairs: @@ -427,10 +469,10 @@ def get_close(search_xyz: cnp.float64_t[:, :], indices: cnp.int64_t[:, :], iscs: If false, two atoms are considered neighbours if the second atom is within the sphere of the first atom. Note that this implies that atom `i` might be atom `j`'s neighbour while the opposite is not true. - + Returns -------- - neighs: + neighs: contains the atom indices of all pairs of neighbor atoms. split_indices: array containing the breakpoints in `neighs`. These @@ -438,15 +480,15 @@ def get_close(search_xyz: cnp.float64_t[:, :], indices: cnp.int64_t[:, :], iscs: for each 8 bin search. It can be used later to quickly split `neighs` on the multiple individual searches. """ - + N_ind = search_xyz.shape[0] i_pair: cython.int = 0 - + ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) - neighs: cnp.int64_t[:,:] = np.zeros((max_npairs, 5), dtype=np.int64) + neighs: cnp.int64_t[:, :] = np.zeros((max_npairs, 5), dtype=np.int64) split_indices: cnp.int64_t[:] = np.zeros(N_ind, dtype=np.int64) for search_index in range(N_ind): @@ -464,9 +506,11 @@ def get_close(search_xyz: cnp.float64_t[:, :], indices: cnp.int64_t[:, :], iscs: # Find the supercell indices for this bin neigh_isc[:] = iscs[search_index, j, :] - + # And check if this bin corresponds to the unit cell - not_unit_cell: cython.bint = neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + not_unit_cell: cython.bint = ( + neigh_isc[0] != 0 or neigh_isc[1] != 0 or neigh_isc[2] != 0 + ) ref_xyz[:] = search_xyz[search_index, :] if not_unit_cell: @@ -479,25 +523,30 @@ def get_close(search_xyz: cnp.float64_t[:, :], indices: cnp.int64_t[:, :], iscs: should_not_check = True if should_not_check: continue - # Otherwise, move the atom to the neighbor cell. We do this - # instead of moving potential neighbors to the unit cell + # Otherwise, move the atom to the neighbor cell. We do this + # instead of moving potential neighbors to the unit cell # because in this way we reduce the number of operations. for i in range(3): - ref_xyz[i] = ref_xyz[i] - cell[0, i] * neigh_isc[0] - cell[1, i] * neigh_isc[1] - cell[2, i] * neigh_isc[2] - - # Loop through all atoms that are in this bin. + ref_xyz[i] = ( + ref_xyz[i] + - cell[0, i] * neigh_isc[0] + - cell[1, i] * neigh_isc[1] + - cell[2, i] * neigh_isc[2] + ) + + # Loop through all atoms that are in this bin. # If neigh_at == -1, this means no more atoms are in this bin. while neigh_at >= 0: # Calculate the distance between the atom and the potential # neighbor. dist: float = 0.0 for i in range(3): - dist += (xyz[neigh_at, i] - ref_xyz[i])**2 + dist += (xyz[neigh_at, i] - ref_xyz[i]) ** 2 dist = sqrt(dist) # Get the threshold for the potential neighbour threshold: float = thresholds[neigh_at] - + if dist < threshold: # Store the pair of neighbours. neighs[i_pair, 0] = search_index diff --git a/src/sisl/geom/neighs/neighfinder.py b/src/sisl/geom/neighs/neighfinder.py index bd6f91de6d..457a609cdf 100644 --- a/src/sisl/geom/neighs/neighfinder.py +++ b/src/sisl/geom/neighs/neighfinder.py @@ -1,21 +1,23 @@ +from numbers import Real from typing import Optional, Sequence, Tuple, Union import numpy as np -from numbers import Real -from sisl.typing import AtomsArgument from sisl import Geometry +from sisl.typing import AtomsArgument from sisl.utils.mathematics import fnorm + from . import _neigh_operations + class NeighFinder: """Efficient linear scaling finding of neighbours. Once this class is instantiated, a table is build. Then, the neighbour finder can be queried as many times as wished - as long as the geometry doesn't change. - - Note that the radius (`R`) is used to build the table. + as long as the geometry doesn't change. + + Note that the radius (`R`) is used to build the table. Therefore, if one wants to look for neighbours using a different R, one needs to create another finder or call `setup_finder`. @@ -24,7 +26,7 @@ class NeighFinder: geometry: sisl.Geometry the geometry to find neighbours in R: float or array-like of shape (geometry.na), optional - The radius to consider two atoms neighbours. + The radius to consider two atoms neighbours. If it is a single float, the same radius is used for all atoms. If it is an array, it should contain the radius for each atom. @@ -50,36 +52,40 @@ class NeighFinder: total_nbins: int R: Union[float, np.ndarray] - _aux_R: Union[float, np.ndarray] # If the geometry has been tiled, R is also tiled here + _aux_R: Union[ + float, np.ndarray + ] # If the geometry has been tiled, R is also tiled here _sphere_overlap: bool # Data structure - _list: np.ndarray # (natoms, ) - _heads: np.ndarray # (total_nbins, ) - _counts: np.ndarray # (total_nbins, ) + _list: np.ndarray # (natoms, ) + _heads: np.ndarray # (total_nbins, ) + _counts: np.ndarray # (total_nbins, ) - def __init__(self, - geometry: Geometry, + def __init__( + self, + geometry: Geometry, R: Union[float, np.ndarray, None] = None, - sphere_overlap: Optional[bool] = None + sphere_overlap: Optional[bool] = None, ): self.setup_finder(geometry, R=R, sphere_overlap=sphere_overlap) - - def setup_finder(self, - geometry: Optional[Geometry] = None, - R: Union[float, np.ndarray, None] = None, - sphere_overlap: Optional[bool] = None + + def setup_finder( + self, + geometry: Optional[Geometry] = None, + R: Union[float, np.ndarray, None] = None, + sphere_overlap: Optional[bool] = None, ): """Prepares everything for neighbour finding. - + Parameters ---------- geometry: sisl.Geometry, optional the geometry to find neighbours in. - + If not provided, the current geometry is kept. R: float or array-like of shape (geometry.na), optional - The radius to consider two atoms neighbours. + The radius to consider two atoms neighbours. If it is a single float, the same radius is used for all atoms. If it is an array, it should contain the radius for each atom. @@ -100,7 +106,7 @@ def setup_finder(self, # Set the geometry. Copy it because we may need to modify the supercell size. if geometry is not None: self.geometry = geometry.copy() - + # If R is not a single float, make sure it is a numpy array R_is_float = isinstance(R, Real) if not R_is_float: @@ -108,11 +114,11 @@ def setup_finder(self, # If R was not provided, build an array of Rs if R is None: R = self.geometry.atoms.maxR(all=True) - + # Set the radius self.R = R self._aux_R = R - + # If sphere overlap was not provided, set it to False if R is a single float # and True otherwise. if sphere_overlap is None: @@ -124,18 +130,20 @@ def setup_finder(self, bin_size = np.max(self.R) * 2 # Check that the bin size is positive. if bin_size <= 0: - raise ValueError("All R values are 0 or less. Please provide some positive values") + raise ValueError( + "All R values are 0 or less. Please provide some positive values" + ) if sphere_overlap: # In the case when we want to check for sphere overlap, the size should # be twice as big. bin_size *= 2 - + # We add a small amount to bin_size to avoid ambiguities when # a position is exactly at the center of a bin. bin_size += 0.01 lattice_sizes = fnorm(self.geometry.cell, axis=-1) - + if bin_size > np.min(lattice_sizes): self._R_too_big = True # This means that nsc must be at least 5. @@ -153,7 +161,9 @@ def setup_finder(self, ats_xyz = self.geometry.axyz(isc=isc) all_xyz.append(ats_xyz) - self._bins_geometry = Geometry(np.concatenate(all_xyz), atoms=self.geometry.atoms) + self._bins_geometry = Geometry( + np.concatenate(all_xyz), atoms=self.geometry.atoms + ) # Recompute lattice sizes lattice_sizes = fnorm(self._bins_geometry.cell, axis=-1) @@ -162,38 +172,40 @@ def setup_finder(self, # Nothing to modify, we can use the geometry and bin it as it is. self._R_too_big = False self._bins_geometry = self.geometry - + # Get the number of bins along each cell direction. nbins_float = lattice_sizes / bin_size self.nbins = tuple(np.floor(nbins_float).astype(int)) self.total_nbins = np.prod(self.nbins) - + # Get the scalar bin indices of all atoms scalar_bin_indices = self._get_bin_indices(self._bins_geometry.fxyz) - + # Build the tables that will allow us to look for neighbours in an efficient # and linear scaling manner. self._build_table(scalar_bin_indices) - + def _build_table(self, bin_indices): """Builds the tables that will allow efficient linear scaling neighbour search. - + Parameters ---------- bin_indices: array-like of shape (self.total_nbins, ) Array containing the scalar bin index for each atom. """ # Call the fortran routine that builds the table - self._list, self._heads, self._counts = _neigh_operations.build_table(self.total_nbins, bin_indices) + self._list, self._heads, self._counts = _neigh_operations.build_table( + self.total_nbins, bin_indices + ) def assert_consistency(self): """Asserts that the data structure (self._list, self._heads, self._counts) is consistent. - + It also stores that the shape is consistent with the stored geometry and the store total_nbins. """ # Check shapes - assert self._list.shape == (self._bins_geometry.na, ) - assert self._counts.shape == self._heads.shape == (self.total_nbins, ) + assert self._list.shape == (self._bins_geometry.na,) + assert self._counts.shape == self._heads.shape == (self.total_nbins,) # Check values for i_bin, bin_count in enumerate(self._counts): @@ -202,23 +214,25 @@ def assert_consistency(self): while head != -1: count += 1 head = self._list[head] - - assert count == bin_count, f"Bin {i_bin} has {bin_count} atoms but we found {count} atoms" - + + assert ( + count == bin_count + ), f"Bin {i_bin} has {bin_count} atoms but we found {count} atoms" + def _get_search_atom_counts(self, scalar_bin_indices): """Computes the number of atoms that will be explored for each search - + Parameters ----------- scalar_bin_indices: np.ndarray of shape ([n_searches], 8) - Array containing the bin indices for each search. + Array containing the bin indices for each search. Bin indices should be within the unit cell! Returns ----------- np.ndarray of shape (n_searches, ): For each search, the number of atoms that will be involved. - """ + """ return self._counts[scalar_bin_indices.ravel()].reshape(-1, 8).sum(axis=1) def _get_bin_indices(self, fxyz, cartesian=False, floor=True): @@ -232,8 +246,8 @@ def _get_bin_indices(self, fxyz, cartesian=False, floor=True): whether the indices should be returned as cartesian. If `False`, scalar indices are returned. floor: bool, optional - whether to floor the indices or not. - + whether to floor the indices or not. + If asking for scalar indices (i.e. `cartesian=False`), the indices will ALWAYS be floored regardless of this argument. @@ -248,18 +262,18 @@ def _get_bin_indices(self, fxyz, cartesian=False, floor=True): # which would result in a bin index outside of range. # We just bring it back to the unit cell. bin_indices = bin_indices % self.nbins - + if floor or not cartesian: bin_indices = np.floor(bin_indices).astype(int) - + if not cartesian: bin_indices = self._cartesian_to_scalar_index(bin_indices) - + return bin_indices - + def _get_search_indices(self, fxyz, cartesian=False): """Gets the bin indices to explore for a given fractional coordinate. - + Given a fractional coordinate, we will need to look for neighbours in its own bin, and one bin away in each direction. That is, $2^3=8$ bins. @@ -288,48 +302,53 @@ def _get_search_indices(self, fxyz, cartesian=False): # border of the bin are we closer in each direction. bin_indices = self._get_bin_indices(fxyz, floor=False, cartesian=True) bin_indices = np.atleast_2d(bin_indices) - + # Determine which is the neighbouring cell that we need to look for # along each direction. signs = np.ones(bin_indices.shape, dtype=int) signs[(bin_indices % 1) < 0.5] = -1 - + # Build and arrays with all the indices that we need to look for. Since # we have to move one bin away in each direction, we have to look for # neighbors along a total of 8 bins (2**3) search_indices = np.tile(bin_indices.astype(int), 8).reshape(-1, 8, 3) - + search_indices[:, 1::2, 0] += signs[:, 0].reshape(-1, 1) - search_indices[:, [2,3,6,7], 1] += signs[:, 1].reshape(-1, 1) + search_indices[:, [2, 3, 6, 7], 1] += signs[:, 1].reshape(-1, 1) search_indices[:, 4:, 2] += signs[:, 2].reshape(-1, 1) - + # Convert search indices to unit cell indices, but keep the supercell indices. isc, search_indices = np.divmod(search_indices, self.nbins) if not cartesian: search_indices = self._cartesian_to_scalar_index(search_indices) - + return search_indices, isc - + def _cartesian_to_scalar_index(self, index): """Converts cartesian indices to scalar indices""" if not np.issubdtype(index.dtype, int): - raise ValueError("Decimal scalar indices do not make sense, please floor your cartesian indices.") + raise ValueError( + "Decimal scalar indices do not make sense, please floor your cartesian indices." + ) return index.dot([1, self.nbins[0], self.nbins[0] * self.nbins[1]]) - + def _scalar_to_cartesian_index(self, index): """Converts cartesian indices to scalar indices""" if np.min(index) < 0 or np.max(index) > self.total_nbins: - raise ValueError("Some scalar indices are not within the unit cell. We cannot uniquely convert to cartesian") - - third, index = np.divmod(index, self.nbins[0]*self.nbins[1]) + raise ValueError( + "Some scalar indices are not within the unit cell. We cannot uniquely convert to cartesian" + ) + + third, index = np.divmod(index, self.nbins[0] * self.nbins[1]) second, first = np.divmod(index, self.nbins[0]) return np.array([first, second, third]).T - def _correct_pairs_R_too_big(self, - neighbour_pairs: np.ndarray, # (n_pairs, 5) - split_ind: Union[int, np.ndarray], # (n_queried_atoms, ) - pbc: np.ndarray # (3, ) + def _correct_pairs_R_too_big( + self, + neighbour_pairs: np.ndarray, # (n_pairs, 5) + split_ind: Union[int, np.ndarray], # (n_queried_atoms, ) + pbc: np.ndarray, # (3, ) ): """Correction to atom and supercell indices when the binning has been done on a tiled geometry""" is_sc_neigh = neighbour_pairs[:, 1] >= self.geometry.na @@ -340,19 +359,21 @@ def _correct_pairs_R_too_big(self, else: pbc_neighs = neighbour_pairs.copy() - sc_neigh, uc_neigh = np.divmod(neighbour_pairs[:, 1][is_sc_neigh], self.geometry.na) + sc_neigh, uc_neigh = np.divmod( + neighbour_pairs[:, 1][is_sc_neigh], self.geometry.na + ) isc_neigh = self.geometry.sc_off[sc_neigh] - pbc_neighs[is_sc_neigh, 1] = uc_neigh + pbc_neighs[is_sc_neigh, 1] = uc_neigh pbc_neighs[is_sc_neigh, 2:] = isc_neigh if not np.all(pbc): - invalid = pbc_neighs[:, 2:][:, ~ pbc].any(axis=1) - + invalid = pbc_neighs[:, 2:][:, ~pbc].any(axis=1) + neighbour_pairs = pbc_neighs if invalid is not None: - neighbour_pairs = neighbour_pairs[~ invalid] + neighbour_pairs = neighbour_pairs[~invalid] if isinstance(split_ind, int): split_ind = split_ind - invalid.sum() else: @@ -360,14 +381,15 @@ def _correct_pairs_R_too_big(self, return neighbour_pairs, split_ind - def find_neighbours(self, - atoms: AtomsArgument = None, - as_pairs: bool = False, - self_interaction: bool = False, - pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True) + def find_neighbours( + self, + atoms: AtomsArgument = None, + as_pairs: bool = False, + self_interaction: bool = False, + pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True), ): """Find neighbours as specified in the finder. - + This routine only executes the action of finding neighbours, the parameters of the search (`geometry`, `R`, `sphere_overlap`...) are defined when the finder is initialized or by calling `setup_finder`. @@ -387,7 +409,7 @@ def find_neighbours(self, pbc: bool or array-like of shape (3, ) whether periodic conditions should be considered. If a single bool is passed, all directions use that value. - + Returns ---------- np.ndarray or list: @@ -404,38 +426,53 @@ def find_neighbours(self, # Cast R and pbc into arrays of appropiate shape and type. thresholds = np.full(self._bins_geometry.na, self._aux_R, dtype=np.float64) pbc = np.full(3, pbc, dtype=bool) - + # Get search indices - search_indices, isc = self._get_search_indices(self.geometry.fxyz[atoms], cartesian=False) - + search_indices, isc = self._get_search_indices( + self.geometry.fxyz[atoms], cartesian=False + ) + # Get atom counts at_counts = self._get_search_atom_counts(search_indices) max_pairs = at_counts.sum() if not self_interaction: max_pairs -= search_indices.shape[0] - + # Find the neighbour pairs neighbour_pairs, split_ind = _neigh_operations.get_pairs( - atoms, search_indices, isc, self._heads, self._list, max_pairs, self_interaction, - self._bins_geometry.xyz, self._bins_geometry.cell, pbc, thresholds, self._sphere_overlap + atoms, + search_indices, + isc, + self._heads, + self._list, + max_pairs, + self_interaction, + self._bins_geometry.xyz, + self._bins_geometry.cell, + pbc, + thresholds, + self._sphere_overlap, ) # Correct neighbour indices for the case where R was too big and # we needed to create an auxiliary supercell. if self._R_too_big: - neighbour_pairs, split_ind = self._correct_pairs_R_too_big(neighbour_pairs, split_ind, pbc) - + neighbour_pairs, split_ind = self._correct_pairs_R_too_big( + neighbour_pairs, split_ind, pbc + ) + if as_pairs: # Just return the neighbour pairs - return neighbour_pairs[:split_ind[-1]] + return neighbour_pairs[: split_ind[-1]] else: # Split to get the neighbours of each atom return np.split(neighbour_pairs[:, 1:], split_ind, axis=0)[:-1] - def find_all_unique_pairs(self, - self_interaction: bool = False, - pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True) + def find_all_unique_pairs( + self, + self_interaction: bool = False, + pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True), ): """Find all unique neighbour pairs within the geometry. @@ -443,7 +480,7 @@ def find_all_unique_pairs(self, and correspond to the same unit cell. As an example, the connection atom 0 (unit cell) to atom 5 (1, 0, 0) is not the same as the connection atom 5 (unit cell) to atom 0 (-1, 0, 0). - + Note that this routine can not be called if `sphere_overlap` is set to `False` and the radius is not a single float. In that case, there is no way of defining "uniqueness" since pair `ij` can have @@ -464,7 +501,7 @@ def find_all_unique_pairs(self, pbc: bool or array-like of shape (3, ) whether periodic conditions should be considered. If a single bool is passed, all directions use that value. - + Returns ---------- np.ndarray of shape (n_pairs, 5): @@ -472,17 +509,21 @@ def find_all_unique_pairs(self, The three extra columns are the supercell indices of atom `j`. """ if not self._sphere_overlap and not isinstance(self._aux_R, Real): - raise ValueError("Unique atom pairs do not make sense if we are not looking for sphere overlaps." - " Please setup the finder again setting `sphere_overlap` to `True` if you wish so.") - - # In the case where we tiled the geometry to do the binning, it is much better to + raise ValueError( + "Unique atom pairs do not make sense if we are not looking for sphere overlaps." + " Please setup the finder again setting `sphere_overlap` to `True` if you wish so." + ) + + # In the case where we tiled the geometry to do the binning, it is much better to # just find all neighbours and then drop duplicate connections. Otherwise it is a bit of a mess. if self._R_too_big: # Find all neighbours - all_neighbours = self.find_neighbours(as_pairs=True, self_interaction=self_interaction, pbc=pbc) + all_neighbours = self.find_neighbours( + as_pairs=True, self_interaction=self_interaction, pbc=pbc + ) # Find out which of the pairs are uc connections - is_uc_neigh = ~ np.any(all_neighbours[:, 2:], axis=1) + is_uc_neigh = ~np.any(all_neighbours[:, 2:], axis=1) # Create an array with unit cell connections where duplicates are removed unique_uc = np.unique(np.sort(all_neighbours[is_uc_neigh][:, :2]), axis=0) @@ -497,7 +538,9 @@ def find_all_unique_pairs(self, pbc = np.full(3, pbc, dtype=bool) # Get search indices - search_indices, isc = self._get_search_indices(self.geometry.fxyz, cartesian=False) + search_indices, isc = self._get_search_indices( + self.geometry.fxyz, cartesian=False + ) # Get atom counts at_counts = self._get_search_atom_counts(search_indices) @@ -508,19 +551,29 @@ def find_all_unique_pairs(self, # Find all unique neighbour pairs neighbour_pairs, n_pairs = _neigh_operations.get_all_unique_pairs( - search_indices, isc, self._heads, self._list, max_pairs, self_interaction, - self.geometry.xyz, self.geometry.cell, pbc, thresholds, self._sphere_overlap + search_indices, + isc, + self._heads, + self._list, + max_pairs, + self_interaction, + self.geometry.xyz, + self.geometry.cell, + pbc, + thresholds, + self._sphere_overlap, ) return neighbour_pairs[:n_pairs] - def find_close(self, - xyz: Sequence, - as_pairs: bool = False, - pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True) + def find_close( + self, + xyz: Sequence, + as_pairs: bool = False, + pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True), ): """Find all atoms that are close to some coordinates in space. - + This routine only executes the action of finding neighbours, the parameters of the search (`geometry`, `R`, `sphere_overlap`...) are defined when the finder is initialized or by calling `setup_finder`. @@ -531,12 +584,12 @@ def find_close(self, the coordinates for which neighbours are desired. as_pairs: bool, optional If `True` pairs are returned, where the first item is the index - of the point and the second one is the atom index. + of the point and the second one is the atom index. Otherwise a list containing the neighbours for each point is returned. pbc: bool or array-like of shape (3, ) whether periodic conditions should be considered. If a single bool is passed, all directions use that value. - + Returns ---------- np.ndarray or list: @@ -550,30 +603,42 @@ def find_close(self, # Cast R and pbc into arrays of appropiate shape and type. thresholds = np.full(self._bins_geometry.na, self._aux_R, dtype=np.float64) pbc = np.full(3, pbc, dtype=bool) - + xyz = np.atleast_2d(xyz) # Get search indices - search_indices, isc = self._get_search_indices(xyz.dot(self._bins_geometry.icell.T) % 1, cartesian=False) - + search_indices, isc = self._get_search_indices( + xyz.dot(self._bins_geometry.icell.T) % 1, cartesian=False + ) + # Get atom counts at_counts = self._get_search_atom_counts(search_indices) max_pairs = at_counts.sum() - + # Find the neighbour pairs - neighbour_pairs, split_ind = _neigh_operations.get_close( - xyz, search_indices, isc, self._heads, self._list, max_pairs, - self._bins_geometry.xyz, self._bins_geometry.cell, pbc, thresholds + neighbour_pairs, split_ind = _neigh_operations.get_close( + xyz, + search_indices, + isc, + self._heads, + self._list, + max_pairs, + self._bins_geometry.xyz, + self._bins_geometry.cell, + pbc, + thresholds, ) # Correct neighbour indices for the case where R was too big and # we needed to create an auxiliary supercell. if self._R_too_big: - neighbour_pairs, split_ind = self._correct_pairs_R_too_big(neighbour_pairs, split_ind, pbc) + neighbour_pairs, split_ind = self._correct_pairs_R_too_big( + neighbour_pairs, split_ind, pbc + ) if as_pairs: # Just return the neighbour pairs - return neighbour_pairs[:split_ind[-1]] + return neighbour_pairs[: split_ind[-1]] else: # Split to get the neighbours of each position return np.split(neighbour_pairs[:, 1:], split_ind, axis=0)[:-1] diff --git a/src/sisl/geom/neighs/tests/test_neighfinder.py b/src/sisl/geom/neighs/tests/test_neighfinder.py index 0004c0c502..5e1bfb656a 100644 --- a/src/sisl/geom/neighs/tests/test_neighfinder.py +++ b/src/sisl/geom/neighs/tests/test_neighfinder.py @@ -1,33 +1,38 @@ -import pytest - import numpy as np +import pytest from sisl import Geometry from sisl.geom import NeighFinder + @pytest.fixture(scope="module", params=[True, False]) def sphere_overlap(request): return request.param + @pytest.fixture(scope="module", params=[True, False]) def multiR(request): return request.param + @pytest.fixture(scope="module", params=[True, False]) def self_interaction(request): return request.param + @pytest.fixture(scope="module", params=[False, True]) def post_setup(request): return request.param + @pytest.fixture(scope="module", params=[False, True]) def pbc(request): return request.param + @pytest.fixture(scope="module") def neighfinder(sphere_overlap, multiR): - geom = Geometry([[0,0,0], [1.2, 0, 0], [9, 0, 0]], lattice=np.diag([10, 10, 7])) + geom = Geometry([[0, 0, 0], [1.2, 0, 0], [9, 0, 0]], lattice=np.diag([10, 10, 7])) R = np.array([1.1, 1.5, 1.2]) if multiR else 1.5 @@ -37,11 +42,11 @@ def neighfinder(sphere_overlap, multiR): return neighfinder + @pytest.fixture(scope="module") def expected_neighs(sphere_overlap, multiR, self_interaction, pbc): - first_at_neighs = [] - if not(multiR and not sphere_overlap): + if not (multiR and not sphere_overlap): first_at_neighs.append([0, 1, 0, 0, 0]) if self_interaction: first_at_neighs.append([0, 0, 0, 0, 0]) @@ -53,7 +58,7 @@ def expected_neighs(sphere_overlap, multiR, self_interaction, pbc): second_at_neighs.insert(0, [1, 1, 0, 0, 0]) if pbc and sphere_overlap: second_at_neighs.append([1, 2, -1, 0, 0]) - + third_at_neighs = [] if self_interaction: third_at_neighs.append([2, 2, 0, 0, 0]) @@ -62,11 +67,15 @@ def expected_neighs(sphere_overlap, multiR, self_interaction, pbc): third_at_neighs.append([2, 1, 1, 0, 0]) third_at_neighs.append([2, 0, 1, 0, 0]) - return np.array(first_at_neighs), np.array(second_at_neighs), np.array(third_at_neighs) + return ( + np.array(first_at_neighs), + np.array(second_at_neighs), + np.array(third_at_neighs), + ) -def test_neighfinder_setup(sphere_overlap, multiR, post_setup): - geom = Geometry([[0,0,0], [1, 0, 0]], lattice=np.diag([10, 10, 7])) +def test_neighfinder_setup(sphere_overlap, multiR, post_setup): + geom = Geometry([[0, 0, 0], [1, 0, 0]], lattice=np.diag([10, 10, 7])) R = np.array([0.9, 1.5]) if multiR else 1.5 @@ -111,9 +120,11 @@ def test_neighfinder_setup(sphere_overlap, multiR, post_setup): assert (finder._counts == 0).sum() == finder.total_nbins - 1 assert finder._counts.sum() == 2 + def test_neighbour_pairs(neighfinder, self_interaction, pbc, expected_neighs): - - neighs = neighfinder.find_neighbours(as_pairs=True, self_interaction=self_interaction, pbc=pbc) + neighs = neighfinder.find_neighbours( + as_pairs=True, self_interaction=self_interaction, pbc=pbc + ) assert isinstance(neighs, np.ndarray) @@ -125,9 +136,11 @@ def test_neighbour_pairs(neighfinder, self_interaction, pbc, expected_neighs): assert np.all(neighs == [*first_at_neighs, *second_at_neighs, *third_at_neighs]) -def test_neighbours_lists(neighfinder, self_interaction, pbc, expected_neighs): - neighs = neighfinder.find_neighbours(as_pairs=False, self_interaction=self_interaction, pbc=pbc) +def test_neighbours_lists(neighfinder, self_interaction, pbc, expected_neighs): + neighs = neighfinder.find_neighbours( + as_pairs=False, self_interaction=self_interaction, pbc=pbc + ) assert isinstance(neighs, list) assert len(neighs) == 3 @@ -137,28 +150,43 @@ def test_neighbours_lists(neighfinder, self_interaction, pbc, expected_neighs): first_at_neighs, second_at_neighs, third_at_neighs = expected_neighs # Check shapes - for i, i_at_neighs in enumerate([first_at_neighs, second_at_neighs, third_at_neighs]): - assert neighs[i].shape == (len(i_at_neighs), 4), f"Wrong shape for neighbours of atom {i}" + for i, i_at_neighs in enumerate( + [first_at_neighs, second_at_neighs, third_at_neighs] + ): + assert neighs[i].shape == ( + len(i_at_neighs), + 4, + ), f"Wrong shape for neighbours of atom {i}" # Check values - for i, i_at_neighs in enumerate([first_at_neighs, second_at_neighs, third_at_neighs]): + for i, i_at_neighs in enumerate( + [first_at_neighs, second_at_neighs, third_at_neighs] + ): if len(neighs[i]) == 0: continue - assert np.all(neighs[i] == i_at_neighs[:, 1:]), f"Wrong values for neighbours of atom {i}" + assert np.all( + neighs[i] == i_at_neighs[:, 1:] + ), f"Wrong values for neighbours of atom {i}" -def test_all_unique_pairs(neighfinder, self_interaction, pbc, expected_neighs): +def test_all_unique_pairs(neighfinder, self_interaction, pbc, expected_neighs): if isinstance(neighfinder.R, np.ndarray) and not neighfinder._sphere_overlap: with pytest.raises(ValueError): - neighfinder.find_all_unique_pairs(self_interaction=self_interaction, pbc=pbc) + neighfinder.find_all_unique_pairs( + self_interaction=self_interaction, pbc=pbc + ) return - - neighs = neighfinder.find_all_unique_pairs(self_interaction=self_interaction, pbc=pbc) + + neighs = neighfinder.find_all_unique_pairs( + self_interaction=self_interaction, pbc=pbc + ) first_at_neighs, second_at_neighs, third_at_neighs = expected_neighs - all_expected_neighs = np.array([*first_at_neighs, *second_at_neighs, *third_at_neighs]) + all_expected_neighs = np.array( + [*first_at_neighs, *second_at_neighs, *third_at_neighs] + ) unique_neighs = [] for neigh_pair in all_expected_neighs: @@ -173,6 +201,7 @@ def test_all_unique_pairs(neighfinder, self_interaction, pbc, expected_neighs): assert neighs.shape == (len(unique_neighs), 5) + def test_close(neighfinder, pbc): neighs = neighfinder.find_close([0.3, 0, 0], as_pairs=True, pbc=pbc) @@ -183,10 +212,11 @@ def test_close(neighfinder, pbc): assert neighs.shape == (len(expected_neighs), 5) assert np.all(neighs == expected_neighs) + def test_no_neighbours(pbc): """Test the case where there are no neighbours, to see that it doesn't crash.""" - geom = Geometry([[0,0,0]]) + geom = Geometry([[0, 0, 0]]) finder = NeighFinder(geom, R=1.5) @@ -208,11 +238,12 @@ def test_no_neighbours(pbc): assert isinstance(neighs, np.ndarray) assert neighs.shape == (0, 5) + def test_R_too_big(pbc): """Test the case when R is so big that it needs a bigger bin than the unit cell.""" - geom = Geometry([[0,0,0], [1, 0, 0]], lattice=np.diag([2, 10, 10])) + geom = Geometry([[0, 0, 0], [1, 0, 0]], lattice=np.diag([2, 10, 10])) neighfinder = NeighFinder(geom, R=1.5) @@ -221,7 +252,7 @@ def test_R_too_big(pbc): expected_neighs = [[0, 1, 0, 0, 0]] if pbc: expected_neighs.append([0, 1, -1, 0, 0]) - expected_neighs.append([1, 0, 1, 0, 0]) + expected_neighs.append([1, 0, 1, 0, 0]) assert neighs.shape == (len(expected_neighs), 5) assert np.all(neighs == expected_neighs) From c2a1b3b465146f975c06f029a2162a9d99c38a3e Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Thu, 21 Dec 2023 12:44:30 +0100 Subject: [PATCH 08/17] small clean-ups, and fixed requirements future.annotations requires Cython>=3 Signed-off-by: Nick Papior --- ci/doc.yml | 2 +- pyproject.toml | 5 +- src/sisl/geom/neighs/CMakeLists.txt | 8 +-- src/sisl/geom/neighs/_neigh_operations.py | 62 ++++++++++--------- .../geom/neighs/tests/test_neighfinder.py | 35 ++++------- 5 files changed, 50 insertions(+), 62 deletions(-) diff --git a/ci/doc.yml b/ci/doc.yml index 3cab8b1e59..33e2c30aa8 100644 --- a/ci/doc.yml +++ b/ci/doc.yml @@ -12,7 +12,7 @@ dependencies: - pyproject-metadata - pyparsing>=1.5.7 - numpy>=1.13 - - cython>=0.29.28 + - cython>=3.0.0 - scipy>=1.5.0 - netcdf4 - xarray>=0.10.0 diff --git a/pyproject.toml b/pyproject.toml index 44599e56b3..3c2d521a8a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ requires = [ "setuptools_scm[toml]>=6.2", "scikit-build-core[pyproject]", - "Cython>=0.29.28", + "Cython>=3", # see https://github.com/scipy/oldest-supported-numpy/ # should fix #310 "oldest-supported-numpy; sys_platform != 'win32'", @@ -49,7 +49,8 @@ dependencies = [ ] authors = [ - {name = "Nick Papior", email = "nickpapior@gmail.com"} + {name = "Nick Papior", email = "nickpapior@gmail.com"}, + {name = "Pol Febrer"} ] maintainers = [{name="sisl developers"}] diff --git a/src/sisl/geom/neighs/CMakeLists.txt b/src/sisl/geom/neighs/CMakeLists.txt index 0c2a94aebf..4e37c6334f 100644 --- a/src/sisl/geom/neighs/CMakeLists.txt +++ b/src/sisl/geom/neighs/CMakeLists.txt @@ -1,11 +1,5 @@ # In this directory we have a set of libraries # We will need to link to the Numpy includes -set_property(DIRECTORY - APPEND - PROPERTY INCLUDE_DIRECTORIES - ${CMAKE_CURRENT_SOURCE_DIR}/.. - ) - foreach(source _neigh_operations) add_cython_library( SOURCE ${source}.py @@ -14,4 +8,4 @@ foreach(source _neigh_operations) ) install(TARGETS ${source} LIBRARY DESTINATION ${SKBUILD_PROJECT_NAME}/geom/neighs) -endforeach() \ No newline at end of file +endforeach() diff --git a/src/sisl/geom/neighs/_neigh_operations.py b/src/sisl/geom/neighs/_neigh_operations.py index 6dac016573..ac65efcb11 100644 --- a/src/sisl/geom/neighs/_neigh_operations.py +++ b/src/sisl/geom/neighs/_neigh_operations.py @@ -38,9 +38,12 @@ def build_table(nbins: cnp.int64_t, bin_indices: cnp.int64_t[:]): """ n_atoms = bin_indices.shape[0] - list_array: cnp.int64_t[:] = np.zeros(n_atoms, dtype=np.int64) - counts: cnp.int64_t[:] = np.zeros(nbins, dtype=np.int64) - heads: cnp.int64_t[:] = np.full(nbins, -1, dtype=np.int64) + list_array_obj = np.zeros(n_atoms, dtype=np.int64) + list_array: cnp.int64_t[:] = list_array_obj + counts_obj = np.zeros(nbins, dtype=np.int64) + counts: cnp.int64_t[:] = counts_obj + heads_obj = np.full(nbins, -1, dtype=np.int64) + heads: cnp.int64_t[:] = heads_obj # Loop through all atoms for at in range(n_atoms): @@ -54,10 +57,10 @@ def build_table(nbins: cnp.int64_t, bin_indices: cnp.int64_t[:]): heads[bin_index] = at # Update the count of this bin (increment it by 1). - counts[bin_index] = counts[bin_index] + 1 + counts[bin_index] += 1 # Return the memory views as numpy arrays - return np.asarray(list_array), np.asarray(heads), np.asarray(counts) + return list_array_obj, heads_obj, counts_obj @cython.boundscheck(False) @@ -135,20 +138,22 @@ def get_pairs( """ N_ind = at_indices.shape[0] - i_pair: cython.int = 0 + i_pair: cython.size_t = 0 ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) - neighs: cnp.int64_t[:, :] = np.zeros((max_npairs, 5), dtype=np.int64) - split_indices: cnp.int64_t[:] = np.zeros(N_ind, dtype=np.int64) + neighs_obj: cnp.ndarray = np.zeros([max_npairs, 5], dtype=np.int64) + neighs: cnp.int64_t[:, :] = neighs_obj + split_indices_obj: cnp.ndarray = np.zeros(N_ind, dtype=np.int64) + split_indices: cnp.int64_t[:] = split_indices_obj for search_index in range(N_ind): at = at_indices[search_index] for j in range(8): # Find the bin index. - bin_index = indices[search_index, j] + bin_index: cnp.int64_t = indices[search_index, j] # Get the first atom index in this bin neigh_at: cnp.int64_t = heads[bin_index] @@ -181,11 +186,10 @@ def get_pairs( # instead of moving potential neighbors to the unit cell # because in this way we reduce the number of operations. for i in range(3): - ref_xyz[i] = ( - ref_xyz[i] - - cell[0, i] * neigh_isc[0] - - cell[1, i] * neigh_isc[1] - - cell[2, i] * neigh_isc[2] + ref_xyz[i] -= ( + cell[0, i] * neigh_isc[0] + + cell[1, i] * neigh_isc[1] + + cell[2, i] * neigh_isc[2] ) # Loop through all atoms that are in this bin. @@ -228,7 +232,7 @@ def get_pairs( # We have finished this search, store the breakpoint. split_indices[search_index] = i_pair - return np.asarray(neighs), np.asarray(split_indices) + return neighs_obj, split_indices_obj @cython.boundscheck(False) @@ -306,12 +310,12 @@ def get_all_unique_pairs( N_ats = list_array.shape[0] - i_pair: cython.int = 0 + i_pair: cython.size_t = 0 ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) - neighs: cnp.int64_t[:, :] = np.zeros((max_npairs, 5), dtype=np.int64) + neighs: cnp.int64_t[:, :] = np.zeros([max_npairs, 5], dtype=np.int64) for at in range(N_ats): if self_interaction: @@ -325,7 +329,7 @@ def get_all_unique_pairs( for j in range(8): # Find the bin index. - bin_index = indices[at, j] + bin_index: cnp.int64_t = indices[at, j] # Get the first atom index in this bin neigh_at: cnp.int64_t = heads[bin_index] @@ -358,11 +362,10 @@ def get_all_unique_pairs( # instead of moving potential neighbors to the unit cell # because in this way we reduce the number of operations. for i in range(3): - ref_xyz[i] = ( - ref_xyz[i] - - cell[0, i] * neigh_isc[0] - - cell[1, i] * neigh_isc[1] - - cell[2, i] * neigh_isc[2] + ref_xyz[i] -= ( + cell[0, i] * neigh_isc[0] + + cell[1, i] * neigh_isc[1] + + cell[2, i] * neigh_isc[2] ) # Loop through all atoms that are in this bin. @@ -483,7 +486,7 @@ def get_close( N_ind = search_xyz.shape[0] - i_pair: cython.int = 0 + i_pair: cython.size_t = 0 ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) @@ -494,7 +497,7 @@ def get_close( for search_index in range(N_ind): for j in range(8): # Find the bin index. - bin_index = indices[search_index, j] + bin_index: cnp.int64_t = indices[search_index, j] # Get the first atom index in this bin neigh_at: cnp.int64_t = heads[bin_index] @@ -527,11 +530,10 @@ def get_close( # instead of moving potential neighbors to the unit cell # because in this way we reduce the number of operations. for i in range(3): - ref_xyz[i] = ( - ref_xyz[i] - - cell[0, i] * neigh_isc[0] - - cell[1, i] * neigh_isc[1] - - cell[2, i] * neigh_isc[2] + ref_xyz[i] -= ( + cell[0, i] * neigh_isc[0] + + cell[1, i] * neigh_isc[1] + + cell[2, i] * neigh_isc[2] ) # Loop through all atoms that are in this bin. diff --git a/src/sisl/geom/neighs/tests/test_neighfinder.py b/src/sisl/geom/neighs/tests/test_neighfinder.py index 5e1bfb656a..bc1cb4935a 100644 --- a/src/sisl/geom/neighs/tests/test_neighfinder.py +++ b/src/sisl/geom/neighs/tests/test_neighfinder.py @@ -1,38 +1,31 @@ +from functools import partial + import numpy as np import pytest from sisl import Geometry from sisl.geom import NeighFinder +pytestmark = [pytest.mark.neighbour] -@pytest.fixture(scope="module", params=[True, False]) -def sphere_overlap(request): - return request.param +tr_fixture = partial(pytest.fixture, scope="module", params=[True, False]) -@pytest.fixture(scope="module", params=[True, False]) -def multiR(request): - return request.param - -@pytest.fixture(scope="module", params=[True, False]) -def self_interaction(request): +def request_param(request): return request.param -@pytest.fixture(scope="module", params=[False, True]) -def post_setup(request): - return request.param - - -@pytest.fixture(scope="module", params=[False, True]) -def pbc(request): - return request.param +sphere_overlap = tr_fixture()(request_param) +multiR = tr_fixture()(request_param) +self_interaction = tr_fixture()(request_param) +post_setup = tr_fixture()(request_param) +pbc = tr_fixture()(request_param) @pytest.fixture(scope="module") def neighfinder(sphere_overlap, multiR): - geom = Geometry([[0, 0, 0], [1.2, 0, 0], [9, 0, 0]], lattice=np.diag([10, 10, 7])) + geom = Geometry([[0, 0, 0], [1.2, 0, 0], [9, 0, 0]], lattice=[10, 10, 7]) R = np.array([1.1, 1.5, 1.2]) if multiR else 1.5 @@ -75,7 +68,7 @@ def expected_neighs(sphere_overlap, multiR, self_interaction, pbc): def test_neighfinder_setup(sphere_overlap, multiR, post_setup): - geom = Geometry([[0, 0, 0], [1, 0, 0]], lattice=np.diag([10, 10, 7])) + geom = Geometry([[0, 0, 0], [1, 0, 0]], lattice=[10, 10, 7]) R = np.array([0.9, 1.5]) if multiR else 1.5 @@ -243,7 +236,7 @@ def test_R_too_big(pbc): """Test the case when R is so big that it needs a bigger bin than the unit cell.""" - geom = Geometry([[0, 0, 0], [1, 0, 0]], lattice=np.diag([2, 10, 10])) + geom = Geometry([[0, 0, 0], [1, 0, 0]], lattice=[2, 10, 10]) neighfinder = NeighFinder(geom, R=1.5) @@ -265,7 +258,5 @@ def test_R_too_big(pbc): if pbc: expected_neighs.insert(0, [0, 1, -1, 0, 0]) - print(neighs, expected_neighs) - assert neighs.shape == (len(expected_neighs), 5) assert np.all(neighs == expected_neighs) From e1cdfb3424fdaaa7dc7fa36e1616a7b0b1104a45 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Fri, 5 Jan 2024 10:17:44 +0100 Subject: [PATCH 09/17] fixed names of neighbor finder changed class and argument names to make them simpler. Also changed default of overlap to False. The internal R is now an array, regardless, but it can have either ndim 0 or 1, depending on whether it is an array or not. Signed-off-by: Nick Papior --- src/sisl/conftest.py | 1 + src/sisl/geom/neighs/CMakeLists.txt | 2 +- src/sisl/geom/neighs/__init__.py | 2 +- .../neighs/{neighfinder.py => _finder.py} | 217 +++++++++--------- .../{_neigh_operations.py => _operations.py} | 0 .../{test_neighfinder.py => test_finder.py} | 42 ++-- src/sisl/utils/mathematics.py | 2 - 7 files changed, 127 insertions(+), 139 deletions(-) rename src/sisl/geom/neighs/{neighfinder.py => _finder.py} (75%) rename src/sisl/geom/neighs/{_neigh_operations.py => _operations.py} (100%) rename src/sisl/geom/neighs/tests/{test_neighfinder.py => test_finder.py} (85%) diff --git a/src/sisl/conftest.py b/src/sisl/conftest.py index 7cfd21df99..79a59563b9 100644 --- a/src/sisl/conftest.py +++ b/src/sisl/conftest.py @@ -227,6 +227,7 @@ def pytest_configure(config): "hamiltonian", "geometry", "geom", + "neighbor", "shape", "state", "electron", diff --git a/src/sisl/geom/neighs/CMakeLists.txt b/src/sisl/geom/neighs/CMakeLists.txt index 4e37c6334f..fa06fa99d8 100644 --- a/src/sisl/geom/neighs/CMakeLists.txt +++ b/src/sisl/geom/neighs/CMakeLists.txt @@ -1,6 +1,6 @@ # In this directory we have a set of libraries # We will need to link to the Numpy includes -foreach(source _neigh_operations) +foreach(source _operations) add_cython_library( SOURCE ${source}.py LIBRARY ${source} diff --git a/src/sisl/geom/neighs/__init__.py b/src/sisl/geom/neighs/__init__.py index 3784cc3801..e75df2478b 100644 --- a/src/sisl/geom/neighs/__init__.py +++ b/src/sisl/geom/neighs/__init__.py @@ -1 +1 @@ -from .neighfinder import NeighFinder +from ._finder import NeighborFinder diff --git a/src/sisl/geom/neighs/neighfinder.py b/src/sisl/geom/neighs/_finder.py similarity index 75% rename from src/sisl/geom/neighs/neighfinder.py rename to src/sisl/geom/neighs/_finder.py index 457a609cdf..17378cb2e8 100644 --- a/src/sisl/geom/neighs/neighfinder.py +++ b/src/sisl/geom/neighs/_finder.py @@ -1,43 +1,41 @@ -from numbers import Real from typing import Optional, Sequence, Tuple, Union import numpy as np from sisl import Geometry from sisl.typing import AtomsArgument -from sisl.utils.mathematics import fnorm -from . import _neigh_operations +from . import _operations -class NeighFinder: - """Efficient linear scaling finding of neighbours. +class NeighborFinder: + """Efficient linear scaling finding of neighbors. Once this class is instantiated, a table is build. Then, - the neighbour finder can be queried as many times as wished + the neighbor finder can be queried as many times as wished as long as the geometry doesn't change. Note that the radius (`R`) is used to build the table. - Therefore, if one wants to look for neighbours using a different - R, one needs to create another finder or call `setup_finder`. + Therefore, if one wants to look for neighbors using a different + R, one needs to create another finder or call `setup`. Parameters ---------- geometry: sisl.Geometry - the geometry to find neighbours in + the geometry to find neighbors in R: float or array-like of shape (geometry.na), optional - The radius to consider two atoms neighbours. + The radius to consider two atoms neighbors. If it is a single float, the same radius is used for all atoms. If it is an array, it should contain the radius for each atom. If not provided, an array is constructed, where the radius for each atom is its maxR. - sphere_overlap: bool, optional - If `True`, two atoms are considered neighbours if their spheres + overlap: bool, optional + If `True`, two atoms are considered neighbors if their spheres overlap. - If `False`, two atoms are considered neighbours if the second atom + If `False`, two atoms are considered neighbors if the second atom is within the sphere of the first atom. Note that this implies that - atom `i` might be atom `j`'s neighbour while the opposite is not true. + atom `i` might be atom `j`'s neighbor while the opposite is not true. If not provided, it will be `True` if `R` is an array and `False` if it is a single float. @@ -51,11 +49,9 @@ class NeighFinder: nbins: Tuple[int, int, int] total_nbins: int - R: Union[float, np.ndarray] - _aux_R: Union[ - float, np.ndarray - ] # If the geometry has been tiled, R is also tiled here - _sphere_overlap: bool + R: np.ndarray + _aux_R: np.ndarray + _overlap: bool # Data structure _list: np.ndarray # (natoms, ) @@ -65,27 +61,27 @@ class NeighFinder: def __init__( self, geometry: Geometry, - R: Union[float, np.ndarray, None] = None, - sphere_overlap: Optional[bool] = None, + R: Optional[Union[float, np.ndarray]] = None, + overlap: bool = False, ): - self.setup_finder(geometry, R=R, sphere_overlap=sphere_overlap) + self.setup(geometry, R=R, overlap=overlap) - def setup_finder( + def setup( self, geometry: Optional[Geometry] = None, - R: Union[float, np.ndarray, None] = None, - sphere_overlap: Optional[bool] = None, + R: Optional[Union[float, np.ndarray]] = None, + overlap: bool = None, ): - """Prepares everything for neighbour finding. + """Prepares everything for neighbor finding. Parameters ---------- geometry: sisl.Geometry, optional - the geometry to find neighbours in. + the geometry to find neighbors in. If not provided, the current geometry is kept. R: float or array-like of shape (geometry.na), optional - The radius to consider two atoms neighbours. + The radius to consider two atoms neighbors. If it is a single float, the same radius is used for all atoms. If it is an array, it should contain the radius for each atom. @@ -93,27 +89,22 @@ def setup_finder( each atom is its maxR. Note that negative R values are allowed - sphere_overlap: bool, optional - If `True`, two atoms are considered neighbours if their spheres + overlap: bool + If `True`, two atoms are considered neighbors if their spheres overlap. - If `False`, two atoms are considered neighbours if the second atom + If `False`, two atoms are considered neighbors if the second atom is within the sphere of the first atom. Note that this implies that - atom `i` might be atom `j`'s neighbour while the opposite is not true. - - If not provided, it will be `True` if `R` is an array and `False` if - it is a single float. + atom `i` might be atom `j`'s neighbor while the opposite is not true. """ # Set the geometry. Copy it because we may need to modify the supercell size. if geometry is not None: self.geometry = geometry.copy() - # If R is not a single float, make sure it is a numpy array - R_is_float = isinstance(R, Real) - if not R_is_float: - R = np.array(R) # If R was not provided, build an array of Rs if R is None: R = self.geometry.atoms.maxR(all=True) + else: + R = np.asarray(R) # Set the radius self.R = R @@ -121,9 +112,7 @@ def setup_finder( # If sphere overlap was not provided, set it to False if R is a single float # and True otherwise. - if sphere_overlap is None: - sphere_overlap = not R_is_float - self._sphere_overlap = sphere_overlap + self._overlap = overlap # Determine the bin_size as the maximum DIAMETER to ensure that we ALWAYS # only need to look one bin away for neighbors. @@ -133,7 +122,7 @@ def setup_finder( raise ValueError( "All R values are 0 or less. Please provide some positive values" ) - if sphere_overlap: + if overlap: # In the case when we want to check for sphere overlap, the size should # be twice as big. bin_size *= 2 @@ -142,7 +131,7 @@ def setup_finder( # a position is exactly at the center of a bin. bin_size += 0.01 - lattice_sizes = fnorm(self.geometry.cell, axis=-1) + lattice_sizes = self.geometry.length if bin_size > np.min(lattice_sizes): self._R_too_big = True @@ -153,7 +142,7 @@ def setup_finder( nsc = np.ceil(bin_size / lattice_sizes) // 2 * 2 + 1 # And then set it as the number of supercells. self.geometry.set_nsc(nsc.astype(int)) - if isinstance(self._aux_R, np.ndarray): + if self._aux_R.ndim == 1: self._aux_R = np.tile(self._aux_R, self.geometry.n_s) all_xyz = [] @@ -166,7 +155,7 @@ def setup_finder( ) # Recompute lattice sizes - lattice_sizes = fnorm(self._bins_geometry.cell, axis=-1) + lattice_sizes = self._bins_geometry.length else: # Nothing to modify, we can use the geometry and bin it as it is. @@ -181,12 +170,12 @@ def setup_finder( # Get the scalar bin indices of all atoms scalar_bin_indices = self._get_bin_indices(self._bins_geometry.fxyz) - # Build the tables that will allow us to look for neighbours in an efficient + # Build the tables that will allow us to look for neighbors in an efficient # and linear scaling manner. self._build_table(scalar_bin_indices) def _build_table(self, bin_indices): - """Builds the tables that will allow efficient linear scaling neighbour search. + """Builds the tables that will allow efficient linear scaling neighbor search. Parameters ---------- @@ -194,7 +183,7 @@ def _build_table(self, bin_indices): Array containing the scalar bin index for each atom. """ # Call the fortran routine that builds the table - self._list, self._heads, self._counts = _neigh_operations.build_table( + self._list, self._heads, self._counts = _operations.build_table( self.total_nbins, bin_indices ) @@ -274,7 +263,7 @@ def _get_bin_indices(self, fxyz, cartesian=False, floor=True): def _get_search_indices(self, fxyz, cartesian=False): """Gets the bin indices to explore for a given fractional coordinate. - Given a fractional coordinate, we will need to look for neighbours + Given a fractional coordinate, we will need to look for neighbors in its own bin, and one bin away in each direction. That is, $2^3=8$ bins. Parameters @@ -303,7 +292,7 @@ def _get_search_indices(self, fxyz, cartesian=False): bin_indices = self._get_bin_indices(fxyz, floor=False, cartesian=True) bin_indices = np.atleast_2d(bin_indices) - # Determine which is the neighbouring cell that we need to look for + # Determine which is the neighboring cell that we need to look for # along each direction. signs = np.ones(bin_indices.shape, dtype=int) signs[(bin_indices % 1) < 0.5] = -1 @@ -346,21 +335,21 @@ def _scalar_to_cartesian_index(self, index): def _correct_pairs_R_too_big( self, - neighbour_pairs: np.ndarray, # (n_pairs, 5) + neighbor_pairs: np.ndarray, # (n_pairs, 5) split_ind: Union[int, np.ndarray], # (n_queried_atoms, ) pbc: np.ndarray, # (3, ) ): """Correction to atom and supercell indices when the binning has been done on a tiled geometry""" - is_sc_neigh = neighbour_pairs[:, 1] >= self.geometry.na + is_sc_neigh = neighbor_pairs[:, 1] >= self.geometry.na invalid = None if not np.any(pbc): invalid = is_sc_neigh else: - pbc_neighs = neighbour_pairs.copy() + pbc_neighs = neighbor_pairs.copy() sc_neigh, uc_neigh = np.divmod( - neighbour_pairs[:, 1][is_sc_neigh], self.geometry.na + neighbor_pairs[:, 1][is_sc_neigh], self.geometry.na ) isc_neigh = self.geometry.sc_off[sc_neigh] @@ -370,42 +359,42 @@ def _correct_pairs_R_too_big( if not np.all(pbc): invalid = pbc_neighs[:, 2:][:, ~pbc].any(axis=1) - neighbour_pairs = pbc_neighs + neighbor_pairs = pbc_neighs if invalid is not None: - neighbour_pairs = neighbour_pairs[~invalid] + neighbor_pairs = neighbor_pairs[~invalid] if isinstance(split_ind, int): split_ind = split_ind - invalid.sum() else: split_ind = split_ind - np.cumsum(invalid)[split_ind - 1] - return neighbour_pairs, split_ind + return neighbor_pairs, split_ind - def find_neighbours( + def find_neighbors( self, atoms: AtomsArgument = None, as_pairs: bool = False, self_interaction: bool = False, pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True), ): - """Find neighbours as specified in the finder. + """Find neighbors as specified in the finder. - This routine only executes the action of finding neighbours, - the parameters of the search (`geometry`, `R`, `sphere_overlap`...) - are defined when the finder is initialized or by calling `setup_finder`. + This routine only executes the action of finding neighbors, + the parameters of the search (`geometry`, `R`, `overlap`...) + are defined when the finder is initialized or by calling `setup`. Parameters ---------- atoms: optional - the atoms for which neighbours are desired. Anything that can + the atoms for which neighbors are desired. Anything that can be sanitized by `sisl.Geometry` is a valid value. - If not provided, neighbours for all atoms are searched. + If not provided, neighbors for all atoms are searched. as_pairs: bool, optional If `True` pairs of atoms are returned. Otherwise a list containing - the neighbours for each atom is returned. + the neighbors for each atom is returned. self_interaction: bool, optional - whether to consider an atom a neighbour of itself. + whether to consider an atom a neighbor of itself. pbc: bool or array-like of shape (3, ) whether periodic conditions should be considered. If a single bool is passed, all directions use that value. @@ -415,7 +404,7 @@ def find_neighbours( np.ndarray or list: If `as_pairs=True`: A `np.ndarray` of shape (n_pairs, 5) is returned. - Each pair `ij` means that `j` is a neighbour of `i`. + Each pair `ij` means that `j` is a neighbor of `i`. The three extra columns are the supercell indices of atom `j`. If `as_pairs=False`: A list containing a numpy array of shape (n_neighs, 4) for each atom. @@ -439,8 +428,8 @@ def find_neighbours( if not self_interaction: max_pairs -= search_indices.shape[0] - # Find the neighbour pairs - neighbour_pairs, split_ind = _neigh_operations.get_pairs( + # Find the neighbor pairs + neighbor_pairs, split_ind = _operations.get_pairs( atoms, search_indices, isc, @@ -452,36 +441,36 @@ def find_neighbours( self._bins_geometry.cell, pbc, thresholds, - self._sphere_overlap, + self._overlap, ) - # Correct neighbour indices for the case where R was too big and + # Correct neighbor indices for the case where R was too big and # we needed to create an auxiliary supercell. if self._R_too_big: - neighbour_pairs, split_ind = self._correct_pairs_R_too_big( - neighbour_pairs, split_ind, pbc + neighbor_pairs, split_ind = self._correct_pairs_R_too_big( + neighbor_pairs, split_ind, pbc ) if as_pairs: - # Just return the neighbour pairs - return neighbour_pairs[: split_ind[-1]] + # Just return the neighbor pairs + return neighbor_pairs[: split_ind[-1]] else: - # Split to get the neighbours of each atom - return np.split(neighbour_pairs[:, 1:], split_ind, axis=0)[:-1] + # Split to get the neighbors of each atom + return np.split(neighbor_pairs[:, 1:], split_ind, axis=0)[:-1] def find_all_unique_pairs( self, self_interaction: bool = False, pbc: Union[bool, Tuple[bool, bool, bool]] = (True, True, True), ): - """Find all unique neighbour pairs within the geometry. + """Find all unique neighbor pairs within the geometry. A pair of atoms is considered unique if atoms have the same index and correspond to the same unit cell. As an example, the connection atom 0 (unit cell) to atom 5 (1, 0, 0) is not the same as the connection atom 5 (unit cell) to atom 0 (-1, 0, 0). - Note that this routine can not be called if `sphere_overlap` is + Note that this routine can not be called if `overlap` is set to `False` and the radius is not a single float. In that case, there is no way of defining "uniqueness" since pair `ij` can have a different threshold radius than pair `ji`. @@ -489,15 +478,15 @@ def find_all_unique_pairs( Parameters ---------- atoms: optional - the atoms for which neighbours are desired. Anything that can + the atoms for which neighbors are desired. Anything that can be sanitized by `sisl.Geometry` is a valid value. - If not provided, neighbours for all atoms are searched. + If not provided, neighbors for all atoms are searched. as_pairs: bool, optional If `True` pairs of atoms are returned. Otherwise a list containing - the neighbours for each atom is returned. + the neighbors for each atom is returned. self_interaction: bool, optional - whether to consider an atom a neighbour of itself. + whether to consider an atom a neighbor of itself. pbc: bool or array-like of shape (3, ) whether periodic conditions should be considered. If a single bool is passed, all directions use that value. @@ -505,33 +494,33 @@ def find_all_unique_pairs( Returns ---------- np.ndarray of shape (n_pairs, 5): - Each pair `ij` means that `j` is a neighbour of `i`. + Each pair `ij` means that `j` is a neighbor of `i`. The three extra columns are the supercell indices of atom `j`. """ - if not self._sphere_overlap and not isinstance(self._aux_R, Real): + if not self._overlap and self._aux_R.ndim == 1: raise ValueError( "Unique atom pairs do not make sense if we are not looking for sphere overlaps." - " Please setup the finder again setting `sphere_overlap` to `True` if you wish so." + " Please setup the finder again setting `overlap` to `True` if you wish so." ) # In the case where we tiled the geometry to do the binning, it is much better to - # just find all neighbours and then drop duplicate connections. Otherwise it is a bit of a mess. + # just find all neighbors and then drop duplicate connections. Otherwise it is a bit of a mess. if self._R_too_big: - # Find all neighbours - all_neighbours = self.find_neighbours( + # Find all neighbors + all_neighbors = self.find_neighbors( as_pairs=True, self_interaction=self_interaction, pbc=pbc ) # Find out which of the pairs are uc connections - is_uc_neigh = ~np.any(all_neighbours[:, 2:], axis=1) + is_uc_neigh = ~np.any(all_neighbors[:, 2:], axis=1) # Create an array with unit cell connections where duplicates are removed - unique_uc = np.unique(np.sort(all_neighbours[is_uc_neigh][:, :2]), axis=0) - uc_neighbours = np.zeros((len(unique_uc), 5), dtype=int) - uc_neighbours[:, :2] = unique_uc + unique_uc = np.unique(np.sort(all_neighbors[is_uc_neigh][:, :2]), axis=0) + uc_neighbors = np.zeros((len(unique_uc), 5), dtype=int) + uc_neighbors[:, :2] = unique_uc # Concatenate the uc connections with the rest of the connections. - return np.concatenate((uc_neighbours, all_neighbours[~is_uc_neigh])) + return np.concatenate((uc_neighbors, all_neighbors[~is_uc_neigh])) # Cast R and pbc into arrays of appropiate shape and type. thresholds = np.full(self.geometry.na, self.R, dtype=np.float64) @@ -549,8 +538,8 @@ def find_all_unique_pairs( if not self_interaction: max_pairs -= search_indices.shape[0] - # Find all unique neighbour pairs - neighbour_pairs, n_pairs = _neigh_operations.get_all_unique_pairs( + # Find all unique neighbor pairs + neighbor_pairs, n_pairs = _operations.get_all_unique_pairs( search_indices, isc, self._heads, @@ -561,10 +550,10 @@ def find_all_unique_pairs( self.geometry.cell, pbc, thresholds, - self._sphere_overlap, + self._overlap, ) - return neighbour_pairs[:n_pairs] + return neighbor_pairs[:n_pairs] def find_close( self, @@ -574,18 +563,18 @@ def find_close( ): """Find all atoms that are close to some coordinates in space. - This routine only executes the action of finding neighbours, - the parameters of the search (`geometry`, `R`, `sphere_overlap`...) - are defined when the finder is initialized or by calling `setup_finder`. + This routine only executes the action of finding neighbors, + the parameters of the search (`geometry`, `R`, `overlap`...) + are defined when the finder is initialized or by calling `setup`. Parameters ---------- xyz: array-like of shape ([npoints], 3) - the coordinates for which neighbours are desired. + the coordinates for which neighbors are desired. as_pairs: bool, optional If `True` pairs are returned, where the first item is the index of the point and the second one is the atom index. - Otherwise a list containing the neighbours for each point is returned. + Otherwise a list containing the neighbors for each point is returned. pbc: bool or array-like of shape (3, ) whether periodic conditions should be considered. If a single bool is passed, all directions use that value. @@ -595,7 +584,7 @@ def find_close( np.ndarray or list: If `as_pairs=True`: A `np.ndarray` of shape (n_pairs, 5) is returned. - Each pair `ij` means that `j` is a neighbour of `i`. + Each pair `ij` means that `j` is a neighbor of `i`. The three extra columns are the supercell indices of atom `j`. If `as_pairs=False`: A list containing a numpy array of shape (n_neighs, 4) for each atom. @@ -615,8 +604,8 @@ def find_close( max_pairs = at_counts.sum() - # Find the neighbour pairs - neighbour_pairs, split_ind = _neigh_operations.get_close( + # Find the neighbor pairs + neighbor_pairs, split_ind = _operations.get_close( xyz, search_indices, isc, @@ -629,16 +618,16 @@ def find_close( thresholds, ) - # Correct neighbour indices for the case where R was too big and + # Correct neighbor indices for the case where R was too big and # we needed to create an auxiliary supercell. if self._R_too_big: - neighbour_pairs, split_ind = self._correct_pairs_R_too_big( - neighbour_pairs, split_ind, pbc + neighbor_pairs, split_ind = self._correct_pairs_R_too_big( + neighbor_pairs, split_ind, pbc ) if as_pairs: - # Just return the neighbour pairs - return neighbour_pairs[: split_ind[-1]] + # Just return the neighbor pairs + return neighbor_pairs[: split_ind[-1]] else: - # Split to get the neighbours of each position - return np.split(neighbour_pairs[:, 1:], split_ind, axis=0)[:-1] + # Split to get the neighbors of each position + return np.split(neighbor_pairs[:, 1:], split_ind, axis=0)[:-1] diff --git a/src/sisl/geom/neighs/_neigh_operations.py b/src/sisl/geom/neighs/_operations.py similarity index 100% rename from src/sisl/geom/neighs/_neigh_operations.py rename to src/sisl/geom/neighs/_operations.py diff --git a/src/sisl/geom/neighs/tests/test_neighfinder.py b/src/sisl/geom/neighs/tests/test_finder.py similarity index 85% rename from src/sisl/geom/neighs/tests/test_neighfinder.py rename to src/sisl/geom/neighs/tests/test_finder.py index bc1cb4935a..68801fafa2 100644 --- a/src/sisl/geom/neighs/tests/test_neighfinder.py +++ b/src/sisl/geom/neighs/tests/test_finder.py @@ -4,9 +4,9 @@ import pytest from sisl import Geometry -from sisl.geom import NeighFinder +from sisl.geom import NeighborFinder -pytestmark = [pytest.mark.neighbour] +pytestmark = [pytest.mark.neighbor] tr_fixture = partial(pytest.fixture, scope="module", params=[True, False]) @@ -29,7 +29,7 @@ def neighfinder(sphere_overlap, multiR): R = np.array([1.1, 1.5, 1.2]) if multiR else 1.5 - neighfinder = NeighFinder(geom, R=R, sphere_overlap=sphere_overlap) + neighfinder = NeighborFinder(geom, R=R, overlap=sphere_overlap) neighfinder.assert_consistency() @@ -75,18 +75,18 @@ def test_neighfinder_setup(sphere_overlap, multiR, post_setup): if post_setup: # We are going to create a data structure with the wrong parameters, # and then update it. - finder = NeighFinder(geom, R=R - 0.7, sphere_overlap=not sphere_overlap) - finder.setup_finder(R=R, sphere_overlap=sphere_overlap) + finder = NeighborFinder(geom, R=R - 0.7, overlap=not sphere_overlap) + finder.setup(R=R, overlap=sphere_overlap) else: # Initialize finder with the right parameters. - finder = NeighFinder(geom, R=R, sphere_overlap=sphere_overlap) + finder = NeighborFinder(geom, R=R, overlap=sphere_overlap) # Check that R is properly set when its a scalar and an array. if multiR: assert isinstance(finder.R, np.ndarray) assert np.all(finder.R == R) else: - assert isinstance(finder.R, float) + assert finder.R.ndim == 0 assert finder.R == R # Assert that we have stored a copy of the geometry @@ -115,7 +115,7 @@ def test_neighfinder_setup(sphere_overlap, multiR, post_setup): def test_neighbour_pairs(neighfinder, self_interaction, pbc, expected_neighs): - neighs = neighfinder.find_neighbours( + neighs = neighfinder.find_neighbors( as_pairs=True, self_interaction=self_interaction, pbc=pbc ) @@ -130,8 +130,8 @@ def test_neighbour_pairs(neighfinder, self_interaction, pbc, expected_neighs): assert np.all(neighs == [*first_at_neighs, *second_at_neighs, *third_at_neighs]) -def test_neighbours_lists(neighfinder, self_interaction, pbc, expected_neighs): - neighs = neighfinder.find_neighbours( +def test_neighbors_lists(neighfinder, self_interaction, pbc, expected_neighs): + neighs = neighfinder.find_neighbors( as_pairs=False, self_interaction=self_interaction, pbc=pbc ) @@ -149,7 +149,7 @@ def test_neighbours_lists(neighfinder, self_interaction, pbc, expected_neighs): assert neighs[i].shape == ( len(i_at_neighs), 4, - ), f"Wrong shape for neighbours of atom {i}" + ), f"Wrong shape for neighbors of atom {i}" # Check values for i, i_at_neighs in enumerate( @@ -160,11 +160,11 @@ def test_neighbours_lists(neighfinder, self_interaction, pbc, expected_neighs): assert np.all( neighs[i] == i_at_neighs[:, 1:] - ), f"Wrong values for neighbours of atom {i}" + ), f"Wrong values for neighbors of atom {i}" def test_all_unique_pairs(neighfinder, self_interaction, pbc, expected_neighs): - if isinstance(neighfinder.R, np.ndarray) and not neighfinder._sphere_overlap: + if neighfinder.R.ndim == 1 and not neighfinder._overlap: with pytest.raises(ValueError): neighfinder.find_all_unique_pairs( self_interaction=self_interaction, pbc=pbc @@ -199,26 +199,26 @@ def test_close(neighfinder, pbc): neighs = neighfinder.find_close([0.3, 0, 0], as_pairs=True, pbc=pbc) expected_neighs = [[0, 1, 0, 0, 0], [0, 0, 0, 0, 0]] - if pbc and isinstance(neighfinder.R, float): + if pbc and neighfinder.R.ndim == 0: expected_neighs.append([0, 2, -1, 0, 0]) assert neighs.shape == (len(expected_neighs), 5) assert np.all(neighs == expected_neighs) -def test_no_neighbours(pbc): - """Test the case where there are no neighbours, to see that it doesn't crash.""" +def test_no_neighbors(pbc): + """Test the case where there are no neighbors, to see that it doesn't crash.""" geom = Geometry([[0, 0, 0]]) - finder = NeighFinder(geom, R=1.5) + finder = NeighborFinder(geom, R=1.5) - neighs = finder.find_neighbours(as_pairs=True, pbc=pbc) + neighs = finder.find_neighbors(as_pairs=True, pbc=pbc) assert isinstance(neighs, np.ndarray) assert neighs.shape == (0, 5) - neighs = finder.find_neighbours(as_pairs=False, pbc=pbc) + neighs = finder.find_neighbors(as_pairs=False, pbc=pbc) assert isinstance(neighs, list) assert len(neighs) == 1 @@ -238,7 +238,7 @@ def test_R_too_big(pbc): geom = Geometry([[0, 0, 0], [1, 0, 0]], lattice=[2, 10, 10]) - neighfinder = NeighFinder(geom, R=1.5) + neighfinder = NeighborFinder(geom, R=1.5) neighs = neighfinder.find_all_unique_pairs(pbc=pbc) @@ -250,7 +250,7 @@ def test_R_too_big(pbc): assert neighs.shape == (len(expected_neighs), 5) assert np.all(neighs == expected_neighs) - neighfinder = NeighFinder(geom, R=[0.6, 2.2]) + neighfinder = NeighborFinder(geom, R=[0.6, 2.2], overlap=True) neighs = neighfinder.find_close([[0.5, 0, 0]], as_pairs=True, pbc=pbc) diff --git a/src/sisl/utils/mathematics.py b/src/sisl/utils/mathematics.py index 715fb47a61..6f820ce8ce 100644 --- a/src/sisl/utils/mathematics.py +++ b/src/sisl/utils/mathematics.py @@ -130,8 +130,6 @@ def spher2cart(r, theta, phi): theta = asarray(theta) phi = asarray(phi) shape = _a.broadcast_shapes(r.shape, theta.shape, phi.shape) - print(r.shape, theta.shape, phi.shape) - print(shape) R = _a.empty(shape + (3,), dtype=r.dtype) sphi = sin(phi) R[..., 0] = r * cos(theta) * sphi From 884414918a26de1ce32964af1f29a425278bd0a5 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Fri, 5 Jan 2024 10:37:30 +0100 Subject: [PATCH 10/17] added Pol as author in Citation This is not to diminish other contributors. Pol has made tremendous contributions. This is a delicate decision, and is in no way a discouragement of other contributions made. Signed-off-by: Nick Papior --- CITATION.cff | 3 +++ pyproject.toml | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 754eefddd6..ce93dd58e0 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -9,6 +9,9 @@ authors: given-names: Nick orcid: "https://orcid.org/0000-0003-3038-1855" alias: zerothi + - family-names: Febrer + given-names: Pol + alias: pfebrer title: sisl identifiers: - description: "Collection DOI for any sisl version." diff --git a/pyproject.toml b/pyproject.toml index 3c2d521a8a..374fcb80e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -137,9 +137,9 @@ analysis = [ ] viz = [ + "netCDF4", "dill >= 0.3.2", "pathos", - "netCDF4", "scikit-image", "plotly", @@ -148,34 +148,34 @@ viz = [ ] viz-plotly = [ + "netCDF4", "dill >= 0.3.2", "pathos", - "netCDF4", "scikit-image", "plotly", ] viz-matplotlib = [ + "netCDF4", "dill >= 0.3.2", "pathos", - "netCDF4", "scikit-image", "matplotlib", ] viz-blender = [ + "netCDF4", "dill >= 0.3.2", "pathos", - "netCDF4", "scikit-image", ] viz-ase = [ + "netCDF4", "dill >= 0.3.2", "pathos", - "netCDF4", "scikit-image", "ase", @@ -187,10 +187,10 @@ test = [ "pytest-env", "pytest-faulthandler", "coveralls", + "netCDF4", "tqdm", "dill >= 0.3.2", "pathos", - "netCDF4", "scikit-image", "matplotlib", "plotly", From 929242dd4683072d10e9d783c4350d83fb3484b5 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Wed, 14 Feb 2024 20:56:15 +0100 Subject: [PATCH 11/17] moved neighbor code to new folder Signed-off-by: Nick Papior --- src/sisl/geom/__init__.py | 2 +- src/sisl/geom/{neighs => _neighbors}/CMakeLists.txt | 0 src/sisl/geom/{neighs => _neighbors}/__init__.py | 0 src/sisl/geom/{neighs => _neighbors}/_finder.py | 0 src/sisl/geom/{neighs => _neighbors}/_operations.py | 0 src/sisl/geom/{neighs => _neighbors}/tests/test_finder.py | 0 6 files changed, 1 insertion(+), 1 deletion(-) rename src/sisl/geom/{neighs => _neighbors}/CMakeLists.txt (100%) rename src/sisl/geom/{neighs => _neighbors}/__init__.py (100%) rename src/sisl/geom/{neighs => _neighbors}/_finder.py (100%) rename src/sisl/geom/{neighs => _neighbors}/_operations.py (100%) rename src/sisl/geom/{neighs => _neighbors}/tests/test_finder.py (100%) diff --git a/src/sisl/geom/__init__.py b/src/sisl/geom/__init__.py index 910e7bf731..d67b727d0d 100644 --- a/src/sisl/geom/__init__.py +++ b/src/sisl/geom/__init__.py @@ -49,6 +49,6 @@ from .flat import * from .nanoribbon import * from .nanotube import * -from .neighs import * +from ._neighbors import * from .special import * from .surfaces import * diff --git a/src/sisl/geom/neighs/CMakeLists.txt b/src/sisl/geom/_neighbors/CMakeLists.txt similarity index 100% rename from src/sisl/geom/neighs/CMakeLists.txt rename to src/sisl/geom/_neighbors/CMakeLists.txt diff --git a/src/sisl/geom/neighs/__init__.py b/src/sisl/geom/_neighbors/__init__.py similarity index 100% rename from src/sisl/geom/neighs/__init__.py rename to src/sisl/geom/_neighbors/__init__.py diff --git a/src/sisl/geom/neighs/_finder.py b/src/sisl/geom/_neighbors/_finder.py similarity index 100% rename from src/sisl/geom/neighs/_finder.py rename to src/sisl/geom/_neighbors/_finder.py diff --git a/src/sisl/geom/neighs/_operations.py b/src/sisl/geom/_neighbors/_operations.py similarity index 100% rename from src/sisl/geom/neighs/_operations.py rename to src/sisl/geom/_neighbors/_operations.py diff --git a/src/sisl/geom/neighs/tests/test_finder.py b/src/sisl/geom/_neighbors/tests/test_finder.py similarity index 100% rename from src/sisl/geom/neighs/tests/test_finder.py rename to src/sisl/geom/_neighbors/tests/test_finder.py From c6ab0c43d7b6bab1b4266ce4bff4e7013f1a9fe4 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Thu, 15 Feb 2024 10:25:32 +0100 Subject: [PATCH 12/17] cleaned documentation and return value Signed-off-by: Nick Papior --- src/sisl/geom/_neighbors/_finder.py | 4 +-- src/sisl/geom/_neighbors/_operations.py | 34 ++++++++----------------- 2 files changed, 13 insertions(+), 25 deletions(-) diff --git a/src/sisl/geom/_neighbors/_finder.py b/src/sisl/geom/_neighbors/_finder.py index 17378cb2e8..2b52a3b17e 100644 --- a/src/sisl/geom/_neighbors/_finder.py +++ b/src/sisl/geom/_neighbors/_finder.py @@ -539,7 +539,7 @@ def find_all_unique_pairs( max_pairs -= search_indices.shape[0] # Find all unique neighbor pairs - neighbor_pairs, n_pairs = _operations.get_all_unique_pairs( + neighbor_pairs = _operations.get_all_unique_pairs( search_indices, isc, self._heads, @@ -553,7 +553,7 @@ def find_all_unique_pairs( self._overlap, ) - return neighbor_pairs[:n_pairs] + return neighbor_pairs def find_close( self, diff --git a/src/sisl/geom/_neighbors/_operations.py b/src/sisl/geom/_neighbors/_operations.py index ac65efcb11..d14a89ab8b 100644 --- a/src/sisl/geom/_neighbors/_operations.py +++ b/src/sisl/geom/_neighbors/_operations.py @@ -77,7 +77,7 @@ def get_pairs( cell: cnp.float64_t[:, :], pbc: cnp.npy_bool[:], thresholds: cnp.float64_t[:], - sphere_overlap: cython.bint, + overlap: cython.bint, ): """Gets (possibly duplicated) pairs of neighbour atoms. @@ -119,7 +119,7 @@ def get_pairs( be considered. thresholds: the threshold radius for each atom in the geometry. - sphere_overlap: + overlap: If true, two atoms are considered neighbours if their spheres overlap. If false, two atoms are considered neighbours if the second atom @@ -210,7 +210,7 @@ def get_pairs( # Get the threshold for this pair of atoms threshold: float = thresholds[at] - if sphere_overlap: + if overlap: # If the user wants to check for sphere overlaps, we have # to sum the radius of the neighbor to the threshold threshold = threshold + thresholds[neigh_at] @@ -248,7 +248,7 @@ def get_all_unique_pairs( cell: cnp.float64_t[:, :], pbc: cnp.npy_bool[:], thresholds: cnp.float64_t[:], - sphere_overlap: cython.bint, + overlap: cython.bint, ): """Gets all unique pairs of atoms that are neighbours. @@ -287,7 +287,7 @@ def get_all_unique_pairs( be considered. thresholds: the threshold radius for each atom in the geometry. - sphere_overlap: + overlap: If true, two atoms are considered neighbours if their spheres overlap. If false, two atoms are considered neighbours if the second atom @@ -302,20 +302,16 @@ def get_all_unique_pairs( Columns 3 to 5 contain the supercell indices of the neighbour atom (the one in column 2, column 1 atoms are always in the unit cell). - npairs: - The number of unique pairs that have been found, which - will be less than `max_npairs`. This should presumably - be used to slice the `neighs` array once in python. """ N_ats = list_array.shape[0] - i_pair: cython.size_t = 0 - ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) neighs: cnp.int64_t[:, :] = np.zeros([max_npairs, 5], dtype=np.int64) + # Counter for filling neighs + i_pair: cython.size_t = 0 for at in range(N_ats): if self_interaction: @@ -325,7 +321,7 @@ def get_all_unique_pairs( neighs[i_pair, 2:] = 0 # Increment the pair index - i_pair = i_pair + 1 + i_pair += 1 for j in range(8): # Find the bin index. @@ -389,7 +385,7 @@ def get_all_unique_pairs( # Get the threshold for this pair of atoms threshold: float = thresholds[at] - if sphere_overlap: + if overlap: # If the user wants to check for sphere overlaps, we have # to sum the radius of the neighbor to the threshold threshold = threshold + thresholds[neigh_at] @@ -403,13 +399,13 @@ def get_all_unique_pairs( neighs[i_pair, 4] = neigh_isc[2] # Increment the pair index - i_pair = i_pair + 1 + i_pair += 1 # Get the next atom in this bin. Sum 1 to get fortran index. neigh_at = list_array[neigh_at] # Return the array of neighbours, but only the filled part - return np.asarray(neighs[: i_pair + 1]), i_pair + return np.array(neighs[: i_pair]) @cython.boundscheck(False) @@ -454,8 +450,6 @@ def get_close( It is used to allocate the `neighs` array. This is computed in python with the help of the `counts` array constructed by `build_table`. - self_interaction: bool, optional - whether to consider an atom a neighbour of itself. xyz: the cartesian coordinates of all atoms in the geometry. cell: @@ -466,12 +460,6 @@ def get_close( be considered. thresholds: the threshold radius for each atom in the geometry. - sphere_overlap: - If true, two atoms are considered neighbours if their spheres - overlap. - If false, two atoms are considered neighbours if the second atom - is within the sphere of the first atom. Note that this implies that - atom `i` might be atom `j`'s neighbour while the opposite is not true. Returns -------- From ce22990c93813b0f789cf04c865f520916126934 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Thu, 15 Feb 2024 11:45:54 +0100 Subject: [PATCH 13/17] ran black Signed-off-by: Nick Papior --- src/sisl/geom/_neighbors/_operations.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sisl/geom/_neighbors/_operations.py b/src/sisl/geom/_neighbors/_operations.py index d14a89ab8b..36b484f8c1 100644 --- a/src/sisl/geom/_neighbors/_operations.py +++ b/src/sisl/geom/_neighbors/_operations.py @@ -1,4 +1,5 @@ """File meant to be compiled with Cython so that operations are much faster.""" + from __future__ import annotations import cython @@ -405,7 +406,7 @@ def get_all_unique_pairs( neigh_at = list_array[neigh_at] # Return the array of neighbours, but only the filled part - return np.array(neighs[: i_pair]) + return np.array(neighs[:i_pair]) @cython.boundscheck(False) From ac59727cd8c372523c27f8f2a1e5b82628ab05ad Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Thu, 15 Feb 2024 11:48:15 +0100 Subject: [PATCH 14/17] fixed sort order Signed-off-by: Nick Papior --- src/sisl/geom/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sisl/geom/__init__.py b/src/sisl/geom/__init__.py index d67b727d0d..33e178f5f0 100644 --- a/src/sisl/geom/__init__.py +++ b/src/sisl/geom/__init__.py @@ -43,12 +43,12 @@ """ from ._composite import * +from ._neighbors import * from .basic import * from .bilayer import * from .category import * from .flat import * from .nanoribbon import * from .nanotube import * -from ._neighbors import * from .special import * from .surfaces import * From 4101a79fc05dbe684aa966f0c1054dd45817aee7 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Thu, 15 Feb 2024 13:17:14 +0100 Subject: [PATCH 15/17] fixed cmake for neighbor location Signed-off-by: Nick Papior --- CMakeLists.txt | 16 ++++++++++++++-- src/sisl/geom/CMakeLists.txt | 2 +- src/sisl/geom/_neighbors/CMakeLists.txt | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d5b15c8fed..bfab99a53b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,19 @@ find_package(Python COMPONENTS REQUIRED) # Retrive the cython executable +message(STATUS "Locating Cython executable") find_program(CYTHON_EXECUTABLE "cython" REQUIRED) +execute_process(COMMAND + "${CYTHON_EXECUTABLE}" --version + OUTPUT_QUIET + OUTPUT_VARIABLE CYTHON_VERSION + RESULT_VARIABLE CYTHON_VERSION_ERR + OUTPUT_STRIP_TRAILING_WHITESPACE) +# Print it out +message(STATUS "Cython version information [${CYTHON_EXECUTABLE} --version]: ${CYTHON_VERSION}") +if(CYTHON_VERSION_ERR) + message(SEND_ERROR "ERROR: Cython version detection failed, this might be problematic") +endif() # Define global definitions @@ -287,7 +299,7 @@ function(add_cython_library) if(NOT DEFINED _c_SOURCE) list(GET _c_UNPARSED_ARGUMENTS 0 _c_SOURCE) endif() - + # Parse simple flags if(NOT DEFINED _c_CYTHON_EXECUTABLE) set(_c_CYTHON_EXECUTABLE "${CYTHON_EXECUTABLE}") @@ -617,7 +629,7 @@ function(add_f2py_library) VERBATIM COMMENT "Generating module file from signature ${_f_SIGNATURE}" ) - + python_add_library(${_f_LIBRARY} WITH_SOABI MODULE "${_module_file}" ${_f_SOURCES} ${_wrappers} ) diff --git a/src/sisl/geom/CMakeLists.txt b/src/sisl/geom/CMakeLists.txt index 51d35ada33..25c1ecc0a4 100644 --- a/src/sisl/geom/CMakeLists.txt +++ b/src/sisl/geom/CMakeLists.txt @@ -1 +1 @@ -add_subdirectory("neighs") \ No newline at end of file +add_subdirectory("_neighbors") diff --git a/src/sisl/geom/_neighbors/CMakeLists.txt b/src/sisl/geom/_neighbors/CMakeLists.txt index fa06fa99d8..e48ed38047 100644 --- a/src/sisl/geom/_neighbors/CMakeLists.txt +++ b/src/sisl/geom/_neighbors/CMakeLists.txt @@ -7,5 +7,5 @@ foreach(source _operations) OUTPUT ${source}_C ) install(TARGETS ${source} LIBRARY - DESTINATION ${SKBUILD_PROJECT_NAME}/geom/neighs) + DESTINATION ${SKBUILD_PROJECT_NAME}/geom/_neighbors) endforeach() From 34446a02ef46013667943c5ad46cf395449101b5 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Fri, 16 Feb 2024 12:20:09 +0100 Subject: [PATCH 16/17] fixed memory problems in the finder In some cases (when the bin-size is big) the maximum pairs it would allow could be quite big. When the bins has many atoms, the error in the actual neighbors can be significant, leading to memory problems. Now the finder uses a maximum of 200MB of initial neighbor lists. This results in incremental re-allocations which is a bit more heavy, but it reduces the chances of running out of memory. One can change the default memory by changing the class variable: Neighbor.memory accordingly. Added it to the documentation. Signed-off-by: Nick Papior --- benchmarks/comparisons/neighbors.py | 90 ++++++++++++++++ docs/api/basic.rst | 2 +- src/sisl/geom/_neighbors/_finder.py | 84 ++++++++++++--- src/sisl/geom/_neighbors/_operations.py | 136 ++++++++++++++++++------ src/sisl/utils/misc.py | 66 ++++++++++++ 5 files changed, 327 insertions(+), 51 deletions(-) create mode 100644 benchmarks/comparisons/neighbors.py diff --git a/benchmarks/comparisons/neighbors.py b/benchmarks/comparisons/neighbors.py new file mode 100644 index 0000000000..70294cd686 --- /dev/null +++ b/benchmarks/comparisons/neighbors.py @@ -0,0 +1,90 @@ +import timeit +from collections import defaultdict + +from ase.neighborlist import neighbor_list + +from sisl.geom import NeighborFinder, fcc, graphene_nanoribbon + + +def quadratic(geom): + neighs = [] + for at in geom: + neighs.append(geom.close(at, R=1.5)) + + +what = "fcc" +times = defaultdict(list) +na = [] + +# Iterate over multiple tiles +for tiles in range(1, 11): + print(tiles) + + if what == "fcc": + geom = fcc(1.5, "C").tile(3 * tiles, 1).tile(3 * tiles, 2).tile(3 * tiles, 0) + elif what == "ribbon": + geom = graphene_nanoribbon(9).tile(tiles, 0) + + na.append(geom.na) + + # Compute with ASE + ase_time = timeit.timeit( + lambda: neighbor_list("ij", geom.to.ase(), 1.5, self_interaction=False), + number=1, + ) + times["ASE"].append(ase_time) + + for bs in [4, 8, 12]: + print(" bs = ", bs) + + # Compute with this implementation (different variants) + my_time = timeit.timeit( + lambda: NeighborFinder(geom, R=1.5, bin_size=bs).find_neighbors( + None, as_pairs=True + ), + number=1, + ) + times[f"(PAIRS) [{bs}]"].append(my_time) + + my_time = timeit.timeit( + lambda: NeighborFinder(geom, R=1.5, bin_size=bs).find_neighbors( + None, as_pairs=False + ), + number=1, + ) + times[f"(NEIGHS) [{bs}]"].append(my_time) + + my_time = timeit.timeit( + lambda: NeighborFinder(geom, R=1.5, bin_size=bs).find_neighbors( + None, as_pairs=False + ), + number=1, + ) + times[f"(NEIGHS) [{bs}]"].append(my_time) + + my_time = timeit.timeit( + lambda: NeighborFinder(geom, R=1.5, bin_size=bs).find_all_unique_pairs(), + number=1, + ) + times[f"(UNIQUE PAIRS) [{bs}]"].append(my_time) + + # Compute with quadratic search + # quadratic_time = timeit.timeit(lambda: quadratic(geom), number=1) + # times["QUADRATIC"].append(quadratic_time) + +# Plotting +import plotly.graph_objs as go + +fig = go.Figure() +for key, time in times.items(): + fig.add_scatter(x=na, y=time, name=key) + +fig.update_layout( + xaxis_title="Number of atoms", + yaxis_title="Time (s)", + title=f"Finding neighbours on {what}", + yaxis_showgrid=True, + xaxis_showgrid=True, +) +fig.write_image(f"neighbors_{what}.png") +fig.show() diff --git a/docs/api/basic.rst b/docs/api/basic.rst index 94eae50f45..f38d2a9754 100644 --- a/docs/api/basic.rst +++ b/docs/api/basic.rst @@ -66,6 +66,6 @@ In particular `oplist` is useful when calculating averages in Brillouin zones (s .. autosummary:: :toctree: generated/ + NeighborFinder oplist ~sisl.utils.PropertyDict - diff --git a/src/sisl/geom/_neighbors/_finder.py b/src/sisl/geom/_neighbors/_finder.py index 2b52a3b17e..3e5fba8678 100644 --- a/src/sisl/geom/_neighbors/_finder.py +++ b/src/sisl/geom/_neighbors/_finder.py @@ -4,6 +4,7 @@ from sisl import Geometry from sisl.typing import AtomsArgument +from sisl.utils import size_to_elements from . import _operations @@ -35,12 +36,23 @@ class NeighborFinder: overlap. If `False`, two atoms are considered neighbors if the second atom is within the sphere of the first atom. Note that this implies that - atom `i` might be atom `j`'s neighbor while the opposite is not true. + atom ``i`` might be atom ``j``'s neighbor while the opposite is not true. If not provided, it will be `True` if `R` is an array and `False` if it is a single float. + bin_size : float, optional + the factor for the radius to determine how large the bins are. + It can minimally be 2, meaning that the maximum radius to consider + is twice the radius considered. For larger values, more atoms will be in + each bin (and thus fewer bins). + Hence, this value can be used to fine-tune the memory requirement by + decreasing number of bins, at the cost of a bit more run-time searching + bins. """ + #: Memory control of the finder + memory: Tuple[str, float] = ("200MB", 1.5) + geometry: Geometry # Geometry actually used for binning. Can be the provided geometry # or a tiled geometry if the search radius is too big (compared to the lattice size). @@ -63,14 +75,16 @@ def __init__( geometry: Geometry, R: Optional[Union[float, np.ndarray]] = None, overlap: bool = False, + bin_size: float = 2, ): - self.setup(geometry, R=R, overlap=overlap) + self.setup(geometry, R=R, overlap=overlap, bin_size=bin_size) def setup( self, geometry: Optional[Geometry] = None, R: Optional[Union[float, np.ndarray]] = None, overlap: bool = None, + bin_size: float = 2, ): """Prepares everything for neighbor finding. @@ -94,7 +108,15 @@ def setup( overlap. If `False`, two atoms are considered neighbors if the second atom is within the sphere of the first atom. Note that this implies that - atom `i` might be atom `j`'s neighbor while the opposite is not true. + atom ``i`` might be atom ``j``'s neighbor while the opposite is not true. + bin_size : + the factor for the radius to determine how large the bins are. + It can minimally be 2, meaning that the maximum radius to consider + is twice the radius considered. For larger values, more atoms will be in + each bin (and thus fewer bins). + Hence, this value can be used to fine-tune the memory requirement by + decreasing number of bins, at the cost of a bit more run-time searching + bins. """ # Set the geometry. Copy it because we may need to modify the supercell size. if geometry is not None: @@ -116,16 +138,24 @@ def setup( # Determine the bin_size as the maximum DIAMETER to ensure that we ALWAYS # only need to look one bin away for neighbors. - bin_size = np.max(self.R) * 2 + max_R = np.max(self.R) + if overlap: + # In the case when we want to check for sphere overlap, the size should + # be twice as big. + max_R *= 2 + + if bin_size < 2: + raise ValueError( + "The bin_size must be larger than 2 to only search in the " + "neighboring bins. Please increase to a value >=2" + ) + + bin_size = max_R * bin_size # Check that the bin size is positive. if bin_size <= 0: raise ValueError( "All R values are 0 or less. Please provide some positive values" ) - if overlap: - # In the case when we want to check for sphere overlap, the size should - # be twice as big. - bin_size *= 2 # We add a small amount to bin_size to avoid ambiguities when # a position is exactly at the center of a bin. @@ -427,6 +457,12 @@ def find_neighbors( max_pairs = at_counts.sum() if not self_interaction: max_pairs -= search_indices.shape[0] + init_pairs = min( + max_pairs, + # 8 bytes per element (int64) + # While this might be wrong on Win, it shouldn't matter + size_to_elements(self.memory[0], 8), + ) # Find the neighbor pairs neighbor_pairs, split_ind = _operations.get_pairs( @@ -435,13 +471,14 @@ def find_neighbors( isc, self._heads, self._list, - max_pairs, self_interaction, self._bins_geometry.xyz, self._bins_geometry.cell, pbc, thresholds, self._overlap, + init_pairs, + self.memory[1], ) # Correct neighbor indices for the case where R was too big and @@ -454,9 +491,9 @@ def find_neighbors( if as_pairs: # Just return the neighbor pairs return neighbor_pairs[: split_ind[-1]] - else: - # Split to get the neighbors of each atom - return np.split(neighbor_pairs[:, 1:], split_ind, axis=0)[:-1] + + # Split to get the neighbors of each atom + return np.split(neighbor_pairs[:, 1:], split_ind, axis=0)[:-1] def find_all_unique_pairs( self, @@ -537,6 +574,12 @@ def find_all_unique_pairs( max_pairs = at_counts.sum() if not self_interaction: max_pairs -= search_indices.shape[0] + init_pairs = min( + max_pairs, + # 8 bytes per element (int64) + # While this might be wrong on Win, it shouldn't matter + size_to_elements(self.memory[0], 8), + ) # Find all unique neighbor pairs neighbor_pairs = _operations.get_all_unique_pairs( @@ -544,13 +587,14 @@ def find_all_unique_pairs( isc, self._heads, self._list, - max_pairs, self_interaction, self.geometry.xyz, self.geometry.cell, pbc, thresholds, self._overlap, + init_pairs, + self.memory[1], ) return neighbor_pairs @@ -603,6 +647,12 @@ def find_close( at_counts = self._get_search_atom_counts(search_indices) max_pairs = at_counts.sum() + init_pairs = min( + max_pairs, + # 8 bytes per element (int64) + # While this might be wrong on Win, it shouldn't matter + size_to_elements(self.memory[0], 8), + ) # Find the neighbor pairs neighbor_pairs, split_ind = _operations.get_close( @@ -611,11 +661,12 @@ def find_close( isc, self._heads, self._list, - max_pairs, self._bins_geometry.xyz, self._bins_geometry.cell, pbc, thresholds, + init_pairs, + self.memory[1], ) # Correct neighbor indices for the case where R was too big and @@ -628,6 +679,5 @@ def find_close( if as_pairs: # Just return the neighbor pairs return neighbor_pairs[: split_ind[-1]] - else: - # Split to get the neighbors of each position - return np.split(neighbor_pairs[:, 1:], split_ind, axis=0)[:-1] + + return np.split(neighbor_pairs[:, 1:], split_ind, axis=0)[:-1] diff --git a/src/sisl/geom/_neighbors/_operations.py b/src/sisl/geom/_neighbors/_operations.py index 36b484f8c1..a166537014 100644 --- a/src/sisl/geom/_neighbors/_operations.py +++ b/src/sisl/geom/_neighbors/_operations.py @@ -41,8 +41,10 @@ def build_table(nbins: cnp.int64_t, bin_indices: cnp.int64_t[:]): list_array_obj = np.zeros(n_atoms, dtype=np.int64) list_array: cnp.int64_t[:] = list_array_obj + counts_obj = np.zeros(nbins, dtype=np.int64) counts: cnp.int64_t[:] = counts_obj + heads_obj = np.full(nbins, -1, dtype=np.int64) heads: cnp.int64_t[:] = heads_obj @@ -72,13 +74,14 @@ def get_pairs( iscs: cnp.int64_t[:, :, :], heads: cnp.int64_t[:], list_array: cnp.int64_t[:], - max_npairs: cnp.int64_t, self_interaction: cython.bint, xyz: cnp.float64_t[:, :], cell: cnp.float64_t[:, :], pbc: cnp.npy_bool[:], thresholds: cnp.float64_t[:], overlap: cython.bint, + init_npairs: cython.size_t, + grow_factor: cnp.float64_t, ): """Gets (possibly duplicated) pairs of neighbour atoms. @@ -103,11 +106,6 @@ def get_pairs( If an item is -1, it means that there are no more atoms in the same bin. This array is constructed by `build_table`. - max_npairs: - The number of maximum pairs that can be found. - It is used to allocate the `neighs` array. This is computed - in python with the help of the `counts` array constructed - by `build_table`. self_interaction: bool, optional whether to consider an atom a neighbour of itself. xyz: @@ -126,6 +124,16 @@ def get_pairs( If false, two atoms are considered neighbours if the second atom is within the sphere of the first atom. Note that this implies that atom `i` might be atom `j`'s neighbour while the opposite is not true. + init_npairs: + The initial number of pairs that can be found. + It is used to allocate the `neighs` array. This is computed + in python with the help of the `counts` array constructed + by `build_table`. + This is not limiting the final size, but is used to pre-allocate + a growing array. + grow_factor: + the grow factor of the size when the neighbor list needs + to grow. Returns -------- @@ -139,16 +147,27 @@ def get_pairs( """ N_ind = at_indices.shape[0] - i_pair: cython.size_t = 0 + ref_xyz: cnp.float64_t[:] = np.empty(3, dtype=np.float64) + neigh_isc: cnp.int64_t[:] = np.empty(3, dtype=np.int64) - ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) - neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) + def grow(): + nonlocal neighs_obj, neighs, grow_factor - neighs_obj: cnp.ndarray = np.zeros([max_npairs, 5], dtype=np.int64) + n: cython.size_t = neighs.shape[0] + new_neighs_obj = np.empty([int(n * grow_factor), 5], dtype=np.int64) + new_neighs_obj[:n, :] = neighs_obj[:, :] + neighs_obj = new_neighs_obj + neighs = neighs_obj + + neighs_obj: cnp.ndarray = np.empty([init_npairs, 5], dtype=np.int64) neighs: cnp.int64_t[:, :] = neighs_obj + split_indices_obj: cnp.ndarray = np.zeros(N_ind, dtype=np.int64) split_indices: cnp.int64_t[:] = split_indices_obj + # Counter for filling neighs + i_pair: cython.size_t = 0 + for search_index in range(N_ind): at = at_indices[search_index] @@ -217,6 +236,10 @@ def get_pairs( threshold = threshold + thresholds[neigh_at] if dist < threshold: + + if i_pair >= neighs.shape[0]: + grow() + # Store the pair of neighbours. neighs[i_pair, 0] = at neighs[i_pair, 1] = neigh_at @@ -233,7 +256,8 @@ def get_pairs( # We have finished this search, store the breakpoint. split_indices[search_index] = i_pair - return neighs_obj, split_indices_obj + # We copy to allow GC to remove the full array + return neighs_obj[:i_pair, :].copy(), split_indices_obj @cython.boundscheck(False) @@ -243,13 +267,14 @@ def get_all_unique_pairs( iscs: cnp.int64_t[:, :, :], heads: cnp.int64_t[:], list_array: cnp.int64_t[:], - max_npairs: cnp.int64_t, self_interaction: cython.bint, xyz: cnp.float64_t[:, :], cell: cnp.float64_t[:, :], pbc: cnp.npy_bool[:], thresholds: cnp.float64_t[:], overlap: cython.bint, + init_npairs: cython.size_t, + grow_factor: cnp.float64_t, ): """Gets all unique pairs of atoms that are neighbours. @@ -271,11 +296,6 @@ def get_all_unique_pairs( If an item is -1, it means that there are no more atoms in the same bin. This array is constructed by `build_table`. - max_npairs: - The number of maximum pairs that can be found. - It is used to allocate the `neighs` array. This is computed - in python with the help of the `counts` array constructed - by `build_table`. self_interaction: bool, optional whether to consider an atom a neighbour of itself. xyz: @@ -294,6 +314,16 @@ def get_all_unique_pairs( If false, two atoms are considered neighbours if the second atom is within the sphere of the first atom. Note that this implies that atom `i` might be atom `j`'s neighbour while the opposite is not true. + init_npairs: + The initial number of pairs that can be found. + It is used to allocate the `neighs` array. This is computed + in python with the help of the `counts` array constructed + by `build_table`. + This is not limiting the final size, but is used to pre-allocate + a growing array. + grow_factor: + the grow factor of the size when the neighbor list needs + to grow. Returns -------- @@ -307,15 +337,29 @@ def get_all_unique_pairs( N_ats = list_array.shape[0] - ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) - neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) + ref_xyz: cnp.float64_t[:] = np.empty(3, dtype=np.float64) + neigh_isc: cnp.int64_t[:] = np.empty(3, dtype=np.int64) + + def grow(): + nonlocal neighs_obj, neighs, grow_factor + + n: cython.size_t = neighs.shape[0] + new_neighs_obj = np.empty([int(n * grow_factor), 5], dtype=np.int64) + new_neighs_obj[:n, :] = neighs_obj[:, :] + neighs_obj = new_neighs_obj + neighs = neighs_obj + + neighs_obj: cnp.ndarray = np.empty([init_npairs, 5], dtype=np.int64) + neighs: cnp.int64_t[:, :] = neighs_obj - neighs: cnp.int64_t[:, :] = np.zeros([max_npairs, 5], dtype=np.int64) # Counter for filling neighs i_pair: cython.size_t = 0 for at in range(N_ats): if self_interaction: + if i_pair >= neighs.shape[0]: + grow() + # Add the self interaction neighs[i_pair, 0] = at neighs[i_pair, 1] = at @@ -392,6 +436,9 @@ def get_all_unique_pairs( threshold = threshold + thresholds[neigh_at] if dist < threshold: + if i_pair >= neighs.shape[0]: + grow() + # Store the pair of neighbours. neighs[i_pair, 0] = at neighs[i_pair, 1] = neigh_at @@ -406,7 +453,8 @@ def get_all_unique_pairs( neigh_at = list_array[neigh_at] # Return the array of neighbours, but only the filled part - return np.array(neighs[:i_pair]) + # We copy to remove the unneeded data-sizes + return neighs_obj[:i_pair].copy() @cython.boundscheck(False) @@ -417,11 +465,12 @@ def get_close( iscs: cnp.int64_t[:, :, :], heads: cnp.int64_t[:], list_array: cnp.int64_t[:], - max_npairs: cnp.int64_t, xyz: cnp.float64_t[:, :], cell: cnp.float64_t[:, :], pbc: cnp.npy_bool[:], thresholds: cnp.float64_t[:], + init_npairs: cython.size_t, + grow_factor: cnp.float64_t, ): """Gets the atoms that are close to given positions @@ -446,11 +495,6 @@ def get_close( If an item is -1, it means that there are no more atoms in the same bin. This array is constructed by `build_table`. - max_npairs: - The number of maximum pairs that can be found. - It is used to allocate the `neighs` array. This is computed - in python with the help of the `counts` array constructed - by `build_table`. xyz: the cartesian coordinates of all atoms in the geometry. cell: @@ -461,6 +505,16 @@ def get_close( be considered. thresholds: the threshold radius for each atom in the geometry. + init_npairs: + The initial number of pairs that can be found. + It is used to allocate the `neighs` array. This is computed + in python with the help of the `counts` array constructed + by `build_table`. + This is not limiting the final size, but is used to pre-allocate + a growing array. + grow_factor: + the grow factor of the size when the neighbor list needs + to grow. Returns -------- @@ -475,13 +529,25 @@ def get_close( N_ind = search_xyz.shape[0] - i_pair: cython.size_t = 0 + ref_xyz: cnp.float64_t[:] = np.empty(3, dtype=np.float64) + neigh_isc: cnp.int64_t[:] = np.empty(3, dtype=np.int64) + + def grow(): + nonlocal neighs_obj, neighs, grow_factor - ref_xyz: cnp.float64_t[:] = np.zeros(3, dtype=np.float64) - neigh_isc: cnp.int64_t[:] = np.zeros(3, dtype=np.int64) + n: cython.size_t = neighs.shape[0] + new_neighs_obj = np.empty([int(n * grow_factor), 5], dtype=np.int64) + new_neighs_obj[:n, :] = neighs_obj[:, :] + neighs_obj = new_neighs_obj + neighs = neighs_obj - neighs: cnp.int64_t[:, :] = np.zeros((max_npairs, 5), dtype=np.int64) - split_indices: cnp.int64_t[:] = np.zeros(N_ind, dtype=np.int64) + neighs_obj: cnp.ndarray = np.empty([init_npairs, 5], dtype=np.int64) + neighs: cnp.int64_t[:, :] = neighs_obj + + split_indices_obj: cnp.ndarray = np.zeros(N_ind, dtype=np.int64) + split_indices: cnp.int64_t[:] = split_indices_obj + + i_pair: cython.size_t = 0 for search_index in range(N_ind): for j in range(8): @@ -539,6 +605,10 @@ def get_close( threshold: float = thresholds[neigh_at] if dist < threshold: + + if i_pair >= neighs.shape[0]: + grow() + # Store the pair of neighbours. neighs[i_pair, 0] = search_index neighs[i_pair, 1] = neigh_at @@ -555,4 +625,4 @@ def get_close( # We have finished this search, store the breakpoint. split_indices[search_index] = i_pair - return np.asarray(neighs), np.asarray(split_indices) + return neighs_obj[:i_pair, :].copy(), split_indices_obj diff --git a/src/sisl/utils/misc.py b/src/sisl/utils/misc.py index cebb03409d..f67c9f68f4 100644 --- a/src/sisl/utils/misc.py +++ b/src/sisl/utils/misc.py @@ -6,14 +6,17 @@ import importlib import inspect import operator as op +import re import sys from math import pi from numbers import Integral +from typing import Union __all__ = ["merge_instances", "str_spec", "direction", "angle"] __all__ += ["iter_shape", "math_eval", "allow_kwargs"] __all__ += ["import_attr", "lazy_import"] __all__ += ["PropertyDict", "NotNonePropertyDict"] +__all__ += ["size_to_num", "size_to_elements"] # supported operators @@ -81,6 +84,69 @@ def merge_instances(*args, **kwargs): return m +def size_to_num(size: Union[int, float, str], unit: str = "MB") -> float: + """Convert a size-specification to a size in a specific `unit` + + Converts the input value (`size`) into a number corresponding to + the `size` converted to the specified `unit`. + + If `size` is passed as an integer (or float), it will be interpreted + as a size in MB. Otherwise the string may contain the size specification. + """ + if isinstance(size, (float, int)): + return size + + # Now parse the size from a string + match = re.match(r"(\d+[.\d]*)(\D*)", size) + size = float(match[1].strip()) + unit_in = match[2].strip() + + # Now parse things + # We expect data to be in MB (default unit) + # Then we can always convert + conv = { + "b": 1 / (1024 * 1024), + "B": 1 / (1024 * 1024), + "k": 1 / 1024, + "kB": 1 / 1024, + "kb": 1 / 1024, + "kB": 1 / 1024, + "mb": 1, + "M": 1, + "MB": 1, + "G": 1024, + "GB": 1024, + "T": 1024 * 1024, + "TB": 1024 * 1024, + } + + if unit_in: + unit_in = conv[unit_in] + else: + unit_in = 1 + + # Convert the requested unit + unit = conv[unit] + + return size * unit_in / unit + + +def size_to_elements(size: Union[int, str], byte_per_elem: int = 8) -> int: + """Calculate the number of elements such that they occupy `size` memory + + Parameters + ---------- + size : + a size specification, either by an integer, or a str. If an integer it is + assumed to be in MB, otherwise the str can hold a unit specification. + byte_per_elem + number of bytes per element when doing the conversion + """ + size = size_to_num(size, unit="B") + + return int(size // byte_per_elem) + + def iter_shape(shape): """Generator for iterating a shape by returning consecutive slices From 7f9b4dc87cd2b16ca5302f28d0aefddb3281f50a Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Fri, 16 Feb 2024 12:58:18 +0100 Subject: [PATCH 17/17] allowed tuple input of bin_size Signed-off-by: Nick Papior --- src/sisl/geom/_neighbors/_finder.py | 33 ++++++++++--------- src/sisl/geom/_neighbors/tests/test_finder.py | 23 +++++++++++++ src/sisl/utils/misc.py | 1 - 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/src/sisl/geom/_neighbors/_finder.py b/src/sisl/geom/_neighbors/_finder.py index 3e5fba8678..15791687b1 100644 --- a/src/sisl/geom/_neighbors/_finder.py +++ b/src/sisl/geom/_neighbors/_finder.py @@ -40,8 +40,9 @@ class NeighborFinder: If not provided, it will be `True` if `R` is an array and `False` if it is a single float. - bin_size : float, optional - the factor for the radius to determine how large the bins are. + bin_size : float or tuple of float, optional + the factor for the radius to determine how large the bins are, + optionally along each lattice vector. It can minimally be 2, meaning that the maximum radius to consider is twice the radius considered. For larger values, more atoms will be in each bin (and thus fewer bins). @@ -75,7 +76,7 @@ def __init__( geometry: Geometry, R: Optional[Union[float, np.ndarray]] = None, overlap: bool = False, - bin_size: float = 2, + bin_size: Union[float, Tuple[float, float, float]] = 2, ): self.setup(geometry, R=R, overlap=overlap, bin_size=bin_size) @@ -84,7 +85,7 @@ def setup( geometry: Optional[Geometry] = None, R: Optional[Union[float, np.ndarray]] = None, overlap: bool = None, - bin_size: float = 2, + bin_size: Union[float, Tuple[float, float, float]] = 2, ): """Prepares everything for neighbor finding. @@ -110,7 +111,8 @@ def setup( is within the sphere of the first atom. Note that this implies that atom ``i`` might be atom ``j``'s neighbor while the opposite is not true. bin_size : - the factor for the radius to determine how large the bins are. + the factor for the radius to determine how large the bins are, + optionally along each lattice vector. It can minimally be 2, meaning that the maximum radius to consider is twice the radius considered. For larger values, more atoms will be in each bin (and thus fewer bins). @@ -144,27 +146,28 @@ def setup( # be twice as big. max_R *= 2 - if bin_size < 2: + if max_R <= 0: + raise ValueError( + "All R values are 0 or less. Please provide some positive values" + ) + + bin_size = np.asarray(bin_size) + if np.any(bin_size < 2): raise ValueError( "The bin_size must be larger than 2 to only search in the " "neighboring bins. Please increase to a value >=2" ) bin_size = max_R * bin_size - # Check that the bin size is positive. - if bin_size <= 0: - raise ValueError( - "All R values are 0 or less. Please provide some positive values" - ) # We add a small amount to bin_size to avoid ambiguities when # a position is exactly at the center of a bin. - bin_size += 0.01 + bin_size += 0.001 lattice_sizes = self.geometry.length - if bin_size > np.min(lattice_sizes): - self._R_too_big = True + self._R_too_big = np.any(bin_size > lattice_sizes) + if self._R_too_big: # This means that nsc must be at least 5. # We round the amount of cells needed in each direction @@ -188,8 +191,6 @@ def setup( lattice_sizes = self._bins_geometry.length else: - # Nothing to modify, we can use the geometry and bin it as it is. - self._R_too_big = False self._bins_geometry = self.geometry # Get the number of bins along each cell direction. diff --git a/src/sisl/geom/_neighbors/tests/test_finder.py b/src/sisl/geom/_neighbors/tests/test_finder.py index 68801fafa2..67d6892445 100644 --- a/src/sisl/geom/_neighbors/tests/test_finder.py +++ b/src/sisl/geom/_neighbors/tests/test_finder.py @@ -260,3 +260,26 @@ def test_R_too_big(pbc): assert neighs.shape == (len(expected_neighs), 5) assert np.all(neighs == expected_neighs) + + +def test_bin_sizes(): + geom = Geometry([[0, 0, 0], [1, 0, 0]], lattice=[2, 10, 10]) + + # We should have fewer bins along the first lattice vector + n1 = NeighborFinder(geom, R=1.5, bin_size=2) + n2 = NeighborFinder(geom, R=1.5, bin_size=4) + + assert n1.total_nbins > n2.total_nbins + # When the bin is bigger than the unit cell, this situation + # occurs + assert n1.nbins[0] == n2.nbins[0] + assert n1.nbins[1] > n2.nbins[1] + assert n1.nbins[2] > n2.nbins[2] + + # We should have the same number of bins the 2nd and 3rd lattice vectors + n3 = NeighborFinder(geom, R=1.5, bin_size=2) + n4 = NeighborFinder(geom, R=1.5, bin_size=(2, 4, 4)) + + assert n3.nbins[0] == n4.nbins[0] + assert n3.nbins[1] > n4.nbins[1] + assert n3.nbins[2] > n4.nbins[2] diff --git a/src/sisl/utils/misc.py b/src/sisl/utils/misc.py index f67c9f68f4..78dec16935 100644 --- a/src/sisl/utils/misc.py +++ b/src/sisl/utils/misc.py @@ -108,7 +108,6 @@ def size_to_num(size: Union[int, float, str], unit: str = "MB") -> float: "b": 1 / (1024 * 1024), "B": 1 / (1024 * 1024), "k": 1 / 1024, - "kB": 1 / 1024, "kb": 1 / 1024, "kB": 1 / 1024, "mb": 1,