Skip to content

Commit

Permalink
smallish refactor of ufunc handling
Browse files Browse the repository at this point in the history
This should fix lots of corner cases of ufuncs
handling stuff unexpectedly.

- added _rows_and_cols which returns a DOK format
  of the rows and columns
- added documentation about creating a sparse matrix
  from data, indices and pointers.
- much faster creation of diagonal matrices
  And handles correctly offsets, out-of-bounds
  will be removed from the SparseCSR
- added SparseCSR.toarray
- added SparseCSR.__array__ to allow numpy handling
- fixed SparseCSR dtype handling in ufuncs.
  The ufuncs result-type handling will be used.
  In some corner cases could the casting be wrong, now
  the dtype is explicitly created from the ufunc.resolve_dtypes
- added more SparseCSR tests which should cover cases
  of non-sorted, sorted, and non-finalized matrices.
- major clean-up of SparseCSR tests, removed setup and instantiated
  specific matrix fixtures.
- lots of minor bug-fixes related to in-place operations

Fixes #661

Signed-off-by: Nick Papior <[email protected]>
  • Loading branch information
zerothi committed Dec 20, 2023
1 parent 0bc5d5e commit 7cf88f1
Show file tree
Hide file tree
Showing 10 changed files with 506 additions and 329 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ we hit release version 1.0.0.
## [0.14.4] - YYYY-MM-DD

### Added
- `SparseCSR.toarray` to comply with array handling (equivalent to `todense`)
- enabled `Grid.to|new` with the most basic stuff
str|Path|Grid|pyamg
- `Shape.translate`, to easily translate entire shape constructs, #655
Expand All @@ -18,6 +19,11 @@ we hit release version 1.0.0.
- added `offset` argument in `Geometry.add_vacuum` to enable shifting atomic coordinates

### Fixed
- `SparseCSR` ufunc handling, in some corner cases could the dtype casting do things
wrongly.
- fixed corner cases where the `SparseCSR.diags(offsets=)` would add elements
in non-existing elements
- some cases of writing orthogonal matrices to TSHS/nc file formats #661
- `BDOS` from TBtrans calculations now returns the full DOS of all (Bloch-expanded)
atoms
- `Lattice` objects now issues a warning when created with 0-length vectors
Expand All @@ -30,6 +36,8 @@ we hit release version 1.0.0.

### Changed
- `vacuum` is now an optional parameter for all ribbon structures
- enabled `array_fill_repeat` with custom axis, to tile along specific
dimensions


## [0.14.3] - 2023-11-07
Expand Down
26 changes: 13 additions & 13 deletions src/sisl/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,32 +36,32 @@
from xml.etree.ElementTree import parse as xml_parse


def array_fill_repeat(array, size, cls=None):
def array_fill_repeat(array, size, axis=-1, cls=None):
"""
This repeats an array along the zeroth axis until it
has size `size`. Note that initial size of `array` has
to be an integer part of `size`.
"""
array = np.asarray(array, dtype=cls)
try:
reps = size // len(array)
reps = size // array.shape[axis]
except Exception:
array = [array]
reps = size // len(array)
if size % len(array) != 0:
# likely a scalar
array = np.expand_dims(array, axis=0)
reps = size // array.shape[axis]
if size % array.shape[axis] != 0:
# We do not have it correctly formatted (either an integer
# repeatable part, full, or a single)
raise ValueError(
"Repetition of or array is not divisible with actual length. "
"Hence we cannot create a repeated size."
)
if cls is None:
if reps > 1:
return np.tile(array, reps)
return array
else:
if reps > 1:
return np.tile(np.array(array, dtype=cls), reps)
return np.array(array, dtype=cls)

if reps > 1:
tile_reps = list(1 for _ in array.shape)
tile_reps[axis] = reps
return np.tile(array, tile_reps)
return array


@set_module("sisl")
Expand Down
3 changes: 2 additions & 1 deletion src/sisl/_sparse.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -251,12 +251,13 @@ def _sparse_dense(shape,

cdef Py_ssize_t nr = ncol.shape[0]
cdef V = np.zeros(shape, dtype=dtype)
cdef VV = V[:, ::1]
cdef Py_ssize_t r, ind, ix, s2

s2 = shape[2]
for r in range(nr):
for ind in range(ptr[r], ptr[r] + ncol[r]):
for ix in range(s2):
V[r, col[ind], ix] += D[ind, ix]
VV[r, col[ind], ix] += D[ind, ix]

return V
2 changes: 1 addition & 1 deletion src/sisl/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ def _sanitize_atoms(self, atoms) -> ndarray:
atoms = _a.asarray(atoms)
if atoms.size == 0:
return _a.asarrayl([])
elif atoms.dtype == bool_:
if atoms.dtype == bool_:
return atoms.nonzero()[0]
return atoms

Expand Down
23 changes: 8 additions & 15 deletions src/sisl/io/siesta/_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import numpy as np

import sisl._array as _a
from sisl.sparse import _rows_and_cols
from sisl.messages import warn
from sisl import SislError

try:
Expand All @@ -25,21 +27,12 @@ def _ensure_diagonal(csr):
since missing items will be set to 0 which should be 1 in
non-orthogonal basis sets.
"""
# Create index arrays
row = (csr.ncol > 0).nonzero()[0]
row = np.repeat(row, csr.ncol[row])
# Ensure we only take those elements that are in play (i.e. this
# will work regardless of the csr being finalized or not)
col = csr.col[_a.array_arange(csr.ptr[:-1], n=csr.ncol, dtype=np.int32)]

# figure out where they are the same (diagonals)
present_diags = row[row == col]

# Now figure out which elements are missing
missing_diags = np.delete(np.arange(csr.shape[0]), present_diags)

for row in missing_diags:
csr[row, row] = 0.0
old_nnz = csr.nnz
csr += csr.diags(0, dim=1)
if csr.nnz != old_nnz:
warn(
"ensuring the sparse matrix having diagonal elements changed the sparsity pattern."
)


def _csr_from_siesta(geom, csr):
Expand Down
27 changes: 17 additions & 10 deletions src/sisl/io/siesta/binaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,9 +403,11 @@ def write_hamiltonian(self, H, **kwargs):
"a zero element sparse matrix!"
)

sort = kwargs.get("sort", True)

# Convert to siesta CSR
_csr_to_siesta(H.geometry, csr)
csr.finalize(sort=kwargs.get("sort", True))
_csr_to_siesta(H.geometry, csr, diag=True)
csr.finalize(sort=sort)
_mat_spin_convert(csr, H.spin)

# Extract the data to pass to the fortran routine
Expand All @@ -414,17 +416,22 @@ def write_hamiltonian(self, H, **kwargs):

# Get H and S
if H.orthogonal:
h = csr._D
s = csr.diags(1.0, dim=1)
# Ensure all data is correctly formatted (i.e. have the same sparsity pattern)
s.align(csr)
s.finalize(sort=kwargs.get("sort", True))
if s.nnz != len(h):
s = csr.copy(dims=0)
s.empty(keep_nnz=True)
s += s.diags(1)
missing_diags = s.nnz - csr.nnz
if missing_diags:
# This should never happen as _csr_to_siesta should ensure
# that the diagonal entries always exists.
# Hence it gets changed before writing.
# Not compeletely optimal, but for now this is OK
raise SislError(
f"{self.__class__.__name__}.write_hamiltonian "
"The diagonal elements of your orthogonal Hamiltonian "
"have not been defined, this is a requirement."
f"have not been defined. Got {len(csr) - missing_diags} elements, expected {len(csr)}."
)
s = s._D[:, 0]
h = csr._D
s = s._D
else:
h = csr._D[:, : H.S_idx]
s = csr._D[:, H.S_idx]
Expand Down
6 changes: 1 addition & 5 deletions src/sisl/io/siesta/tests/test_siesta.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,7 @@ def test_nc_multiple_checks(sisl_tmp, sisl_system, sort):
shuffle(edges)
DM[io, edges] = 2.0

if not sort:
with pytest.raises(ValueError):
DM.write(sile, sort=sort)
else:
DM.write(sile, sort=sort)
DM.write(sile, sort=sort)


def test_nc_overlap(sisl_tmp, sisl_system):
Expand Down
Loading

0 comments on commit 7cf88f1

Please sign in to comment.