Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updates to incorporate the matrix chunking method #79

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 12 additions & 29 deletions src/matvis/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from pathlib import Path
from pyuvdata import UVBeam
from pyuvsim import AnalyticBeam, simsetup
from submatrices import get_matrix_sets
from typing import Literal

from matvis import DATA_PATH, HAVE_GPU, coordinates, cpu, simulate_vis
Expand Down Expand Up @@ -78,7 +79,7 @@ def run_profile(
naz=360,
nza=180,
pairs=None,
matsets=None,
matsets=None,
nchunks=1,
source_buffer=1.0,
):
Expand Down Expand Up @@ -114,7 +115,6 @@ def run_profile(
print(f" ANALYTIC-BEAM: {analytic_beam:>7}")
print(f" METHOD: {method:>7}")
print(f" NPAIRS: {len(pairs) if pairs is not None else nants**2:>7}")
#print(f" NPAIRS: {sum(np.array([len(pairs[i][0])*len(pairs[i][1]) for i in range(len(pairs))])) if pairs is not None else nants**2:>7}")
print("---------------------------------")

if gpu:
Expand All @@ -138,7 +138,7 @@ def run_profile(
beam_idx=beam_idx,
matprod_method=f"{'GPU' if gpu else 'CPU'}{method}",
antpairs=pairs,
matsets=matsets,
matsets=matsets,
min_chunks=nchunks,
source_buffer=source_buffer,
)
Expand Down Expand Up @@ -267,28 +267,6 @@ def get_redundancies(bls, ndecimals: int = 2):

return pairs

# Chris: for now I have duplicated the get_redundancies function as a placeholder for generating the matrix sets and just assumed
# 1x1 sub-matrices (the vector method). In the future this will be replaced by a function that takes in a set of sub-matrices provided
# by the user.
def get_matrix_sets(bls, ndecimals: int = 2):
"""Find redundant baselines."""
uvbins = set()
msets = []

# Everything here is in wavelengths
bls = np.round(bls, decimals=ndecimals)
nant = bls.shape[0]

# group redundant baselines
for i in range(nant):
for j in range(i + 1, nant):
u, v = bls[i, j]
if (u, v) not in uvbins and (-u, -v) not in uvbins:
uvbins.add((u, v))
msets.append([np.array([i]), np.array([j]))

return msets


@main.command()
@click.option(
Expand All @@ -315,10 +293,15 @@ def hera_profile(hex_num, nside, keep_ants, outriggers, **kwargs):

bls = antpos[np.newaxis, :, :2] - antpos[:, np.newaxis, :2]
pairs = np.array(get_redundancies(bls.value))
matsets = list(get_matrix_sets(bls.value))
#pairs = list(get_redundancies(bls.value))

run_profile(nsource=12 * nside**2, nants=antpos.shape[0], pairs=pairs, matsets=matsets, **kwargs)
matsets = list(get_matrix_sets(bls.value))

run_profile(
nsource=12 * nside**2,
nants=antpos.shape[0],
pairs=pairs,
matsets=matsets,
**kwargs,
)


def get_line_based_stats(lstats) -> tuple[dict, float]:
Expand Down
37 changes: 27 additions & 10 deletions src/matvis/core/matprod.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ class MatProd(ABC):
Number of antennas.
antpairs
The antenna pairs to sum over. If None, all pairs are used.
matsets
The list of sub-matrices to calculate. If None, all pairs are used in a single
matrix multiplication.
matsets
The list of sub-matrices to calculate. If None, all pairs are used in a single
matrix multiplication.
precision
The precision of the data (1 or 2).
"""
Expand All @@ -39,13 +39,7 @@ def __init__(
if antpairs is None:
self.all_pairs = True
self.antpairs = np.array([(i, j) for i in range(nant) for j in range(nant)])
self.matsets = list(
[
(np.array([i]), np.array([j]))
for i in range(nant)
for j in range(nant)
]
)
self.matsets = None
else:
self.all_pairs = False
self.antpairs = antpairs
Expand All @@ -72,10 +66,33 @@ def allocate_vis(self):
(self.nchunks, self.npairs, self.nfeed, self.nfeed), 0.0, dtype=self.ctype
)

def check_antpairs_in_matsets(self):
"""Check that all non-redundant ant pairs are included in set of sub-matrices.

If using the CPUMatChunk method, make sure that all non-redudant antpairs are included somewhere
in the set of sub-matrices to be calculated. Otherwise, throw an exception.
"""
antpairs_set = set()
matsets_set = set()

for _, (ai, aj) in enumerate(self.antpairs):
antpairs_set.add((ai, aj))
Comment on lines +88 to +89
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you could simply do this as antpairs_set = set(self.antpairs)


for _, (ai, aj) in enumerate(self.matsets):
for i in range(len(ai)):
for j in range(len(aj)):
matsets_set.add((ai[i], aj[j]))

if not antpairs_set.issubset(matsets_set):
raise Exception("Some non-redundant pairs are missing from sub-matrices. ")

def setup(self):
"""Setup the memory for the object."""
self.allocate_vis()

if self.matsets is not None:
self.check_antpairs_in_matsets()

@abstractmethod
def compute(self, z: np.ndarray, out: np.ndarray):
"""
Expand Down
44 changes: 17 additions & 27 deletions src/matvis/cpu/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,13 @@ def simulate(
Either a 2D array, shape ``(Npairs, 2)``, or list of 2-tuples of ints, with
the list of antenna-pairs to return as visibilities (all feed-pairs are always
calculated). If None, all feed-pairs are returned.
matsets :: array_like, optional
A list of containing a set of 2-tuples of numpy arrays. Each entry in the list
corresponds to a different sub-matrix. The first element of the ith tuple lists the rows of Z corresponding
to the rows of the ith sub-matrix, and the second tuple element contains the list of columns. In the case
of vector-vector loop, the number of tuples is equal to Npairs and each element of the tuple contains only
one int, such that the sub-matrices contain only one element each. Outer dimension has to be a list because
numpy doesn't like inhomogeneous arrays.
matsets : array_like, optional
A list of containing a set of 2-tuples of numpy arrays. Each entry in the list
corresponds to a different sub-matrix. The first element of the ith tuple lists the rows of Z corresponding
to the rows of the ith sub-matrix, and the second tuple element contains the list of columns. In the case
of vector-vector loop, the number of tuples is equal to Npairs and each element of the tuple contains only
one int, such that the sub-matrices contain only one element each. Outer dimension has to be a list because
numpy doesn't like inhomogeneous arrays.
precision : int, optional
Which precision level to use for floats and complex numbers.
Allowed values:
Expand All @@ -107,27 +107,17 @@ def simulate(
max_progress_reports : int, optional
Maximum number of progress reports to print to the screen (if logging level
allows). Default is 100.
#matprod_method : str, optional
# The method to use for the final matrix multiplication. Default is 'CPUMatMul',
# which simply uses `np.dot` over the two full matrices. Currently, the other
# option is `CPUVectorLoop`, which uses a loop over the antenna pairs,
# computing the sum over sources as a vector dot product.
# Whether to calculate visibilities for each antpair in antpairs as a vector
# dot-product instead of using a full matrix-matrix multiplication for all
# possible pairs. Default is False. Setting to True can be faster for large
# arrays where `antpairs` is small (possibly from high redundancy). You should
# run a performance test before using this.
matprod_method : str, optional
The method to use for the final matrix multiplication. Default is 'CPUMatMul',
matprod_method : str, optional
The method to use for the final matrix multiplication. Default is 'CPUMatMul',
which simply uses `np.dot` over the two full matrices. The second option is
`CPUVectorLoop`, which uses a loop over the antenna pairs, computing the sum
over sources as a vector dot product. The third option is ``CPUMatChunk'', which
divides the product into several matrix multiplications that are optimized to have the
highest possible density of non-redundant pairs without invoking the overhead of a large
for loop over vector products. For very large arrays, with very high redundancy, CPUVectorLoop
is usually fastest, while for cases with low redundancy, 'CPUMatMul' is fastest. The ``CPUMatChunk''
can sometimes be fastest for intermediate levels of redundancy and intermediately sized arrays. You should
run a performance test before choosing one of these options.
`CPUVectorLoop`, which uses a loop over the antenna pairs, computing the sum
over sources as a vector dot product. The third option is `CPUMatChunk`, which
divides the product into several matrix multiplications that are optimized to have the
highest possible density of non-redundant pairs without invoking the overhead of a large
for loop over vector products. For very large arrays, with very high redundancy, CPUVectorLoop
is usually fastest, while for cases with low redundancy, `CPUMatMul` is fastest. The `CPUMatChunk`
can sometimes be fastest for intermediate levels of redundancy and intermediately sized arrays. You should
run a performance test before choosing one of these options.
max_memory : int, optional
The maximum memory (in bytes) to use for the visibility calculation. This is
not a hard-set limit, but rather a guideline for how much memory to use. If the
Expand Down
7 changes: 2 additions & 5 deletions src/matvis/cpu/matprod.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ def compute(self, z: np.ndarray, out: np.ndarray) -> np.ndarray:


class CPUMatChunk(MatProd):
steven-murray marked this conversation as resolved.
Show resolved Hide resolved
"""Loop over a small set of sub-matrix products which collectively contain all nont-redundant pairs."""

def compute(self, z: np.ndarray, out: np.ndarray) -> np.ndarray:
"""Perform the source-summing operation for a single time and chunk.

Expand All @@ -63,7 +65,6 @@ def compute(self, z: np.ndarray, out: np.ndarray) -> np.ndarray:
out
Output array, shaped as (Nfeed, Nfeed, Npairs).
"""

z = z.reshape((self.nant, self.nfeed, -1))

mat_product = np.zeros(
Expand All @@ -75,10 +76,6 @@ def compute(self, z: np.ndarray, out: np.ndarray) -> np.ndarray:
for k in range(self.nfeed):
for i, (ai, aj) in enumerate(self.matsets):
AI, AJ = np.meshgrid(ai, aj)
# print(ai)
# print(aj)
# print(AI)
# print(AJ)
mat_product[AI, AJ, j, k] = z[ai[:], j].conj().dot(z[aj[:], k].T).T

# Now, we need to identify the non-redundant pairs and put them into the final output array
Expand Down
22 changes: 22 additions & 0 deletions src/matvis/submatrices.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Module containing several options for computing sub-matrices for the MatChunk method."""
import numpy as np


def get_matrix_sets(bls, ndecimals: int = 2):
"""Find redundant baselines."""
uvbins = set()
msets = []

# Everything here is in wavelengths
bls = np.round(bls, decimals=ndecimals)
nant = bls.shape[0]

# group redundant baselines
for i in range(nant):
for j in range(i + 1, nant):
u, v = bls[i, j]
if (u, v) not in uvbins and (-u, -v) not in uvbins:
uvbins.add((u, v))
msets.append([np.array([i]), np.array([j])])

return msets
1 change: 0 additions & 1 deletion src/matvis/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ def simulate_vis(
]

npairs = len(antpairs) if antpairs is not None else nants * nants
# npairs = sum(np.array([len(antpairs[i][0])*len(antpairs[i][1]) for i in range(len(antpairs))])) if antpairs is not None else nants * nants
if polarized:
vis = np.zeros(
(freqs.size, lsts.size, npairs, nfeeds, nfeeds), dtype=complex_dtype
Expand Down
Loading