Skip to content

Commit

Permalink
Implemented ReconstructLri3D operator, refs #ARRUS-126
Browse files Browse the repository at this point in the history
Note: the implementation of the operator is specific for the Vermon matrix probe.
  • Loading branch information
pjarosik committed Feb 14, 2022
1 parent 02d45b3 commit c6ecb45
Show file tree
Hide file tree
Showing 2 changed files with 356 additions and 0 deletions.
206 changes: 206 additions & 0 deletions api/python/arrus/utils/imaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -2152,3 +2152,209 @@ def process(self, data):
.reshape(self._n_frames, -1) \
.copy()


class ReconstructLri3D(Operation):
"""
Rx beamforming for synthetic aperture imaging for matrix array.
tx_foc, tx_ang_zx, tx_ang_zy: arrays
Expected input data shape: batch_size, n_emissions, n_rx_x, n_rx_y, n_samples
:param x_grid: output image grid points (OX coordinates)
:param y_grid: output image grid points (OY coordinates)
:param z_grid: output image grid points (OZ coordinates)
:param rx_tang_limits: RX apodization angle limits (given as the tangent of the angle), \
a pair of values (min, max). If not provided or None, [-0.5, 0.5] range will be used
"""

def __init__(self, x_grid, y_grid, z_grid, tx_foc, tx_ang_zx, tx_ang_zy,
tx_ap_cent_x, tx_ap_cent_y,
speed_of_sound, rx_tang_limits=None):
self.tx_ap_cent_y = tx_ap_cent_y
self.tx_ap_cent_x = tx_ap_cent_x
self.tx_ang_zy = tx_ang_zy
self.tx_ang_zx = tx_ang_zx
self.tx_foc = tx_foc
self.x_grid = x_grid
self.y_grid = y_grid
self.z_grid = z_grid
self.speed_of_sound = speed_of_sound
import cupy as cp
self.num_pkg = cp
self.rx_tang_limits = rx_tang_limits

def set_pkgs(self, num_pkg, **kwargs):
if num_pkg is np:
raise ValueError("ReconstructLri3D operation is implemented for GPU only.")

def prepare(self, const_metadata):
import cupy as cp

current_dir = os.path.dirname(os.path.join(os.path.abspath(__file__)))
_kernel_source = Path(os.path.join(current_dir, "iq_raw_2_lri_3d.cu")).read_text()
self._kernel_module = self.num_pkg.RawModule(code=_kernel_source)
self._kernel_module.compile()
self._kernel = self._kernel_module.get_function("iqRaw2Lri3D")

# INPUT PARAMETERS.
# Input data shape.
self.n_seq, self.n_tx, self.n_rx_y, self.n_rx_x, self.n_samples = const_metadata.input_shape

seq = const_metadata.context.raw_sequence
# TODO note: we assume here that a single TX/RX has the below properties
# the same for each TX/RX. Validation is missing here.
ref_tx_rx = seq.ops[0]
ref_tx = ref_tx_rx.tx
ref_rx = ref_tx_rx.rx
probe_model = const_metadata.context.device.probe.model
acq_fs = (const_metadata.context.device.sampling_frequency / ref_rx.downsampling_factor)
start_sample = ref_rx.sample_range[0]

self.y_size = len(self.y_grid)
self.x_size = len(self.x_grid)
self.z_size = len(self.z_grid)
output_shape = (self.n_seq, self.n_tx, self.y_size, self.x_size, self.z_size)
self.output_buffer = self.num_pkg.zeros(output_shape, dtype=self.num_pkg.complex64)
x_block_size = min(self.z_size, 8)
y_block_size = min(self.x_size, 8)
z_block_size = min(self.y_size, 8)
self.block_size = (x_block_size, y_block_size, z_block_size)
self.grid_size = (int((self.z_size-1)//z_block_size + 1),
int((self.x_size-1)//x_block_size + 1),
int((self.y_size-1)//y_block_size + 1))

self.y_pix = self.num_pkg.asarray(self.y_grid, dtype=self.num_pkg.float32)
self.x_pix = self.num_pkg.asarray(self.x_grid, dtype=self.num_pkg.float32)
self.z_pix = self.num_pkg.asarray(self.z_grid, dtype=self.num_pkg.float32)

# System and transmit properties.
self.sos = self.num_pkg.float32(self.speed_of_sound)
self.fs = self.num_pkg.float32(const_metadata.data_description.sampling_frequency)
self.fn = self.num_pkg.float32(ref_tx.excitation.center_frequency)
self.tx_foc = self.num_pkg.asarray(self.tx_foc).astype(np.float32)
self.tx_ang_zx = self.num_pkg.asarray(self.tx_ang_zx).astype(np.float32)
self.tx_ang_zy = self.num_pkg.asarray(self.tx_ang_zy).astype(np.float32)

# Could be determined automatically based on
self.tx_ap_cent_x = self.num_pkg.asarray(self.tx_ap_cent_x).astype(np.float32)
self.tx_ap_cent_y = self.num_pkg.asarray(self.tx_ap_cent_y).astype(np.float32)

# Probe description
# TODO specific for Vermon mat-3d probe.
probe_model = const_metadata.context.device.probe.model
pitch = 0.3e-3 # probe_model.pitch
self.n_elements = 32
# General regular position of elements
element_pos = np.arange(-(self.n_elements - 1)/2, self.n_elements/2)
element_pos_x = element_pos*pitch
element_pos_y = np.zeros_like(element_pos)
# E.g. rows
# 0:8 -> 0:8
# 8:16 -> 9:17 (+ 1 offset)
# 16:24 -> 18:26 (+2 offset)
# 24:32 -> 27:35 (+ 3 offset)
for i in range(4):
element_pos_y[i*8:(i+1)*8] = (element_pos[i*8:(i+1)*8]+i)*pitch

element_pos_x = element_pos_x.astype(np.float32)
element_pos_y = element_pos_y.astype(np.float32)
# Put the data into GPU constant memory.
device_props = cp.cuda.runtime.getDeviceProperties(0)
if device_props["totalConstMem"] < 256*2*4: # 2 float32 arrays, 256 elements max
raise ValueError("There is not enough constant memory available!")
x_elem = np.asarray(element_pos_x, dtype=self.num_pkg.float32)
self._x_elem_const = _get_const_memory_array(
self._kernel_module, name="xElemConst", input_array=x_elem)
y_elem = np.asarray(element_pos_y, dtype=self.num_pkg.float32)
self._y_elem_const = _get_const_memory_array(
self._kernel_module, name="yElemConst", input_array=y_elem)

def get_min_max_x_y(aperture):
cords = np.argwhere(aperture)
y, x = zip(*cords)
return np.min(x), np.max(x), np.min(y), np.max(y)

# TODO assumption, that probe has the same number elements in both dimensions
apertures = (tx_rx.tx.aperture.reshape((self.n_elements, self.n_elements)) for tx_rx in seq.ops)
min_max_x_y = (get_min_max_x_y(aperture) for aperture in apertures)
min_x, max_x, min_y, max_y = zip(*min_max_x_y)
min_x, max_x = np.atleast_1d(min_x), np.atleast_1d(max_x)
min_y, max_y = np.atleast_1d(min_y), np.atleast_1d(max_y)
self.tx_ap_first_elem_x = self.num_pkg.asarray(min_x, dtype=self.num_pkg.int32)
self.tx_ap_last_elem_x = self.num_pkg.asarray(max_x, dtype=self.num_pkg.int32)
self.tx_ap_first_elem_y = self.num_pkg.asarray(min_y, dtype=self.num_pkg.int32)
self.tx_ap_last_elem_y = self.num_pkg.asarray(max_y, dtype=self.num_pkg.int32)

# Find the tx_center_delay
# 1. Find the position of the center.
ap_center_x = (element_pos_x[min_x] + element_pos_x[max_x])/2
ap_center_y = (element_pos_y[min_y] + element_pos_y[max_y])/2
ap_center_elem_x = np.interp(ap_center_x, element_pos_x, np.arange(len(element_pos_x)))
ap_center_elem_y = np.interp(ap_center_y, element_pos_y, np.arange(len(element_pos_y)))
ap_center_elem_x = np.round(ap_center_elem_x).astype(np.int32)
ap_center_elem_y = np.round(ap_center_elem_y).astype(np.int32)
# Make sure, that for all TX/RXs:
# The center element is in the aperture
# All TX/RX have (almost) the same delay in the aperture's center
tx_center_delay = None
for i, tx_rx in enumerate(seq.ops):
tx = tx_rx.tx
tx_center_x = ap_center_elem_x[i]
tx_center_y = ap_center_elem_y[i]
aperture = tx.aperture.reshape((self.n_elements, self.n_elements))
# TODO the below will work only for full aperture transmits
delays = tx.delays.reshape((self.n_elements, self.n_elements))
if not aperture[tx_center_y, tx_center_x]:
# The aperture's center should transmit signal
raise ValueError("TX aperture center should be turned on.")
if tx_center_delay is None:
tx_center_delay = delays[tx_center_y, tx_center_x]
else:
# Make sure that the center' delays is the same for all TX/RXs
current_center_delay = delays[tx_center_y, tx_center_x]
if not np.isclose(tx_center_delay, current_center_delay):
raise ValueError(f"TX/RX {i}: center delay is not close "
f"to the center delay of other TX/RXs."
f"Assumed that the center element is:"
f"({tx_center_y, tx_center_x}). "
f"Center delays should be equalized "
f"for all TX/RXs. ")

# Min/max tang
if self.rx_tang_limits is not None:
self.min_tang, self.max_tang = self.rx_tang_limits
else:
# Default:
self.min_tang, self.max_tang = -0.5, 0.5
self.min_tang = self.num_pkg.float32(self.min_tang)
self.max_tang = self.num_pkg.float32(self.max_tang)
burst_factor = ref_tx.excitation.n_periods / (2*self.fn)
self.initial_delay = -start_sample/65e6+burst_factor+tx_center_delay
self.initial_delay = self.num_pkg.float32(self.initial_delay)

self.rx_apod = scipy.signal.windows.hamming(20).astype(np.float32)
self.rx_apod = self.num_pkg.asarray(self.rx_apod)
self.n_rx_apod = self.num_pkg.int32(len(self.rx_apod))

return const_metadata.copy(input_shape=output_shape)

def process(self, data):
data = self.num_pkg.ascontiguousarray(data)
params = (
self.output_buffer, data,
self.n_seq, self.n_tx, self.n_rx_y, self.n_rx_x, self.n_samples,
self.z_pix, self.z_size,
self.x_pix, self.x_size,
self.y_pix, self.y_size,
self.sos, self.fs, self.fn,
self.tx_foc, self.tx_ang_zx, self.tx_ang_zy,
self.tx_ap_cent_x, self.tx_ap_cent_y,
self.tx_ap_first_elem_x, self.tx_ap_last_elem_x,
self.tx_ap_first_elem_y, self.tx_ap_last_elem_y,
self.min_tang, self.max_tang,
self.min_tang, self.max_tang,
self.initial_delay,
self.rx_apod, self.n_rx_apod
)
self._kernel(self.grid_size, self.block_size, params)
return self.output_buffer
150 changes: 150 additions & 0 deletions api/python/arrus/utils/iq_raw_2_lri_3d.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#include <cupy/complex.cuh>
#define M_PI 3.14159265358979

__constant__ float xElemConst[256];
__constant__ float yElemConst[256];

__forceinline__ __device__ float ownHypotf(float z, float x)
{
return sqrtf(z*z + x*x);
}

__forceinline__ __device__ float ownHypotf(float z, float x, float y)
{
return sqrtf(z*z + x*x + y*y);
}

__forceinline__ __device__ complex<float> interpLinear(const complex<float> *input, float sample) {
float interpWgh = modff(sample, &sample);
int intSample = int(sample);
return input[intSample]*(1 - interpWgh) + input[intSample+1]*interpWgh;
}

__forceinline__ __device__ float interpLinearNormalized(const float *input, int inputSize, float sample) {
// Normalized => sample == 0 means input[0], sample == 1 means input[inputSize-1]
sample = sample*(inputSize-1);
float interpWgh = modff(sample, &sample);
int intSample = int(sample);
return input[intSample]*(1-interpWgh) + input[intSample+1]*interpWgh;
}

extern "C"
__global__ void iqRaw2Lri3D(complex<float> *iqLri, const complex<float> *input,
const int nSeq, const int nTx, const int nElemY, const int nElemX, const int nSamp,
const float *zPix, const int nZPix,
const float *xPix, const int nXPix,
const float *yPix, const int nYPix,
const float sos, const float fs, const float fn,
const float *txFoc, const float *txAngZX, const float *txAngZY,
const float *txApCentX, const float *txApCentY,
const int *txApFstElemX, const int *txApLstElemX,
const int *txApFstElemY, const int *txApLstElemY,
const float minRxTangZX, const float maxRxTangZX,
const float minRxTangZY, const float maxRxTangZY,
const float initDel,
const float *rxApod, const int nRxApod) {

int z = blockIdx.x * blockDim.x + threadIdx.x;
int x = blockIdx.y * blockDim.y + threadIdx.y;
int y = blockIdx.z * blockDim.z + threadIdx.z;
if (z >= nZPix || x >= nXPix || y >= nYPix) {
return;
}
unsigned txOffset, offset;
float txAngZn, txAngAz;
float txDist, rxDist, rxTang, txApod, rxApodX, rxApodY, time, iSamp;
float modSin, modCos, pixWgh;
complex<float> samp, pix;
complex<float> modFactor;
const float omega = 2 * M_PI * fn;
const float sosInv = 1 / sos;
const float zDistInv = 1 / zPix[z];
const float rngRxTangZXInv = 1 / (maxRxTangZX - minRxTangZX); // inverted zx-tangent range TODO parameter?
const float rngRxTangZYInv = 1 / (maxRxTangZY - minRxTangZY); // inverted zy-tangent range TODO parameter?
iqLri[z + x*nZPix + y*nZPix*nXPix] = 0;

for (int iTx=0; iTx < nTx; ++iTx) {
// Plane inclinations -> spherical coordinates
txAngZn = atanf(ownHypotf(tanf(txAngZX[iTx]), tanf(txAngZY[iTx]))); // Zenith
txAngAz = atan2f(tanf(txAngZY[iTx]), tanf(txAngZX[iTx])); // Azimuth
txOffset = iTx*nSamp*nElemX*nElemY; // TODO number of sequences in transmit?

if (!isinf(txFoc[iTx])) {
/* STA */
float zFoc = txFoc[iTx] * cosf(txAngZn);
float xFoc = txFoc[iTx] * sinf(txAngZn) * cosf(txAngAz) + txApCentX[iTx];
float yFoc = txFoc[iTx] * sinf(txAngZn) * sinf(txAngAz) + txApCentY[iTx];

float pixFocArrang;

if (txFoc[iTx] <= 0.f) {
/* Virtual Point Source BEHIND probe surface */
// Valid pixels are assumed to be always in front of the focal point (VSP)
pixFocArrang = 1.f;
}
else {
/* Virtual Point Source IN FRONT OF probe surface */
// Projection of the Foc-Pix vector on the ApCent-Foc vector (dot product) ...
// to determine if the pixel is behind (-) or in front of (+) the focal point (VSP).
pixFocArrang = (((zPix[z]-zFoc)* zFoc + (xPix[x]-xFoc)*(xFoc-txApCentX[iTx]) + (yPix[x]-yFoc)*(yFoc-txApCentY[iTx])) >= 0.f) ? 1.f : -1.f;
}
txDist = ownHypotf(zPix[z]-zFoc, xPix[x]-xFoc, yPix[y]-yFoc);
txDist *= pixFocArrang; // Compensation for the Pix-Foc arrangement
// txFoc is an artificial time, that actually does not happen
txDist += txFoc[iTx]; // Compensation for the reference time being the moment when txApCent fires.

// Projections of Foc-Pix vector on the rotated Foc-ApEdge vectors (dot products) ...
// to determine if the pixel is in the sonified area (dot product >= 0).
// Foc-ApEdgeFst vector is rotated left, Foc-ApEdgeLst vector is rotated right.
txApod = (
((-(zPix[z] - zFoc)*(xElemConst[txApFstElemX[iTx]] - xFoc) - (xPix[x] - xFoc)*zFoc) * pixFocArrang >= 0.f) &&
( ((zPix[z] - zFoc)*(xElemConst[txApLstElemX[iTx]] - xFoc) + (xPix[x] - xFoc)*zFoc) * pixFocArrang >= 0.f) &&
((-(zPix[z] - zFoc)*(yElemConst[txApFstElemY[iTx]] - yFoc) - (yPix[y] - yFoc)*zFoc) * pixFocArrang >= 0.f) &&
(( (zPix[z] - zFoc)*(yElemConst[txApLstElemY[iTx]] - yFoc) + (yPix[y] - yFoc)*zFoc) * pixFocArrang >= 0.f)
) ? 1.f : 0.f;
}
else {
/* PWI */
txDist = zPix[z] * cosf(txAngZn) + ((xPix[x] - txApCentX[iTx]) * cosf(txAngAz) + (yPix[y] - txApCentY[iTx]) * sinf(txAngAz)) * sinf(txAngZn);

// Projections of ApEdge-Pix vector on the rotated unit vector of tx direction (dot products) ...
// to determine if the pixel is in the sonified area (dot product >= 0).
// For ApEdgeFst, the vector is rotated left, for ApEdgeLst the vector is rotated right.
txApod = (((-sinf(txAngZX[iTx])*zPix[z] + cosf(txAngZX[iTx]) * (xPix[x]-xElemConst[txApFstElemX[iTx]])) >= 0.f) &&
( (sinf(txAngZX[iTx])*zPix[z] - cosf(txAngZX[iTx]) * (xPix[x]-xElemConst[txApLstElemX[iTx]])) >= 0.f) &&
( (-sinf(txAngZY[iTx])*zPix[z] + cosf(txAngZY[iTx]) * (yPix[x]-yElemConst[txApFstElemY[iTx]])) >= 0.f) &&
( (sinf(txAngZY[iTx])*zPix[z] - cosf(txAngZY[iTx]) * (yPix[x]-yElemConst[txApLstElemY[iTx]])) >= 0.f )) ? 1.f : 0.f;
}

pix.real(0.0f);
pix.imag(0.0f);
pixWgh = 0.0f;

if (txApod != 0.f) {
for (int iElemY = 0; iElemY < nElemY; ++iElemY) {
rxTang = (yPix[y]-yElemConst[iElemY])*zDistInv;
if (rxTang < minRxTangZY || rxTang > maxRxTangZY) continue;
rxApodY = interpLinearNormalized(rxApod, nRxApod, (rxTang-minRxTangZY)*rngRxTangZYInv);
for (int iElemX = 0; iElemX < nElemX; ++iElemX) {
offset = iTx*nElemY*nElemX*nSamp + iElemY*nElemX*nSamp + iElemX*nSamp;
rxTang = (xPix[x] - xElemConst[iElemX])*zDistInv;
if (rxTang < minRxTangZX || rxTang > maxRxTangZX) continue;
rxApodX = interpLinearNormalized(rxApod, nRxApod, (rxTang-minRxTangZX)*rngRxTangZXInv);
rxDist = ownHypotf(zPix[z], xPix[x] - xElemConst[iElemX], yPix[y] - yElemConst[iElemY]);
time = (txDist+rxDist)*sosInv + initDel;
iSamp = time*fs;
if (iSamp < 0 || iSamp > static_cast<float>(nSamp-1)) continue;
__sincosf(omega*time, &modSin, &modCos);
modFactor = complex<float>(modCos, modSin);
samp = interpLinear(input+offset, iSamp);
pix += samp*modFactor*rxApodX*rxApodY;
pixWgh += rxApodX*rxApodY;
}
}
}
if (pixWgh != 0.0f) {
iqLri[z + x*nZPix + y*nZPix*nXPix] += pix * txApod / pixWgh;
}
}
}

0 comments on commit c6ecb45

Please sign in to comment.