Use "tensorised-Pauli decomposition" in SparsePauliOp.from_operator (…

…#11133) * Use "tensorised-Pauli decomposition" in `SparsePauliOp.from_operator` This switches the implementation of `SparsePauliOp.from_operator` to the "tensorised-Pauli decomposition" described in https://arxiv.org/abs/2310.13421. This initial implementation is a relatively naive implementation of the algorithm as described in the paper, with a couple of engineering improvements over the paper's accompaniment at HANTLUK/PauliDecomposition@932ce3926 (i.e. constant-factor improvements, rather than complexity improvements). This makes the "zero check" on the recursions short-circuiting, which means rather matrix elements need to be examined on average after each recursive step. Further, the constant-factor multiplication is tracked separately as a single scalar throughout the recursion to avoid a full-matrix complex multiplication at every recursive step (which is approximately six real-floating-point operations per complex multiplication). There is plenty of space in this implementation to reduce internal memory allocations by reusing swap space, and to dispatch the different components of the decomposition to threaded workers. _Effective_ threading is more non-trivial, as a typical operator will have structure that could be exploiting to more optimally dispatch the threads. These can be investigated later. * Remove unused import * Remove superfluous `num_qubits` argument * Export `ZXPaulis` to Python space * Add comment on array-builder logic * Use `num_traits::Zero` trait for zero checking * Use custom ilog2 * Use stdlib `ilog2`
Qiskit · Jan 12, 2024 · 52affa5 · 52affa5
1 parent e573245
commit 52affa5
Show file tree

Hide file tree

Showing 5 changed files with 231 additions and 30 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/accelerate/Cargo.toml b/crates/accelerate/Cargo.toml
@@ -21,6 +21,7 @@ rand = "0.8"
 rand_pcg = "0.3"
 rand_distr = "0.4.3"
 ahash = "0.8.6"
+num-traits = "0.2"
 num-complex = "0.4"
 num-bigint = "0.4"
 rustworkx-core = "0.13"

diff --git a/crates/accelerate/src/sparse_pauli_op.rs b/crates/accelerate/src/sparse_pauli_op.rs
@@ -10,13 +10,16 @@
 // copyright notice, and modified files need to carry a notice indicating
 // that they have been altered from the originals.
 
+use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 use pyo3::wrap_pyfunction;
 use pyo3::Python;
 
 use hashbrown::HashMap;
-use ndarray::{ArrayView1, Axis};
-use numpy::{IntoPyArray, PyReadonlyArray2};
+use ndarray::{s, Array1, Array2, ArrayView1, ArrayView2, Axis};
+use num_complex::Complex64;
+use num_traits::Zero;
+use numpy::{IntoPyArray, PyArray1, PyArray2, PyReadonlyArray2};
 
 /// Find the unique elements of an array.
 ///
@@ -60,8 +63,204 @@ pub fn unordered_unique(py: Python, array: PyReadonlyArray2<u16>) -> (PyObject,
     )
 }
 
+#[derive(Clone, Copy)]
+enum Pauli {
+    I,
+    X,
+    Y,
+    Z,
+}
+
+/// A complete ZX-convention representation of a Pauli decomposition.  This is all the components
+/// necessary to construct a Qiskit-space :class:`.SparsePauliOp`, where :attr:`phases` is in the
+/// ZX convention.
+#[pyclass(module = "qiskit._accelerate.sparse_pauli_op")]
+pub struct ZXPaulis {
+    #[pyo3(get)]
+    pub z: Py<PyArray2<bool>>,
+    #[pyo3(get)]
+    pub x: Py<PyArray2<bool>>,
+    #[pyo3(get)]
+    pub phases: Py<PyArray1<u8>>,
+    #[pyo3(get)]
+    pub coeffs: Py<PyArray1<Complex64>>,
+}
+
+/// Decompose a dense complex operator into the symplectic Pauli representation in the
+/// ZX-convention.
+///
+/// This is an implementation of the "tensorized Pauli decomposition" presented in
+/// `Hantzko, Binkowski and Gupta (2023) <https://arxiv.org/abs/2310.13421>`__.
+#[pyfunction]
+pub fn decompose_dense(
+    py: Python,
+    operator: PyReadonlyArray2<Complex64>,
+    tolerance: f64,
+) -> PyResult<ZXPaulis> {
+    let num_qubits = operator.shape()[0].ilog2() as usize;
+    let size = 1 << num_qubits;
+    if operator.shape() != [size, size] {
+        return Err(PyValueError::new_err(format!(
+            "input with shape {:?} cannot be interpreted as a multiqubit operator",
+            operator.shape()
+        )));
+    }
+    let mut paulis = vec![];
+    let mut coeffs = vec![];
+    if num_qubits > 0 {
+        decompose_dense_inner(
+            Complex64::new(1.0, 0.0),
+            num_qubits,
+            &[],
+            operator.as_array(),
+            &mut paulis,
+            &mut coeffs,
+            tolerance * tolerance,
+        );
+    }
+    if coeffs.is_empty() {
+        Ok(ZXPaulis {
+            z: PyArray2::zeros(py, [0, num_qubits], false).into(),
+            x: PyArray2::zeros(py, [0, num_qubits], false).into(),
+            phases: PyArray1::zeros(py, [0], false).into(),
+            coeffs: PyArray1::zeros(py, [0], false).into(),
+        })
+    } else {
+        // Constructing several arrays of different shapes at once is rather awkward in iterator
+        // logic, so we just loop manually.
+        let mut z = Array2::<bool>::uninit([paulis.len(), num_qubits]);
+        let mut x = Array2::<bool>::uninit([paulis.len(), num_qubits]);
+        let mut phases = Array1::<u8>::uninit(paulis.len());
+        for (i, paulis) in paulis.drain(..).enumerate() {
+            let mut phase = 0u8;
+            for (j, pauli) in paulis.into_iter().rev().enumerate() {
+                match pauli {
+                    Pauli::I => {
+                        z[[i, j]].write(false);
+                        x[[i, j]].write(false);
+                    }
+                    Pauli::X => {
+                        z[[i, j]].write(false);
+                        x[[i, j]].write(true);
+                    }
+                    Pauli::Y => {
+                        z[[i, j]].write(true);
+                        x[[i, j]].write(true);
+                        phase = phase.wrapping_add(1);
+                    }
+                    Pauli::Z => {
+                        z[[i, j]].write(true);
+                        x[[i, j]].write(false);
+                    }
+                }
+            }
+            phases[i].write(phase % 4);
+        }
+        // These are safe because the above loops write into every element.  It's guaranteed that
+        // each of the elements of the `paulis` vec will have `num_qubits` because they're all
+        // reading from the same base array.
+        let z = unsafe { z.assume_init() };
+        let x = unsafe { x.assume_init() };
+        let phases = unsafe { phases.assume_init() };
+        Ok(ZXPaulis {
+            z: z.into_pyarray(py).into(),
+            x: x.into_pyarray(py).into(),
+            phases: phases.into_pyarray(py).into(),
+            coeffs: PyArray1::from_vec(py, coeffs).into(),
+        })
+    }
+}
+
+/// Recurse worker routine of `decompose_dense`.  Should be called with at least one qubit.
+fn decompose_dense_inner(
+    factor: Complex64,
+    num_qubits: usize,
+    paulis: &[Pauli],
+    block: ArrayView2<Complex64>,
+    out_paulis: &mut Vec<Vec<Pauli>>,
+    out_coeffs: &mut Vec<Complex64>,
+    square_tolerance: f64,
+) {
+    if num_qubits == 0 {
+        // It would be safe to `return` here, but if it's unreachable then LLVM is allowed to
+        // optimise out this branch entirely in release mode, which is good for a ~2% speedup.
+        unreachable!("should not call this with an empty operator")
+    }
+    // Base recursion case.
+    if num_qubits == 1 {
+        let mut push_if_nonzero = |extra: Pauli, value: Complex64| {
+            if value.norm_sqr() <= square_tolerance {
+                return;
+            }
+            let paulis = {
+                let mut vec = Vec::with_capacity(paulis.len() + 1);
+                vec.extend_from_slice(paulis);
+                vec.push(extra);
+                vec
+            };
+            out_paulis.push(paulis);
+            out_coeffs.push(value);
+        };
+        push_if_nonzero(Pauli::I, 0.5 * factor * (block[[0, 0]] + block[[1, 1]]));
+        push_if_nonzero(Pauli::X, 0.5 * factor * (block[[0, 1]] + block[[1, 0]]));
+        push_if_nonzero(
+            Pauli::Y,
+            0.5 * Complex64::i() * factor * (block[[0, 1]] - block[[1, 0]]),
+        );
+        push_if_nonzero(Pauli::Z, 0.5 * factor * (block[[0, 0]] - block[[1, 1]]));
+        return;
+    }
+    let mut recurse_if_nonzero = |extra: Pauli, factor: Complex64, values: Array2<Complex64>| {
+        let mut is_zero = true;
+        for value in values.iter() {
+            if !value.is_zero() {
+                is_zero = false;
+                break;
+            }
+        }
+        if is_zero {
+            return;
+        }
+        let mut new_paulis = Vec::with_capacity(paulis.len() + 1);
+        new_paulis.extend_from_slice(paulis);
+        new_paulis.push(extra);
+        decompose_dense_inner(
+            factor,
+            num_qubits - 1,
+            &new_paulis,
+            values.view(),
+            out_paulis,
+            out_coeffs,
+            square_tolerance,
+        );
+    };
+    let mid = 1usize << (num_qubits - 1);
+    recurse_if_nonzero(
+        Pauli::I,
+        0.5 * factor,
+        &block.slice(s![..mid, ..mid]) + &block.slice(s![mid.., mid..]),
+    );
+    recurse_if_nonzero(
+        Pauli::X,
+        0.5 * factor,
+        &block.slice(s![..mid, mid..]) + &block.slice(s![mid.., ..mid]),
+    );
+    recurse_if_nonzero(
+        Pauli::Y,
+        0.5 * Complex64::i() * factor,
+        &block.slice(s![..mid, mid..]) - &block.slice(s![mid.., ..mid]),
+    );
+    recurse_if_nonzero(
+        Pauli::Z,
+        0.5 * factor,
+        &block.slice(s![..mid, ..mid]) - &block.slice(s![mid.., mid..]),
+    );
+}
+
 #[pymodule]
 pub fn sparse_pauli_op(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(unordered_unique))?;
+    m.add_wrapped(wrap_pyfunction!(decompose_dense))?;
+    m.add_class::<ZXPaulis>()?;
     Ok(())
 }
diff --git a/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py b/qiskit/quantum_info/operators/symplectic/sparse_pauli_op.py
@@ -24,7 +24,7 @@
 import numpy as np
 import rustworkx as rx
 
-from qiskit._accelerate.sparse_pauli_op import unordered_unique
+from qiskit._accelerate.sparse_pauli_op import unordered_unique, decompose_dense
 from qiskit.circuit.parameter import Parameter
 from qiskit.circuit.parameterexpression import ParameterExpression
 from qiskit.circuit.parametertable import ParameterView
@@ -35,7 +35,6 @@
 from qiskit.quantum_info.operators.operator import Operator
 from qiskit.quantum_info.operators.symplectic.pauli import BasePauli
 from qiskit.quantum_info.operators.symplectic.pauli_list import PauliList
-from qiskit.quantum_info.operators.symplectic.pauli_utils import pauli_basis
 from qiskit.quantum_info.operators.symplectic.pauli import Pauli
 
 
@@ -131,8 +130,8 @@ def __init__(
 
         pauli_list = PauliList(data.copy() if copy and hasattr(data, "copy") else data)
 
-        if isinstance(coeffs, np.ndarray) and coeffs.dtype == object:
-            dtype = object
+        if isinstance(coeffs, np.ndarray):
+            dtype = object if coeffs.dtype == object else complex
         elif coeffs is not None:
             if not isinstance(coeffs, (np.ndarray, Sequence)):
                 coeffs = [coeffs]
@@ -727,15 +726,20 @@ def from_operator(
     ) -> SparsePauliOp:
         """Construct from an Operator objector.
 
-        Note that the cost of this construction is exponential as it involves
-        taking inner products with every element of the N-qubit Pauli basis.
+        Note that the cost of this construction is exponential in general because the number of
+        possible Pauli terms in the decomposition is exponential in the number of qubits.
+
+        Internally this uses an implementation of the "tensorized Pauli decomposition" presented in
+        `Hantzko, Binkowski and Gupta (2023) <https://arxiv.org/abs/2310.13421>`__.
 
         Args:
             obj (Operator): an N-qubit operator.
-            atol (float): Optional. Absolute tolerance for checking if
-                          coefficients are zero (Default: 1e-8).
-            rtol (float): Optional. relative tolerance for checking if
-                          coefficients are zero (Default: 1e-5).
+            atol (float): Optional. Absolute tolerance for checking if coefficients are zero
+                (Default: 1e-8).  Since the comparison is to zero, in effect the tolerance used is
+                the maximum of ``atol`` and ``rtol``.
+            rtol (float): Optional. relative tolerance for checking if coefficients are zero
+                (Default: 1e-5).  Since the comparison is to zero, in effect the tolerance used is
+                the maximum of ``atol`` and ``rtol``.
 
         Returns:
             SparsePauliOp: the SparsePauliOp representation of the operator.
@@ -748,6 +752,7 @@ def from_operator(
             atol = SparsePauliOp.atol
         if rtol is None:
             rtol = SparsePauliOp.rtol
+        tol = max((atol, rtol))
 
         if not isinstance(obj, Operator):
             obj = Operator(obj)
@@ -756,24 +761,11 @@ def from_operator(
         num_qubits = obj.num_qubits
         if num_qubits is None:
             raise QiskitError("Input Operator is not an N-qubit operator.")
-        data = obj.data
-
-        # Index of non-zero basis elements
-        inds = []
-        # Non-zero coefficients
-        coeffs = []
-        # Non-normalized basis factor
-        denom = 2**num_qubits
-        # Compute coefficients from basis
-        basis = pauli_basis(num_qubits)
-        for i, mat in enumerate(basis.matrix_iter()):
-            coeff = np.trace(mat.dot(data)) / denom
-            if not np.isclose(coeff, 0, atol=atol, rtol=rtol):
-                inds.append(i)
-                coeffs.append(coeff)
-        # Get Non-zero coeff PauliList terms
-        paulis = basis[inds]
-        return SparsePauliOp(paulis, coeffs, copy=False)
+        zx_paulis = decompose_dense(obj.data, tol)
+        # Indirection through `BasePauli` is because we're already supplying the phase in the ZX
+        # convention.
+        pauli_list = PauliList(BasePauli(zx_paulis.z, zx_paulis.x, zx_paulis.phases))
+        return SparsePauliOp(pauli_list, zx_paulis.coeffs, ignore_pauli_phase=True, copy=False)
 
     @staticmethod
     def from_list(

diff --git a/releasenotes/notes/fast-sparse-pauli-operator-b41cacf11e8c4e0e.yaml b/releasenotes/notes/fast-sparse-pauli-operator-b41cacf11e8c4e0e.yaml
@@ -0,0 +1,8 @@
+---
+features:
+  - |
+    :meth:`.SparsePauliOp.from_operator` now uses an implementation of the
+    "tensorized Pauli decomposition algorithm" presented in 
+    Hatznko, Binkowski and Gupta (2023) <https://arxiv.org/abs/2310.13421>`__.  The method is now
+    several orders of magnitude faster; for example, it is possible to decompose a random
+    10-qubit operator in around 250ms on a consumer Macbook Pro (Intel i7, 2020).