Oxidize synth_cnot_count_full_pmh (Qiskit#12588)

* port PMH synth to Rust * Raise an error if the section size is invalid Co-authored-by: Abdalla01001 <[email protected]> Co-authored-by: Tarun-Kumar07 <[email protected]> * Review comments of Shelly & Sasha --------- Co-authored-by: Abdalla01001 <[email protected]> Co-authored-by: Tarun-Kumar07 <[email protected]>
Procatv · Aug 1, 2024 · a525e6d · a525e6d
1 parent 2b0bee4
commit a525e6d
Show file tree

Hide file tree

Showing 5 changed files with 272 additions and 95 deletions.
diff --git a/crates/accelerate/src/synthesis/linear/mod.rs b/crates/accelerate/src/synthesis/linear/mod.rs
@@ -14,6 +14,7 @@ use crate::QiskitError;
 use numpy::{IntoPyArray, PyArray2, PyReadonlyArray2, PyReadwriteArray2};
 use pyo3::prelude::*;
 
+mod pmh;
 pub mod utils;
 
 #[pyfunction]
@@ -186,5 +187,6 @@ pub fn linear(m: &Bound<PyModule>) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(binary_matmul))?;
     m.add_wrapped(wrap_pyfunction!(random_invertible_binary_matrix))?;
     m.add_wrapped(wrap_pyfunction!(check_invertible_binary_matrix))?;
+    m.add_wrapped(wrap_pyfunction!(pmh::synth_cnot_count_full_pmh))?;
     Ok(())
 }
diff --git a/crates/accelerate/src/synthesis/linear/pmh.rs b/crates/accelerate/src/synthesis/linear/pmh.rs
@@ -0,0 +1,195 @@
+// This code is part of Qiskit.
+//
+// (C) Copyright IBM 2024
+//
+// This code is licensed under the Apache License, Version 2.0. You may
+// obtain a copy of this license in the LICENSE.txt file in the root directory
+// of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+//
+// Any modifications or derivative works of this code must retain this
+// copyright notice, and modified files need to carry a notice indicating
+// that they have been altered from the originals.
+
+use hashbrown::HashMap;
+use ndarray::{s, Array1, Array2, ArrayViewMut2, Axis};
+use numpy::PyReadonlyArray2;
+use smallvec::smallvec;
+use std::cmp;
+
+use qiskit_circuit::circuit_data::CircuitData;
+use qiskit_circuit::operations::{Param, StandardGate};
+use qiskit_circuit::Qubit;
+
+use pyo3::prelude::*;
+
+use super::utils::_add_row_or_col;
+
+/// This helper function allows transposed access to a matrix.
+fn _index(transpose: bool, i: usize, j: usize) -> (usize, usize) {
+    if transpose {
+        (j, i)
+    } else {
+        (i, j)
+    }
+}
+
+fn _ceil_fraction(numerator: usize, denominator: usize) -> usize {
+    let mut fraction = numerator / denominator;
+    if numerator % denominator > 0 {
+        fraction += 1;
+    }
+    fraction
+}
+
+/// This function is a helper function of the algorithm for optimal synthesis
+/// of linear reversible circuits (the Patel–Markov–Hayes algorithm). It works
+/// like gaussian elimination, except that it works a lot faster, and requires
+/// fewer steps (and therefore fewer CNOTs). It takes the matrix and
+/// splits it into sections of size section_size. Then it eliminates all non-zero
+/// sub-rows within each section, which are the same as a non-zero sub-row
+/// above. Once this has been done, it continues with normal gaussian elimination.
+/// The benefit is that with small section sizes, most of the sub-rows will
+/// be cleared in the first step, resulting in a factor ``section_size`` fewer row row operations
+/// during Gaussian elimination.
+///
+/// The algorithm is described in detail in the following paper
+/// "Optimal synthesis of linear reversible circuits."
+/// Patel, Ketan N., Igor L. Markov, and John P. Hayes.
+/// Quantum Information & Computation 8.3 (2008): 282-294.
+///
+/// Note:
+/// This implementation tweaks the Patel, Markov, and Hayes algorithm by adding
+/// a "back reduce" which adds rows below the pivot row with a high degree of
+/// overlap back to it. The intuition is to avoid a high-weight pivot row
+/// increasing the weight of lower rows.
+///
+/// Args:
+///     matrix: square matrix, describing a linear quantum circuit
+///     section_size: the section size the matrix columns are divided into
+///
+/// Returns:
+///     A vector of CX locations (control, target) that need to be applied.
+fn lower_cnot_synth(
+    mut matrix: ArrayViewMut2<bool>,
+    section_size: usize,
+    transpose: bool,
+) -> Vec<(usize, usize)> {
+    // The vector of CNOTs to be applied. Called ``circuit`` here for consistency with the paper.
+    let mut circuit: Vec<(usize, usize)> = Vec::new();
+    let cutoff = 1;
+
+    // to apply to the transposed matrix, we can just set axis = 1
+    let row_axis = if transpose { Axis(1) } else { Axis(0) };
+
+    // get number of columns (same as rows) and the number of sections
+    let n = matrix.raw_dim()[0];
+    let num_sections = _ceil_fraction(n, section_size);
+
+    // iterate over the columns
+    for section in 1..num_sections + 1 {
+        // store sub section row patterns here, which we saw already
+        let mut patterns: HashMap<Array1<bool>, usize> = HashMap::new();
+        let section_slice = s![(section - 1) * section_size..cmp::min(section * section_size, n)];
+
+        // iterate over the rows (note we only iterate from the diagonal downwards)
+        for row_idx in (section - 1) * section_size..n {
+            // we need to keep track of the rows we saw already, called ``pattern`` here
+            let pattern: Array1<bool> = matrix
+                .index_axis(row_axis, row_idx)
+                .slice(section_slice)
+                .to_owned();
+
+            // skip if the row is empty (i.e. all elements are false)
+            if pattern.iter().any(|&el| el) {
+                if patterns.contains_key(&pattern) {
+                    // store CX location
+                    circuit.push((patterns[&pattern], row_idx));
+                    // remove the row
+                    _add_row_or_col(matrix.view_mut(), &transpose, patterns[&pattern], row_idx);
+                } else {
+                    // if we have not seen this pattern yet, keep track of it
+                    patterns.insert(pattern, row_idx);
+                }
+            }
+        }
+
+        // gaussian eliminate the remainder of the section
+        for col_idx in (section - 1) * section_size..cmp::min(section * section_size, n) {
+            let mut diag_el = matrix[[col_idx, col_idx]];
+
+            for r in col_idx + 1..n {
+                if matrix[_index(transpose, r, col_idx)] {
+                    if !diag_el {
+                        _add_row_or_col(matrix.view_mut(), &transpose, r, col_idx);
+                        circuit.push((r, col_idx));
+                        diag_el = true
+                    }
+                    _add_row_or_col(matrix.view_mut(), &transpose, col_idx, r);
+                    circuit.push((col_idx, r));
+                }
+
+                // back-reduce to the pivot row: this one-line-magic checks if the logical AND
+                // between the two target rows has more ``true`` elements is larger than the cutoff
+                if matrix
+                    .index_axis(row_axis, col_idx)
+                    .iter()
+                    .zip(matrix.index_axis(row_axis, r).iter())
+                    .map(|(&i, &j)| i & j)
+                    .filter(|&x| x)
+                    .count()
+                    > cutoff
+                {
+                    _add_row_or_col(matrix.view_mut(), &transpose, r, col_idx);
+                    circuit.push((r, col_idx));
+                }
+            }
+        }
+    }
+    circuit
+}
+
+/// Synthesize a linear function, given by a boolean square matrix, into a circuit.
+/// This function uses the Patel-Markov-Hayes algorithm, described in arXiv:quant-ph/0302002,
+/// using section-wise elimination of the rows.
+#[pyfunction]
+#[pyo3(signature = (matrix, section_size=None))]
+pub fn synth_cnot_count_full_pmh(
+    py: Python,
+    matrix: PyReadonlyArray2<bool>,
+    section_size: Option<i64>,
+) -> PyResult<CircuitData> {
+    let arrayview = matrix.as_array();
+    let mut mat: Array2<bool> = arrayview.to_owned();
+    let num_qubits = mat.nrows(); // is a quadratic matrix
+
+    // If given, use the user-specified input size. If None, we default to
+    // alpha * log2(num_qubits), as suggested in the paper, where the coefficient alpha
+    // is calibrated to minimize the upper bound on the number of row operations.
+    // In addition, we at least set a block size of 2, which, in practice, minimizes the bound
+    // until ~100 qubits.
+    let alpha = 0.56;
+    let blocksize = match section_size {
+        Some(section_size) => section_size as usize,
+        None => std::cmp::max(2, (alpha * (num_qubits as f64).log2()).floor() as usize),
+    };
+
+    // compute the synthesis for the lower triangular part of the matrix, and then
+    // apply it on the transposed part for the full synthesis
+    let lower_cnots = lower_cnot_synth(mat.view_mut(), blocksize, false);
+    let upper_cnots = lower_cnot_synth(mat.view_mut(), blocksize, true);
+
+    // iterator over the gates
+    let instructions = upper_cnots
+        .iter()
+        .map(|(i, j)| (*j, *i))
+        .chain(lower_cnots.into_iter().rev())
+        .map(|(ctrl, target)| {
+            (
+                StandardGate::CXGate,
+                smallvec![],
+                smallvec![Qubit(ctrl as u32), Qubit(target as u32)],
+            )
+        });
+
+    CircuitData::from_standard_gates(py, num_qubits as u32, instructions, Param::Float(0.0))
+}
diff --git a/qiskit/synthesis/linear/cnot_synth.py b/qiskit/synthesis/linear/cnot_synth.py
@@ -17,126 +17,53 @@
 """
 
 from __future__ import annotations
-import copy
+
 import numpy as np
 from qiskit.circuit import QuantumCircuit
-from qiskit.exceptions import QiskitError
+
+from qiskit._accelerate.synthesis.linear import synth_cnot_count_full_pmh as fast_pmh
 
 
 def synth_cnot_count_full_pmh(
-    state: list[list[bool]] | np.ndarray[bool], section_size: int = 2
+    state: list[list[bool]] | np.ndarray[bool], section_size: int | None = None
 ) -> QuantumCircuit:
-    """
+    r"""
     Synthesize linear reversible circuits for all-to-all architecture
     using Patel, Markov and Hayes method.
 
     This function is an implementation of the Patel, Markov and Hayes algorithm from [1]
     for optimal synthesis of linear reversible circuits for all-to-all architecture,
-    as specified by an :math:`n \\times n` matrix.
+    as specified by an :math:`n \times n` matrix.
 
     Args:
-        state: :math:`n \\times n` boolean invertible matrix, describing
-            the state of the input circuit
+        state: :math:`n \times n` boolean invertible matrix, describing
+            the state of the input circuit.
         section_size: The size of each section in the Patel–Markov–Hayes algorithm [1].
+            If ``None`` it is chosen to be :math:`\max(2, \alpha\log_2(n))` with
+            :math:`\alpha = 0.56`, which approximately minimizes the upper bound on the number
+            of row operations given in [1] Eq. (3).
 
     Returns:
-        QuantumCircuit: a CX-only circuit implementing the linear transformation.
+        A CX-only circuit implementing the linear transformation.
 
     Raises:
-        QiskitError: when variable ``state`` isn't of type ``numpy.ndarray``
+        ValueError: When ``section_size`` is larger than the number of columns.
 
     References:
         1. Patel, Ketan N., Igor L. Markov, and John P. Hayes,
            *Optimal synthesis of linear reversible circuits*,
            Quantum Information & Computation 8.3 (2008): 282-294.
            `arXiv:quant-ph/0302002 [quant-ph] <https://arxiv.org/abs/quant-ph/0302002>`_
     """
-    if not isinstance(state, (list, np.ndarray)):
-        raise QiskitError(
-            f"state should be of type list or numpy.ndarray, but was of the type {type(state)}"
+    normalized = np.asarray(state).astype(bool)
+    if section_size is not None and normalized.shape[1] < section_size:
+        raise ValueError(
+            f"The section_size ({section_size}) cannot be larger than the number of columns "
+            f"({normalized.shape[1]})."
         )
-    state = np.array(state)
-    # Synthesize lower triangular part
-    [state, circuit_l] = _lwr_cnot_synth(state, section_size)
-    state = np.transpose(state)
-    # Synthesize upper triangular part
-    [state, circuit_u] = _lwr_cnot_synth(state, section_size)
-    circuit_l.reverse()
-    for i in circuit_u:
-        i.reverse()
-    # Convert the list into a circuit of C-NOT gates
-    circ = QuantumCircuit(state.shape[0])
-    for i in circuit_u + circuit_l:
-        circ.cx(i[0], i[1])
-    return circ
-
-
-def _lwr_cnot_synth(state, section_size):
-    """
-    This function is a helper function of the algorithm for optimal synthesis
-    of linear reversible circuits (the Patel–Markov–Hayes algorithm). It works
-    like gaussian elimination, except that it works a lot faster, and requires
-    fewer steps (and therefore fewer CNOTs). It takes the matrix "state" and
-    splits it into sections of size section_size. Then it eliminates all non-zero
-    sub-rows within each section, which are the same as a non-zero sub-row
-    above. Once this has been done, it continues with normal gaussian elimination.
-    The benefit is that with small section sizes (m), most of the sub-rows will
-    be cleared in the first step, resulting in a factor m fewer row row operations
-    during Gaussian elimination.
 
-    The algorithm is described in detail in the following paper
-    "Optimal synthesis of linear reversible circuits."
-    Patel, Ketan N., Igor L. Markov, and John P. Hayes.
-    Quantum Information & Computation 8.3 (2008): 282-294.
-
-    Note:
-    This implementation tweaks the Patel, Markov, and Hayes algorithm by adding
-    a "back reduce" which adds rows below the pivot row with a high degree of
-    overlap back to it. The intuition is to avoid a high-weight pivot row
-    increasing the weight of lower rows.
-
-    Args:
-        state (ndarray): n x n matrix, describing a linear quantum circuit
-        section_size (int): the section size the matrix columns are divided into
-
-    Returns:
-        numpy.matrix: n by n matrix, describing the state of the output circuit
-        list: a k by 2 list of C-NOT operations that need to be applied
-    """
-    circuit = []
-    num_qubits = state.shape[0]
-    cutoff = 1
+    # call Rust implementation with normalized input
+    circuit_data = fast_pmh(normalized, section_size)
 
-    # Iterate over column sections
-    for sec in range(1, int(np.floor(num_qubits / section_size) + 1)):
-        # Remove duplicate sub-rows in section sec
-        patt = {}
-        for row in range((sec - 1) * section_size, num_qubits):
-            sub_row_patt = copy.deepcopy(state[row, (sec - 1) * section_size : sec * section_size])
-            if np.sum(sub_row_patt) == 0:
-                continue
-            if str(sub_row_patt) not in patt:
-                patt[str(sub_row_patt)] = row
-            else:
-                state[row, :] ^= state[patt[str(sub_row_patt)], :]
-                circuit.append([patt[str(sub_row_patt)], row])
-        # Use gaussian elimination for remaining entries in column section
-        for col in range((sec - 1) * section_size, sec * section_size):
-            # Check if 1 on diagonal
-            diag_one = 1
-            if state[col, col] == 0:
-                diag_one = 0
-            # Remove ones in rows below column col
-            for row in range(col + 1, num_qubits):
-                if state[row, col] == 1:
-                    if diag_one == 0:
-                        state[col, :] ^= state[row, :]
-                        circuit.append([row, col])
-                        diag_one = 1
-                    state[row, :] ^= state[col, :]
-                    circuit.append([col, row])
-                # Back reduce the pivot row using the current row
-                if sum(state[col, :] & state[row, :]) > cutoff:
-                    state[col, :] ^= state[row, :]
-                    circuit.append([row, col])
-    return [state, circuit]
+    # construct circuit from the data
+    return QuantumCircuit._from_circuit_data(circuit_data)
diff --git a/releasenotes/notes/oxidize-pmh-ec74e4002510eaad.yaml b/releasenotes/notes/oxidize-pmh-ec74e4002510eaad.yaml
@@ -0,0 +1,17 @@
+---
+features_synthesis:
+  - |
+    Port :func:`.synth_cnot_full_pmh`, used to synthesize a linear function
+    into a CX network, to Rust. This produces approximately 44x speedup,
+    as measured on 100 qubit circuits. 
+  - |
+    The function :func:`.synth_cnot_full_pmh` now allows choosing the 
+    (heuristically) optimal ``section_size`` by setting it to ``None``. Then, a value is 
+    chosen which attempts to minimize the upper bound on the number of CX gates, that is
+    :math:`\alpha \log_2(n)` where :math:`n` is the number of qubits and :math:`\alpha \approx 0.56`.
+fixes:
+  - |
+    Fixed a bug in :func:`.synth_cnot_full_pmh` where providing a ``section_size`` that did 
+    not divide the number of qubits without remainder could lead to wrong results. 
+    Now any ``section_size`` (at most equal to the number of qubits) synthesizes the correct
+    circuit. For a (heuristically) optimal value, set ``section_size=None``.