diff --git a/docs/differentiation.md b/docs/differentiation.md index 359e8416..8437e103 100644 --- a/docs/differentiation.md +++ b/docs/differentiation.md @@ -13,7 +13,7 @@ The [adjoint differentiation mode](https://arxiv.org/abs/2009.02823) computes fi The Generalized parameter shift rule (GPSR mode) is an extension of the well known [parameter shift rule (PSR)](https://arxiv.org/abs/1811.11184) algorithm [to arbitrary quantum operations](https://arxiv.org/abs/2108.01218). Indeed, PSR only works for quantum operations whose generator has a single gap in its eigenvalue spectrum, GPSR extending to multi-gap. !!! warning "Usage restrictions" - At the moment, circuits with one or more Scale or HamiltonianEvolution operations are not supported. + At the moment, circuits with one or more Scale or HamiltonianEvolution with parametric generators operations are not supported. They should be handled differently as GPSR requires operations to be of the form presented below. Also, circuits with operations sharing a same parameter name are also not supported as such cases are handled by our other Python package for differentiable digital-analog quantum programs Qadence diff --git a/pyqtorch/differentiation/gpsr.py b/pyqtorch/differentiation/gpsr.py index e784c748..8009401d 100644 --- a/pyqtorch/differentiation/gpsr.py +++ b/pyqtorch/differentiation/gpsr.py @@ -8,9 +8,9 @@ from torch.autograd import Function from pyqtorch.circuit import QuantumCircuit -from pyqtorch.composite import Scale, Sequence +from pyqtorch.composite import Scale from pyqtorch.embed import Embedding -from pyqtorch.hamiltonians import HamiltonianEvolution, Observable +from pyqtorch.hamiltonians import GeneratorType, HamiltonianEvolution, Observable from pyqtorch.matrices import DEFAULT_REAL_DTYPE from pyqtorch.primitives import Parametric from pyqtorch.utils import param_dict @@ -123,9 +123,6 @@ def backward(ctx: Any, grad_out: Tensor) -> Tuple[None, ...]: """ values = param_dict(ctx.param_names, ctx.saved_tensors) - shift_pi2 = torch.tensor(torch.pi) / 2.0 - shift_multi = 0.5 - dtype_values = DEFAULT_REAL_DTYPE device = torch.device("cpu") try: @@ -133,6 +130,9 @@ def backward(ctx: Any, grad_out: Tensor) -> Tuple[None, ...]: except Exception: pass + shift_pi2 = torch.tensor(torch.pi, dtype=dtype_values) / 2.0 + shift_multi = 0.5 + def expectation_fn(values: dict[str, Tensor]) -> Tensor: """Use the PSRExpectation for nested grad calls. @@ -156,7 +156,7 @@ def single_gap_shift( param_name: str, values: dict[str, Tensor], spectral_gap: Tensor, - shift: Tensor = torch.tensor(torch.pi) / 2.0, + shift: Tensor = shift_pi2, ) -> Tensor: """Implements single gap PSR rule. @@ -183,14 +183,14 @@ def single_gap_shift( return ( spectral_gap * (f_plus - f_minus) - / (4 * torch.sin(spectral_gap * shift / 2)) + / (4.0 * torch.sin(spectral_gap * shift / 2.0)) ) def multi_gap_shift( param_name: str, values: dict[str, Tensor], spectral_gaps: Tensor, - shift_prefac: float = 0.5, + shift_prefac: float = shift_multi, ) -> Tensor: """Implement multi gap PSR rule. @@ -244,38 +244,46 @@ def multi_gap_shift( F = torch.stack(F).reshape(n_eqs, -1) R = torch.linalg.solve(M, F) dfdx = torch.sum(spectral_gaps * R, dim=0).reshape(batch_size) + return dfdx - def vjp(operation: Parametric, values: dict[str, Tensor]) -> Tensor: + def vjp( + param_name: str, spectral_gap: Tensor, values: dict[str, Tensor] + ) -> Tensor: """Vector-jacobian product between `grad_out` and jacobians of parameters. Args: - operation: Parametric operation to compute PSR. + param_name: Parameter name to compute gradient over. + spectral_gap: Spectral gap of the corresponding operation. values: Dictionary with parameter values. Returns: Updated jacobian by PSR. """ - psr_fn, shift = ( - (multi_gap_shift, shift_multi) - if len(operation.spectral_gap) > 1 - else (single_gap_shift, shift_pi2) - ) + psr_fn = multi_gap_shift if len(spectral_gap) > 1 else single_gap_shift + return grad_out * psr_fn( # type: ignore[operator] - operation.param_name, # type: ignore + param_name, # type: ignore values, - operation.spectral_gap, - shift, + spectral_gap, ) grads = {p: None for p in ctx.param_names} + + def update_gradient(param_name: str, spectral_gap: Tensor): + if values[param_name].requires_grad: + if grads[param_name] is not None: + grads[param_name] += vjp(param_name, spectral_gap, values) + else: + grads[param_name] = vjp(param_name, spectral_gap, values) + for op in ctx.circuit.flatten(): - if isinstance(op, Parametric) and isinstance(op.param_name, str): - if values[op.param_name].requires_grad: - if grads[op.param_name] is not None: - grads[op.param_name] += vjp(op, values) - else: - grads[op.param_name] = vjp(op, values) + + if isinstance(op, (Parametric, HamiltonianEvolution)) and isinstance( + op.param_name, str + ): + factor = 1.0 if isinstance(op, Parametric) else 2.0 + update_gradient(op.param_name, factor * op.spectral_gap) return ( None, @@ -300,24 +308,25 @@ def check_support_psr(circuit: QuantumCircuit): """ param_names = list() - for op in circuit.operations: - if isinstance(op, Scale) or isinstance(op, HamiltonianEvolution): + for op in circuit.flatten(): + if isinstance(op, Scale): raise ValueError( f"PSR is not applicable as circuit contains an operation of type: {type(op)}." ) - if isinstance(op, Sequence): - for subop in op.flatten(): - if isinstance(subop, Scale) or isinstance(subop, HamiltonianEvolution): - raise ValueError( - f"PSR is not applicable as circuit contains \ - an operation of type: {type(subop)}." - ) - if isinstance(subop, Parametric): - if isinstance(subop.param_name, str): - param_names.append(subop.param_name) + if isinstance(op, HamiltonianEvolution) and op.generator_type in [ + GeneratorType.SYMBOL, + GeneratorType.PARAMETRIC_OPERATION, + ]: + raise ValueError( + f"PSR is not applicable as circuit contains an operation of type: {type(op)} \ + whose generator type is {op.generator_type}." + ) elif isinstance(op, Parametric): if isinstance(op.param_name, str): param_names.append(op.param_name) + elif isinstance(op, HamiltonianEvolution): + if isinstance(op.time, str): + param_names.append(op.time) else: continue diff --git a/pyqtorch/hamiltonians/evolution.py b/pyqtorch/hamiltonians/evolution.py index 9e0e3019..d09db497 100644 --- a/pyqtorch/hamiltonians/evolution.py +++ b/pyqtorch/hamiltonians/evolution.py @@ -2,6 +2,7 @@ import logging from collections import OrderedDict +from functools import cached_property from logging import getLogger from typing import Callable, Tuple, Union @@ -18,6 +19,7 @@ Operator, State, StrEnum, + _round_operator, expand_operator, is_diag, ) @@ -223,6 +225,13 @@ def generator(self) -> ModuleList: """ return self.operations + def flatten(self) -> ModuleList: + return ModuleList([self]) + + @property + def param_name(self) -> Tensor | str: + return self.time + def _symbolic_generator( self, values: dict, @@ -273,6 +282,32 @@ def create_hamiltonian(self) -> Callable[[dict], Operator]: """ return self._generator_map[self.generator_type] + @cached_property + def eigenvals_generator(self) -> Tensor: + """Get eigenvalues of the underlying hamiltonian. + + Note: Only works for GeneratorType.TENSOR + or GeneratorType.OPERATION. + + Returns: + Eigenvalues of the operation. + """ + + return self.generator[0].eigenvalues + + @cached_property + def spectral_gap(self) -> Tensor: + """Difference between the moduli of the two largest eigenvalues of the generator. + + Returns: + Tensor: Spectral gap value. + """ + spectrum = self.eigenvals_generator + diffs = spectrum - spectrum.T + diffs = _round_operator(diffs) + spectral_gap = torch.unique(torch.abs(torch.tril(diffs))) + return spectral_gap[spectral_gap.nonzero()] + def forward( self, state: Tensor, diff --git a/pyqtorch/matrices.py b/pyqtorch/matrices.py index d5e94c97..59a3e7f7 100644 --- a/pyqtorch/matrices.py +++ b/pyqtorch/matrices.py @@ -67,14 +67,38 @@ def PROJMAT(ket: Tensor, bra: Tensor) -> Tensor: def parametric_unitary( - theta: torch.Tensor, P: torch.Tensor, I: torch.Tensor, batch_size: int # noqa: E741 + theta: torch.Tensor, + P: torch.Tensor, + identity_mat: torch.Tensor, + batch_size: int, + a: float = 0.5, # noqa: E741 ) -> torch.Tensor: - cos_t = torch.cos(theta / 2).unsqueeze(0).unsqueeze(1) + """Compute the exponentiation of a Pauli matrix :math:`P` + + The exponentiation is given by: + :math:`exp(-i a \\theta P ) = I cos(r \\theta) - i a P sin(r \\theta) / r` + + where :math:`a` is a prefactor + and :math:`r = a * sg / 2`, :math:`sg` corresponding to the spectral gap. + + Here, we assume :math:`sg = 2` + + Args: + theta (torch.Tensor): Parameter values. + P (torch.Tensor): Pauli matrix to exponentiate. + I (torch.Tensor): Identity matrix + batch_size (int): Batch size of parameters. + a (float): Prefactor. + + Returns: + torch.Tensor: The exponentiation of P + """ + cos_t = torch.cos(theta * a).unsqueeze(0).unsqueeze(1) cos_t = cos_t.repeat((2, 2, 1)) - sin_t = torch.sin(theta / 2).unsqueeze(0).unsqueeze(1) + sin_t = torch.sin(theta * a).unsqueeze(0).unsqueeze(1) sin_t = sin_t.repeat((2, 2, 1)) - batch_imat = I.unsqueeze(2).repeat(1, 1, batch_size) + batch_imat = identity_mat.unsqueeze(2).repeat(1, 1, batch_size) batch_operation_mat = P.unsqueeze(2).repeat(1, 1, batch_size) return cos_t * batch_imat - 1j * sin_t * batch_operation_mat diff --git a/pyqtorch/quantum_operation.py b/pyqtorch/quantum_operation.py index 0b408968..7ae6aced 100644 --- a/pyqtorch/quantum_operation.py +++ b/pyqtorch/quantum_operation.py @@ -245,6 +245,24 @@ def eigenvals_generator(self) -> Tensor: """ return torch.linalg.eigvalsh(self.operation).reshape(-1, 1) + @cached_property + def eigenvalues( + self, + values: dict[str, Tensor] | Tensor = dict(), + embedding: Embedding | None = None, + ) -> Tensor: + """Get eigenvalues of the tensor of QuantumOperation. + + Args: + values (dict[str, Tensor], optional): Parameter values. Defaults to dict(). + embedding (Embedding | None, optional): Optional embedding. Defaults to None. + + Returns: + Eigenvalues of the related tensor. + """ + blockmat = self.tensor(values, embedding) + return torch.linalg.eigvals(blockmat.permute((2, 0, 1))).reshape(-1, 1) + @cached_property def spectral_gap(self) -> Tensor: """Difference between the moduli of the two largest eigenvalues of the generator. @@ -253,7 +271,8 @@ def spectral_gap(self) -> Tensor: Tensor: Spectral gap value. """ spectrum = self.eigenvals_generator - spectral_gap = torch.unique(torch.abs(torch.tril(spectrum - spectrum.T))) + diffs = spectrum - spectrum.T + spectral_gap = torch.unique(torch.abs(torch.tril(diffs))) return spectral_gap[spectral_gap.nonzero()] def _default_operator_function( diff --git a/pyqtorch/utils.py b/pyqtorch/utils.py index dc7b18cd..1e178337 100644 --- a/pyqtorch/utils.py +++ b/pyqtorch/utils.py @@ -27,7 +27,6 @@ GRADCHECK_ATOL = 1e-05 GRADCHECK_sampling_ATOL = 1e-01 PSR_ACCEPTANCE = 1e-05 -GPSR_ACCEPTANCE = 1e-05 ABC_ARRAY: NDArray = array(list(ABC)) logger = getLogger(__name__) @@ -48,6 +47,23 @@ def qubit_support_as_tuple(support: int | tuple[int, ...]) -> tuple[int, ...]: return qubit_support +def _round_operator(t: Tensor, decimals: int = 4) -> Tensor: + if torch.is_complex(t): + + def _round(_t: Tensor) -> Tensor: + r = _t.real.round(decimals=decimals) + i = _t.imag.round(decimals=decimals) + return torch.complex(r, i) + + else: + + def _round(_t: Tensor) -> Tensor: + return _t.round(decimals=decimals) + + fn = torch.vmap(_round) + return fn(t) + + def inner_prod(bra: Tensor, ket: Tensor) -> Tensor: """ Compute the inner product :math:`\\langle\\bra|\\ket\\rangle` diff --git a/tests/test_gpsr.py b/tests/test_gpsr.py index e6d14e73..1de4e90b 100644 --- a/tests/test_gpsr.py +++ b/tests/test_gpsr.py @@ -5,14 +5,15 @@ import pytest import torch from helpers import random_pauli_hamiltonian +from test_analog import Hamiltonian_general import pyqtorch as pyq from pyqtorch import DiffMode, expectation from pyqtorch.circuit import QuantumCircuit -from pyqtorch.hamiltonians import Observable -from pyqtorch.matrices import COMPLEX_TO_REAL_DTYPES +from pyqtorch.hamiltonians import HamiltonianEvolution, Observable +from pyqtorch.matrices import COMPLEX_TO_REAL_DTYPES, DEFAULT_MATRIX_DTYPE from pyqtorch.primitives import Parametric -from pyqtorch.utils import GPSR_ACCEPTANCE, PSR_ACCEPTANCE, GRADCHECK_sampling_ATOL +from pyqtorch.utils import PSR_ACCEPTANCE, GRADCHECK_sampling_ATOL def circuit_psr(n_qubits: int) -> QuantumCircuit: @@ -66,6 +67,129 @@ def circuit_sequence(n_qubits: int) -> QuantumCircuit: return circ +def circuit_hamevo_tensor_gpsr(n_qubits: int) -> QuantumCircuit: + """Helper function to make an example circuit.""" + + ham = Hamiltonian_general(n_qubits) + ham_op = pyq.HamiltonianEvolution(ham, "t", qubit_support=tuple(range(n_qubits))) + + ops = [ + pyq.CRX(0, 1, "theta_0"), + pyq.X(1), + pyq.CRY(1, 2, "theta_1"), + ham_op, + pyq.CRX(1, 2, "theta_2"), + pyq.X(0), + pyq.CRY(0, 1, "theta_3"), + pyq.CNOT(0, 1), + ] + + circ = QuantumCircuit(n_qubits, ops) + + return circ + + +def circuit_hamevo_pauligen_gpsr(n_qubits: int) -> QuantumCircuit: + """Helper function to make an example circuit.""" + + ham = random_pauli_hamiltonian( + n_qubits, k_1q=n_qubits, k_2q=0, default_scale_coeffs=1.0 + )[0] + ham_op = pyq.HamiltonianEvolution(ham, "t", qubit_support=tuple(range(n_qubits))) + + ops = [ + pyq.CRX(0, 1, "theta_0"), + pyq.X(1), + pyq.CRY(1, 2, "theta_1"), + ham_op, + pyq.CRX(1, 2, "theta_2"), + pyq.X(0), + pyq.CRY(0, 1, "theta_3"), + pyq.CNOT(0, 1), + ] + + circ = QuantumCircuit(n_qubits, ops) + + return circ + + +@pytest.mark.parametrize( + ["n_qubits", "batch_size", "circuit_fn"], + [ + (3, 1, circuit_hamevo_tensor_gpsr), + (3, 1, circuit_hamevo_pauligen_gpsr), + ], +) +def test_expectation_gpsr_hamevo( + n_qubits: int, + batch_size: int, + circuit_fn: Callable, + dtype: torch.dtype = torch.complex128, +) -> None: + torch.manual_seed(42) + circ = circuit_fn(n_qubits).to(dtype) + obs = Observable( + random_pauli_hamiltonian( + n_qubits, k_1q=n_qubits, k_2q=0, default_scale_coeffs=1.0 + )[0] + ).to(dtype) + values = { + op.param_name: torch.rand( + batch_size, requires_grad=True, dtype=COMPLEX_TO_REAL_DTYPES[dtype] + ) + for op in circ.flatten() + if isinstance(op, Parametric) and isinstance(op.param_name, str) + } + values.update( + { + op.time: torch.rand( + batch_size, requires_grad=True, dtype=COMPLEX_TO_REAL_DTYPES[dtype] + ) + for op in circ.operations + if isinstance(op, HamiltonianEvolution) and isinstance(op.time, str) + } + ) + state = pyq.random_state(n_qubits, dtype=dtype) + + # Apply adjoint + exp_ad = expectation(circ, state, values, obs, DiffMode.AD) + grad_ad = torch.autograd.grad( + exp_ad, tuple(values.values()), torch.ones_like(exp_ad), create_graph=True + ) + + # Apply PSR + exp_gpsr = expectation(circ, state, values, obs, DiffMode.GPSR) + grad_gpsr = torch.autograd.grad( + exp_gpsr, tuple(values.values()), torch.ones_like(exp_gpsr), create_graph=True + ) + + # first order checks + for i in range(len(grad_ad)): + assert torch.allclose(grad_ad[i], grad_gpsr[i], atol=PSR_ACCEPTANCE) + + # second order checks + for i in range(len(grad_ad)): + gradgrad_ad = torch.autograd.grad( + grad_ad[i], + tuple(values.values()), + torch.ones_like(grad_ad[i]), + create_graph=True, + ) + + gradgrad_gpsr = torch.autograd.grad( + grad_gpsr[i], + tuple(values.values()), + torch.ones_like(grad_gpsr[i]), + create_graph=True, + ) + + assert len(gradgrad_ad) == len(gradgrad_gpsr) + + # check second order gradients + for j in range(len(gradgrad_ad)): + assert torch.allclose(gradgrad_ad[j], gradgrad_gpsr[j], atol=1.0e-2) + + @pytest.mark.parametrize( ["n_qubits", "batch_size", "circuit_fn"], [ @@ -128,12 +252,9 @@ def test_expectation_gpsr( ) assert torch.allclose(exp_gpsr, exp_gpsr_sampled, atol=1e-01) - atol = PSR_ACCEPTANCE if circuit_fn != circuit_gpsr else GPSR_ACCEPTANCE - # first order checks - for i in range(len(grad_ad)): - assert torch.allclose(grad_ad[i], grad_gpsr[i], atol=atol) + assert torch.allclose(grad_ad[i], grad_gpsr[i], atol=PSR_ACCEPTANCE) assert torch.allclose( grad_gpsr[i], grad_gpsr_sampled[i], atol=GRADCHECK_sampling_ATOL ) @@ -158,7 +279,7 @@ def test_expectation_gpsr( # check second order gradients for j in range(len(gradgrad_ad)): - assert torch.allclose(gradgrad_ad[j], gradgrad_gpsr[j], atol=atol) + assert torch.allclose(gradgrad_ad[j], gradgrad_gpsr[j], atol=PSR_ACCEPTANCE) @pytest.mark.parametrize("gate_type", ["scale", "hamevo", "same", ""]) @@ -171,7 +292,9 @@ def test_compatibility_gpsr(gate_type: str, sequence_circuit: bool) -> None: scale = pyq.Scale(seq_gate, pname) ops = [scale] elif gate_type == "hamevo": - hamevo = pyq.HamiltonianEvolution(pyq.Sequence([pyq.X(0)]), pname, (0,)) + symbol = pname + t_evo = torch.tensor([torch.pi / 4], dtype=DEFAULT_MATRIX_DTYPE) + hamevo = pyq.HamiltonianEvolution(symbol, t_evo, (0,)) ops = [hamevo] elif gate_type == "same": ops = [pyq.RY(0, pname), pyq.RZ(0, pname)] @@ -186,20 +309,21 @@ def test_compatibility_gpsr(gate_type: str, sequence_circuit: bool) -> None: obs = pyq.Observable([pyq.Z(0)]) state = pyq.zero_state(2) - param_value = torch.pi / 2 - values = {"theta_0": torch.tensor([param_value], requires_grad=True)} + if gate_type == "hamevo": + H = pyq.X(0).tensor() + H.requires_grad = True + values = {"theta_0": H} + else: + param_value = torch.pi / 2 + values = {"theta_0": torch.tensor([param_value], requires_grad=True)} if gate_type != "": with pytest.raises(ValueError): exp_gpsr = expectation(circ, state, values, obs, DiffMode.GPSR) - - grad_gpsr = torch.autograd.grad( - exp_gpsr, tuple(values.values()), torch.ones_like(exp_gpsr) - ) else: exp_gpsr = expectation(circ, state, values, obs, DiffMode.GPSR) grad_gpsr = torch.autograd.grad( exp_gpsr, tuple(values.values()), torch.ones_like(exp_gpsr) ) - assert len(grad_gpsr) > 0 + assert len(grad_gpsr) == 1