diff --git a/docs/differentiation.md b/docs/differentiation.md index 8e29f83c..25d71031 100644 --- a/docs/differentiation.md +++ b/docs/differentiation.md @@ -10,7 +10,32 @@ It uses the `torch` native automatic differentiation engine which tracks operati The [adjoint differentiation mode](https://arxiv.org/abs/2009.02823) computes first-order gradients by only requiring at most three states in memory in `O(P)` time where `P` is the number of parameters in a circuit. ### Generalized Parameter-Shift rules (DiffMode.GPSR) -To be added. +The Generalized parameter shift rule (GPSR mode) is an extension of the well known [parameter shift rule (PSR)](https://arxiv.org/abs/1811.11184) algorithm [to arbitrary quantum operations](https://arxiv.org/abs/2108.01218). Indeed, PSR only works for quantum operations whose generator has a single gap in its eigenvalue spectrum, GPSR extending to multi-gap. + +For this, we define the differentiable function as quantum expectation value + +$$ +f(x) = \left\langle 0\right|\hat{U}^{\dagger}(x)\hat{C}\hat{U}(x)\left|0\right\rangle +$$ + +where $\hat{U}(x)={\rm exp}{\left( -i\frac{x}{2}\hat{G}\right)}$ is the quantum evolution operator with generator $\hat{G}$ representing the structure of the underlying quantum circuit and $\hat{C}$ is the cost operator. Then using the eigenvalue spectrum $\left\{ \lambda_n\right\}$ of the generator $\hat{G}$ we calculate the full set of corresponding unique non-zero spectral gaps $\left\{ \Delta_s\right\}$ (differences between eigenvalues). It can be shown that the final expression of derivative of $f(x)$ is then given by the following expression: + +$\begin{equation} +\frac{{\rm d}f\left(x\right)}{{\rm d}x}=\overset{S}{\underset{s=1}{\sum}}\Delta_{s}R_{s}, +\end{equation}$ + +where $S$ is the number of unique non-zero spectral gaps and $R_s$ are real quantities that are solutions of a system of linear equations + +$\begin{equation} +\begin{cases} +F_{1} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}\left(\frac{\delta_{1}\Delta_{s}}{2}\right)R_{s},\\ +F_{2} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}\left(\frac{\delta_{2}\Delta_{s}}{2}\right)R_{s},\\ + & ...\\ +F_{S} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}\left(\frac{\delta_{M}\Delta_{s}}{2}\right)R_{s}. +\end{cases} +\end{equation}$ + +Here $F_s=f(x+\delta_s)-f(x-\delta_s)$ denotes the difference between values of functions evaluated at shifted arguments $x\pm\delta_s$. ### Example ```python exec="on" source="material-block" html="1" @@ -23,7 +48,7 @@ batch_size = 1 ry = pyq.RY(0, param_name="x") cnot = pyq.CNOT(1, 2) -ops = [ry, cnot] +ops = [ry] n_qubits = 3 circ = pyq.QuantumCircuit(n_qubits, ops) @@ -32,8 +57,11 @@ state = pyq.zero_state(n_qubits) values_ad = {"x": torch.tensor([torch.pi / 2], requires_grad=True)} values_adjoint = {"x": torch.tensor([torch.pi / 2], requires_grad=True)} +values_gpsr = {"x": torch.tensor([torch.pi / 2], requires_grad=True)} + exp_ad = pyq.expectation(circ, state, values_ad, obs, DiffMode.AD) exp_adjoint = pyq.expectation(circ, state, values_adjoint, obs, DiffMode.ADJOINT) +exp_gpsr = pyq.expectation(circ, state, values_gpsr, obs, DiffMode.GPSR) dfdx_ad = torch.autograd.grad(exp_ad, tuple(values_ad.values()), torch.ones_like(exp_ad)) @@ -41,7 +69,12 @@ dfdx_adjoint = torch.autograd.grad( exp_adjoint, tuple(values_adjoint.values()), torch.ones_like(exp_adjoint) ) -assert len(dfdx_ad) == len(dfdx_adjoint) +dfdx_gpsr = torch.autograd.grad( + exp_gpsr, tuple(values_gpsr.values()), torch.ones_like(exp_gpsr) +) + +assert len(dfdx_ad) == len(dfdx_adjoint) == len(dfdx_gpsr) for i in range(len(dfdx_ad)): assert torch.allclose(dfdx_ad[i], dfdx_adjoint[i]) + assert torch.allclose(dfdx_ad[i], dfdx_gpsr[i]) ``` diff --git a/pyqtorch/api.py b/pyqtorch/api.py index 92f78a83..24af0ecb 100644 --- a/pyqtorch/api.py +++ b/pyqtorch/api.py @@ -8,6 +8,7 @@ from pyqtorch.adjoint import AdjointExpectation from pyqtorch.analog import Observable from pyqtorch.circuit import QuantumCircuit +from pyqtorch.gpsr import PSRExpectation from pyqtorch.utils import DiffMode, inner_prod logger = getLogger(__name__) @@ -94,6 +95,8 @@ def expectation( circuit, observable, state, values.keys(), *values.values() ) elif diff_mode == DiffMode.GPSR: - raise NotImplementedError("To be added.") + return PSRExpectation.apply( + circuit, observable, state, values.keys(), *values.values() + ) else: logger.error(f"Requested diff_mode '{diff_mode}' not supported.") diff --git a/pyqtorch/gpsr.py b/pyqtorch/gpsr.py new file mode 100644 index 00000000..3e47c46f --- /dev/null +++ b/pyqtorch/gpsr.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +from logging import getLogger +from typing import Any, Tuple + +import torch +from torch import Tensor, no_grad +from torch.autograd import Function + +import pyqtorch as pyq +from pyqtorch.analog import HamiltonianEvolution, Observable, Scale +from pyqtorch.circuit import QuantumCircuit +from pyqtorch.parametric import Parametric +from pyqtorch.utils import inner_prod, param_dict + +logger = getLogger(__name__) + + +class PSRExpectation(Function): + r""" + Implementation of the generalized parameter shift rule. + + Compared to the original parameter shift rule + which only works for quantum operations whose generator has a single gap + in its eigenvalue spectrum, GPSR works with arbitrary + generators of quantum operations. + + For this, we define the differentiable function as quantum expectation value + + $$ + f(x) = \left\langle 0\right|\hat{U}^{\dagger}(x)\hat{C}\hat{U}(x)\left|0\right\rangle + $$ + + where $\hat{U}(x)={\rm exp}{\left( -i\frac{x}{2}\hat{G}\right)}$ + is the quantum evolution operator with generator $\hat{G}$ representing the structure + of the underlying quantum circuit and $\hat{C}$ is the cost operator. + Then using the eigenvalue spectrum $\left\{ \lambda_n\right\}$ of the generator $\hat{G}$ + we calculate the full set of corresponding unique non-zero spectral gaps + $\left\{ \Delta_s\right\}$ (differences between eigenvalues). + It can be shown that the final expression of derivative of $f(x)$ + is then given by the following expression: + + $\begin{equation} + \frac{{\rm d}f\left(x\right)}{{\rm d}x}=\overset{S}{\underset{s=1}{\sum}}\Delta_{s}R_{s}, + \end{equation}$ + + where $S$ is the number of unique non-zero spectral gaps and $R_s$ are real quantities that + are solutions of a system of linear equations + + $\begin{equation} + \begin{cases} + F_{1} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin} + \left(\frac{\delta_{1}\Delta_{s}}{2}\right)R_{s},\\ + F_{2} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin} + \left(\frac{\delta_{2}\Delta_{s}}{2}\right)R_{s},\\ + & ...\\ + F_{S} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin} + \left(\frac{\delta_{M}\Delta_{s}}{2}\right)R_{s}. + \end{cases} + \end{equation}$ + + Here $F_s=f(x+\delta_s)-f(x-\delta_s)$ denotes the difference between values + of functions evaluated at shifted arguments $x\pm\delta_s$. + + Arguments: + circuit: A QuantumCircuit instance + observable: A hamiltonian. + state: A state in the form of [2 * n_qubits + [batch_size]] + param_names: A list of parameter names. + *param_values: A unpacked tensor of values for each parameter. + """ + + @staticmethod + @no_grad() + def forward( + ctx: Any, + circuit: QuantumCircuit, + observable: Observable, + state: Tensor, + param_names: list[str], + *param_values: Tensor, + ) -> Tensor: + ctx.circuit = circuit + ctx.observable = observable + ctx.param_names = param_names + ctx.state = state + values = param_dict(param_names, param_values) + ctx.out_state = circuit.run(state, values) + ctx.projected_state = observable.run(ctx.out_state, values) + ctx.save_for_backward(*param_values) + return inner_prod(ctx.out_state, ctx.projected_state).real + + @staticmethod + def backward(ctx: Any, grad_out: Tensor) -> Tuple[None, ...]: + check_support_psr(ctx.circuit) + param_values = ctx.saved_tensors + values = param_dict(ctx.param_names, param_values) + grads_dict = {k: None for k in values.keys()} + shift = torch.tensor(torch.pi) / 2.0 + + for op in ctx.circuit.flatten(): + if isinstance(op, Parametric) and isinstance(op.param_name, str): + spectrum = torch.linalg.eigvalsh(op.pauli).reshape(-1, 1) + spectral_gap = torch.unique( + torch.abs(torch.tril(spectrum - spectrum.T)) + ) + spectral_gap = spectral_gap[spectral_gap.nonzero()] + assert ( + len(spectral_gap) == 1 + ), "PSRExpectation only works on single_gap for now." + + if values[op.param_name].requires_grad: + with no_grad(): + copied_values = values.copy() + copied_values[op.param_name] += shift + f_plus = pyq.expectation( + ctx.circuit, ctx.state, copied_values, ctx.observable + ) + copied_values[op.param_name] -= 2.0 * shift + f_min = pyq.expectation( + ctx.circuit, ctx.state, copied_values, ctx.observable + ) + + grad = ( + spectral_gap + * (f_plus - f_min) + / (4 * torch.sin(spectral_gap * shift / 2)) + ) + grad *= grad_out + if grads_dict[op.param_name] is not None: + grads_dict[op.param_name] += grad + else: + grads_dict[op.param_name] = grad + else: + logger.error(f"PSRExpectation does not support operation: {type(op)}.") + return (None, None, None, None, *grads_dict.values()) + + +def check_support_psr(circuit: QuantumCircuit): + """Checking that circuit has only compatible operations for PSR. + + Args: + circuit (QuantumCircuit): Circuit to check. + + Raises: + ValueError: When circuit contains Scale or HamiltonianEvolution. + """ + for op in circuit.operations: + if isinstance(op, Scale) or isinstance(op, HamiltonianEvolution): + raise ValueError( + f"PSR is not applicable as circuit contains an operation of type: {type(op)}." + ) diff --git a/pyqtorch/utils.py b/pyqtorch/utils.py index 5bdda042..140e57fa 100644 --- a/pyqtorch/utils.py +++ b/pyqtorch/utils.py @@ -87,6 +87,7 @@ class DiffMode(StrEnum): """An implementation of "Efficient calculation of gradients in classical simulations of variational quantum algorithms", Jones & Gacon, 2020""" + GPSR = "gpsr" """To be added.""" diff --git a/tests/test_circuit.py b/tests/test_circuit.py index 689034cc..5faf415c 100644 --- a/tests/test_circuit.py +++ b/tests/test_circuit.py @@ -13,16 +13,21 @@ from pyqtorch.noise import Noise from pyqtorch.parametric import Parametric from pyqtorch.primitive import Primitive -from pyqtorch.utils import GRADCHECK_ATOL, DensityMatrix, product_state +from pyqtorch.utils import ( + GRADCHECK_ATOL, + DensityMatrix, + product_state, +) -def test_adjoint_diff() -> None: +# TODO add GPSR when multigap is implemented for this test +@pytest.mark.parametrize("n_qubits", [3, 4, 5]) +def test_adjoint_diff(n_qubits: int) -> None: rx = pyq.RX(0, param_name="theta_0") cry = pyq.CPHASE(0, 1, param_name="theta_1") rz = pyq.RZ(2, param_name="theta_2") cnot = pyq.CNOT(1, 2) ops = [rx, cry, rz, cnot] - n_qubits = 3 circ = pyq.QuantumCircuit(n_qubits, ops) obs = pyq.QuantumCircuit(n_qubits, [pyq.Z(0)]) @@ -60,7 +65,7 @@ def test_adjoint_diff() -> None: assert len(grad_ad) == len(grad_adjoint) for i in range(len(grad_ad)): - assert torch.allclose(grad_ad[i], grad_adjoint[i]) + assert torch.allclose(grad_ad[i], grad_adjoint[i], atol=GRADCHECK_ATOL) @pytest.mark.parametrize("dtype", [torch.complex64, torch.complex128]) @@ -155,7 +160,7 @@ def test_adjoint_duplicate_params() -> None: grad_adjoint = torch.autograd.grad( exp_adjoint, tuple(values.values()), torch.ones_like(exp_adjoint) )[0] - assert torch.allclose(grad_ad, grad_adjoint) + assert torch.allclose(grad_ad, grad_adjoint, atol=GRADCHECK_ATOL) @pytest.mark.parametrize("fn", [pyq.X, pyq.Z, pyq.Y]) @@ -310,3 +315,74 @@ def test_sample_run() -> None: assert torch.allclose(wf, product_state("1100")) assert torch.allclose(pyq.QuantumCircuit(4, [pyq.I(0)]).run("1100"), wf) assert "1100" in samples[0] + + +# TODO delete this test when first one up is +@pytest.mark.parametrize("n_qubits", [3, 4, 5]) +def test_all_diff(n_qubits: int) -> None: + rx = pyq.RX(0, param_name="theta_0") + rz = pyq.RZ(2, param_name="theta_1") + cnot = pyq.CNOT(1, 2) + ops = [rx, rz, cnot] + circ = pyq.QuantumCircuit(n_qubits, ops) + obs = pyq.QuantumCircuit(n_qubits, [pyq.Z(0)]) + + theta_0_value = torch.pi / 2 + theta_1_value = torch.pi + + state = pyq.zero_state(n_qubits) + + theta_0 = torch.tensor([theta_0_value], requires_grad=True) + + theta_1 = torch.tensor([theta_1_value], requires_grad=True) + + values = {"theta_0": theta_0, "theta_1": theta_1} + + exp_ad = expectation(circ, state, values, obs, DiffMode.AD) + exp_adjoint = expectation(circ, state, values, obs, DiffMode.ADJOINT) + exp_gpsr = expectation(circ, state, values, obs, DiffMode.GPSR) + + grad_ad = torch.autograd.grad( + exp_ad, tuple(values.values()), torch.ones_like(exp_ad) + ) + + grad_adjoint = torch.autograd.grad( + exp_adjoint, tuple(values.values()), torch.ones_like(exp_adjoint) + ) + + grad_gpsr = torch.autograd.grad( + exp_gpsr, tuple(values.values()), torch.ones_like(exp_gpsr) + ) + + assert len(grad_ad) == len(grad_adjoint) == len(grad_gpsr) + + for i in range(len(grad_ad)): + assert torch.allclose( + grad_ad[i], grad_adjoint[i], atol=GRADCHECK_ATOL + ) and torch.allclose(grad_ad[i], grad_gpsr[i], atol=GRADCHECK_ATOL) + + +@pytest.mark.xfail(raises=ValueError) +@pytest.mark.parametrize("gate_type", ["scale", "hamevo"]) +def test_compatibility_gpsr(gate_type: str) -> None: + + if gate_type == "scale": + seq_gate = pyq.Sequence([pyq.X(0)]) + scale = pyq.Scale(seq_gate, "theta_0") + ops = [scale] + else: + hamevo = pyq.HamiltonianEvolution(pyq.Sequence([pyq.X(0)]), "theta_0", (0,)) + + ops = [hamevo] + + circ = pyq.QuantumCircuit(1, ops) + obs = pyq.QuantumCircuit(1, [pyq.Z(0)]) + state = pyq.zero_state(1) + + param_value = torch.pi / 2 + values = {"theta_0": torch.tensor([param_value], requires_grad=True)} + exp_gpsr = expectation(circ, state, values, obs, DiffMode.GPSR) + + grad_gpsr = torch.autograd.grad( + exp_gpsr, tuple(values.values()), torch.ones_like(exp_gpsr) + )