Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Single gap GPSR #213

Merged
merged 12 commits into from
Jul 4, 2024
37 changes: 35 additions & 2 deletions docs/differentiation.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,32 @@ It uses the `torch` native automatic differentiation engine which tracks operati
The [adjoint differentiation mode](https://arxiv.org/abs/2009.02823) computes first-order gradients by only requiring at most three states in memory in `O(P)` time where `P` is the number of parameters in a circuit.

### Generalized Parameter-Shift rules (DiffMode.GPSR)
To be added.
The Generalized parameter shift rule (GPSR) mode. It is an extension of the well known [parameter shift rule (PSR)]((https://arxiv.org/abs/1811.11184)) algorithm which has been generalized [to arbitrary quantum operations](https://arxiv.org/abs/2108.01218). Indeed, PSR only works for quantum operations whose generator has a single gap in its eigenvalue spectrum, GPSR extending to multi-gap.
chMoussa marked this conversation as resolved.
Show resolved Hide resolved

For this, we define the differentiable function as quantum expectation value

$$
f(x) = \left\langle 0\right|\hat{U}^{\dagger}(x)\hat{C}\hat{U}(x)\left|0\right\rangle
$$

where $\hat{U}(x)={\rm exp}{\left( -i\frac{x}{2}\hat{G}\right)}$ is the quantum evolution operator with generator $\hat{G}$ representing the structure of the underlying quantum circuit and $\hat{C}$ is the cost operator. Then using the eigenvalue spectrum $\left\{ \lambda_n\right\}$ of the generator $\hat{G}$ we calculate the full set of corresponding unique non-zero spectral gaps $\left\{ \Delta_s\right\}$ (differences between eigenvalues). It can be shown that the final expression of derivative of $f(x)$ is then given by the following expression:

$\begin{equation}
\frac{{\rm d}f\left(x\right)}{{\rm d}x}=\overset{S}{\underset{s=1}{\sum}}\Delta_{s}R_{s},
\end{equation}$

where $S$ is the number of unique non-zero spectral gaps and $R_s$ are real quantities that are solutions of a system of linear equations

$\begin{equation}
\begin{cases}
F_{1} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}\left(\frac{\delta_{1}\Delta_{s}}{2}\right)R_{s},\\
F_{2} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}\left(\frac{\delta_{2}\Delta_{s}}{2}\right)R_{s},\\
& ...\\
F_{S} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}\left(\frac{\delta_{M}\Delta_{s}}{2}\right)R_{s}.
\end{cases}
\end{equation}$

Here $F_s=f(x+\delta_s)-f(x-\delta_s)$ denotes the difference between values of functions evaluated at shifted arguments $x\pm\delta_s$.

### Example
```python exec="on" source="material-block" html="1"
Expand All @@ -32,16 +57,24 @@ state = pyq.zero_state(n_qubits)

values_ad = {"x": torch.tensor([torch.pi / 2], requires_grad=True)}
values_adjoint = {"x": torch.tensor([torch.pi / 2], requires_grad=True)}
values_gpsr = {"x": torch.tensor([torch.pi / 2], requires_grad=True)}

exp_ad = pyq.expectation(circ, state, values_ad, obs, DiffMode.AD)
exp_adjoint = pyq.expectation(circ, state, values_adjoint, obs, DiffMode.ADJOINT)
exp_gpsr = pyq.expectation(circ, state, values_gpsr, obs, DiffMode.GPSR)

dfdx_ad = torch.autograd.grad(exp_ad, tuple(values_ad.values()), torch.ones_like(exp_ad))

dfdx_adjoint = torch.autograd.grad(
exp_adjoint, tuple(values_adjoint.values()), torch.ones_like(exp_adjoint)
)

assert len(dfdx_ad) == len(dfdx_adjoint)
dfdx_gpsr = torch.autograd.grad(
exp_gpsr, tuple(values_gpsr.values()), torch.ones_like(exp_gpsr)
)

assert len(dfdx_ad) == len(dfdx_adjoint) == len(dfdx_gpsr)
for i in range(len(dfdx_ad)):
assert torch.allclose(dfdx_ad[i], dfdx_adjoint[i])
assert torch.allclose(dfdx_ad[i], dfdx_gpsr[i])
```
5 changes: 4 additions & 1 deletion pyqtorch/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pyqtorch.adjoint import AdjointExpectation
from pyqtorch.analog import Observable
from pyqtorch.circuit import QuantumCircuit
from pyqtorch.gpsr import PSRExpectation
from pyqtorch.utils import DiffMode, inner_prod

logger = getLogger(__name__)
Expand Down Expand Up @@ -94,6 +95,8 @@ def expectation(
circuit, observable, state, values.keys(), *values.values()
)
elif diff_mode == DiffMode.GPSR:
raise NotImplementedError("To be added.")
return PSRExpectation.apply(
circuit, observable, state, values.keys(), *values.values()
)
else:
logger.error(f"Requested diff_mode '{diff_mode}' not supported.")
86 changes: 86 additions & 0 deletions pyqtorch/gpsr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from __future__ import annotations

from logging import getLogger
from typing import Any, Tuple

import torch
from torch import Tensor, no_grad
from torch.autograd import Function

import pyqtorch as pyq
from pyqtorch.analog import Observable
from pyqtorch.circuit import QuantumCircuit
from pyqtorch.parametric import Parametric
from pyqtorch.utils import inner_prod, param_dict

logger = getLogger(__name__)


class PSRExpectation(Function):
"""
Describe PSR
chMoussa marked this conversation as resolved.
Show resolved Hide resolved
"""

@staticmethod
@no_grad()
def forward(
ctx: Any,
circuit: QuantumCircuit,
observable: Observable,
state: Tensor,
param_names: list[str],
*param_values: Tensor,
) -> Tensor:
ctx.circuit = circuit
ctx.observable = observable
ctx.param_names = param_names
ctx.state = state
values = param_dict(param_names, param_values)
ctx.out_state = circuit.run(state, values)
ctx.projected_state = observable.run(ctx.out_state, values)
ctx.save_for_backward(*param_values)
return inner_prod(ctx.out_state, ctx.projected_state).real

@staticmethod
def backward(ctx: Any, grad_out: Tensor) -> Tuple[None, ...]:
param_values = ctx.saved_tensors
values = param_dict(ctx.param_names, param_values)
grads_dict = {k: None for k in values.keys()}
shift = torch.tensor(torch.pi) / 2.0

for op in ctx.circuit.flatten():
if isinstance(op, Parametric) and isinstance(op.param_name, str):
spectrum = torch.linalg.eigvalsh(op.pauli).reshape(-1, 1)
spectral_gap = torch.unique(
torch.abs(torch.tril(spectrum - spectrum.T))
)
spectral_gap = spectral_gap[spectral_gap.nonzero()]
assert (
len(spectral_gap) == 1
), "PSRExpectation only works on single_gap for now."
chMoussa marked this conversation as resolved.
Show resolved Hide resolved

if values[op.param_name].requires_grad:
with no_grad():
copied_values = values.copy()
copied_values[op.param_name] += shift
f_plus = pyq.expectation(
ctx.circuit, ctx.state, copied_values, ctx.observable
)
copied_values[op.param_name] -= 2.0 * shift
f_min = pyq.expectation(
ctx.circuit, ctx.state, copied_values, ctx.observable
)

grad = (
spectral_gap
* (f_plus - f_min)
/ (4 * torch.sin(spectral_gap * shift / 2))
)
grad *= grad_out
if grads_dict[op.param_name] is not None:
grads_dict[op.param_name] += grad
else:
grads_dict[op.param_name] = grad
else:
logger.error(f"PSRExpectation does not support operation: {type(op)}.")
return (None, None, None, None, *grads_dict.values())
1 change: 1 addition & 0 deletions pyqtorch/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class DiffMode(StrEnum):
"""An implementation of "Efficient calculation of gradients
in classical simulations of variational quantum algorithms",
Jones & Gacon, 2020"""

GPSR = "gpsr"
"""To be added."""

Expand Down
54 changes: 50 additions & 4 deletions tests/test_circuit.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@
from pyqtorch.utils import GRADCHECK_ATOL, DensityMatrix, product_state


def test_adjoint_diff() -> None:
# TODO add GPSR when multigap is implemented for this test
@pytest.mark.parametrize("n_qubits", [3, 4, 5])
def test_adjoint_diff(n_qubits: int) -> None:
rx = pyq.RX(0, param_name="theta_0")
cry = pyq.CPHASE(0, 1, param_name="theta_1")
rz = pyq.RZ(2, param_name="theta_2")
cnot = pyq.CNOT(1, 2)
ops = [rx, cry, rz, cnot]
n_qubits = 3
circ = pyq.QuantumCircuit(n_qubits, ops)
obs = pyq.QuantumCircuit(n_qubits, [pyq.Z(0)])

Expand Down Expand Up @@ -60,7 +61,7 @@ def test_adjoint_diff() -> None:

assert len(grad_ad) == len(grad_adjoint)
for i in range(len(grad_ad)):
assert torch.allclose(grad_ad[i], grad_adjoint[i])
assert torch.allclose(grad_ad[i], grad_adjoint[i], atol=GRADCHECK_ATOL)


@pytest.mark.parametrize("dtype", [torch.complex64, torch.complex128])
Expand Down Expand Up @@ -155,7 +156,7 @@ def test_adjoint_duplicate_params() -> None:
grad_adjoint = torch.autograd.grad(
exp_adjoint, tuple(values.values()), torch.ones_like(exp_adjoint)
)[0]
assert torch.allclose(grad_ad, grad_adjoint)
assert torch.allclose(grad_ad, grad_adjoint, atol=GRADCHECK_ATOL)


@pytest.mark.parametrize("fn", [pyq.X, pyq.Z, pyq.Y])
Expand Down Expand Up @@ -310,3 +311,48 @@ def test_sample_run() -> None:
assert torch.allclose(wf, product_state("1100"))
assert torch.allclose(pyq.QuantumCircuit(4, [pyq.I(0)]).run("1100"), wf)
assert "1100" in samples[0]


# TODO delete this test when first one up is
@pytest.mark.parametrize("n_qubits", [3, 4, 5])
def test_all_diff(n_qubits: int) -> None:
rx = pyq.RX(0, param_name="theta_0")
rz = pyq.RZ(2, param_name="theta_1")
cnot = pyq.CNOT(1, 2)
ops = [rx, rz, cnot]
circ = pyq.QuantumCircuit(n_qubits, ops)
obs = pyq.QuantumCircuit(n_qubits, [pyq.Z(0)])

theta_0_value = torch.pi / 2
theta_1_value = torch.pi

state = pyq.zero_state(n_qubits)

theta_0 = torch.tensor([theta_0_value], requires_grad=True)

theta_1 = torch.tensor([theta_1_value], requires_grad=True)

values = {"theta_0": theta_0, "theta_1": theta_1}

exp_ad = expectation(circ, state, values, obs, DiffMode.AD)
exp_adjoint = expectation(circ, state, values, obs, DiffMode.ADJOINT)
exp_gpsr = expectation(circ, state, values, obs, DiffMode.GPSR)

grad_ad = torch.autograd.grad(
exp_ad, tuple(values.values()), torch.ones_like(exp_ad)
)

grad_adjoint = torch.autograd.grad(
exp_adjoint, tuple(values.values()), torch.ones_like(exp_adjoint)
)

grad_gpsr = torch.autograd.grad(
exp_gpsr, tuple(values.values()), torch.ones_like(exp_gpsr)
)

assert len(grad_ad) == len(grad_adjoint) == len(grad_gpsr)

for i in range(len(grad_ad)):
assert torch.allclose(
grad_ad[i], grad_adjoint[i], atol=GRADCHECK_ATOL
) and torch.allclose(grad_ad[i], grad_gpsr[i], atol=GRADCHECK_ATOL)
Loading