Skip to content

Commit

Permalink
[Feature] Single gap GPSR (#213)
Browse files Browse the repository at this point in the history
Co-authored-by: Charles MOUSSA <[email protected]>
Co-authored-by: Roland-djee <[email protected]>
  • Loading branch information
3 people authored Jul 4, 2024
1 parent e2b4860 commit 7e38f2e
Show file tree
Hide file tree
Showing 5 changed files with 274 additions and 9 deletions.
39 changes: 36 additions & 3 deletions docs/differentiation.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,32 @@ It uses the `torch` native automatic differentiation engine which tracks operati
The [adjoint differentiation mode](https://arxiv.org/abs/2009.02823) computes first-order gradients by only requiring at most three states in memory in `O(P)` time where `P` is the number of parameters in a circuit.

### Generalized Parameter-Shift rules (DiffMode.GPSR)
To be added.
The Generalized parameter shift rule (GPSR mode) is an extension of the well known [parameter shift rule (PSR)](https://arxiv.org/abs/1811.11184) algorithm [to arbitrary quantum operations](https://arxiv.org/abs/2108.01218). Indeed, PSR only works for quantum operations whose generator has a single gap in its eigenvalue spectrum, GPSR extending to multi-gap.

For this, we define the differentiable function as quantum expectation value

$$
f(x) = \left\langle 0\right|\hat{U}^{\dagger}(x)\hat{C}\hat{U}(x)\left|0\right\rangle
$$

where $\hat{U}(x)={\rm exp}{\left( -i\frac{x}{2}\hat{G}\right)}$ is the quantum evolution operator with generator $\hat{G}$ representing the structure of the underlying quantum circuit and $\hat{C}$ is the cost operator. Then using the eigenvalue spectrum $\left\{ \lambda_n\right\}$ of the generator $\hat{G}$ we calculate the full set of corresponding unique non-zero spectral gaps $\left\{ \Delta_s\right\}$ (differences between eigenvalues). It can be shown that the final expression of derivative of $f(x)$ is then given by the following expression:

$\begin{equation}
\frac{{\rm d}f\left(x\right)}{{\rm d}x}=\overset{S}{\underset{s=1}{\sum}}\Delta_{s}R_{s},
\end{equation}$

where $S$ is the number of unique non-zero spectral gaps and $R_s$ are real quantities that are solutions of a system of linear equations

$\begin{equation}
\begin{cases}
F_{1} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}\left(\frac{\delta_{1}\Delta_{s}}{2}\right)R_{s},\\
F_{2} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}\left(\frac{\delta_{2}\Delta_{s}}{2}\right)R_{s},\\
& ...\\
F_{S} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}\left(\frac{\delta_{M}\Delta_{s}}{2}\right)R_{s}.
\end{cases}
\end{equation}$

Here $F_s=f(x+\delta_s)-f(x-\delta_s)$ denotes the difference between values of functions evaluated at shifted arguments $x\pm\delta_s$.

### Example
```python exec="on" source="material-block" html="1"
Expand All @@ -23,7 +48,7 @@ batch_size = 1

ry = pyq.RY(0, param_name="x")
cnot = pyq.CNOT(1, 2)
ops = [ry, cnot]
ops = [ry]
n_qubits = 3
circ = pyq.QuantumCircuit(n_qubits, ops)

Expand All @@ -32,16 +57,24 @@ state = pyq.zero_state(n_qubits)

values_ad = {"x": torch.tensor([torch.pi / 2], requires_grad=True)}
values_adjoint = {"x": torch.tensor([torch.pi / 2], requires_grad=True)}
values_gpsr = {"x": torch.tensor([torch.pi / 2], requires_grad=True)}

exp_ad = pyq.expectation(circ, state, values_ad, obs, DiffMode.AD)
exp_adjoint = pyq.expectation(circ, state, values_adjoint, obs, DiffMode.ADJOINT)
exp_gpsr = pyq.expectation(circ, state, values_gpsr, obs, DiffMode.GPSR)

dfdx_ad = torch.autograd.grad(exp_ad, tuple(values_ad.values()), torch.ones_like(exp_ad))

dfdx_adjoint = torch.autograd.grad(
exp_adjoint, tuple(values_adjoint.values()), torch.ones_like(exp_adjoint)
)

assert len(dfdx_ad) == len(dfdx_adjoint)
dfdx_gpsr = torch.autograd.grad(
exp_gpsr, tuple(values_gpsr.values()), torch.ones_like(exp_gpsr)
)

assert len(dfdx_ad) == len(dfdx_adjoint) == len(dfdx_gpsr)
for i in range(len(dfdx_ad)):
assert torch.allclose(dfdx_ad[i], dfdx_adjoint[i])
assert torch.allclose(dfdx_ad[i], dfdx_gpsr[i])
```
5 changes: 4 additions & 1 deletion pyqtorch/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pyqtorch.adjoint import AdjointExpectation
from pyqtorch.analog import Observable
from pyqtorch.circuit import QuantumCircuit
from pyqtorch.gpsr import PSRExpectation
from pyqtorch.utils import DiffMode, inner_prod

logger = getLogger(__name__)
Expand Down Expand Up @@ -94,6 +95,8 @@ def expectation(
circuit, observable, state, values.keys(), *values.values()
)
elif diff_mode == DiffMode.GPSR:
raise NotImplementedError("To be added.")
return PSRExpectation.apply(
circuit, observable, state, values.keys(), *values.values()
)
else:
logger.error(f"Requested diff_mode '{diff_mode}' not supported.")
152 changes: 152 additions & 0 deletions pyqtorch/gpsr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from __future__ import annotations

from logging import getLogger
from typing import Any, Tuple

import torch
from torch import Tensor, no_grad
from torch.autograd import Function

import pyqtorch as pyq
from pyqtorch.analog import HamiltonianEvolution, Observable, Scale
from pyqtorch.circuit import QuantumCircuit
from pyqtorch.parametric import Parametric
from pyqtorch.utils import inner_prod, param_dict

logger = getLogger(__name__)


class PSRExpectation(Function):
r"""
Implementation of the generalized parameter shift rule.
Compared to the original parameter shift rule
which only works for quantum operations whose generator has a single gap
in its eigenvalue spectrum, GPSR works with arbitrary
generators of quantum operations.
For this, we define the differentiable function as quantum expectation value
$$
f(x) = \left\langle 0\right|\hat{U}^{\dagger}(x)\hat{C}\hat{U}(x)\left|0\right\rangle
$$
where $\hat{U}(x)={\rm exp}{\left( -i\frac{x}{2}\hat{G}\right)}$
is the quantum evolution operator with generator $\hat{G}$ representing the structure
of the underlying quantum circuit and $\hat{C}$ is the cost operator.
Then using the eigenvalue spectrum $\left\{ \lambda_n\right\}$ of the generator $\hat{G}$
we calculate the full set of corresponding unique non-zero spectral gaps
$\left\{ \Delta_s\right\}$ (differences between eigenvalues).
It can be shown that the final expression of derivative of $f(x)$
is then given by the following expression:
$\begin{equation}
\frac{{\rm d}f\left(x\right)}{{\rm d}x}=\overset{S}{\underset{s=1}{\sum}}\Delta_{s}R_{s},
\end{equation}$
where $S$ is the number of unique non-zero spectral gaps and $R_s$ are real quantities that
are solutions of a system of linear equations
$\begin{equation}
\begin{cases}
F_{1} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}
\left(\frac{\delta_{1}\Delta_{s}}{2}\right)R_{s},\\
F_{2} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}
\left(\frac{\delta_{2}\Delta_{s}}{2}\right)R_{s},\\
& ...\\
F_{S} & =4\overset{S}{\underset{s=1}{\sum}}{\rm sin}
\left(\frac{\delta_{M}\Delta_{s}}{2}\right)R_{s}.
\end{cases}
\end{equation}$
Here $F_s=f(x+\delta_s)-f(x-\delta_s)$ denotes the difference between values
of functions evaluated at shifted arguments $x\pm\delta_s$.
Arguments:
circuit: A QuantumCircuit instance
observable: A hamiltonian.
state: A state in the form of [2 * n_qubits + [batch_size]]
param_names: A list of parameter names.
*param_values: A unpacked tensor of values for each parameter.
"""

@staticmethod
@no_grad()
def forward(
ctx: Any,
circuit: QuantumCircuit,
observable: Observable,
state: Tensor,
param_names: list[str],
*param_values: Tensor,
) -> Tensor:
ctx.circuit = circuit
ctx.observable = observable
ctx.param_names = param_names
ctx.state = state
values = param_dict(param_names, param_values)
ctx.out_state = circuit.run(state, values)
ctx.projected_state = observable.run(ctx.out_state, values)
ctx.save_for_backward(*param_values)
return inner_prod(ctx.out_state, ctx.projected_state).real

@staticmethod
def backward(ctx: Any, grad_out: Tensor) -> Tuple[None, ...]:
check_support_psr(ctx.circuit)
param_values = ctx.saved_tensors
values = param_dict(ctx.param_names, param_values)
grads_dict = {k: None for k in values.keys()}
shift = torch.tensor(torch.pi) / 2.0

for op in ctx.circuit.flatten():
if isinstance(op, Parametric) and isinstance(op.param_name, str):
spectrum = torch.linalg.eigvalsh(op.pauli).reshape(-1, 1)
spectral_gap = torch.unique(
torch.abs(torch.tril(spectrum - spectrum.T))
)
spectral_gap = spectral_gap[spectral_gap.nonzero()]
assert (
len(spectral_gap) == 1
), "PSRExpectation only works on single_gap for now."

if values[op.param_name].requires_grad:
with no_grad():
copied_values = values.copy()
copied_values[op.param_name] += shift
f_plus = pyq.expectation(
ctx.circuit, ctx.state, copied_values, ctx.observable
)
copied_values[op.param_name] -= 2.0 * shift
f_min = pyq.expectation(
ctx.circuit, ctx.state, copied_values, ctx.observable
)

grad = (
spectral_gap
* (f_plus - f_min)
/ (4 * torch.sin(spectral_gap * shift / 2))
)
grad *= grad_out
if grads_dict[op.param_name] is not None:
grads_dict[op.param_name] += grad
else:
grads_dict[op.param_name] = grad
else:
logger.error(f"PSRExpectation does not support operation: {type(op)}.")
return (None, None, None, None, *grads_dict.values())


def check_support_psr(circuit: QuantumCircuit):
"""Checking that circuit has only compatible operations for PSR.
Args:
circuit (QuantumCircuit): Circuit to check.
Raises:
ValueError: When circuit contains Scale or HamiltonianEvolution.
"""
for op in circuit.operations:
if isinstance(op, Scale) or isinstance(op, HamiltonianEvolution):
raise ValueError(
f"PSR is not applicable as circuit contains an operation of type: {type(op)}."
)
1 change: 1 addition & 0 deletions pyqtorch/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class DiffMode(StrEnum):
"""An implementation of "Efficient calculation of gradients
in classical simulations of variational quantum algorithms",
Jones & Gacon, 2020"""

GPSR = "gpsr"
"""To be added."""

Expand Down
86 changes: 81 additions & 5 deletions tests/test_circuit.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,21 @@
from pyqtorch.noise import Noise
from pyqtorch.parametric import Parametric
from pyqtorch.primitive import Primitive
from pyqtorch.utils import GRADCHECK_ATOL, DensityMatrix, product_state
from pyqtorch.utils import (
GRADCHECK_ATOL,
DensityMatrix,
product_state,
)


def test_adjoint_diff() -> None:
# TODO add GPSR when multigap is implemented for this test
@pytest.mark.parametrize("n_qubits", [3, 4, 5])
def test_adjoint_diff(n_qubits: int) -> None:
rx = pyq.RX(0, param_name="theta_0")
cry = pyq.CPHASE(0, 1, param_name="theta_1")
rz = pyq.RZ(2, param_name="theta_2")
cnot = pyq.CNOT(1, 2)
ops = [rx, cry, rz, cnot]
n_qubits = 3
circ = pyq.QuantumCircuit(n_qubits, ops)
obs = pyq.QuantumCircuit(n_qubits, [pyq.Z(0)])

Expand Down Expand Up @@ -60,7 +65,7 @@ def test_adjoint_diff() -> None:

assert len(grad_ad) == len(grad_adjoint)
for i in range(len(grad_ad)):
assert torch.allclose(grad_ad[i], grad_adjoint[i])
assert torch.allclose(grad_ad[i], grad_adjoint[i], atol=GRADCHECK_ATOL)


@pytest.mark.parametrize("dtype", [torch.complex64, torch.complex128])
Expand Down Expand Up @@ -155,7 +160,7 @@ def test_adjoint_duplicate_params() -> None:
grad_adjoint = torch.autograd.grad(
exp_adjoint, tuple(values.values()), torch.ones_like(exp_adjoint)
)[0]
assert torch.allclose(grad_ad, grad_adjoint)
assert torch.allclose(grad_ad, grad_adjoint, atol=GRADCHECK_ATOL)


@pytest.mark.parametrize("fn", [pyq.X, pyq.Z, pyq.Y])
Expand Down Expand Up @@ -310,3 +315,74 @@ def test_sample_run() -> None:
assert torch.allclose(wf, product_state("1100"))
assert torch.allclose(pyq.QuantumCircuit(4, [pyq.I(0)]).run("1100"), wf)
assert "1100" in samples[0]


# TODO delete this test when first one up is
@pytest.mark.parametrize("n_qubits", [3, 4, 5])
def test_all_diff(n_qubits: int) -> None:
rx = pyq.RX(0, param_name="theta_0")
rz = pyq.RZ(2, param_name="theta_1")
cnot = pyq.CNOT(1, 2)
ops = [rx, rz, cnot]
circ = pyq.QuantumCircuit(n_qubits, ops)
obs = pyq.QuantumCircuit(n_qubits, [pyq.Z(0)])

theta_0_value = torch.pi / 2
theta_1_value = torch.pi

state = pyq.zero_state(n_qubits)

theta_0 = torch.tensor([theta_0_value], requires_grad=True)

theta_1 = torch.tensor([theta_1_value], requires_grad=True)

values = {"theta_0": theta_0, "theta_1": theta_1}

exp_ad = expectation(circ, state, values, obs, DiffMode.AD)
exp_adjoint = expectation(circ, state, values, obs, DiffMode.ADJOINT)
exp_gpsr = expectation(circ, state, values, obs, DiffMode.GPSR)

grad_ad = torch.autograd.grad(
exp_ad, tuple(values.values()), torch.ones_like(exp_ad)
)

grad_adjoint = torch.autograd.grad(
exp_adjoint, tuple(values.values()), torch.ones_like(exp_adjoint)
)

grad_gpsr = torch.autograd.grad(
exp_gpsr, tuple(values.values()), torch.ones_like(exp_gpsr)
)

assert len(grad_ad) == len(grad_adjoint) == len(grad_gpsr)

for i in range(len(grad_ad)):
assert torch.allclose(
grad_ad[i], grad_adjoint[i], atol=GRADCHECK_ATOL
) and torch.allclose(grad_ad[i], grad_gpsr[i], atol=GRADCHECK_ATOL)


@pytest.mark.xfail(raises=ValueError)
@pytest.mark.parametrize("gate_type", ["scale", "hamevo"])
def test_compatibility_gpsr(gate_type: str) -> None:

if gate_type == "scale":
seq_gate = pyq.Sequence([pyq.X(0)])
scale = pyq.Scale(seq_gate, "theta_0")
ops = [scale]
else:
hamevo = pyq.HamiltonianEvolution(pyq.Sequence([pyq.X(0)]), "theta_0", (0,))

ops = [hamevo]

circ = pyq.QuantumCircuit(1, ops)
obs = pyq.QuantumCircuit(1, [pyq.Z(0)])
state = pyq.zero_state(1)

param_value = torch.pi / 2
values = {"theta_0": torch.tensor([param_value], requires_grad=True)}
exp_gpsr = expectation(circ, state, values, obs, DiffMode.GPSR)

grad_gpsr = torch.autograd.grad(
exp_gpsr, tuple(values.values()), torch.ones_like(exp_gpsr)
)

0 comments on commit 7e38f2e

Please sign in to comment.