From 1c208ebc266cdb7f5d5af81d3e2e178d49a283b0 Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Wed, 5 Oct 2022 17:05:30 +0800
Subject: [PATCH 01/36] implement some optimization methods from pennylane

---
 .../optimization_methods_pennylane.py         | 90 +++++++++++++++++++
 openqaoa/optimizers/training_vqa.py           | 22 ++++-
 2 files changed, 111 insertions(+), 1 deletion(-)
 create mode 100644 openqaoa/optimizers/optimization_methods_pennylane.py

diff --git a/openqaoa/optimizers/optimization_methods_pennylane.py b/openqaoa/optimizers/optimization_methods_pennylane.py
new file mode 100644
index 000000000..627f3d8fd
--- /dev/null
+++ b/openqaoa/optimizers/optimization_methods_pennylane.py
@@ -0,0 +1,90 @@
+
+
+from importlib.metadata import requires
+from operator import ne
+import pennylane as pl
+import inspect
+from scipy.optimize import OptimizeResult
+import numpy as np
+
+def pennylane_optimizer(fun, x0, args=(), maxfev=None, method = 'vgd', qfim=None,
+                 nums_frequency=None, spectra=None, shifts=None, 
+                 maxiter=100, tol=10**(-6), jac=None, callback=None, **options):
+
+    def cost(params, **k):
+        return fun(np.array(params), **k)
+
+    available_methods_dict = {  
+                                'adagrad': pl.AdagradOptimizer, 
+                                'adam': pl.AdamOptimizer, 
+                                'vgd': pl.GradientDescentOptimizer, 
+                                'momentum':  pl.MomentumOptimizer,
+                                'nesterov_momentum': pl.NesterovMomentumOptimizer,
+                                'natural_grad_descent': pl.QNGOptimizer,
+                                'rmsprop': pl.RMSPropOptimizer,
+                                'rotosolve': pl.RotosolveOptimizer, 
+                                'spsa': pl.QNSPSAOptimizer,
+                             }
+
+    optimizer = available_methods_dict[method]
+    arguments = inspect.signature(optimizer).parameters.keys()
+    options_keys = list(options.keys())
+
+    print(arguments)
+
+    for key in options_keys:
+        if key not in arguments: options.pop(key) 
+
+    optimizer = optimizer(**options)
+    print(options, optimizer)
+
+    
+    bestx = pl.numpy.array(x0, requires_grad=True)
+    besty = cost(x0)
+    funcalls = 1  # tracks no. of function evals.
+    niter = 0
+    improved = True
+    stop = False  
+
+    testx = np.copy(bestx)
+    testy = np.real(besty)
+    while improved and not stop and niter < maxiter:
+        improved = False
+        niter += 1
+        print(niter)
+
+        # compute step
+        if qfim:    #natural_grad_descent
+            testx, testy = optimizer.step_and_cost(cost, bestx, grad_fn=jac, metric_tensor_fn=qfim) 
+        elif jac:   #adagrad, adam, vgd, momentum, nesterov_momentum, rmsprop
+            testx, testy = optimizer.step_and_cost(cost, bestx, grad_fn=jac)
+        elif method=='rotosolve': 
+            testx, testy = optimizer.step_and_cost(
+                                                    cost, bestx, 
+                                                    nums_frequency={'params': {(i,):1 for i in range(bestx.size)}} if not nums_frequency else nums_frequency,
+                                                    spectra=spectra,
+                                                    shifts=shifts,
+                                                    full_output=False,
+                                                  )
+        else:      #spsa 
+            testx, testy = optimizer.step_and_cost(cost, bestx)
+
+        # check if stable
+        if np.abs(besty-testy) < tol and niter > 2:
+            improved = False
+
+        else:
+            besty = testy
+            bestx = testx
+            improved = True
+
+        if callback is not None:
+            callback(bestx)
+        if maxfev is not None and funcalls >= maxfev:
+            stop = True
+            break
+
+    return OptimizeResult(fun=besty, x=np.array(bestx), nit=niter,
+                          nfev=funcalls, success=(niter > 1))
+
+
diff --git a/openqaoa/optimizers/training_vqa.py b/openqaoa/optimizers/training_vqa.py
index c2fb322b3..e655905ff 100644
--- a/openqaoa/optimizers/training_vqa.py
+++ b/openqaoa/optimizers/training_vqa.py
@@ -26,6 +26,7 @@
 from ..basebackend import VQABaseBackend
 from ..qaoa_parameters.baseparams import QAOAVariationalBaseParams
 from . import optimization_methods as om
+from . import optimization_methods_pennylane as ompl
 
 from .logger_vqa import Logger
 from .result import Result
@@ -472,7 +473,12 @@ class CustomScipyGradientOptimizer(OptimizeVQA):
 
     """
     CUSTOM_GRADIENT_OPTIMIZERS = ['vgd', 'newton',
-                                  'rmsprop', 'natural_grad_descent', 'spsa']
+                                  'rmsprop', 'natural_grad_descent', 'spsa', 
+                                  'pennylane',
+                                  'pennylane adagrad', 'pennylane adam', 'pennylane vgd', 
+                                  'pennylane momentum', 'pennylane nesterov_momentum',
+                                  'pennylane natural_grad_descent', 'pennylane rmsprop', 
+                                  'pennylane rotosolve', 'pennylane spsa']
 
     def __init__(self,
                  vqa_object: Type[VQABaseBackend],
@@ -562,6 +568,8 @@ def optimize(self):
         : 
             The optimized return object from the ``scipy.optimize`` package the result is assigned to the attribute ``opt_result``
         '''
+        #pennylane_methods = {'pl_adagrad': ompl.adagrad, 'pl_adam': ompl.adam, 'pl_vgd': ompl.grad_descent, 'pl_momentum': ompl.momentum}
+
         if self.method == 'vgd':
             method = om.grad_descent
         elif self.method == 'newton':
@@ -575,6 +583,18 @@ def optimize(self):
         elif self.method == 'spsa':
             print("Warning : SPSA is an experimental feature.")
             method = om.SPSA
+        elif self.method.lower().split()[0] == 'pennylane':
+            print("Warning : PennyLane")
+            method = ompl.pennylane_optimizer
+
+            if len(self.method.split()) > 1:
+                self.options['method'] = self.method.lower().split()[1]
+
+                if self.options['method'] == 'natural_grad_descent': 
+                    self.options['qfim'] = qfim(self.vqa_object, self.variational_params, self.log)
+                elif self.options['method'] in ['spsa', 'rotosolve']:    
+                    self.jac = None 
+            #method = pennylane_methods[self.method]
         
         try:
             if self.hess == None:

From 90e68ebc39b38b36ffc5561217e3234c4fbb1f70 Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Thu, 6 Oct 2022 17:04:49 +0800
Subject: [PATCH 02/36] adding the spsa pennylane optimizer

---
 openqaoa/optimizers/optimization_methods_pennylane.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/openqaoa/optimizers/optimization_methods_pennylane.py b/openqaoa/optimizers/optimization_methods_pennylane.py
index 627f3d8fd..a71a56e84 100644
--- a/openqaoa/optimizers/optimization_methods_pennylane.py
+++ b/openqaoa/optimizers/optimization_methods_pennylane.py
@@ -7,6 +7,8 @@
 from scipy.optimize import OptimizeResult
 import numpy as np
 
+import matplotlib.pyplot as plt
+
 def pennylane_optimizer(fun, x0, args=(), maxfev=None, method = 'vgd', qfim=None,
                  nums_frequency=None, spectra=None, shifts=None, 
                  maxiter=100, tol=10**(-6), jac=None, callback=None, **options):
@@ -23,7 +25,7 @@ def cost(params, **k):
                                 'natural_grad_descent': pl.QNGOptimizer,
                                 'rmsprop': pl.RMSPropOptimizer,
                                 'rotosolve': pl.RotosolveOptimizer, 
-                                'spsa': pl.QNSPSAOptimizer,
+                                'spsa': pl.SPSAOptimizer,
                              }
 
     optimizer = available_methods_dict[method]
@@ -34,10 +36,10 @@ def cost(params, **k):
 
     for key in options_keys:
         if key not in arguments: options.pop(key) 
+        if 'maxiter' in arguments: options['maxiter'] = maxiter
 
     optimizer = optimizer(**options)
     print(options, optimizer)
-
     
     bestx = pl.numpy.array(x0, requires_grad=True)
     besty = cost(x0)
@@ -66,7 +68,7 @@ def cost(params, **k):
                                                     shifts=shifts,
                                                     full_output=False,
                                                   )
-        else:      #spsa 
+        else:       #spsa  
             testx, testy = optimizer.step_and_cost(cost, bestx)
 
         # check if stable
@@ -83,7 +85,7 @@ def cost(params, **k):
         if maxfev is not None and funcalls >= maxfev:
             stop = True
             break
-
+        
     return OptimizeResult(fun=besty, x=np.array(bestx), nit=niter,
                           nfev=funcalls, success=(niter > 1))
 

From 9d5439a24c81270bf8b78de64c1c80a851f95841 Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Wed, 12 Oct 2022 10:33:35 +0800
Subject: [PATCH 03/36] Documentation for pennylane optimizers

---
 .../optimization_methods_pennylane.py         | 85 ++++++++++++++-----
 openqaoa/optimizers/training_vqa.py           | 10 +--
 2 files changed, 70 insertions(+), 25 deletions(-)

diff --git a/openqaoa/optimizers/optimization_methods_pennylane.py b/openqaoa/optimizers/optimization_methods_pennylane.py
index a71a56e84..b631ed7a4 100644
--- a/openqaoa/optimizers/optimization_methods_pennylane.py
+++ b/openqaoa/optimizers/optimization_methods_pennylane.py
@@ -9,14 +9,62 @@
 
 import matplotlib.pyplot as plt
 
-def pennylane_optimizer(fun, x0, args=(), maxfev=None, method = 'vgd', qfim=None,
-                 nums_frequency=None, spectra=None, shifts=None, 
-                 maxiter=100, tol=10**(-6), jac=None, callback=None, **options):
-
-    def cost(params, **k):
-        return fun(np.array(params), **k)
-
-    available_methods_dict = {  
+def pennylane_optimizer(fun, x0, args=(), maxfev=None, method='vgd', qfim=None,
+                        maxiter=100, tol=10**(-6), jac=None, callback=None,                         
+                        nums_frequency=None, spectra=None, shifts=None, **options):
+
+    '''    
+    Minimize a function `fun` using some pennylane method.
+    To check available methods look at the available_methods_dict variable.
+    Read https://docs.pennylane.ai/en/stable/introduction/interfaces.html#optimizers
+
+    PARAMETERS
+    ----------
+    fun : callable
+        Function to minimize
+    x0 : ndarray
+        Initial guess.
+    args : sequence, optional
+        Arguments to pass to `func`.
+    maxfev : int, optional
+        Maximum number of function evaluations.
+    method : string, optional
+        Optimizer method to compute the steps.
+    qfim : callable, optional (required for natural_grad_descent)
+        Callable Fubini-Study metric tensor
+    maxiter : int, optional
+        Maximum number of iterations.
+    tol : float
+        Tolerance before the optimizer terminates; if `tol` is larger than the difference between two steps, terminate optimization.
+    jac : callable, optinal (required for all methods but rotosolve and spsa)
+        Callable gradient function.
+    callback : callable, optional
+        Called after each iteration, as ``callback(xk)``, where ``xk`` is the
+        current parameter vector.
+    options : dict, optional
+        Dictionary where keys are the arguments for the optimizers object, and
+        the values are the values to pass to these arguments
+
+
+    (read https://docs.pennylane.ai/en/stable/code/api/pennylane.RotosolveOptimizer.html#pennylane.RotosolveOptimizer.step)
+    nums_frequency : dict[dict], required for rotosolve
+        The number of frequencies in the fun per parameter.
+    spectra : dict[dict], required for rotosolve
+        Frequency spectra in the objective_fn per parameter.
+    shifts : dict[dict], required for rotosolve
+        Shift angles for the reconstruction per parameter.
+
+
+    RETURNS
+    -------
+    OptimizeResult : OptimizeResult
+        Scipy OptimizeResult object.
+    '''
+
+    def cost(params, **k): # define a function to convert the params list from pennylane to numpy
+        return fun(np.array(params), *k)
+
+    available_methods_dict = {  # optimizers implemented
                                 'adagrad': pl.AdagradOptimizer, 
                                 'adam': pl.AdamOptimizer, 
                                 'vgd': pl.GradientDescentOptimizer, 
@@ -28,21 +76,21 @@ def cost(params, **k):
                                 'spsa': pl.SPSAOptimizer,
                              }
 
-    optimizer = available_methods_dict[method]
+    optimizer = available_methods_dict[method] # define the optimizer
+
+    #get optimizer arguments
     arguments = inspect.signature(optimizer).parameters.keys()
     options_keys = list(options.keys())
 
-    print(arguments)
-
+    #check which values of the options dict can be passed to the optimizer (pop the others)
     for key in options_keys:
         if key not in arguments: options.pop(key) 
         if 'maxiter' in arguments: options['maxiter'] = maxiter
 
-    optimizer = optimizer(**options)
-    print(options, optimizer)
+    optimizer = optimizer(**options) #pass the arguments
     
     bestx = pl.numpy.array(x0, requires_grad=True)
-    besty = cost(x0)
+    besty = cost(x0, *args)
     funcalls = 1  # tracks no. of function evals.
     niter = 0
     improved = True
@@ -53,23 +101,22 @@ def cost(params, **k):
     while improved and not stop and niter < maxiter:
         improved = False
         niter += 1
-        print(niter)
 
         # compute step
         if qfim:    #natural_grad_descent
-            testx, testy = optimizer.step_and_cost(cost, bestx, grad_fn=jac, metric_tensor_fn=qfim) 
+            testx, testy = optimizer.step_and_cost(cost, bestx, *args, grad_fn=jac, metric_tensor_fn=qfim) 
         elif jac:   #adagrad, adam, vgd, momentum, nesterov_momentum, rmsprop
-            testx, testy = optimizer.step_and_cost(cost, bestx, grad_fn=jac)
+            testx, testy = optimizer.step_and_cost(cost, bestx, *args, grad_fn=jac)
         elif method=='rotosolve': 
             testx, testy = optimizer.step_and_cost(
-                                                    cost, bestx, 
+                                                    cost, bestx, *args,
                                                     nums_frequency={'params': {(i,):1 for i in range(bestx.size)}} if not nums_frequency else nums_frequency,
                                                     spectra=spectra,
                                                     shifts=shifts,
                                                     full_output=False,
                                                   )
         else:       #spsa  
-            testx, testy = optimizer.step_and_cost(cost, bestx)
+            testx, testy = optimizer.step_and_cost(cost, bestx, *args)
 
         # check if stable
         if np.abs(besty-testy) < tol and niter > 2:
diff --git a/openqaoa/optimizers/training_vqa.py b/openqaoa/optimizers/training_vqa.py
index e655905ff..96b80c963 100644
--- a/openqaoa/optimizers/training_vqa.py
+++ b/openqaoa/optimizers/training_vqa.py
@@ -470,6 +470,7 @@ class CustomScipyGradientOptimizer(OptimizeVQA):
         * optimizer_options
         
             * Dictionary of optimiser-specific arguments, defaults to ``None``
+            * Used also for the pennylande optimizers (and step function) arguments
 
     """
     CUSTOM_GRADIENT_OPTIMIZERS = ['vgd', 'newton',
@@ -568,7 +569,6 @@ def optimize(self):
         : 
             The optimized return object from the ``scipy.optimize`` package the result is assigned to the attribute ``opt_result``
         '''
-        #pennylane_methods = {'pl_adagrad': ompl.adagrad, 'pl_adam': ompl.adam, 'pl_vgd': ompl.grad_descent, 'pl_momentum': ompl.momentum}
 
         if self.method == 'vgd':
             method = om.grad_descent
@@ -583,18 +583,16 @@ def optimize(self):
         elif self.method == 'spsa':
             print("Warning : SPSA is an experimental feature.")
             method = om.SPSA
-        elif self.method.lower().split()[0] == 'pennylane':
-            print("Warning : PennyLane")
+        elif self.method.lower().split()[0] == 'pennylane': # check if we are using a pennylane optimizer
             method = ompl.pennylane_optimizer
 
-            if len(self.method.split()) > 1:
-                self.options['method'] = self.method.lower().split()[1]
+            if len(self.method.split()) > 1: # check if we are not using the default (vgd)
+                self.options['method'] = self.method.lower().split()[1] 
 
                 if self.options['method'] == 'natural_grad_descent': 
                     self.options['qfim'] = qfim(self.vqa_object, self.variational_params, self.log)
                 elif self.options['method'] in ['spsa', 'rotosolve']:    
                     self.jac = None 
-            #method = pennylane_methods[self.method]
         
         try:
             if self.hess == None:

From a331046d8eac026a25145fdc286767c8df979c88 Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Wed, 12 Oct 2022 10:44:01 +0800
Subject: [PATCH 04/36] Documentation for pennylane optimizers

---
 openqaoa/optimizers/optimization_methods_pennylane.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/openqaoa/optimizers/optimization_methods_pennylane.py b/openqaoa/optimizers/optimization_methods_pennylane.py
index b631ed7a4..9009c3bf6 100644
--- a/openqaoa/optimizers/optimization_methods_pennylane.py
+++ b/openqaoa/optimizers/optimization_methods_pennylane.py
@@ -43,7 +43,9 @@ def pennylane_optimizer(fun, x0, args=(), maxfev=None, method='vgd', qfim=None,
         current parameter vector.
     options : dict, optional
         Dictionary where keys are the arguments for the optimizers object, and
-        the values are the values to pass to these arguments
+        the values are the values to pass to these arguments.
+        To know all the possible argumets read
+        https://docs.pennylane.ai/en/stable/introduction/interfaces.html#optimizers.
 
 
     (read https://docs.pennylane.ai/en/stable/code/api/pennylane.RotosolveOptimizer.html#pennylane.RotosolveOptimizer.step)

From 3054af6bda077933f16266d359848a10b04a9817 Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Wed, 12 Oct 2022 10:45:06 +0800
Subject: [PATCH 05/36] Added pennylane dependency in setup.py

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 213ad4305..7f289a06e 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,8 @@
     "matplotlib>=3.4.3, <3.5.0",
     "qiskit>=0.36.1",
     "pyquil>=3.1.0",
-    "docplex>=2.23.1"
+    "docplex>=2.23.1",
+    "pennylane>=0.26.0"
 ]
 
 requirements_docs = [

From 985674af0cea51d4def12597ba9cd37d7b1b70da Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Wed, 12 Oct 2022 11:15:36 +0800
Subject: [PATCH 06/36] Added a test for pennylane optimizers

---
 tests/test_optimizers_pennylane.py | 48 ++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 tests/test_optimizers_pennylane.py

diff --git a/tests/test_optimizers_pennylane.py b/tests/test_optimizers_pennylane.py
new file mode 100644
index 000000000..31c66caaf
--- /dev/null
+++ b/tests/test_optimizers_pennylane.py
@@ -0,0 +1,48 @@
+import warnings
+import unittest
+
+import networkx as nx
+from openqaoa.workflows.optimizer import QAOA
+from openqaoa.devices import create_device
+from openqaoa.problems.problem import MaximumCut
+from openqaoa.optimizers.training_vqa import CustomScipyGradientOptimizer
+
+
+#create a problem
+nodes = 4
+edge_probability = 0.6
+g = nx.generators.fast_gnp_random_graph(n=nodes,p=edge_probability)
+maxcut_prob = MaximumCut(g)
+maxcut_qubo = maxcut_prob.get_qubo_problem()
+
+
+class TestPennylaneOptimizers(unittest.TestCase):
+
+    def _run_method(self, method):
+        q = QAOA()
+        device = create_device(location='local', name='qiskit.statevector_simulator')
+        q.set_device(device)
+
+
+        q.set_circuit_properties(p=2, param_type='standard', init_type='rand', mixer_hamiltonian='x')
+        q.set_backend_properties(prepend_state=None, append_state=None)
+        q.set_classical_optimizer(method=method, maxiter=4, optimizer_options = {'blocking':False, 'resamplings': 0},
+                                optimization_progress=True, cost_progress=True, parameter_log=True, jac='finite_difference')
+
+        q.compile(maxcut_qubo) 
+        q.optimize()
+
+    def test_pennylane_optimizers(self):
+        list_optimizers = CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS
+
+        for opt in list_optimizers:
+            if opt.split()[0] == "pennylane":
+                self._run_method(opt)
+
+
+
+
+if __name__ == "__main__":
+    with warnings.catch_warnings():
+        warnings.simplefilter('ignore', category=PendingDeprecationWarning)
+        unittest.main()

From 7e2a022ec6f4845ae838aa7ff398a9c5d35521e3 Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Wed, 12 Oct 2022 11:24:09 +0800
Subject: [PATCH 07/36] Documenttion for pennylane optimizers test

---
 tests/test_optimizers_pennylane.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_optimizers_pennylane.py b/tests/test_optimizers_pennylane.py
index 31c66caaf..ba415866a 100644
--- a/tests/test_optimizers_pennylane.py
+++ b/tests/test_optimizers_pennylane.py
@@ -19,6 +19,7 @@
 class TestPennylaneOptimizers(unittest.TestCase):
 
     def _run_method(self, method):
+        " function tu run the test for any method "
         q = QAOA()
         device = create_device(location='local', name='qiskit.statevector_simulator')
         q.set_device(device)
@@ -33,6 +34,7 @@ def _run_method(self, method):
         q.optimize()
 
     def test_pennylane_optimizers(self):
+        " function to run the tests for pennylane optimizers "
         list_optimizers = CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS
 
         for opt in list_optimizers:

From 27b0ef7f91772208754c27a779458ccc24d6435d Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Thu, 13 Oct 2022 11:17:38 +0800
Subject: [PATCH 08/36] The proposed changes have been modified

---
 .../optimization_methods_pennylane.py         | 55 +++++++++++++------
 openqaoa/optimizers/training_vqa.py           | 24 ++++----
 2 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/openqaoa/optimizers/optimization_methods_pennylane.py b/openqaoa/optimizers/optimization_methods_pennylane.py
index 9009c3bf6..b33a2c8d1 100644
--- a/openqaoa/optimizers/optimization_methods_pennylane.py
+++ b/openqaoa/optimizers/optimization_methods_pennylane.py
@@ -1,13 +1,42 @@
+#   Copyright 2022 Entropica Labs
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+"""
+Function to implement pennylane optimization algorithms.
+Read https://docs.pennylane.ai/en/stable/introduction/interfaces.html#optimizers
+Only those that don't require a pennylane backend have been implemented.
+Similarly as with the custom optimization methods Scipy `minimize` is used. Extends available scipy methods.
+"""
 
-
-from importlib.metadata import requires
-from operator import ne
 import pennylane as pl
 import inspect
 from scipy.optimize import OptimizeResult
 import numpy as np
 
-import matplotlib.pyplot as plt
+AVAILABLE_OPTIMIZERS = {  # optimizers implemented
+                            'adagrad': pl.AdagradOptimizer, 
+                            'adam': pl.AdamOptimizer, 
+                            'vgd': pl.GradientDescentOptimizer, 
+                            'momentum':  pl.MomentumOptimizer,
+                            'nesterov_momentum': pl.NesterovMomentumOptimizer,
+                            'natural_grad_descent': pl.QNGOptimizer,
+                            'rmsprop': pl.RMSPropOptimizer,
+                            'rotosolve': pl.RotosolveOptimizer, 
+                            'spsa': pl.SPSAOptimizer,
+                        }
+
+
 
 def pennylane_optimizer(fun, x0, args=(), maxfev=None, method='vgd', qfim=None,
                         maxiter=100, tol=10**(-6), jac=None, callback=None,                         
@@ -66,19 +95,8 @@ def pennylane_optimizer(fun, x0, args=(), maxfev=None, method='vgd', qfim=None,
     def cost(params, **k): # define a function to convert the params list from pennylane to numpy
         return fun(np.array(params), *k)
 
-    available_methods_dict = {  # optimizers implemented
-                                'adagrad': pl.AdagradOptimizer, 
-                                'adam': pl.AdamOptimizer, 
-                                'vgd': pl.GradientDescentOptimizer, 
-                                'momentum':  pl.MomentumOptimizer,
-                                'nesterov_momentum': pl.NesterovMomentumOptimizer,
-                                'natural_grad_descent': pl.QNGOptimizer,
-                                'rmsprop': pl.RMSPropOptimizer,
-                                'rotosolve': pl.RotosolveOptimizer, 
-                                'spsa': pl.SPSAOptimizer,
-                             }
 
-    optimizer = available_methods_dict[method] # define the optimizer
+    optimizer = AVAILABLE_OPTIMIZERS[method] # define the optimizer
 
     #get optimizer arguments
     arguments = inspect.signature(optimizer).parameters.keys()
@@ -102,7 +120,6 @@ def cost(params, **k): # define a function to convert the params list from penny
     testy = np.real(besty)
     while improved and not stop and niter < maxiter:
         improved = False
-        niter += 1
 
         # compute step
         if qfim:    #natural_grad_descent
@@ -121,7 +138,7 @@ def cost(params, **k): # define a function to convert the params list from penny
             testx, testy = optimizer.step_and_cost(cost, bestx, *args)
 
         # check if stable
-        if np.abs(besty-testy) < tol and niter > 2:
+        if np.abs(besty-testy) < tol and niter > 1:
             improved = False
 
         else:
@@ -134,6 +151,8 @@ def cost(params, **k): # define a function to convert the params list from penny
         if maxfev is not None and funcalls >= maxfev:
             stop = True
             break
+
+        niter += 1
         
     return OptimizeResult(fun=besty, x=np.array(bestx), nit=niter,
                           nfev=funcalls, success=(niter > 1))
diff --git a/openqaoa/optimizers/training_vqa.py b/openqaoa/optimizers/training_vqa.py
index 96b80c963..70ed0a1bb 100644
--- a/openqaoa/optimizers/training_vqa.py
+++ b/openqaoa/optimizers/training_vqa.py
@@ -474,12 +474,11 @@ class CustomScipyGradientOptimizer(OptimizeVQA):
 
     """
     CUSTOM_GRADIENT_OPTIMIZERS = ['vgd', 'newton',
-                                  'rmsprop', 'natural_grad_descent', 'spsa', 
-                                  'pennylane',
-                                  'pennylane adagrad', 'pennylane adam', 'pennylane vgd', 
-                                  'pennylane momentum', 'pennylane nesterov_momentum',
-                                  'pennylane natural_grad_descent', 'pennylane rmsprop', 
-                                  'pennylane rotosolve', 'pennylane spsa']
+                                  'rmsprop', 'natural_grad_descent', 'spsa',
+                                  'pennylane_adagrad', 'pennylane_adam', 'pennylane_vgd', 
+                                  'pennylane_momentum', 'pennylane_nesterov_momentum',
+                                  'pennylane_natural_grad_descent', 'pennylane_rmsprop', 
+                                  'pennylane_rotosolve', 'pennylane_spsa']
 
     def __init__(self,
                  vqa_object: Type[VQABaseBackend],
@@ -583,16 +582,15 @@ def optimize(self):
         elif self.method == 'spsa':
             print("Warning : SPSA is an experimental feature.")
             method = om.SPSA
-        elif self.method.lower().split()[0] == 'pennylane': # check if we are using a pennylane optimizer
+        elif self.method.lower().split('_')[0] == 'pennylane': # check if we are using a pennylane optimizer
             method = ompl.pennylane_optimizer
 
-            if len(self.method.split()) > 1: # check if we are not using the default (vgd)
-                self.options['method'] = self.method.lower().split()[1] 
+            self.options['method'] = self.method.lower().replace("pennylane_", "") 
 
-                if self.options['method'] == 'natural_grad_descent': 
-                    self.options['qfim'] = qfim(self.vqa_object, self.variational_params, self.log)
-                elif self.options['method'] in ['spsa', 'rotosolve']:    
-                    self.jac = None 
+            if self.options['method'] == 'natural_grad_descent': 
+                self.options['qfim'] = qfim(self.vqa_object, self.variational_params, self.log)
+            if self.options['method'] in ['spsa', 'rotosolve']:    
+                self.jac = None 
         
         try:
             if self.hess == None:

From ed96166c99239430170d576338aa332b180f7055 Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Thu, 13 Oct 2022 14:21:56 +0800
Subject: [PATCH 09/36] codecov added

---
 codecov.yml | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 codecov.yml

diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 000000000..e03bbc8e5
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,8 @@
+coverage:
+  status:
+    patch:
+      default:
+        target: 30%
+    project:
+      default:
+        target: 80%
\ No newline at end of file

From 115b5766c38767cbe5d8fdf18f5f6f0225bf8012 Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Fri, 21 Oct 2022 10:16:04 +0800
Subject: [PATCH 10/36] Documentation updated

---
 docs/requirements.txt      | 1 +
 docs/source/optimizers.rst | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index bb4c9fe51..7955230ac 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -7,3 +7,4 @@ sphinx>=4.5.0
 sphinx-autodoc-typehints==1.18.1
 sphinx-rtd-theme==1.0.0
 ipython==8.2.0
+pennylane>=0.26.0
diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst
index b64def0a4..f3f49d0d9 100644
--- a/docs/source/optimizers.rst
+++ b/docs/source/optimizers.rst
@@ -29,6 +29,12 @@ Optimization Methods
     :show-inheritance:
     :inherited-members:
 
+.. automodule:: openqaoa.optimizers.optimization_methods_pennylane
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :inherited-members:
+
 Derivate functions
 ------------------
 .. automodule:: openqaoa.derivative_functions

From 828a787502b51b76c5ac10ce5200ba1bc1cc9d4e Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Fri, 28 Oct 2022 15:04:55 +0800
Subject: [PATCH 11/36] Solving bug in optimizers pennylane tests

---
 tests/test_optimizers_pennylane.py | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/tests/test_optimizers_pennylane.py b/tests/test_optimizers_pennylane.py
index ba415866a..7bfa3f8cb 100644
--- a/tests/test_optimizers_pennylane.py
+++ b/tests/test_optimizers_pennylane.py
@@ -4,16 +4,14 @@
 import networkx as nx
 from openqaoa.workflows.optimizer import QAOA
 from openqaoa.devices import create_device
-from openqaoa.problems.problem import MaximumCut
+from openqaoa.problems.problem import MinimumVertexCover
 from openqaoa.optimizers.training_vqa import CustomScipyGradientOptimizer
 
 
 #create a problem
-nodes = 4
-edge_probability = 0.6
-g = nx.generators.fast_gnp_random_graph(n=nodes,p=edge_probability)
-maxcut_prob = MaximumCut(g)
-maxcut_qubo = maxcut_prob.get_qubo_problem()
+g = nx.circulant_graph(4, [1])
+problem = MinimumVertexCover(g, field =1.0, penalty=10)
+qubo_problem = problem.get_qubo_problem()
 
 
 class TestPennylaneOptimizers(unittest.TestCase):
@@ -21,25 +19,23 @@ class TestPennylaneOptimizers(unittest.TestCase):
     def _run_method(self, method):
         " function tu run the test for any method "
         q = QAOA()
-        device = create_device(location='local', name='qiskit.statevector_simulator')
-        q.set_device(device)
-
-
-        q.set_circuit_properties(p=2, param_type='standard', init_type='rand', mixer_hamiltonian='x')
-        q.set_backend_properties(prepend_state=None, append_state=None)
-        q.set_classical_optimizer(method=method, maxiter=4, optimizer_options = {'blocking':False, 'resamplings': 0},
-                                optimization_progress=True, cost_progress=True, parameter_log=True, jac='finite_difference')
-
-        q.compile(maxcut_qubo) 
+        q.set_classical_optimizer(method=method, maxiter=3, jac='finite_difference')
+        q.compile(qubo_problem) 
         q.optimize()
 
+        assert len(q.results.most_probable_states['solutions_bitstrings'][0]) > 0
+
     def test_pennylane_optimizers(self):
         " function to run the tests for pennylane optimizers "
         list_optimizers = CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS
 
+        i = 0
         for opt in list_optimizers:
-            if opt.split()[0] == "pennylane":
+            if opt.split('_')[0] == "pennylane":
                 self._run_method(opt)
+                i += 1
+
+        assert i == sum([1 for opt in list_optimizers if  "pennylane" in opt])
 
 
 

From 1394b967a49285f256efcd9490e508340dac7480 Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Fri, 28 Oct 2022 15:05:18 +0800
Subject: [PATCH 12/36] Documentation

---
 .../optimization_methods_pennylane.py         | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/openqaoa/optimizers/optimization_methods_pennylane.py b/openqaoa/optimizers/optimization_methods_pennylane.py
index b33a2c8d1..65de3b947 100644
--- a/openqaoa/optimizers/optimization_methods_pennylane.py
+++ b/openqaoa/optimizers/optimization_methods_pennylane.py
@@ -13,9 +13,9 @@
 #   limitations under the License.
 
 """
-Function to implement pennylane optimization algorithms.
+A set of functions to implement pennylane optimization algorithms.
 Read https://docs.pennylane.ai/en/stable/introduction/interfaces.html#optimizers
-Only those that don't require a pennylane backend have been implemented.
+Optimisers requiring a pennylane backend haven't been implemented yet.
 Similarly as with the custom optimization methods Scipy `minimize` is used. Extends available scipy methods.
 """
 
@@ -73,17 +73,17 @@ def pennylane_optimizer(fun, x0, args=(), maxfev=None, method='vgd', qfim=None,
     options : dict, optional
         Dictionary where keys are the arguments for the optimizers object, and
         the values are the values to pass to these arguments.
-        To know all the possible argumets read
-        https://docs.pennylane.ai/en/stable/introduction/interfaces.html#optimizers.
-
-
-    (read https://docs.pennylane.ai/en/stable/code/api/pennylane.RotosolveOptimizer.html#pennylane.RotosolveOptimizer.step)
-    nums_frequency : dict[dict], required for rotosolve
+        To know all the possible options see https://docs.pennylane.ai/en/stable/introduction/interfaces.html#optimizers.
+    nums_frequency : dict[dict], optional
+        It is required for rotosolve method
         The number of frequencies in the fun per parameter.
-    spectra : dict[dict], required for rotosolve
+    spectra : dict[dict], optional
+        It is required for rotosolve method
         Frequency spectra in the objective_fn per parameter.
-    shifts : dict[dict], required for rotosolve
+    shifts : dict[dict], optional
+        It is required for rotosolve method
         Shift angles for the reconstruction per parameter.
+        Read https://docs.pennylane.ai/en/stable/code/api/pennylane.RotosolveOptimizer.html#pennylane.RotosolveOptimizer.step for more information.
 
 
     RETURNS

From f870c2f04b639d399dc5e0db48982a24c65efdfd Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Fri, 28 Oct 2022 15:06:37 +0800
Subject: [PATCH 13/36] Add custm optmzrs in ALLOWED_MINIMIZATION_METHODS

---
 openqaoa/workflows/parameters/qaoa_parameters.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/openqaoa/workflows/parameters/qaoa_parameters.py b/openqaoa/workflows/parameters/qaoa_parameters.py
index 6826974c2..f2591803a 100644
--- a/openqaoa/workflows/parameters/qaoa_parameters.py
+++ b/openqaoa/workflows/parameters/qaoa_parameters.py
@@ -20,13 +20,14 @@
 from openqaoa.devices import SUPPORTED_LOCAL_SIMULATORS
 from .parameters import Parameters
 from scipy.optimize._minimize import MINIMIZE_METHODS
+from openqaoa.optimizers.training_vqa import CustomScipyGradientOptimizer
 
 
 ALLOWED_PARAM_TYPES = ['standard', 'standard_w_bias', 'extended', 'fourier',
                        'fourier_extended', 'fourier_w_bias', 'annealing']
 ALLOWED_INIT_TYPES = ['rand', 'ramp', 'custom']
 ALLOWED_MIXERS = ['x', 'xy']
-ALLOWED_MINIMIZATION_METHODS = MINIMIZE_METHODS
+ALLOWED_MINIMIZATION_METHODS = MINIMIZE_METHODS + CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS
 
 ALLOWED_QVM_DEVICES = ['Aspen-11', 'Aspen-M-1']
 ALLOWED_QVM_DEVICES.extend(f'{n}q-qvm' for n in range(2, 80))

From a5ebaffb450dfc32104a517fb4a9b373313cc769 Mon Sep 17 00:00:00 2001
From: raulconchello <raulconchello@gmail.com>
Date: Mon, 31 Oct 2022 15:15:39 +0800
Subject: [PATCH 14/36] Change in step computation depending on the method

---
 openqaoa/optimizers/optimization_methods_pennylane.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/openqaoa/optimizers/optimization_methods_pennylane.py b/openqaoa/optimizers/optimization_methods_pennylane.py
index 65de3b947..21fb2f989 100644
--- a/openqaoa/optimizers/optimization_methods_pennylane.py
+++ b/openqaoa/optimizers/optimization_methods_pennylane.py
@@ -121,12 +121,12 @@ def cost(params, **k): # define a function to convert the params list from penny
     while improved and not stop and niter < maxiter:
         improved = False
 
-        # compute step
-        if qfim:    #natural_grad_descent
+        # compute step (depends on the optimizer)
+        if method in ['natural_grad_descent']: 
             testx, testy = optimizer.step_and_cost(cost, bestx, *args, grad_fn=jac, metric_tensor_fn=qfim) 
-        elif jac:   #adagrad, adam, vgd, momentum, nesterov_momentum, rmsprop
+        if method in ['adagrad', 'adam', 'vgd', 'momentum', 'nesterov_momentum', 'rmsprop']:
             testx, testy = optimizer.step_and_cost(cost, bestx, *args, grad_fn=jac)
-        elif method=='rotosolve': 
+        if method in ['rotosolve']: 
             testx, testy = optimizer.step_and_cost(
                                                     cost, bestx, *args,
                                                     nums_frequency={'params': {(i,):1 for i in range(bestx.size)}} if not nums_frequency else nums_frequency,
@@ -134,7 +134,7 @@ def cost(params, **k): # define a function to convert the params list from penny
                                                     shifts=shifts,
                                                     full_output=False,
                                                   )
-        else:       #spsa  
+        if method in ['spsa']:       
             testx, testy = optimizer.step_and_cost(cost, bestx, *args)
 
         # check if stable

From eb3a47e184522dd06751275e431ce1baa4c7ded3 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Tue, 8 Nov 2022 11:11:59 +0800
Subject: [PATCH 15/36] Making training_vqa (for pennylane) more readable

---
 .../optimizers/optimization_methods_pennylane.py   | 14 +++++++-------
 openqaoa/optimizers/training_vqa.py                |  6 +++---
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/openqaoa/optimizers/optimization_methods_pennylane.py b/openqaoa/optimizers/optimization_methods_pennylane.py
index 21fb2f989..4f92fec98 100644
--- a/openqaoa/optimizers/optimization_methods_pennylane.py
+++ b/openqaoa/optimizers/optimization_methods_pennylane.py
@@ -38,7 +38,7 @@
 
 
 
-def pennylane_optimizer(fun, x0, args=(), maxfev=None, method='vgd', qfim=None,
+def pennylane_optimizer(fun, x0, args=(), maxfev=None, pennylane_method='vgd', qfim=None,
                         maxiter=100, tol=10**(-6), jac=None, callback=None,                         
                         nums_frequency=None, spectra=None, shifts=None, **options):
 
@@ -57,7 +57,7 @@ def pennylane_optimizer(fun, x0, args=(), maxfev=None, method='vgd', qfim=None,
         Arguments to pass to `func`.
     maxfev : int, optional
         Maximum number of function evaluations.
-    method : string, optional
+    pennylane_method : string, optional
         Optimizer method to compute the steps.
     qfim : callable, optional (required for natural_grad_descent)
         Callable Fubini-Study metric tensor
@@ -96,7 +96,7 @@ def cost(params, **k): # define a function to convert the params list from penny
         return fun(np.array(params), *k)
 
 
-    optimizer = AVAILABLE_OPTIMIZERS[method] # define the optimizer
+    optimizer = AVAILABLE_OPTIMIZERS[pennylane_method] # define the optimizer
 
     #get optimizer arguments
     arguments = inspect.signature(optimizer).parameters.keys()
@@ -122,11 +122,11 @@ def cost(params, **k): # define a function to convert the params list from penny
         improved = False
 
         # compute step (depends on the optimizer)
-        if method in ['natural_grad_descent']: 
+        if pennylane_method in ['natural_grad_descent']: 
             testx, testy = optimizer.step_and_cost(cost, bestx, *args, grad_fn=jac, metric_tensor_fn=qfim) 
-        if method in ['adagrad', 'adam', 'vgd', 'momentum', 'nesterov_momentum', 'rmsprop']:
+        if pennylane_method in ['adagrad', 'adam', 'vgd', 'momentum', 'nesterov_momentum', 'rmsprop']:
             testx, testy = optimizer.step_and_cost(cost, bestx, *args, grad_fn=jac)
-        if method in ['rotosolve']: 
+        if pennylane_method in ['rotosolve']: 
             testx, testy = optimizer.step_and_cost(
                                                     cost, bestx, *args,
                                                     nums_frequency={'params': {(i,):1 for i in range(bestx.size)}} if not nums_frequency else nums_frequency,
@@ -134,7 +134,7 @@ def cost(params, **k): # define a function to convert the params list from penny
                                                     shifts=shifts,
                                                     full_output=False,
                                                   )
-        if method in ['spsa']:       
+        if pennylane_method in ['spsa']:       
             testx, testy = optimizer.step_and_cost(cost, bestx, *args)
 
         # check if stable
diff --git a/openqaoa/optimizers/training_vqa.py b/openqaoa/optimizers/training_vqa.py
index 3a1f62d7d..fc07bfe86 100644
--- a/openqaoa/optimizers/training_vqa.py
+++ b/openqaoa/optimizers/training_vqa.py
@@ -640,11 +640,11 @@ def optimize(self):
         elif self.method.lower().split('_')[0] == 'pennylane': # check if we are using a pennylane optimizer
             method = ompl.pennylane_optimizer
 
-            self.options['method'] = self.method.lower().replace("pennylane_", "") 
+            self.options['pennylane_method'] = self.method.lower().replace("pennylane_", "") 
 
-            if self.options['method'] == 'natural_grad_descent': 
+            if self.options['pennylane_method'] == 'natural_grad_descent': 
                 self.options['qfim'] = qfim(self.vqa_object, self.variational_params, self.log)
-            if self.options['method'] in ['spsa', 'rotosolve']:    
+            if self.options['pennylane_method'] in ['spsa', 'rotosolve']:    
                 self.jac = None 
         
         try:

From 1e714c7c7490e97d11cd87b8b8c8102220024fed Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Tue, 8 Nov 2022 15:12:43 +0800
Subject: [PATCH 16/36] Better tests for the pennylane optimizers

---
 tests/test_optimizers_pennylane.py | 210 +++++++++++++++++++++++++++--
 1 file changed, 198 insertions(+), 12 deletions(-)

diff --git a/tests/test_optimizers_pennylane.py b/tests/test_optimizers_pennylane.py
index 7bfa3f8cb..cd654ae7f 100644
--- a/tests/test_optimizers_pennylane.py
+++ b/tests/test_optimizers_pennylane.py
@@ -1,43 +1,229 @@
 import warnings
 import unittest
 
+import numpy as np
 import networkx as nx
+import pennylane as pl
+import copy
+import inspect
+
 from openqaoa.workflows.optimizer import QAOA
 from openqaoa.devices import create_device
 from openqaoa.problems.problem import MinimumVertexCover
 from openqaoa.optimizers.training_vqa import CustomScipyGradientOptimizer
+from openqaoa.optimizers.optimization_methods_pennylane import AVAILABLE_OPTIMIZERS
+from openqaoa.derivative_functions import derivative
+from openqaoa.optimizers.logger_vqa import Logger
+from openqaoa.qaoa_parameters import create_qaoa_variational_params, QAOACircuitParams, PauliOp, Hamiltonian
+from openqaoa.utilities import X_mixer_hamiltonian
+from openqaoa.backends.qaoa_backend import get_qaoa_backend
+from openqaoa.optimizers import get_optimizer
+from openqaoa.qfim import qfim as Qfim
+from openqaoa.problems.problem import QUBO
+
 
+#list of optimizers to test
+list_optimizers = CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS
 
 #create a problem
 g = nx.circulant_graph(4, [1])
 problem = MinimumVertexCover(g, field =1.0, penalty=10)
-qubo_problem = problem.get_qubo_problem()
+qubo_problem_1 = problem.get_qubo_problem()
+qubo_problem_2 = QUBO.random_instance(5)
+qubo_problem_3 = QUBO.random_instance(6)
 
 
 class TestPennylaneOptimizers(unittest.TestCase):
 
-    def _run_method(self, method):
-        " function tu run the test for any method "
+    def setUp(self):
+        
+        self.log = Logger({'func_evals': 
+                           {
+                               'history_update_bool': False, 
+                               'best_update_string': 'HighestOnly'
+                           },
+                           'jac_func_evals':
+                           {
+                               'history_update_bool': False, 
+                               'best_update_string': 'HighestOnly'
+                           },
+                           'qfim_func_evals': 
+                           {
+                               'history_update_bool': False, 
+                               'best_update_string': 'HighestOnly'
+                           }
+                          }, 
+                          {
+                              'root_nodes': ['func_evals', 'jac_func_evals', 
+                                             'qfim_func_evals'], 
+                              'best_update_structure': []
+                          })
+        
+        self.log.log_variables({'func_evals': 0, 'jac_func_evals': 0, 'qfim_func_evals': 0})
+
+    def _run_method_workflows(self, method, problem):
+        " helper function to run the test for any method using workflows"
         q = QAOA()
         q.set_classical_optimizer(method=method, maxiter=3, jac='finite_difference')
-        q.compile(qubo_problem) 
+        q.compile(problem) 
         q.optimize()
 
         assert len(q.results.most_probable_states['solutions_bitstrings'][0]) > 0
 
-    def test_pennylane_optimizers(self):
-        " function to run the tests for pennylane optimizers "
-        list_optimizers = CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS
+    def _run_method_manual(self, method, problem):
+        " helper function tu run the test for any method using manual mode"
+
+        cost_hamil = problem.hamiltonian
+        mixer_hamil = X_mixer_hamiltonian(n_qubits=problem.n)
+        circuit_params = QAOACircuitParams(cost_hamil, mixer_hamil, p=2)
+        device = create_device('local','vectorized')
+        backend_obj_vectorized = get_qaoa_backend(circuit_params,device)
+        variate_params = create_qaoa_variational_params(circuit_params, 'standard', 'ramp')
+        niter = 5
+        grad_stepsize = 0.0001
+        stepsize = 0.001
+
+        # declare needed functions
+        jac = derivative(backend_obj_vectorized, variate_params, self.log, 
+                    'gradient', 'finite_difference', 
+                    {'stepsize': grad_stepsize})
+        qfim = Qfim(backend_obj_vectorized, variate_params, self.log)   
+
+
+        # Optimize
+        vector_optimizer = get_optimizer(backend_obj_vectorized, variate_params, optimizer_dict={
+                                        'method': method, 'jac': jac, 'maxiter': niter, 'qfim': qfim,
+                                        'optimizer_options' : {'stepsize': stepsize}})
+        vector_optimizer()
+
+        # saving the results
+        results = vector_optimizer.qaoa_result
+
+        assert len(results.most_probable_states['solutions_bitstrings'][0]) == problem.n
+
+    def test_pennylane_optimizers_workflows(self):
+        " function to run the tests for pennylane optimizers, workflows"
 
         i = 0
-        for opt in list_optimizers:
-            if opt.split('_')[0] == "pennylane":
-                self._run_method(opt)
-                i += 1
+        for problem in [qubo_problem_3, qubo_problem_2, qubo_problem_1]:
+            for opt in list_optimizers:
+                if opt.split('_')[0] == "pennylane":
+                    self._run_method_workflows(opt, problem)
+                    i += 1
 
-        assert i == sum([1 for opt in list_optimizers if  "pennylane" in opt])
+        assert i == 3*sum([1 for opt in list_optimizers if  "pennylane" in opt])
+
+    def test_pennylane_optimizers_manual(self):
+        " function to run the tests for pennylane optimizers, manual mode"
+
+        i = 0
+        for problem in [qubo_problem_3, qubo_problem_2, qubo_problem_1]:
+            for opt in list_optimizers:
+                if opt.split('_')[0] == "pennylane":
+                    self._run_method_manual(opt, problem)
+                    i += 1
+
+        assert i == 3*sum([1 for opt in list_optimizers if  "pennylane" in opt])
+
+    def _pennylane_step(self, params_array, cost, optimizer, method, jac, qfim):
+        " helper function to run a setp of the pennylane optimizer"
+        params_array = pl.numpy.array(params_array, requires_grad=True)
+        if method in ['natural_grad_descent']: 
+            x, y = optimizer.step_and_cost(cost, params_array, grad_fn=jac, metric_tensor_fn=qfim) 
+        if method in ['adagrad', 'adam', 'vgd', 'momentum', 'nesterov_momentum', 'rmsprop']:
+            x, y = optimizer.step_and_cost(cost, params_array, grad_fn=jac)
+        if method in ['rotosolve']: 
+            x, y = optimizer.step_and_cost(
+                                                    cost, params_array,
+                                                    nums_frequency={'params': {(i,):1 for i in range(params_array.size)}},
+                                                    # spectra=spectra,
+                                                    # shifts=shifts,
+                                                    # full_output=False,
+                                                )
+        if method in ['spsa']:       
+            x, y = optimizer.step_and_cost(cost, params_array)
+
+        return x, y
 
+    def test_step_and_cost(self):
+        " function to run the tests for steps of pennylane optimizers "
 
+        # define some problem
+        cost_hamil = Hamiltonian([PauliOp('ZZ', (0, 1)), PauliOp('ZZ', (1, 2)), PauliOp(
+            'ZZ', (0, 3)), PauliOp('Z', (2,)), PauliOp('Z', (1,))], [1, 1.1, 1.5, 2, -0.8], 0.8)
+        mixer_hamil = X_mixer_hamiltonian(n_qubits=4)
+        circuit_params = QAOACircuitParams(cost_hamil, mixer_hamil, p=2)
+        device = create_device('local','vectorized')
+        backend_obj_vectorized = get_qaoa_backend(circuit_params,device)
+        variate_params = create_qaoa_variational_params(circuit_params, 'standard', 'ramp')
+        niter = 5
+        grad_stepsize = 0.0001
+        stepsize = 0.001
+
+        # declare needed functions
+        jac = derivative(backend_obj_vectorized, variate_params, self.log, 
+                    'gradient', 'finite_difference', 
+                    {'stepsize': grad_stepsize})
+        qfim = Qfim(backend_obj_vectorized, variate_params, self.log)   
+        def cost(params):
+            variate_params.update_from_raw(params)
+            return np.real(backend_obj_vectorized.expectation(variate_params))
+
+        i = 0
+        for method in list_optimizers:
+        
+            if method.split('_')[0] == "pennylane":
+                pennylane_method = method.replace('pennylane_', '')
+
+                # copy the parameters
+                x0 = copy.deepcopy(variate_params.raw().copy())
+
+                # Optimize with the implemented optimizer in OpenQAOA
+                vector_optimizer = get_optimizer(backend_obj_vectorized, variate_params, optimizer_dict={
+                                                'method': method, 'jac': jac, 'maxiter': niter, 'qfim': qfim,
+                                                'optimizer_options' : {'stepsize': stepsize}})
+                vector_optimizer()
+
+                # formatting the data
+                y_opt = vector_optimizer.qaoa_result.intermediate['intermediate cost'][1:4]
+                if pennylane_method in ['rotosolve']: y_opt = vector_optimizer.qaoa_result.intermediate['intermediate cost'][4:40:12]
+
+                # get optimizer to try
+                optimizer = AVAILABLE_OPTIMIZERS[pennylane_method]
+                #get optimizer arguments
+                arguments = inspect.signature(optimizer).parameters.keys()
+
+                #check if stepsize is in the optimizer arguments
+                options = {}
+                if 'stepsize' in arguments: options['stepsize'] = stepsize
+                if 'maxiter'  in arguments: options['maxiter'] = niter
+
+                #pass the argument to the optimizer
+                optimizer = optimizer(**options) 
+
+                # reinitialize variables
+                variate_params.update_from_raw(x0)
+                x0 = variate_params.raw().copy()
+                y0 = cost(x0)
+
+                # compute steps (depends on the optimizer)
+                x1, y1 = self._pennylane_step(x0, cost, optimizer, pennylane_method, jac, qfim)
+                x2, y2 = self._pennylane_step(x1, cost, optimizer, pennylane_method, jac, qfim)
+                x3, y3 = self._pennylane_step(x2, cost, optimizer, pennylane_method, jac, qfim)
+
+                # list of results
+                y = [y1, y2, y3]
+
+                # check that the results are ok
+                if pennylane_method in ['spsa']: 
+                    assert np.sum(np.abs(np.array(y)) >= 0) == 3
+                else:
+                    for yi, y_opt_i in zip(y, y_opt):
+                        assert np.isclose(yi, y_opt_i, rtol=0.001, atol=0.001)
+
+                i += 1                
+
+        assert i == sum([1 for opt in list_optimizers if  "pennylane" in opt])
 
 
 if __name__ == "__main__":

From 3724e2f542626a79b457396012e7b43528a87fe8 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Tue, 8 Nov 2022 15:13:06 +0800
Subject: [PATCH 17/36] Requirements

---
 docs/requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 7955230ac..707e2c861 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -7,4 +7,6 @@ sphinx>=4.5.0
 sphinx-autodoc-typehints==1.18.1
 sphinx-rtd-theme==1.0.0
 ipython==8.2.0
+pandas>=1.3.5
+amazon-braket-sdk==1.23.0
 pennylane>=0.26.0

From 5e04b2a1dd787d965a3773e35914b04347d798b8 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Mon, 14 Nov 2022 15:52:45 +0800
Subject: [PATCH 18/36] Adding CustomScipyPennyLaneOptimizer

---
 openqaoa/optimizers/qaoa_optimizer.py         |   8 +-
 openqaoa/optimizers/training_vqa.py           | 159 +++++++++++++++++-
 .../workflows/parameters/qaoa_parameters.py   |   4 +-
 tests/test_optimizers_pennylane.py            | 123 +++++++-------
 tests/test_workflows.py                       |  13 +-
 5 files changed, 233 insertions(+), 74 deletions(-)

diff --git a/openqaoa/optimizers/qaoa_optimizer.py b/openqaoa/optimizers/qaoa_optimizer.py
index 5f600c7ce..fd4c008c7 100644
--- a/openqaoa/optimizers/qaoa_optimizer.py
+++ b/openqaoa/optimizers/qaoa_optimizer.py
@@ -13,7 +13,7 @@
 #   limitations under the License.
 
 from ..qaoa_parameters.baseparams import QAOAVariationalBaseParams
-from .training_vqa import ScipyOptimizer, CustomScipyGradientOptimizer
+from .training_vqa import ScipyOptimizer, CustomScipyGradientOptimizer,CustomScipyPennyLaneOptimizer
 from ..basebackend import VQABaseBackend
 
 
@@ -24,7 +24,8 @@ def available_optimizers():
 
     optimizers = {
         'scipy': ScipyOptimizer.SCIPY_METHODS,
-        'custom_scipy_gradient': CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS
+        'custom_scipy_gradient': CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS,
+        'custom_scipy_pennylane': CustomScipyPennyLaneOptimizer.PENNYLANE_OPTIMIZERS
     }
 
     return optimizers
@@ -55,7 +56,8 @@ def get_optimizer(vqa_object: VQABaseBackend,
     """
     SUPPORTED_OPTIMIZERS = {
         'scipy': ScipyOptimizer,
-        'custom_scipy_gradient': CustomScipyGradientOptimizer
+        'custom_scipy_gradient': CustomScipyGradientOptimizer,
+        'custom_scipy_pennylane': CustomScipyPennyLaneOptimizer
     }
 
     method = optimizer_dict['method'].lower()
diff --git a/openqaoa/optimizers/training_vqa.py b/openqaoa/optimizers/training_vqa.py
index 6e6f0c5e6..bffe1e7a6 100644
--- a/openqaoa/optimizers/training_vqa.py
+++ b/openqaoa/optimizers/training_vqa.py
@@ -525,11 +525,7 @@ class CustomScipyGradientOptimizer(OptimizeVQA):
 
     """
     CUSTOM_GRADIENT_OPTIMIZERS = ['vgd', 'newton',
-                                  'rmsprop', 'natural_grad_descent', 'spsa',
-                                  'pennylane_adagrad', 'pennylane_adam', 'pennylane_vgd', 
-                                  'pennylane_momentum', 'pennylane_nesterov_momentum',
-                                  'pennylane_natural_grad_descent', 'pennylane_rmsprop', 
-                                  'pennylane_rotosolve', 'pennylane_spsa']
+                                  'rmsprop', 'natural_grad_descent', 'spsa']
 
     def __init__(self,
                  vqa_object: Type[VQABaseBackend],
@@ -657,3 +653,156 @@ def optimize(self):
         finally:
             self.results_dictionary()
             return self
+
+
+class CustomScipyPennyLaneOptimizer(OptimizeVQA):
+    """
+    Python custom scipy optimization with pennylane optimizers for the VQA class.
+
+    .. Tip::
+        Using bounds may result in lower optimization performance
+
+    Parameters
+    ----------
+    vqa_object:
+        Backend object of class VQABaseBackend which contains information on the backend used to perform computations, and the VQA circuit.
+    
+    variational_params:
+        Object of class QAOAVariationalBaseParams, which contains information on the circuit to be executed,  the type of parametrisation, and the angles of the VQA circuit.
+
+    optimizer_dict:
+        * jac
+        
+            * gradient as ``Callable``, if defined else ``None``
+
+        * hess
+        
+            * hessian as ``Callable``, if defined else ``None``
+
+        * bounds
+        
+            * parameter bounds while training, defaults to ``None``
+
+        * constraints
+        
+            * Linear/Non-Linear constraints (only for COBYLA, SLSQP and trust-constr)
+
+        * tol
+        
+            * Tolerance for termination
+
+        * maxiters
+        
+            * sets ``maxiters = 100`` by default if not specified.
+            
+        * maxfev
+        
+            * sets ``maxfev = 100`` by default if not specified.
+            
+        * optimizer_options
+        
+            * Dictionary of optimiser-specific arguments, defaults to ``None``
+            * Used also for the pennylande optimizers (and step function) arguments
+
+    """
+    PENNYLANE_OPTIMIZERS = ['pennylane_adagrad', 'pennylane_adam', 'pennylane_vgd', 
+                                  'pennylane_momentum', 'pennylane_nesterov_momentum',
+                                  'pennylane_natural_grad_descent', 'pennylane_rmsprop', 
+                                  'pennylane_rotosolve', 'pennylane_spsa']
+
+    def __init__(self,
+                 vqa_object: Type[VQABaseBackend],
+                 variational_params: Type[QAOAVariationalBaseParams],
+                 optimizer_dict: dict):
+
+        super().__init__(vqa_object, variational_params, optimizer_dict)
+
+        self.vqa_object = vqa_object
+        self._validate_and_set_params(optimizer_dict)
+
+    def _validate_and_set_params(self, optimizer_dict):
+        """
+        Verify that the specified arguments are valid for the particular optimizer.
+        """
+
+        if self.method not in CustomScipyPennyLaneOptimizer.PENNYLANE_OPTIMIZERS:
+            raise ValueError(
+                f"Please choose from the supported methods: {CustomScipyPennyLaneOptimizer.PENNYLANE_OPTIMIZERS}")
+
+        jac = optimizer_dict.get('jac', None)
+        jac_options = optimizer_dict.get('jac_options', None)
+
+        if jac is None or not isinstance(jac, (Callable, str)):
+            raise ValueError(
+                "Please specify either a string or provide callable gradient in order to use gradient based methods")
+        else:
+            if isinstance(jac, str):
+                self.jac = derivative(self.vqa_object, self.variational_params, self.log, 'gradient', jac, jac_options)
+            else:
+                self.jac = jac
+
+        constraints = optimizer_dict.get('constraints', ())
+        if constraints == () or isinstance(constraints, LinearConstraint) or isinstance(constraints, NonlinearConstraint):
+            self.constraints = constraints
+        else:
+            raise ValueError(
+                f"Constraints for Scipy optimization should be of type {LinearConstraint} or {NonlinearConstraint}")
+
+        bounds = optimizer_dict.get('bounds', None)
+        if bounds is None or isinstance(bounds, Bounds):
+            self.bounds = bounds
+        else:
+            raise ValueError(
+                f"Bounds for Scipy optimization should be of type {Bounds}")
+
+        self.options = optimizer_dict.get('optimizer_options', {})
+        self.options["maxiter"] = optimizer_dict.get('maxiter', None)
+        if optimizer_dict.get('maxfev') is not None:
+            self.options["maxfev"] = optimizer_dict.get('maxfev', None)
+
+        self.tol = optimizer_dict.get('tol', None)
+
+        return self
+
+    def __repr__(self):
+        """
+        Overview of the instantiated optimier/trainer.
+        """
+        maxiter = self.options["maxiter"]
+        string = f"Optimizer for VQA of type: {type(self.vqa).__base__.__name__} \n"
+        string += f"Backend: {type(self.vqa).__name__} \n"
+        string += f"Method: {str(self.method).upper()} with Max Iterations: {maxiter}\n"
+
+        return string
+
+    def optimize(self):
+        '''
+        Main method which implements the optimization process using ``scipy.minimize``.
+
+        Returns
+        -------
+        : 
+            The optimized return object from the ``scipy.optimize`` package the result is assigned to the attribute ``opt_result``
+        '''
+
+        #set the optimizer function
+        method = ompl.pennylane_optimizer
+
+        # set the method to be used for the optimization
+        self.options['pennylane_method'] = self.method.replace("pennylane_", "") 
+
+        if self.options['pennylane_method'] == 'natural_grad_descent': 
+            self.options['qfim'] = qfim(self.vqa_object, self.variational_params, self.log)
+        elif self.options['pennylane_method'] in ['spsa', 'rotosolve']:    
+            self.jac = None 
+        
+        try:
+            result = minimize(self.optimize_this, x0=self.initial_params, method=method,
+                                jac=self.jac, tol=self.tol, constraints=self.constraints,
+                                options=self.options, bounds=self.bounds)
+        except Exception as e:
+            print("The optimization has been terminated early. Most likely due to a connection error. You can retrieve results from the optimization runs that were completed through the .results_information method.")
+            raise e
+        finally:
+            self.results_dictionary()
+            return self
diff --git a/openqaoa/workflows/parameters/qaoa_parameters.py b/openqaoa/workflows/parameters/qaoa_parameters.py
index ba0143b23..1b7b470f8 100644
--- a/openqaoa/workflows/parameters/qaoa_parameters.py
+++ b/openqaoa/workflows/parameters/qaoa_parameters.py
@@ -23,7 +23,7 @@
 from openqaoa.devices import SUPPORTED_LOCAL_SIMULATORS
 from .parameters import Parameters
 from scipy.optimize._minimize import MINIMIZE_METHODS
-from openqaoa.optimizers.training_vqa import CustomScipyGradientOptimizer
+from openqaoa.optimizers.training_vqa import CustomScipyGradientOptimizer, CustomScipyPennyLaneOptimizer
 
 
 
@@ -31,7 +31,7 @@
                        'fourier_extended', 'fourier_w_bias', 'annealing']
 ALLOWED_INIT_TYPES = ['rand', 'ramp', 'custom']
 ALLOWED_MIXERS = ['x', 'xy']
-ALLOWED_MINIMIZATION_METHODS = MINIMIZE_METHODS + CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS
+ALLOWED_MINIMIZATION_METHODS = MINIMIZE_METHODS + CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS + CustomScipyPennyLaneOptimizer.PENNYLANE_OPTIMIZERS
 
 ALLOWED_QVM_DEVICES = ['Aspen-11', 'Aspen-M-1']
 ALLOWED_QVM_DEVICES.extend(f'{n}q-qvm' for n in range(2, 80))
diff --git a/tests/test_optimizers_pennylane.py b/tests/test_optimizers_pennylane.py
index cd654ae7f..6711d4b6f 100644
--- a/tests/test_optimizers_pennylane.py
+++ b/tests/test_optimizers_pennylane.py
@@ -10,7 +10,7 @@
 from openqaoa.workflows.optimizer import QAOA
 from openqaoa.devices import create_device
 from openqaoa.problems.problem import MinimumVertexCover
-from openqaoa.optimizers.training_vqa import CustomScipyGradientOptimizer
+from openqaoa.optimizers.training_vqa import CustomScipyPennyLaneOptimizer
 from openqaoa.optimizers.optimization_methods_pennylane import AVAILABLE_OPTIMIZERS
 from openqaoa.derivative_functions import derivative
 from openqaoa.optimizers.logger_vqa import Logger
@@ -22,8 +22,8 @@
 from openqaoa.problems.problem import QUBO
 
 
-#list of optimizers to test
-list_optimizers = CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS
+#list of optimizers to test, pennylane optimizers
+list_optimizers = CustomScipyPennyLaneOptimizer.PENNYLANE_OPTIMIZERS
 
 #create a problem
 g = nx.circulant_graph(4, [1])
@@ -107,11 +107,10 @@ def test_pennylane_optimizers_workflows(self):
         i = 0
         for problem in [qubo_problem_3, qubo_problem_2, qubo_problem_1]:
             for opt in list_optimizers:
-                if opt.split('_')[0] == "pennylane":
-                    self._run_method_workflows(opt, problem)
-                    i += 1
+                self._run_method_workflows(opt, problem)
+                i += 1
 
-        assert i == 3*sum([1 for opt in list_optimizers if  "pennylane" in opt])
+        assert i == 3*len(list_optimizers)
 
     def test_pennylane_optimizers_manual(self):
         " function to run the tests for pennylane optimizers, manual mode"
@@ -119,11 +118,10 @@ def test_pennylane_optimizers_manual(self):
         i = 0
         for problem in [qubo_problem_3, qubo_problem_2, qubo_problem_1]:
             for opt in list_optimizers:
-                if opt.split('_')[0] == "pennylane":
-                    self._run_method_manual(opt, problem)
-                    i += 1
+                self._run_method_manual(opt, problem)
+                i += 1
 
-        assert i == 3*sum([1 for opt in list_optimizers if  "pennylane" in opt])
+        assert i == 3*len(list_optimizers)
 
     def _pennylane_step(self, params_array, cost, optimizer, method, jac, qfim):
         " helper function to run a setp of the pennylane optimizer"
@@ -172,58 +170,57 @@ def cost(params):
         i = 0
         for method in list_optimizers:
         
-            if method.split('_')[0] == "pennylane":
-                pennylane_method = method.replace('pennylane_', '')
-
-                # copy the parameters
-                x0 = copy.deepcopy(variate_params.raw().copy())
-
-                # Optimize with the implemented optimizer in OpenQAOA
-                vector_optimizer = get_optimizer(backend_obj_vectorized, variate_params, optimizer_dict={
-                                                'method': method, 'jac': jac, 'maxiter': niter, 'qfim': qfim,
-                                                'optimizer_options' : {'stepsize': stepsize}})
-                vector_optimizer()
-
-                # formatting the data
-                y_opt = vector_optimizer.qaoa_result.intermediate['intermediate cost'][1:4]
-                if pennylane_method in ['rotosolve']: y_opt = vector_optimizer.qaoa_result.intermediate['intermediate cost'][4:40:12]
-
-                # get optimizer to try
-                optimizer = AVAILABLE_OPTIMIZERS[pennylane_method]
-                #get optimizer arguments
-                arguments = inspect.signature(optimizer).parameters.keys()
-
-                #check if stepsize is in the optimizer arguments
-                options = {}
-                if 'stepsize' in arguments: options['stepsize'] = stepsize
-                if 'maxiter'  in arguments: options['maxiter'] = niter
-
-                #pass the argument to the optimizer
-                optimizer = optimizer(**options) 
-
-                # reinitialize variables
-                variate_params.update_from_raw(x0)
-                x0 = variate_params.raw().copy()
-                y0 = cost(x0)
-
-                # compute steps (depends on the optimizer)
-                x1, y1 = self._pennylane_step(x0, cost, optimizer, pennylane_method, jac, qfim)
-                x2, y2 = self._pennylane_step(x1, cost, optimizer, pennylane_method, jac, qfim)
-                x3, y3 = self._pennylane_step(x2, cost, optimizer, pennylane_method, jac, qfim)
-
-                # list of results
-                y = [y1, y2, y3]
-
-                # check that the results are ok
-                if pennylane_method in ['spsa']: 
-                    assert np.sum(np.abs(np.array(y)) >= 0) == 3
-                else:
-                    for yi, y_opt_i in zip(y, y_opt):
-                        assert np.isclose(yi, y_opt_i, rtol=0.001, atol=0.001)
-
-                i += 1                
-
-        assert i == sum([1 for opt in list_optimizers if  "pennylane" in opt])
+            pennylane_method = method.replace('pennylane_', '')
+
+            # copy the parameters
+            x0 = copy.deepcopy(variate_params.raw().copy())
+
+            # Optimize with the implemented optimizer in OpenQAOA
+            vector_optimizer = get_optimizer(backend_obj_vectorized, variate_params, optimizer_dict={
+                                            'method': method, 'jac': jac, 'maxiter': niter, 'qfim': qfim,
+                                            'optimizer_options' : {'stepsize': stepsize}})
+            vector_optimizer()
+
+            # formatting the data
+            y_opt = vector_optimizer.qaoa_result.intermediate['intermediate cost'][1:4]
+            if pennylane_method in ['rotosolve']: y_opt = vector_optimizer.qaoa_result.intermediate['intermediate cost'][4:40:12]
+
+            # get optimizer to try
+            optimizer = AVAILABLE_OPTIMIZERS[pennylane_method]
+            #get optimizer arguments
+            arguments = inspect.signature(optimizer).parameters.keys()
+
+            #check if stepsize is in the optimizer arguments
+            options = {}
+            if 'stepsize' in arguments: options['stepsize'] = stepsize
+            if 'maxiter'  in arguments: options['maxiter'] = niter
+
+            #pass the argument to the optimizer
+            optimizer = optimizer(**options) 
+
+            # reinitialize variables
+            variate_params.update_from_raw(x0)
+            x0 = variate_params.raw().copy()
+            y0 = cost(x0)
+
+            # compute steps (depends on the optimizer)
+            x1, y1 = self._pennylane_step(x0, cost, optimizer, pennylane_method, jac, qfim)
+            x2, y2 = self._pennylane_step(x1, cost, optimizer, pennylane_method, jac, qfim)
+            x3, y3 = self._pennylane_step(x2, cost, optimizer, pennylane_method, jac, qfim)
+
+            # list of results
+            y = [y1, y2, y3]
+
+            # check that the results are ok
+            if pennylane_method in ['spsa']: 
+                assert np.sum(np.abs(np.array(y)) >= 0) == 3
+            else:
+                for yi, y_opt_i in zip(y, y_opt):
+                    assert np.isclose(yi, y_opt_i, rtol=0.001, atol=0.001)
+
+            i += 1                
+
+        assert i == len(list_optimizers)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 47f19808f..a077fdcee 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -26,7 +26,7 @@
 from openqaoa.backends.simulators.qaoa_qiskit_sim import QAOAQiskitBackendShotBasedSimulator, QAOAQiskitBackendStatevecSimulator
 from openqaoa.backends.simulators.qaoa_vectorized import QAOAvectorizedBackendSimulator
 from openqaoa.optimizers.qaoa_optimizer import available_optimizers
-from openqaoa.optimizers.training_vqa import ScipyOptimizer, CustomScipyGradientOptimizer
+from openqaoa.optimizers.training_vqa import ScipyOptimizer, CustomScipyGradientOptimizer, CustomScipyPennyLaneOptimizer
 import unittest
 import networkx as nw
 import pytest
@@ -580,6 +580,7 @@ def test_set_classical_optimizer_method_selectors(self):
             
             self.assertEqual(isinstance(q.optimizer, ScipyOptimizer), True)
             self.assertEqual(isinstance(q.optimizer, CustomScipyGradientOptimizer), False)
+            self.assertEqual(isinstance(q.optimizer, CustomScipyPennyLaneOptimizer), False)
             
         for each_method in available_optimizers()['custom_scipy_gradient']:
             q = QAOA()
@@ -589,6 +590,16 @@ def test_set_classical_optimizer_method_selectors(self):
             
             self.assertEqual(isinstance(q.optimizer, ScipyOptimizer), False)
             self.assertEqual(isinstance(q.optimizer, CustomScipyGradientOptimizer), True)
+            self.assertEqual(isinstance(q.optimizer, CustomScipyPennyLaneOptimizer), False)
+            
+        for each_method in available_optimizers()['custom_scipy_pennylane']:
+            q = QAOA()
+            q.set_classical_optimizer(method = each_method, jac='grad_spsa')
+            q.compile(problem = qubo_problem)
+            
+            self.assertEqual(isinstance(q.optimizer, ScipyOptimizer), False)
+            self.assertEqual(isinstance(q.optimizer, CustomScipyGradientOptimizer), False)
+            self.assertEqual(isinstance(q.optimizer, CustomScipyPennyLaneOptimizer), True)
 
 class TestingRQAOA(unittest.TestCase):
     """

From 6d5581838cdc93865c1dd7f2ca2ab0e1555481f7 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Tue, 15 Nov 2022 13:55:58 +0800
Subject: [PATCH 19/36] Debugging

---
 openqaoa/optimizers/training_vqa.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/openqaoa/optimizers/training_vqa.py b/openqaoa/optimizers/training_vqa.py
index bffe1e7a6..aada58bb6 100644
--- a/openqaoa/optimizers/training_vqa.py
+++ b/openqaoa/optimizers/training_vqa.py
@@ -521,7 +521,6 @@ class CustomScipyGradientOptimizer(OptimizeVQA):
         * optimizer_options
         
             * Dictionary of optimiser-specific arguments, defaults to ``None``
-            * Used also for the pennylande optimizers (and step function) arguments
 
     """
     CUSTOM_GRADIENT_OPTIMIZERS = ['vgd', 'newton',
@@ -629,15 +628,6 @@ def optimize(self):
         elif self.method == 'spsa':
             print("Warning : SPSA is an experimental feature.")
             method = om.SPSA
-        elif self.method.lower().split('_')[0] == 'pennylane': # check if we are using a pennylane optimizer
-            method = ompl.pennylane_optimizer
-
-            self.options['pennylane_method'] = self.method.lower().replace("pennylane_", "") 
-
-            if self.options['pennylane_method'] == 'natural_grad_descent': 
-                self.options['qfim'] = qfim(self.vqa_object, self.variational_params, self.log)
-            if self.options['pennylane_method'] in ['spsa', 'rotosolve']:    
-                self.jac = None 
         
         try:
             if self.hess == None:

From 1ce7470d615fc840156fa9ff759055854ea6f6b5 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Tue, 15 Nov 2022 17:28:01 +0800
Subject: [PATCH 20/36] PennyLaneOptimizer

---
 openqaoa/optimizers/qaoa_optimizer.py            | 6 +++---
 openqaoa/optimizers/training_vqa.py              | 6 +++---
 openqaoa/workflows/parameters/qaoa_parameters.py | 4 ++--
 tests/test_optimizers_pennylane.py               | 4 ++--
 tests/test_workflows.py                          | 8 ++++----
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/openqaoa/optimizers/qaoa_optimizer.py b/openqaoa/optimizers/qaoa_optimizer.py
index fd4c008c7..665c230e3 100644
--- a/openqaoa/optimizers/qaoa_optimizer.py
+++ b/openqaoa/optimizers/qaoa_optimizer.py
@@ -13,7 +13,7 @@
 #   limitations under the License.
 
 from ..qaoa_parameters.baseparams import QAOAVariationalBaseParams
-from .training_vqa import ScipyOptimizer, CustomScipyGradientOptimizer,CustomScipyPennyLaneOptimizer
+from .training_vqa import ScipyOptimizer, CustomScipyGradientOptimizer, PennyLaneOptimizer
 from ..basebackend import VQABaseBackend
 
 
@@ -25,7 +25,7 @@ def available_optimizers():
     optimizers = {
         'scipy': ScipyOptimizer.SCIPY_METHODS,
         'custom_scipy_gradient': CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS,
-        'custom_scipy_pennylane': CustomScipyPennyLaneOptimizer.PENNYLANE_OPTIMIZERS
+        'custom_scipy_pennylane': PennyLaneOptimizer.PENNYLANE_OPTIMIZERS
     }
 
     return optimizers
@@ -57,7 +57,7 @@ def get_optimizer(vqa_object: VQABaseBackend,
     SUPPORTED_OPTIMIZERS = {
         'scipy': ScipyOptimizer,
         'custom_scipy_gradient': CustomScipyGradientOptimizer,
-        'custom_scipy_pennylane': CustomScipyPennyLaneOptimizer
+        'custom_scipy_pennylane': PennyLaneOptimizer
     }
 
     method = optimizer_dict['method'].lower()
diff --git a/openqaoa/optimizers/training_vqa.py b/openqaoa/optimizers/training_vqa.py
index aada58bb6..99ba5d36b 100644
--- a/openqaoa/optimizers/training_vqa.py
+++ b/openqaoa/optimizers/training_vqa.py
@@ -645,7 +645,7 @@ def optimize(self):
             return self
 
 
-class CustomScipyPennyLaneOptimizer(OptimizeVQA):
+class PennyLaneOptimizer(OptimizeVQA):
     """
     Python custom scipy optimization with pennylane optimizers for the VQA class.
 
@@ -715,9 +715,9 @@ def _validate_and_set_params(self, optimizer_dict):
         Verify that the specified arguments are valid for the particular optimizer.
         """
 
-        if self.method not in CustomScipyPennyLaneOptimizer.PENNYLANE_OPTIMIZERS:
+        if self.method not in PennyLaneOptimizer.PENNYLANE_OPTIMIZERS:
             raise ValueError(
-                f"Please choose from the supported methods: {CustomScipyPennyLaneOptimizer.PENNYLANE_OPTIMIZERS}")
+                f"Please choose from the supported methods: {PennyLaneOptimizer.PENNYLANE_OPTIMIZERS}")
 
         jac = optimizer_dict.get('jac', None)
         jac_options = optimizer_dict.get('jac_options', None)
diff --git a/openqaoa/workflows/parameters/qaoa_parameters.py b/openqaoa/workflows/parameters/qaoa_parameters.py
index 1b7b470f8..72819f8e9 100644
--- a/openqaoa/workflows/parameters/qaoa_parameters.py
+++ b/openqaoa/workflows/parameters/qaoa_parameters.py
@@ -23,7 +23,7 @@
 from openqaoa.devices import SUPPORTED_LOCAL_SIMULATORS
 from .parameters import Parameters
 from scipy.optimize._minimize import MINIMIZE_METHODS
-from openqaoa.optimizers.training_vqa import CustomScipyGradientOptimizer, CustomScipyPennyLaneOptimizer
+from openqaoa.optimizers.training_vqa import CustomScipyGradientOptimizer, PennyLaneOptimizer
 
 
 
@@ -31,7 +31,7 @@
                        'fourier_extended', 'fourier_w_bias', 'annealing']
 ALLOWED_INIT_TYPES = ['rand', 'ramp', 'custom']
 ALLOWED_MIXERS = ['x', 'xy']
-ALLOWED_MINIMIZATION_METHODS = MINIMIZE_METHODS + CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS + CustomScipyPennyLaneOptimizer.PENNYLANE_OPTIMIZERS
+ALLOWED_MINIMIZATION_METHODS = MINIMIZE_METHODS + CustomScipyGradientOptimizer.CUSTOM_GRADIENT_OPTIMIZERS + PennyLaneOptimizer.PENNYLANE_OPTIMIZERS
 
 ALLOWED_QVM_DEVICES = ['Aspen-11', 'Aspen-M-1']
 ALLOWED_QVM_DEVICES.extend(f'{n}q-qvm' for n in range(2, 80))
diff --git a/tests/test_optimizers_pennylane.py b/tests/test_optimizers_pennylane.py
index 6711d4b6f..e672b0609 100644
--- a/tests/test_optimizers_pennylane.py
+++ b/tests/test_optimizers_pennylane.py
@@ -10,7 +10,7 @@
 from openqaoa.workflows.optimizer import QAOA
 from openqaoa.devices import create_device
 from openqaoa.problems.problem import MinimumVertexCover
-from openqaoa.optimizers.training_vqa import CustomScipyPennyLaneOptimizer
+from openqaoa.optimizers.training_vqa import PennyLaneOptimizer
 from openqaoa.optimizers.optimization_methods_pennylane import AVAILABLE_OPTIMIZERS
 from openqaoa.derivative_functions import derivative
 from openqaoa.optimizers.logger_vqa import Logger
@@ -23,7 +23,7 @@
 
 
 #list of optimizers to test, pennylane optimizers
-list_optimizers = CustomScipyPennyLaneOptimizer.PENNYLANE_OPTIMIZERS
+list_optimizers = PennyLaneOptimizer.PENNYLANE_OPTIMIZERS
 
 #create a problem
 g = nx.circulant_graph(4, [1])
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index a077fdcee..2934ad042 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -26,7 +26,7 @@
 from openqaoa.backends.simulators.qaoa_qiskit_sim import QAOAQiskitBackendShotBasedSimulator, QAOAQiskitBackendStatevecSimulator
 from openqaoa.backends.simulators.qaoa_vectorized import QAOAvectorizedBackendSimulator
 from openqaoa.optimizers.qaoa_optimizer import available_optimizers
-from openqaoa.optimizers.training_vqa import ScipyOptimizer, CustomScipyGradientOptimizer, CustomScipyPennyLaneOptimizer
+from openqaoa.optimizers.training_vqa import ScipyOptimizer, CustomScipyGradientOptimizer, PennyLaneOptimizer
 import unittest
 import networkx as nw
 import pytest
@@ -580,7 +580,7 @@ def test_set_classical_optimizer_method_selectors(self):
             
             self.assertEqual(isinstance(q.optimizer, ScipyOptimizer), True)
             self.assertEqual(isinstance(q.optimizer, CustomScipyGradientOptimizer), False)
-            self.assertEqual(isinstance(q.optimizer, CustomScipyPennyLaneOptimizer), False)
+            self.assertEqual(isinstance(q.optimizer, PennyLaneOptimizer), False)
             
         for each_method in available_optimizers()['custom_scipy_gradient']:
             q = QAOA()
@@ -590,7 +590,7 @@ def test_set_classical_optimizer_method_selectors(self):
             
             self.assertEqual(isinstance(q.optimizer, ScipyOptimizer), False)
             self.assertEqual(isinstance(q.optimizer, CustomScipyGradientOptimizer), True)
-            self.assertEqual(isinstance(q.optimizer, CustomScipyPennyLaneOptimizer), False)
+            self.assertEqual(isinstance(q.optimizer, PennyLaneOptimizer), False)
             
         for each_method in available_optimizers()['custom_scipy_pennylane']:
             q = QAOA()
@@ -599,7 +599,7 @@ def test_set_classical_optimizer_method_selectors(self):
             
             self.assertEqual(isinstance(q.optimizer, ScipyOptimizer), False)
             self.assertEqual(isinstance(q.optimizer, CustomScipyGradientOptimizer), False)
-            self.assertEqual(isinstance(q.optimizer, CustomScipyPennyLaneOptimizer), True)
+            self.assertEqual(isinstance(q.optimizer, PennyLaneOptimizer), True)
 
 class TestingRQAOA(unittest.TestCase):
     """

From 3a6d3204b610c6522a168f7084b66bd8d5685059 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Tue, 15 Nov 2022 18:12:55 +0800
Subject: [PATCH 21/36] Creating PennyLane folder

---
 openqaoa/optimizers/pennylane/FILE.txt        |   0
 openqaoa/optimizers/pennylane/__init__.py     |  22 +++
 .../optimization_methods_pennylane.py         |   0
 .../pennylane_optimizers/gradient_descent.py  | 146 ++++++++++++++++++
 openqaoa/optimizers/training_vqa.py           |   2 +-
 tests/test_optimizers_pennylane.py            |   2 +-
 6 files changed, 170 insertions(+), 2 deletions(-)
 create mode 100644 openqaoa/optimizers/pennylane/FILE.txt
 create mode 100644 openqaoa/optimizers/pennylane/__init__.py
 rename openqaoa/optimizers/{ => pennylane}/optimization_methods_pennylane.py (100%)
 create mode 100644 openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py

diff --git a/openqaoa/optimizers/pennylane/FILE.txt b/openqaoa/optimizers/pennylane/FILE.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/openqaoa/optimizers/pennylane/__init__.py b/openqaoa/optimizers/pennylane/__init__.py
new file mode 100644
index 000000000..40ab3aeb9
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/__init__.py
@@ -0,0 +1,22 @@
+#   Copyright 2022 Entropica Labs
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+"""
+Optimizer directory for the classical optimization loop for QAOA 
+
+Currently supports:
+	ScipyOptimizers (both gradient-free and gradient-based)
+"""
+
+from .optimization_methods_pennylane import *
\ No newline at end of file
diff --git a/openqaoa/optimizers/optimization_methods_pennylane.py b/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py
similarity index 100%
rename from openqaoa/optimizers/optimization_methods_pennylane.py
rename to openqaoa/optimizers/pennylane/optimization_methods_pennylane.py
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py
new file mode 100644
index 000000000..632424b81
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py
@@ -0,0 +1,146 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Gradient descent optimizer"""
+
+from pennylane._grad import grad as get_gradient
+
+
+class GradientDescentOptimizer:
+    r"""Basic gradient-descent optimizer.
+
+    Base class for other gradient-descent-based optimizers.
+
+    A step of the gradient descent optimizer computes the new values via the rule
+
+    .. math::
+
+        x^{(t+1)} = x^{(t)} - \eta \nabla f(x^{(t)}).
+
+    where :math:`\eta` is a user-defined hyperparameter corresponding to step size.
+
+    Args:
+        stepsize (float): the user-defined hyperparameter :math:`\eta`
+    """
+
+    def __init__(self, stepsize=0.01):
+        self.stepsize = stepsize
+
+    def step_and_cost(self, objective_fn, *args, grad_fn=None, **kwargs):
+        """Update trainable arguments with one step of the optimizer and return the corresponding
+        objective function value prior to the step.
+
+        Args:
+            objective_fn (function): the objective function for optimization
+            *args : variable length argument list for objective function
+            grad_fn (function): optional gradient function of the
+                objective function with respect to the variables ``*args``.
+                If ``None``, the gradient function is computed automatically.
+                Must return a ``tuple[array]`` with the same number of elements as ``*args``.
+                Each array of the tuple should have the same shape as the corresponding argument.
+            **kwargs : variable length of keyword arguments for the objective function
+
+        Returns:
+            tuple[list [array], float]: the new variable values :math:`x^{(t+1)}` and the objective
+            function output prior to the step.
+            If single arg is provided, list [array] is replaced by array.
+        """
+
+        g, forward = self.compute_grad(objective_fn, args, kwargs, grad_fn=grad_fn)
+        new_args = self.apply_grad(g, args)
+
+        if forward is None:
+            forward = objective_fn(*args, **kwargs)
+
+        # unwrap from list if one argument, cleaner return
+        if len(new_args) == 1:
+            return new_args[0], forward
+        return new_args, forward
+
+    def step(self, objective_fn, *args, grad_fn=None, **kwargs):
+        """Update trainable arguments with one step of the optimizer.
+
+        Args:
+            objective_fn (function): the objective function for optimization
+            *args : Variable length argument list for objective function
+            grad_fn (function): optional gradient function of the
+                objective function with respect to the variables ``x``.
+                If ``None``, the gradient function is computed automatically.
+                Must return a ``tuple[array]`` with the same number of elements as ``*args``.
+                Each array of the tuple should have the same shape as the corresponding argument.
+            **kwargs : variable length of keyword arguments for the objective function
+
+        Returns:
+            list [array]: the new variable values :math:`x^{(t+1)}`.
+            If single arg is provided, list [array] is replaced by array.
+        """
+
+        g, _ = self.compute_grad(objective_fn, args, kwargs, grad_fn=grad_fn)
+        new_args = self.apply_grad(g, args)
+
+        # unwrap from list if one argument, cleaner return
+        if len(new_args) == 1:
+            return new_args[0]
+
+        return new_args
+
+    @staticmethod
+    def compute_grad(objective_fn, args, kwargs, grad_fn=None):
+        r"""Compute gradient of the objective function at the given point and return it along with
+        the objective function forward pass (if available).
+
+        Args:
+            objective_fn (function): the objective function for optimization
+            args (tuple): tuple of NumPy arrays containing the current parameters for the
+                objection function
+            kwargs (dict): keyword arguments for the objective function
+            grad_fn (function): optional gradient function of the objective function with respect to
+                the variables ``args``. If ``None``, the gradient function is computed automatically.
+                Must return the same shape of tuple [array] as the autograd derivative.
+
+        Returns:
+            tuple (array): NumPy array containing the gradient :math:`\nabla f(x^{(t)})` and the
+            objective function output. If ``grad_fn`` is provided, the objective function
+            will not be evaluted and instead ``None`` will be returned.
+        """
+        g = get_gradient(objective_fn) if grad_fn is None else grad_fn
+        grad = g(*args, **kwargs)
+        forward = getattr(g, "forward", None)
+
+        num_trainable_args = sum(getattr(arg, "requires_grad", False) for arg in args)
+        grad = (grad,) if num_trainable_args == 1 else grad
+
+        return grad, forward
+
+    def apply_grad(self, grad, args):
+        r"""Update the variables to take a single optimization step. Flattens and unflattens
+        the inputs to maintain nested iterables as the parameters of the optimization.
+
+        Args:
+            grad (tuple [array]): the gradient of the objective
+                function at point :math:`x^{(t)}`: :math:`\nabla f(x^{(t)})`
+            args (tuple): the current value of the variables :math:`x^{(t)}`
+
+        Returns:
+            list [array]: the new values :math:`x^{(t+1)}`
+        """
+        args_new = list(args)
+
+        trained_index = 0
+        for index, arg in enumerate(args):
+            if getattr(arg, "requires_grad", False):
+                args_new[index] = arg - self.stepsize * grad[trained_index]
+
+                trained_index += 1
+
+        return args_new
diff --git a/openqaoa/optimizers/training_vqa.py b/openqaoa/optimizers/training_vqa.py
index 99ba5d36b..b6b03cf79 100644
--- a/openqaoa/optimizers/training_vqa.py
+++ b/openqaoa/optimizers/training_vqa.py
@@ -26,7 +26,7 @@
 from ..basebackend import VQABaseBackend
 from ..qaoa_parameters.baseparams import QAOAVariationalBaseParams
 from . import optimization_methods as om
-from . import optimization_methods_pennylane as ompl
+from .pennylane import optimization_methods_pennylane as ompl
 
 from .logger_vqa import Logger
 from .result import Result
diff --git a/tests/test_optimizers_pennylane.py b/tests/test_optimizers_pennylane.py
index e672b0609..95a8b7ef4 100644
--- a/tests/test_optimizers_pennylane.py
+++ b/tests/test_optimizers_pennylane.py
@@ -11,7 +11,7 @@
 from openqaoa.devices import create_device
 from openqaoa.problems.problem import MinimumVertexCover
 from openqaoa.optimizers.training_vqa import PennyLaneOptimizer
-from openqaoa.optimizers.optimization_methods_pennylane import AVAILABLE_OPTIMIZERS
+from openqaoa.optimizers.pennylane.optimization_methods_pennylane import AVAILABLE_OPTIMIZERS
 from openqaoa.derivative_functions import derivative
 from openqaoa.optimizers.logger_vqa import Logger
 from openqaoa.qaoa_parameters import create_qaoa_variational_params, QAOACircuitParams, PauliOp, Hamiltonian

From 5557073e2920f8f6b2fb2d83dcba5c44ce97c9b0 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Wed, 16 Nov 2022 11:34:34 +0800
Subject: [PATCH 22/36] Removing the PennyLane requirement

---
 docs/requirements.txt                         |   1 -
 openqaoa/optimizers/pennylane/__init__.py     |  18 +-
 .../optimizers/pennylane/fourier/__init__.py  |  17 +
 .../pennylane/fourier/reconstruct.py          | 667 +++++++++++++
 .../optimizers/pennylane/math/__init__.py     | 131 +++
 .../pennylane/math/is_independent.py          | 382 +++++++
 .../pennylane/math/matrix_manipulation.py     | 258 +++++
 .../pennylane/math/multi_dispatch.py          | 871 ++++++++++++++++
 openqaoa/optimizers/pennylane/math/quantum.py | 940 ++++++++++++++++++
 .../pennylane/math/single_dispatch.py         | 681 +++++++++++++
 openqaoa/optimizers/pennylane/math/utils.py   | 451 +++++++++
 .../optimizers/pennylane/numpy/__init__.py    | 103 ++
 openqaoa/optimizers/pennylane/numpy/fft.py    |  21 +
 openqaoa/optimizers/pennylane/numpy/linalg.py |  21 +
 openqaoa/optimizers/pennylane/numpy/random.py |  59 ++
 openqaoa/optimizers/pennylane/numpy/tensor.py | 313 ++++++
 .../optimizers/pennylane/numpy/wrapper.py     | 154 +++
 .../optimization_methods_pennylane.py         |  20 +-
 .../pennylane/pennylane_optimizers/adagrad.py |  93 ++
 .../pennylane/pennylane_optimizers/adam.py    | 133 +++
 .../pennylane_optimizers/gradient_descent.py  |   3 +-
 .../pennylane_optimizers/momentum.py          |  84 ++
 .../pennylane_optimizers/nesterov_momentum.py |  75 ++
 .../pennylane_optimizers/rms_prop.py          |  89 ++
 .../pennylane_optimizers/rotosolve.py         | 667 +++++++++++++
 .../pennylane/pennylane_optimizers/spsa.py    | 295 ++++++
 openqaoa/optimizers/training_vqa.py           |  19 +-
 setup.py                                      |   4 +-
 28 files changed, 6544 insertions(+), 26 deletions(-)
 create mode 100644 openqaoa/optimizers/pennylane/fourier/__init__.py
 create mode 100644 openqaoa/optimizers/pennylane/fourier/reconstruct.py
 create mode 100644 openqaoa/optimizers/pennylane/math/__init__.py
 create mode 100644 openqaoa/optimizers/pennylane/math/is_independent.py
 create mode 100644 openqaoa/optimizers/pennylane/math/matrix_manipulation.py
 create mode 100644 openqaoa/optimizers/pennylane/math/multi_dispatch.py
 create mode 100644 openqaoa/optimizers/pennylane/math/quantum.py
 create mode 100644 openqaoa/optimizers/pennylane/math/single_dispatch.py
 create mode 100644 openqaoa/optimizers/pennylane/math/utils.py
 create mode 100644 openqaoa/optimizers/pennylane/numpy/__init__.py
 create mode 100644 openqaoa/optimizers/pennylane/numpy/fft.py
 create mode 100644 openqaoa/optimizers/pennylane/numpy/linalg.py
 create mode 100644 openqaoa/optimizers/pennylane/numpy/random.py
 create mode 100644 openqaoa/optimizers/pennylane/numpy/tensor.py
 create mode 100644 openqaoa/optimizers/pennylane/numpy/wrapper.py
 create mode 100644 openqaoa/optimizers/pennylane/pennylane_optimizers/adagrad.py
 create mode 100644 openqaoa/optimizers/pennylane/pennylane_optimizers/adam.py
 create mode 100644 openqaoa/optimizers/pennylane/pennylane_optimizers/momentum.py
 create mode 100644 openqaoa/optimizers/pennylane/pennylane_optimizers/nesterov_momentum.py
 create mode 100644 openqaoa/optimizers/pennylane/pennylane_optimizers/rms_prop.py
 create mode 100644 openqaoa/optimizers/pennylane/pennylane_optimizers/rotosolve.py
 create mode 100644 openqaoa/optimizers/pennylane/pennylane_optimizers/spsa.py

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 707e2c861..a58ec2ad5 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -9,4 +9,3 @@ sphinx-rtd-theme==1.0.0
 ipython==8.2.0
 pandas>=1.3.5
 amazon-braket-sdk==1.23.0
-pennylane>=0.26.0
diff --git a/openqaoa/optimizers/pennylane/__init__.py b/openqaoa/optimizers/pennylane/__init__.py
index 40ab3aeb9..2aef9d078 100644
--- a/openqaoa/optimizers/pennylane/__init__.py
+++ b/openqaoa/optimizers/pennylane/__init__.py
@@ -19,4 +19,20 @@
 	ScipyOptimizers (both gradient-free and gradient-based)
 """
 
-from .optimization_methods_pennylane import *
\ No newline at end of file
+# from .optimization_methods_pennylane import *
+from .pennylane_optimizers.adagrad import *
+from .pennylane_optimizers.adam import *
+from .pennylane_optimizers.gradient_descent import *
+from .pennylane_optimizers.momentum import *
+from .pennylane_optimizers.nesterov_momentum import *
+from .pennylane_optimizers.rms_prop import *
+from .pennylane_optimizers.rotosolve import *
+from .pennylane_optimizers.spsa import *
+from . import numpy
+from . import math
+from . import fourier
+
+#empty class to be used as a placeholder for the QNode class from PennyLane
+class QNode:
+	def __init__(self):
+		pass
diff --git a/openqaoa/optimizers/pennylane/fourier/__init__.py b/openqaoa/optimizers/pennylane/fourier/__init__.py
new file mode 100644
index 000000000..21e23f47c
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/fourier/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This module contains functions to analyze the Fourier representation
+of quantum circuits."""
+import warnings
+from .reconstruct import reconstruct
diff --git a/openqaoa/optimizers/pennylane/fourier/reconstruct.py b/openqaoa/optimizers/pennylane/fourier/reconstruct.py
new file mode 100644
index 000000000..a78ce4b1c
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/fourier/reconstruct.py
@@ -0,0 +1,667 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains a function that computes the fourier series of
+a quantum expectation value."""
+from functools import wraps
+from inspect import signature
+import warnings
+
+import numpy as np
+from autoray import numpy as anp
+from openqaoa.optimizers import pennylane as qml # changed from the original code
+
+
+def _reconstruct_equ(fun, num_frequency, x0=None, f0=None, interface=None):
+    r"""Reconstruct a univariate Fourier series with consecutive integer
+    frequencies, using trigonometric interpolation and equidistant shifts.
+
+    This technique is based on
+    `Dirichlet kernels <https://en.wikipedia.org/wiki/Dirichlet_kernel>`_, see
+    `Vidal and Theis (2018) <https://arxiv.org/abs/1812.06323>`_ or
+    `Wierichs et al. (2022) <https://doi.org/10.22331/q-2022-03-30-677>`_.
+
+    Args:
+        fun (callable): Univariate finite Fourier series to reconstruct.
+            It must have signature ``float -> float`` .
+        num_frequency (int): Number of integer frequencies in ``fun``.
+            All integer frequencies below ``num_frequency`` are assumed
+            to be present in ``fun`` as well; if they are not, the output
+            is correct put the reconstruction could have been performed
+            with fewer evaluations of ``fun`` .
+        x0 (float): Center to which to shift the reconstruction.
+            The points at which ``fun`` is evaluated are *not* affected
+            by ``x0`` .
+        f0 (float): Value of ``fun`` at zero; Providing ``f0`` saves one
+            evaluation of ``fun``.
+        interface (str): Which auto-differentiation framework to use as
+            interface. This determines in which interface the output
+            reconstructed function is intended to be used.
+
+    Returns:
+        callable: Reconstructed Fourier series with ``num_frequency`` frequencies.
+        This function is a purely classical function. Furthermore, it is fully
+        differentiable.
+    """
+    if not abs(int(num_frequency)) == num_frequency:
+        raise ValueError(f"num_frequency must be a non-negative integer, got {num_frequency}")
+
+    a = (num_frequency + 0.5) / np.pi
+    b = 0.5 / np.pi
+
+    shifts_pos = qml.math.arange(1, num_frequency + 1) / a
+    shifts_neg = -shifts_pos[::-1]
+    shifts = qml.math.concatenate([shifts_neg, [0.0], shifts_pos])
+    shifts = anp.asarray(shifts, like=interface)
+    f0 = fun(0.0) if f0 is None else f0
+    evals = (
+        list(map(fun, shifts[:num_frequency])) + [f0] + list(map(fun, shifts[num_frequency + 1 :]))
+    )
+    evals = anp.asarray(evals, like=interface)
+
+    x0 = anp.asarray(np.float64(0.0), like=interface) if x0 is None else x0
+
+    def _reconstruction(x):
+        """Univariate reconstruction based on equidistant shifts and Dirichlet kernels.
+        The derivative at of ``sinc`` are not well-implemented in TensorFlow and Autograd,
+        use the Fourier transform reconstruction if this derivative is needed.
+        """
+        _x = x - x0 - shifts
+        return qml.math.tensordot(
+            qml.math.sinc(a * _x) / qml.math.sinc(b * _x),
+            evals,
+            axes=[[0], [0]],
+        )
+
+    return _reconstruction
+
+
+_warn_text_f0_ignored = (
+    "The provided value of the function at zero will be ignored due to the "
+    "provided shift values. This may lead to additional evaluations of the "
+    "function to be reconstructed."
+)
+
+
+def _reconstruct_gen(fun, spectrum, shifts=None, x0=None, f0=None, interface=None):
+    r"""Reconstruct a univariate (real-valued) Fourier series with given spectrum.
+
+    Args:
+        fun (callable): Univariate finite Fourier series to reconstruct.
+            It must have signature ``float -> float`` .
+        spectrum (Collection): Frequency spectrum of the Fourier series;
+            non-positive frequencies are ignored.
+        shifts (Sequence): Shift angles at which to evaluate ``fun`` for the reconstruction.
+            Chosen equidistantly within the interval :math:`[0, 2\pi/f_\text{max}]`
+            if ``shifts=None`` , where :math:`f_\text{max}` is the biggest
+            frequency in ``spectrum``.
+        x0 (float): Center to which to shift the reconstruction.
+            The points at which ``fun`` is evaluated are *not* affected
+            by ``x0`` .
+        f0 (float): Value of ``fun`` at zero; If :math:`0` is among the ``shifts``
+            and ``f0`` is provided, one evaluation of ``fun`` is saved.
+        interface (str): Which auto-differentiation framework to use as
+            interface. This determines in which interface the output
+            reconstructed function is intended to be used.
+
+    Returns:
+        callable: Reconstructed Fourier series with :math:`R` frequencies in ``spectrum`` .
+        This function is a purely classical function. Furthermore, it is fully differentiable.
+    """
+    # pylint: disable=unused-argument, too-many-arguments
+
+    have_f0 = f0 is not None
+    have_shifts = shifts is not None
+
+    spectrum = anp.asarray(spectrum, like=interface)
+    spectrum = spectrum[spectrum > 0]
+    f_max = qml.math.max(spectrum)
+
+    # If no shifts are provided, choose equidistant ones
+    if not have_shifts:
+        R = qml.math.shape(spectrum)[0]
+        shifts = qml.math.arange(-R, R + 1) * 2 * np.pi / (f_max * (2 * R + 1)) * R
+        zero_idx = R
+        need_f0 = True
+    elif have_f0:
+        zero_idx = qml.math.where(qml.math.isclose(shifts, qml.math.zeros_like(shifts[0])))
+        zero_idx = zero_idx[0][0] if (len(zero_idx) > 0 and len(zero_idx[0]) > 0) else None
+        need_f0 = zero_idx is not None
+
+    # Take care of shifts close to zero if f0 was provided
+    if have_f0 and need_f0:
+        # Only one shift may be zero at a time
+        shifts = qml.math.concatenate(
+            [shifts[zero_idx : zero_idx + 1], shifts[:zero_idx], shifts[zero_idx + 1 :]]
+        )
+        shifts = anp.asarray(shifts, like=interface)
+        evals = anp.asarray([f0] + list(map(fun, shifts[1:])), like=interface)
+    else:
+        shifts = anp.asarray(shifts, like=interface)
+        if have_f0 and not need_f0:
+            warnings.warn(_warn_text_f0_ignored)
+        evals = anp.asarray(list(map(fun, shifts)), like=interface)
+
+    L = len(shifts)
+    # Construct the coefficient matrix case by case
+    C1 = qml.math.ones((L, 1))
+    C2 = qml.math.cos(qml.math.tensordot(shifts, spectrum, axes=0))
+    C3 = qml.math.sin(qml.math.tensordot(shifts, spectrum, axes=0))
+    C = qml.math.hstack([C1, C2, C3])
+
+    # Solve the system of linear equations
+    cond = qml.math.linalg.cond(C)
+    if cond > 1e8:
+        warnings.warn(
+            f"The condition number of the Fourier transform matrix is very large: {cond}.",
+            UserWarning,
+        )
+    W = qml.math.linalg.solve(C, evals)
+
+    # Extract the Fourier coefficients
+    R = (L - 1) // 2
+    a0 = W[0]
+    a = anp.asarray(W[1 : R + 1], like=interface)
+    b = anp.asarray(W[R + 1 :], like=interface)
+
+    x0 = anp.asarray(np.float64(0.0), like=interface) if x0 is None else x0
+    # Construct the Fourier series
+    def _reconstruction(x):
+        """Univariate reconstruction based on arbitrary shifts."""
+        x = x - x0
+        return (
+            a0
+            + qml.math.tensordot(qml.math.cos(spectrum * x), a, axes=[[0], [0]])
+            + qml.math.tensordot(qml.math.sin(spectrum * x), b, axes=[[0], [0]])
+        )
+
+    return _reconstruction
+
+
+def _parse_ids(ids, info_dict):
+    """Parse different formats of ``ids`` into the right dictionary format,
+    potentially using the information in ``info_dict`` to complete it.
+    """
+    if ids is None:
+        # Infer all id information from info_dict
+        return {outer_key: inner_dict.keys() for outer_key, inner_dict in info_dict.items()}
+    if isinstance(ids, str):
+        # ids only provides a single argument name but no parameter indices
+        return {ids: info_dict[ids].keys()}
+    if not isinstance(ids, dict):
+        # ids only provides argument names but no parameter indices
+        return {_id: info_dict[_id].keys() for _id in ids}
+
+    return ids
+
+
+def _parse_shifts(shifts, R, arg_name, par_idx, atol, need_f0):
+    """Processes shifts for a single reconstruction and determines
+    wheter the function at the reconstruction point, ``f0`` will be
+    needed.
+    """
+    # pylint: disable=too-many-arguments
+    _shifts = shifts.get(arg_name)
+    if _shifts is not None:
+        _shifts = _shifts.get(par_idx)
+    if _shifts is not None:
+        # Check whether the _shifts have the correct size
+        if len(_shifts) != 2 * R + 1:
+            raise ValueError(
+                f"The number of provided shifts ({len(_shifts)}) does not fit to the "
+                f"number of frequencies (2R+1={2*R+1}) for parameter {par_idx} in "
+                f"argument {arg_name}."
+            )
+        if any(qml.math.isclose(_shifts, qml.math.zeros_like(_shifts), rtol=0, atol=atol)):
+            # If 0 is among the shifts, f0 is needed
+            return _shifts, True
+        # If 0 is not among the shifts, f0 is not needed
+        return _shifts, (False or need_f0)
+    # If no shifts are given, f0 is needed always
+    return _shifts, True
+
+
+def _prepare_jobs(ids, nums_frequency, spectra, shifts, atol):
+    r"""For inputs to reconstruct, determine how the given information yields
+    function reconstruction tasks and collect them into a dictionary ``jobs``.
+    Also determine whether the function at zero is needed.
+
+    Args:
+        ids (dict or Sequence or str): Indices for the QNode parameters with respect to which
+            the QNode should be reconstructed as a univariate function, per QNode argument.
+            Each key of the dict, entry of the list, or the single ``str`` has to be the name
+            of an argument of ``qnode`` .
+            If a ``dict`` , the values of ``ids`` have to contain the parameter indices
+            for the respective array-valued QNode argument represented by the key.
+            These indices always are tuples, i.e. ``()`` for scalar and ``(i,)`` for
+            one-dimensional arguments.
+            If a ``list`` , the parameter indices are inferred from ``nums_frequency`` if
+            given or ``spectra`` else.
+            If ``None``, all keys present in ``nums_frequency`` / ``spectra`` are considered.
+        nums_frequency (dict[dict]): Numbers of integer frequencies -- and biggest
+            frequency -- per QNode parameter. The keys have to be argument names of ``qnode``
+            and the inner dictionaries have to be mappings from parameter indices to the
+            respective integer number of frequencies. If the QNode frequencies are not contiguous
+            integers, the argument ``spectra`` should be used to save evaluations of ``qnode`` .
+            Takes precedence over ``spectra`` and leads to usage of equidistant shifts.
+        spectra (dict[dict]): Frequency spectra per QNode parameter.
+            The keys have to be argument names of ``qnode`` and the inner dictionaries have to
+            be mappings from parameter indices to the respective frequency spectrum for that
+            parameter. Ignored if ``nums_frequency!=None``.
+        shifts (dict[dict]): Shift angles for the reconstruction per QNode parameter.
+            The keys have to be argument names of ``qnode`` and the inner dictionaries have to
+            be mappings from parameter indices to the respective shift angles to be used for that
+            parameter. For :math:`R` non-zero frequencies, there must be :math:`2R+1` shifts
+            given. Ignored if ``nums_frequency!=None``.
+        atol (float): Absolute tolerance used to analyze shifts lying close to 0.
+
+    Returns:
+        dict[dict]: Indices for the QNode parameters with respect to which the QNode
+            will be reconstructed. Cast to the dictionary structure explained above.
+            If the input ``ids`` was a dictionary, it is returned unmodified.
+        callable: The reconstruction method to use, one out of two internal methods.
+        dict[dict[dict]]: Keyword arguments for the reconstruction method specifying
+            how to carry out the reconstruction. The outer-most keys are QNode argument
+            names, the middle keys are parameter indices like the inner keys of
+            ``nums_frequency`` or ``spectra`` and the inner-most dictionary contains the
+            keyword arguments, i.e. the keys are keyword argument names for the
+            reconstruction method
+        bool: Whether any of the reconstruction jobs will require the evaluation
+            of the function at the position of reconstruction itself.
+    """
+    if nums_frequency is None:
+        if spectra is None:
+            raise ValueError("Either nums_frequency or spectra must be given.")
+
+        ids = _parse_ids(ids, spectra)
+
+        if shifts is None:
+            shifts = {}
+
+        need_f0 = False
+        recon_fn = _reconstruct_gen
+
+        jobs = {}
+
+        # If no shifts are provided, compute them
+        for arg_name, inner_dict in ids.items():
+            _jobs = {}
+
+            for par_idx in inner_dict:
+
+                # Determine spectrum and number of frequencies, discounting for 0
+                _spectrum = spectra[arg_name][par_idx]
+                R = len(_spectrum) - 1
+                _shifts, need_f0 = _parse_shifts(shifts, R, arg_name, par_idx, atol, need_f0)
+
+                # Store job
+                if R > 0:
+                    _jobs[par_idx] = {"shifts": _shifts, "spectrum": _spectrum}
+                else:
+                    # R=0 belongs to a constant function
+                    _jobs[par_idx] = None
+
+            jobs[arg_name] = _jobs
+
+    else:
+        jobs = {}
+        need_f0 = True
+
+        ids = _parse_ids(ids, nums_frequency)
+
+        recon_fn = _reconstruct_equ
+
+        for arg_name, inner_dict in ids.items():
+            _jobs = {}
+
+            for par_idx in inner_dict:
+                _num_frequency = nums_frequency[arg_name][par_idx]
+                _jobs[par_idx] = {"num_frequency": _num_frequency} if _num_frequency > 0 else None
+
+            jobs[arg_name] = _jobs
+
+    return ids, recon_fn, jobs, need_f0
+
+
+def reconstruct(qnode, ids=None, nums_frequency=None, spectra=None, shifts=None):
+    r"""Reconstruct an expectation value QNode along a single parameter direction.
+    This means we restrict the QNode to vary only one parameter, a univariate restriction.
+    For common quantum gates, such restrictions are finite Fourier series with known
+    frequency spectra. Thus they may be reconstructed using Dirichlet kernels or
+    a non-uniform Fourier transform.
+
+    Args:
+        qnode (pennylane.QNode): Quantum node to be reconstructed, representing a
+            circuit that outputs an expectation value.
+        ids (dict or Sequence or str): Indices for the QNode parameters with respect to which
+            the QNode should be reconstructed as a univariate function, per QNode argument.
+            Each key of the dict, entry of the list, or the single ``str`` has to be the name
+            of an argument of ``qnode`` .
+            If a ``dict`` , the values of ``ids`` have to contain the parameter indices
+            for the respective array-valued QNode argument represented by the key.
+            These indices always are tuples, i.e., ``()`` for scalar and ``(i,)`` for
+            one-dimensional arguments.
+            If a ``list`` , the parameter indices are inferred from ``nums_frequency`` if
+            given or ``spectra`` else.
+            If ``None``, all keys present in ``nums_frequency`` / ``spectra`` are considered.
+        nums_frequency (dict[dict]): Numbers of integer frequencies -- and biggest
+            frequency -- per QNode parameter. The keys have to be argument names of ``qnode``
+            and the inner dictionaries have to be mappings from parameter indices to the
+            respective integer number of frequencies. If the QNode frequencies are not contiguous
+            integers, the argument ``spectra`` should be used to save evaluations of ``qnode`` .
+            Takes precedence over ``spectra`` and leads to usage of equidistant shifts.
+        spectra (dict[dict]): Frequency spectra per QNode parameter.
+            The keys have to be argument names of ``qnode`` and the inner dictionaries have to
+            be mappings from parameter indices to the respective frequency spectrum for that
+            parameter. Ignored if ``nums_frequency!=None``.
+        shifts (dict[dict]): Shift angles for the reconstruction per QNode parameter.
+            The keys have to be argument names of ``qnode`` and the inner dictionaries have to
+            be mappings from parameter indices to the respective shift angles to be used for that
+            parameter. For :math:`R` non-zero frequencies, there must be :math:`2R+1` shifts
+            given. Ignored if ``nums_frequency!=None``.
+
+    Returns:
+        function: Function which accepts the same arguments as the QNode and one additional
+        keyword argument ``f0`` to provide the QNode value at the given arguments.
+        When called, this function will return a dictionary of dictionaries,
+        formatted like ``nums_frequency`` or ``spectra`` ,
+        that contains the univariate reconstructions per QNode parameter.
+
+    For each provided ``id`` in ``ids``, the QNode is restricted to varying the single QNode
+    parameter corresponding to the ``id`` . This univariate function is then reconstructed
+    via a Fourier transform or Dirichlet kernels, depending on the provided input.
+    Either the frequency ``spectra`` of the QNode with respect to its input parameters or
+    the numbers of frequencies, ``nums_frequency`` , per parameter must be provided.
+
+    For quantum-circuit specific details, we refer the reader to
+    `Vidal and Theis (2018) <https://arxiv.org/abs/1812.06323>`__ ,
+    `Vidal and Theis (2020) <https://www.frontiersin.org/articles/10.3389/fphy.2020.00297/full>`__ ,
+    `Schuld, Sweke and Meyer (2021) <https://journals.aps.org/pra/abstract/10.1103/PhysRevA.103.032430>`__ ,
+    and
+    `Wierichs, Izaac, Wang and Lin (2022) <https://doi.org/10.22331/q-2022-03-30-677>`__ .
+    An introduction to the concept of quantum circuits as Fourier series can also be found in
+    the
+    `Quantum models as Fourier series <https://pennylane.ai/qml/demos/tutorial_expressivity_fourier_series.html>`__
+    and
+    `General parameter-shift rules <https://pennylane.ai/qml/demos/tutorial_general_parshift.html>`__
+    demos as well as the
+    :mod:`qml.fourier <pennylane.fourier>` module docstring.
+
+    **Example**
+
+    Consider the following QNode:
+
+    .. code-block:: python
+
+        dev = qml.device("default.qubit", wires=2)
+
+        @qml.qnode(dev)
+        def circuit(x, Y):
+            qml.RX(x, wires=0)
+            qml.RY(Y[0], wires=0)
+            qml.RY(Y[1], wires=1)
+            qml.CNOT(wires=[0, 1])
+            qml.RY(5*  Y[1], wires=1)
+            return qml.expval(qml.PauliZ(0) @ qml.PauliZ(1))
+
+        x = 0.4
+        Y = np.array([1.9, -0.5])
+        f = 2.3
+
+        circuit_value = circuit(x, Y)
+
+    It has three variational parameters ``x`` (a scalar) and two entries of ``Y``
+    (an array-like).
+    A reconstruction job could then be with respect to the two entries of ``Y``,
+    which enter the circuit with one and six integer frequencies, respectively
+    (see the additional examples below for details on how to obtain the frequency
+    spectrum if it is not known):
+
+    >>> nums_frequency = {"Y": {(0,): 1, (1,): 6}}
+    >>> with qml.Tracker(circuit.device) as tracker:
+    ...     rec = qml.fourier.reconstruct(circuit, {"Y": [(0,), (1,)]}, nums_frequency)(x, Y)
+    >>> rec.keys()
+    dict_keys(['Y'])
+    >>> print(*rec["Y"].items(), sep="\n")
+    ((0,), <function _reconstruct_equ.<locals>._reconstruction at 0x7fbd685aee50>)
+    ((1,), <function _reconstruct_equ.<locals>._reconstruction at 0x7fbd6866eee0>)
+    >>> recon_Y0 = rec["Y"][(0,)]
+    >>> recon_Y1 = rec["Y"][(1,)]
+    >>> np.isclose(recon_Y0(Y[0]), circuit_value)
+    True
+    >>> np.isclose(recon_Y1(Y[1]+1.3), circuit(x, Y+np.eye(2)[1]*1.3))
+    True
+
+    We successfully reconstructed the dependence on the two entries of ``Y`` ,
+    keeping ``x`` and the respective other entry in ``Y`` at their initial values.
+    Let us also see how many executions of the device were used to obtain the
+    reconstructions:
+
+    >>> tracker.totals
+    {'executions': 15}
+
+    The example above used that we already knew the frequency spectra of the
+    QNode of interest. However, this is in general not the case and we may need
+    to compute the spectrum first. This can be done with
+    :func:`.fourier.qnode_spectrum` :
+
+    >>> spectra = qml.fourier.qnode_spectrum(circuit)(x, Y)
+    >>> spectra.keys()
+    dict_keys(['x', 'Y'])
+    >>> spectra["x"]
+    {(): [-1.0, 0.0, 1.0]}
+    >>> print(*spectra["Y"].items(), sep="\n")
+    ((0,), [-1.0, 0.0, 1.0])
+    ((1,), [-6.0, -5.0, -4.0, -1.0, 0.0, 1.0, 4.0, 5.0, 6.0])
+
+    For more detailed explanations, usage details and additional examples, see
+    the usage details section below.
+
+    .. details::
+        :title: Usage Details
+
+        **Input formatting**
+
+        As described briefly above, the essential inputs to ``reconstruct`` that provide information
+        about the QNode are given as dictionaries of dictionaries, where the outer keys reference
+        the argument names of ``qnode`` and the inner keys reference the parameter indices within
+        each array-valued QNode argument. These parameter indices always are tuples, so that
+        for scalar-valued QNode parameters, the parameter index is ``()`` by convention and the
+        ``i`` -th parameter of a one-dimensional array can be accessed via ``(i,)`` .
+        For example, providing ``nums_frequency``
+
+        - for a scalar argument: ``nums_frequency = {"x": {(): 4}}``
+        - for a one-dimensional argument: ``nums_frequency = {"Y": {(0,): 2, (1,): 9, (4,): 1}}``
+        - for a three-dimensional argument: ``nums_frequency = {"Z": {(0, 2, 5): 2, (1, 1, 4): 1}}``
+
+        This applies to ``nums_frequency`` , ``spectra`` , and ``shifts`` .
+
+        Note that the information provided in ``nums_frequency`` / ``spectra`` is essential for
+        the correctness of the reconstruction.
+
+        On the other hand, the input format for ``ids`` is flexible and allows a collection of
+        parameter indices for each QNode argument name (as a ``dict`` ), a collection of argument
+        names (as a ``list``, ``set``, ``tuple`` or similar), or a single argument name
+        (as a ``str`` ) to be defined. For ``ids=None`` , all argument names contained in
+        ``nums_frequency`` -- or ``spectra`` if ``nums_frequency`` is not used -- are considered.
+        For inputs that do not specify parameter indices per QNode argument name (all formats but
+        ``dict`` ), these parameter indices are inferred from ``nums_frequency`` / ``spectra`` .
+
+        **Reconstruction cost**
+
+        The reconstruction cost -- in terms of calls to ``qnode`` -- depend on the number of
+        frequencies given via ``nums_frequency`` or ``spectra`` . A univariate reconstruction
+        for :math:`R` frequencies takes :math:`2R+1` evaluations. If multiple univariate
+        reconstructions are performed at the same point with various numbers of frequencies
+        :math:`R_k` , the cost are :math:`1+2\sum_k R_k` if the shift :math:`0` is used in all
+        of them. This is in particular the case if ``nums_frequency`` or ``spectra`` with
+        ``shifts=None`` is used.
+
+        If the number of frequencies is too large or the given frequency spectrum contains
+        more than the spectrum of ``qnode`` , the reconstruction is performed suboptimally
+        but remains correct.
+        For integer-valued spectra with gaps, the equidistant reconstruction is thus suboptimal
+        and the non-equidistant version method be used (also see the examples below).
+
+        **Numerical stability**
+
+        In general, the reconstruction with equidistant shifts for equidistant frequencies
+        (used if ``nums_frequency`` is provided) is more stable numerically than the more
+        general Fourier reconstruction (used if ``nums_frequency=None`` ).
+        If the system of equations to be solved in the Fourier transform is
+        ill-conditioned, a warning is raised as the output might become unstable.
+        Examples for this are shift values or frequencies that lie very close to each other.
+
+        **Differentiability**
+
+        The returned scalar functions are differentiable in all interfaces with respect
+        to their scalar input variable. They expect these inputs to be in the same
+        interface as the one used by the QNode. More advanced differentiability, for example
+        of the reconstructions with respect to QNode properties, is not supported
+        reliably yet.
+
+        .. warning::
+
+            When using ``TensorFlow`` or ``Autograd`` *and* ``nums_frequency`` ,
+            the reconstructed functions are not differentiable at the point of
+            reconstruction. One workaround for this is to use ``spectra`` as
+            input instead and to thereby use the Fourier transform instead of
+            Dirichlet kernels. Alternatively, the original QNode evaluation can
+            be used.
+
+        **More examples**
+
+        Consider the QNode from the example above, now with an additional, tunable frequency
+        ``f`` for the Pauli-X rotation that is controlled by ``x`` :
+
+        .. code-block:: python
+
+            @qml.qnode(dev)
+            def circuit(x, Y, f=1.0):
+                qml.RX(f * x, wires=0)
+                qml.RY(Y[0], wires=0)
+                qml.RY(Y[1], wires=1)
+                qml.CNOT(wires=[0, 1])
+                qml.RY(5*  Y[1], wires=1)
+                return qml.expval(qml.PauliZ(0) @ qml.PauliZ(1))
+
+            f = 2.3
+
+            circuit_value = circuit(x, Y)
+
+
+        We repeat the reconstruction job for the dependence on ``Y[1]`` .
+        Note that even though information about ``Y[0]`` is contained in ``nums_frequency`` ,
+        ``ids`` determines which reconstructions are performed.
+
+        >>> with qml.Tracker(circuit.device) as tracker:
+        ...     rec = qml.fourier.reconstruct(circuit, {"Y": [(1,)]}, nums_frequency)(x, Y)
+        >>> tracker.totals
+        {'executions': 13}
+
+        As expected, we required :math:`2R+1=2\cdot 6+1=13` circuit executions. However, not
+        all frequencies below :math:`f_\text{max}=6` are present in the circuit, so that
+        a reconstruction using knowledge of the full frequency spectrum will be cheaper:
+
+        >>> spectra = {"Y": {(1,): [0., 1., 4., 5., 6.]}}
+        >>> with tracker:
+        ...     rec = qml.fourier.reconstruct(circuit, {"Y": [(1,)]}, None, spectra)(x, Y)
+        >>> tracker.totals
+        {'executions': 9}
+
+        We again obtain the full univariate dependence on ``Y[1]`` but with considerably
+        fewer executions on the quantum device.
+        Once we obtained the classical function that describes the dependence, no
+        additional circuit evaluations are performed:
+
+        >>> with tracker:
+        ...     for Y1 in np.arange(-np.pi, np.pi, 20):
+        ...         rec["Y"][(1,)](-2.1)
+        >>> tracker.totals
+        {}
+
+        If we want to reconstruct the dependence of ``circuit`` on ``x`` , we cannot use
+        ``nums_frequency`` if ``f`` is not an integer. One could rescale ``x`` to obtain
+        the frequency :math:`1` again, or directly use ``spectra`` . We will combine the
+        latter with another reconstruction with respect to ``Y[0]`` :
+
+        >>> spectra = {"x": {(): [0., f]}, "Y": {(0,): [0., 1.]}}
+        >>> with tracker:
+        ...     rec = qml.fourier.reconstruct(circuit, None, None, spectra)(x, Y, f=f)
+        >>> tracker.totals
+        {'executions': 5}
+        >>> recon_x = rec["x"][()]
+        >>> np.isclose(recon_x(x+0.5), circuit(x+0.5, Y, f=f)
+        True
+
+        Note that by convention, the parameter index for a scalar variable is ``()`` and
+        that the frequency :math:`0` always needs to be included in the spectra.
+        Furthermore, we here skipped the input ``ids`` so that the reconstruction
+        was performed for all keys in ``spectra`` .
+        The reconstruction with a single non-zero frequency
+        costs three evaluations of ``circuit`` for each, ``x`` and ``Y[0]`` . Performing
+        both reconstructions at the same position allowed us to save one of the
+        evaluations and reduce the number of calls to :math:`5`.
+
+    """
+    # pylint: disable=cell-var-from-loop, unused-argument
+
+    atol = 1e-8
+    ids, recon_fn, jobs, need_f0 = _prepare_jobs(ids, nums_frequency, spectra, shifts, atol)
+    sign_fn = qnode.func if isinstance(qnode, qml.QNode) else qnode
+    arg_names = list(signature(sign_fn).parameters.keys())
+    arg_idx_from_names = {arg_name: i for i, arg_name in enumerate(arg_names)}
+
+    @wraps(qnode)
+    def wrapper(*args, f0=None, **kwargs):
+        if f0 is None and need_f0:
+            f0 = qnode(*args, **kwargs)
+
+        interface = qml.math.get_interface(args[0])
+
+        def constant_fn(x):
+            """Univariate reconstruction of a constant Fourier series."""
+            return f0
+
+        # Carry out the reconstruction jobs
+        reconstructions = {}
+        for arg_name, inner_dict in jobs.items():
+            _reconstructions = {}
+            arg_idx = arg_idx_from_names[arg_name]
+
+            for par_idx, job in inner_dict.items():
+                if job is None:
+                    _reconstructions[par_idx] = constant_fn
+                else:
+                    if len(qml.math.shape(args[arg_idx])) == 0:
+                        shift_vec = qml.math.ones_like(args[arg_idx])
+                        x0 = args[arg_idx]
+                    else:
+                        shift_vec = qml.math.zeros_like(args[arg_idx])
+                        shift_vec = qml.math.scatter_element_add(shift_vec, par_idx, 1.0)
+                        x0 = args[arg_idx][par_idx]
+
+                    def _univariate_fn(x):
+                        new_arg = args[arg_idx] + shift_vec * x
+                        new_args = args[:arg_idx] + (new_arg,) + args[arg_idx + 1 :]
+                        return qnode(*new_args, **kwargs)
+
+                    _reconstructions[par_idx] = recon_fn(
+                        _univariate_fn, **job, x0=x0, f0=f0, interface=interface
+                    )
+
+            reconstructions[arg_name] = _reconstructions
+
+        return reconstructions
+
+    return wrapper
diff --git a/openqaoa/optimizers/pennylane/math/__init__.py b/openqaoa/optimizers/pennylane/math/__init__.py
new file mode 100644
index 000000000..f138fb6c4
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/math/__init__.py
@@ -0,0 +1,131 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This package contains unified functions for framework-agnostic tensor and array
+manipulation. Given the input tensor-like object, the call is dispatched
+to the corresponding array manipulation framework, allowing for end-to-end
+differentiation to be preserved.
+
+.. warning::
+
+    These functions are experimental, and only a subset of common functionality is supported.
+    Furthermore, the names and behaviour of these functions may differ from similar
+    functions in common frameworks; please refer to the function docstrings for more details.
+
+The following frameworks are currently supported:
+
+* NumPy
+* Autograd
+* TensorFlow
+* PyTorch
+* JAX
+"""
+import autoray as ar
+
+from .multi_dispatch import (
+    _multi_dispatch,
+    multi_dispatch,
+    array,
+    block_diag,
+    concatenate,
+    diag,
+    dot,
+    einsum,
+    eye,
+    frobenius_inner_product,
+    get_trainable_indices,
+    ones_like,
+    scatter,
+    scatter_element_add,
+    stack,
+    tensordot,
+    unwrap,
+    where,
+    add,
+    iscomplex,
+    expm,
+)
+
+from .quantum import cov_matrix, marginal_prob
+from .quantum import reduced_dm, vn_entropy, mutual_info, sqrt_matrix, fidelity, relative_entropy
+
+from .utils import (
+    allclose,
+    allequal,
+    cast,
+    cast_like,
+    in_backprop,
+    is_abstract,
+    convert_like,
+    get_interface,
+    requires_grad,
+)
+
+from .is_independent import is_independent
+
+from .matrix_manipulation import expand_matrix, reduce_matrices
+
+sum = ar.numpy.sum
+toarray = ar.numpy.to_numpy
+T = ar.numpy.transpose
+
+
+# small constant for numerical stability that the user can modify
+eps = 1e-14
+
+
+def __getattr__(name):
+    return getattr(ar.numpy, name)
+
+
+__all__ = [
+    "_multi_dispatch",
+    "multi_dispatch",
+    "allclose",
+    "allequal",
+    "array",
+    "block_diag",
+    "cast",
+    "cast_like",
+    "concatenate",
+    "convert_like",
+    "cov_matrix",
+    "diag",
+    "dot",
+    "einsum",
+    "eye",
+    "fidelity",
+    "frobenius_inner_product",
+    "get_interface",
+    "get_trainable_indices",
+    "in_backprop",
+    "is_abstract",
+    "is_independent",
+    "marginal_prob",
+    "mutual_info",
+    "ones_like",
+    "reduced_dm",
+    "relative_entropy",
+    "requires_grad",
+    "sqrt_matrix",
+    "scatter_element_add",
+    "stack",
+    "tensordot",
+    "unwrap",
+    "vn_entropy",
+    "where",
+    "add",
+    "iscomplex",
+    "expand_matrix",
+]
diff --git a/openqaoa/optimizers/pennylane/math/is_independent.py b/openqaoa/optimizers/pennylane/math/is_independent.py
new file mode 100644
index 000000000..3d57fe7c1
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/math/is_independent.py
@@ -0,0 +1,382 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This file contains the is_independent function that checks if
+a function is independent of its arguments for the interfaces
+
+* Autograd
+* JAX
+* TensorFlow
+* PyTorch
+"""
+import warnings
+
+import numpy as np
+from autograd.tracer import isbox, new_box, trace_stack
+from autograd.core import VJPNode
+
+from openqaoa.optimizers.pennylane import numpy as np # changed from the original pennylane code
+
+
+def _autograd_is_indep_analytic(func, *args, **kwargs):
+    """Test analytically whether a function is independent of its arguments
+    using Autograd.
+
+    Args:
+        func (callable): Function to test for independence
+        args (tuple): Arguments for the function with respect to which
+            to test for independence
+        kwargs (dict): Keyword arguments for the function at which
+            (but not with respect to which) to test for independence
+
+    Returns:
+        bool: Whether the function seems to not depend on it ``args``
+        analytically. That is, an output of ``True`` means that the
+        ``args`` do *not* feed into the output.
+
+    In Autograd, we test this by sending a ``Box`` through the function and
+    testing whether the output is again a ``Box`` and on the same trace as
+    the input ``Box``. This means that we can trace actual *independence*
+    of the output from the input, not only whether the passed function is
+    constant.
+    The code is adapted from
+    `autograd.tracer.py::trace
+    <https://github.com/HIPS/autograd/blob/master/autograd/tracer.py#L7>`__.
+    """
+    # pylint: disable=protected-access
+    node = VJPNode.new_root()
+    with trace_stack.new_trace() as t:
+        start_box = new_box(args, t, node)
+        end_box = func(*start_box, **kwargs)
+
+    if type(end_box) in [tuple, list]:
+        if any(isbox(_end) and _end._trace == start_box._trace for _end in end_box):
+            return False
+    elif isinstance(end_box, np.ndarray):
+        if end_box.ndim == 0:
+            end_box = [end_box.item()]
+        if any(isbox(_end) and _end._trace == start_box._trace for _end in end_box):
+            return False
+    else:
+        if isbox(end_box) and end_box._trace == start_box._trace:
+            return False
+    return True
+
+
+def _jax_is_indep_analytic(func, *args, **kwargs):
+    """Test analytically whether a function is independent of its arguments
+    using JAX.
+
+    Args:
+        func (callable): Function to test for independence
+        args (tuple): Arguments for the function with respect to which
+            to test for independence
+        kwargs (dict): Keyword arguments for the function at which
+            (but not with respect to which) to test for independence
+
+    Returns:
+        bool: Whether the function seems to not depend on it ``args``
+        analytically. That is, an output of ``True`` means that the
+        ``args`` do *not* feed into the output.
+
+    In JAX, we test this by constructing the VJP of the passed function
+    and inspecting its signature.
+    The first argument of the output of ``jax.vjp`` is a ``Partial``.
+    If *any* processing happens to any input, the arguments of that
+    ``Partial`` are unequal to ``((),)`.
+    Functions that depend on the input in a trivial manner, i.e., without
+    processing it, will go undetected by this. Therefore we also
+    test the arguments of the *function* of the above ``Partial``.
+    The first of these arguments is a list of tuples and if the
+    first entry of the first tuple is not ``None``, the input arguments
+    are detected to actually feed into the output.
+
+    .. warning::
+
+        This is an experimental function and unknown edge
+        cases may exist to this two-stage test.
+    """
+    import jax  # pylint: disable=import-outside-toplevel
+
+    mapped_func = lambda *_args: func(*_args, **kwargs)  # pylint: disable=unnecessary-lambda
+    _vjp = jax.vjp(mapped_func, *args)[1]
+    if _vjp.args[0].args != ((),):
+        return False
+    if _vjp.args[0].func.args[0][0][0] is not None:
+        return False
+
+    return True
+
+
+def _tf_is_indep_analytic(func, *args, **kwargs):
+    """Test analytically whether a function is independent of its arguments
+    using TensorFlow.
+
+    Args:
+        func (callable): Function to test for independence
+        args (tuple): Arguments for the function with respect to which
+            to test for independence
+        kwargs (dict): Keyword arguments for the function at which
+            (but not with respect to which) to test for independence
+
+    Returns:
+        bool: Whether the function seems to not depend on it ``args``
+        analytically. That is, an output of ``True`` means that the
+        ``args`` do *not* feed into the output.
+
+    In TensorFlow, we test this by computing the Jacobian of the output(s)
+    with respect to the arguments. If the Jacobian is ``None``, the output(s)
+    is/are independent.
+
+    .. note::
+
+        Of all interfaces, this is currently the most robust for the
+        ``is_independent`` functionality.
+    """
+    import tensorflow as tf  # pylint: disable=import-outside-toplevel
+
+    with tf.GradientTape(persistent=True) as tape:
+        out = func(*args, **kwargs)
+
+    if isinstance(out, tuple):
+        jac = [tape.jacobian(_out, args) for _out in out]
+        return all(all(__jac is None for __jac in _jac) for _jac in jac)
+
+    jac = tape.jacobian(out, args)
+    return all(_jac is None for _jac in jac)
+
+
+def _get_random_args(args, interface, num, seed, bounds):
+    r"""Generate random arguments of a given structure.
+
+    Args:
+        args (tuple): Original input arguments
+        interface (str): Interface of the QNode into which the arguments will be fed
+        num (int): Number of random argument sets to generate
+        seed (int): Seed for random generation
+        bounds (tuple[int]): Range within which to sample the random parameters.
+
+    Returns:
+        list[tuple]: List of length ``num`` with each entry being a random instance
+        of arguments like ``args``.
+
+    This function generates ``num`` many tuples of random arguments in the given range
+    that have the same shapes as ``args``.
+    """
+    width = bounds[1] - bounds[0]
+    if interface == "tf":
+        import tensorflow as tf  # pylint: disable=import-outside-toplevel
+
+        tf.random.set_seed(seed)
+        rnd_args = []
+        for _ in range(num):
+            _args = (tf.random.uniform(tf.shape(_arg)) * width + bounds[0] for _arg in args)
+            _args = tuple(
+                tf.Variable(_arg) if isinstance(arg, tf.Variable) else _arg
+                for _arg, arg in zip(_args, args)
+            )
+            rnd_args.append(_args)
+    elif interface == "torch":
+        import torch  # pylint: disable=import-outside-toplevel
+
+        torch.random.manual_seed(seed)
+        rnd_args = [
+            tuple(torch.rand(np.shape(arg)) * width + bounds[0] for arg in args) for _ in range(num)
+        ]
+    else:
+        np.random.seed(seed)
+        rnd_args = [
+            tuple(np.random.random(np.shape(arg)) * width + bounds[0] for arg in args)
+            for _ in range(num)
+        ]
+        if interface == "autograd":
+
+            # Mark the arguments as trainable with Autograd
+            rnd_args = [tuple(pnp.array(a, requires_grad=True) for a in arg) for arg in rnd_args]
+
+    return rnd_args
+
+
+def _is_indep_numerical(func, interface, args, kwargs, num_pos, seed, atol, rtol, bounds):
+    """Test whether a function returns the same output at random positions.
+
+    Args:
+        func (callable): Function to be tested
+        interface (str): Interface used by ``func``
+        args (tuple): Positional arguments with respect to which to test
+        kwargs (dict): Keyword arguments for ``func`` at which to test;
+            the ``kwargs`` are kept fixed in this test.
+        num_pos (int): Number of random positions to test
+        seed (int): Seed for random number generator
+        atol (float): Absolute tolerance for comparing the outputs
+        rtol (float): Relative tolerance for comparing the outputs
+        bounds (tuple[int, int]): Limits of the range from which to sample
+
+    Returns:
+        bool: Whether ``func`` returns the same output at the randomly
+        chosen points.
+    """
+
+    # pylint:disable=too-many-arguments
+
+    rnd_args = _get_random_args(args, interface, num_pos, seed, bounds)
+    original_output = func(*args, **kwargs)
+    is_tuple_valued = isinstance(original_output, tuple)
+    for _rnd_args in rnd_args:
+        new_output = func(*_rnd_args, **kwargs)
+        if is_tuple_valued:
+            if not all(
+                np.allclose(new, orig, atol=atol, rtol=rtol)
+                for new, orig in zip(new_output, original_output)
+            ):
+                return False
+        else:
+            if not np.allclose(new_output, original_output, atol=atol, rtol=rtol):
+                return False
+
+    return True
+
+
+def is_independent(
+    func,
+    interface,
+    args,
+    kwargs=None,
+    num_pos=5,
+    seed=9123,
+    atol=1e-6,
+    rtol=0,
+    bounds=(-np.pi, np.pi),
+):
+    """Test whether a function is independent of its input arguments,
+    both numerically and analytically.
+
+    Args:
+        func (callable): Function to be tested
+        interface (str): Autodiff framework used by ``func``. Must correspond to one
+            of the supported PennyLane interface strings, such as ``"autograd"``,
+            ``"tf"``, ``"torch"``, ``"jax"``.
+        args (tuple): Positional arguments with respect to which to test
+        kwargs (dict): Keyword arguments for ``func`` at which to test;
+            the keyword arguments are kept fixed in this test.
+        num_pos (int): Number of random positions to test
+        seed (int): Seed for the random number generator
+        atol (float): Absolute tolerance for comparing the outputs
+        rtol (float): Relative tolerance for comparing the outputs
+        bounds (tuple[float]): 2-tuple containing limits of the range from which to sample
+
+    Returns:
+        bool: Whether ``func`` returns the same output at randomly
+        chosen points and is numerically independent of its arguments.
+
+    .. warning::
+
+        This function is experimental.
+        As such, it might yield wrong results and might behave
+        slightly differently in distinct autodifferentiation frameworks
+        for some edge cases.
+        For example, a currently known edge case are piecewise
+        functions that use classical control and simultaneously
+        return (almost) constant output, such as
+
+        .. code-block:: python
+
+            def func(x):
+                if abs(x) <1e-5:
+                    return x
+                else:
+                    return 0. * x
+
+    The analytic and numeric tests used are as follows.
+
+    - The analytic test performed depends on the provided ``interface``,
+      both in its method and its degree of reliability.
+
+    - For the numeric test, the function is evaluated at a series of random positions,
+      and the outputs numerically compared to verify that the output
+      is constant.
+
+    .. warning ::
+
+        Currently, no analytic test is available for the PyTorch interface.
+        When using PyTorch, a warning will be raised and only the
+        numeric test is performed.
+
+    .. note ::
+
+        Due to the structure of ``is_independent``, it is possible that it
+        errs on the side of reporting a dependent function to be independent
+        (a false positive). However, reporting an independent function to be
+        dependent (a false negative) is *highly* unlikely.
+
+    **Example**
+
+    Consider the (linear) function
+
+    .. code-block:: python
+
+        def lin(x, weights=None):
+            return np.dot(x, weights)
+
+    This function clearly depends on ``x``. We may check for this via
+
+    .. code-block:: pycon
+
+        >>> x = np.array([0.2, 9.1, -3.2], requires_grad=True)
+        >>> weights = np.array([1.1, -0.7, 1.8], requires_grad=True)
+        >>> qml.math.is_independent(lin, "autograd", (x,), {"weights": weights})
+        False
+
+    However, the Jacobian will not depend on ``x`` because ``lin`` is a
+    linear function:
+
+    .. code-block:: pycon
+
+        >>> jac = qml.jacobian(lin)
+        >>> qml.math.is_independent(jac, "autograd", (x,), {"weights": weights})
+        True
+
+    Note that a function ``f = lambda x: 0.0 * x`` will be counted as *dependent* on ``x``
+    because it does depend on ``x`` *functionally*, even if the value is constant for all ``x``.
+    This means that ``is_independent`` is a stronger test than simply verifying functions
+    have constant output.
+    """
+
+    # pylint:disable=too-many-arguments
+
+    if not interface in {"autograd", "jax", "tf", "torch", "tensorflow"}:
+        raise ValueError(f"Unknown interface: {interface}")
+
+    kwargs = kwargs or {}
+
+    if interface == "autograd":
+        if not _autograd_is_indep_analytic(func, *args, **kwargs):
+            return False
+
+    if interface == "jax":
+        if not _jax_is_indep_analytic(func, *args, **kwargs):
+            return False
+
+    if interface in ("tf", "tensorflow"):
+        if not _tf_is_indep_analytic(func, *args, **kwargs):
+            return False
+
+    if interface == "torch":
+        warnings.warn(
+            "The function is_independent is only available numerically for the PyTorch interface."
+            " Make sure that sampling positions and evaluating the function at these positions"
+            " is a sufficient test, or change the interface."
+        )
+
+    return _is_indep_numerical(func, interface, args, kwargs, num_pos, seed, atol, rtol, bounds)
diff --git a/openqaoa/optimizers/pennylane/math/matrix_manipulation.py b/openqaoa/optimizers/pennylane/math/matrix_manipulation.py
new file mode 100644
index 000000000..f8a342b8d
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/math/matrix_manipulation.py
@@ -0,0 +1,258 @@
+# Copyright 2018-2022 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This module contains methods to expand the matrix representation of an operator
+to a higher hilbert space with re-ordered wires."""
+import copy
+from functools import reduce
+from typing import Generator, Tuple
+
+import numpy as np
+from scipy.sparse import csr_matrix, eye, issparse, kron
+
+from openqaoa.optimizers import pennylane as qml # changed from the original code
+Wires = None
+
+
+def expand_matrix(base_matrix, wires, wire_order=None, sparse_format="csr"):
+    """Re-express a base matrix acting on a subspace defined by a set of wire labels
+    according to a global wire order.
+
+    Args:
+        base_matrix (tensor_like): base matrix to expand
+        wires (Iterable): wires determining the subspace that base matrix acts on; a base matrix of
+            dimension :math:`2^n` acts on a subspace of :math:`n` wires
+        wire_order (Iterable): global wire order, which has to contain all wire labels in ``wires``, but can also
+            contain additional labels
+        sparse_format (str): if the base matrix is a scipy sparse matrix then this is the string representing the
+            preferred scipy sparse matrix format to cast the expanded matrix too
+
+    Returns:
+        tensor_like: expanded matrix
+
+    **Example**
+
+    If the wire order is ``None`` or identical to ``wires``, the original matrix gets returned:
+
+    >>> base_matrix = np.array([[1, 2, 3, 4],
+    ...                         [5, 6, 7, 8],
+    ...                         [9, 10, 11, 12],
+    ...                         [13, 14, 15, 16]])
+    >>> print(expand_matrix(base_matrix, wires=[0, 2], wire_order=[0, 2]))
+    [[ 1  2  3  4]
+     [ 5  6  7  8]
+     [ 9 10 11 12]
+     [13 14 15 16]]
+    >>> print(expand_matrix(base_matrix, wires=[0, 2]))
+    [[ 1  2  3  4]
+     [ 5  6  7  8]
+     [ 9 10 11 12]
+     [13 14 15 16]]
+
+    If the wire order is a permutation of ``wires``, the entries of the base matrix get permuted:
+
+    >>> print(expand_matrix(base_matrix, wires=[0, 2], wire_order=[2, 0]))
+    [[ 1  3  2  4]
+     [ 9 11 10 12]
+     [ 5  7  6  8]
+     [13 15 14 16]]
+
+    If the wire order contains wire labels not found in ``wires``, the matrix gets expanded:
+
+    >>> print(expand_matrix(base_matrix, wires=[0, 2], wire_order=[0, 1, 2]))
+    [[ 1  2  0  0  3  4  0  0]
+     [ 5  6  0  0  7  8  0  0]
+     [ 0  0  1  2  0  0  3  4]
+     [ 0  0  5  6  0  0  7  8]
+     [ 9 10  0  0 11 12  0  0]
+     [13 14  0  0 15 16  0  0]
+     [ 0  0  9 10  0  0 11 12]
+     [ 0  0 13 14  0  0 15 16]]
+
+    The method works with tensors from all autodifferentiation frameworks, for example:
+
+    >>> base_matrix_torch = torch.tensor([[1., 2.],
+    ...                                   [3., 4.]], requires_grad=True)
+    >>> res = expand_matrix(base_matrix_torch, wires=["b"], wire_order=["a", "b"])
+    >>> type(res)
+    torch.Tensor
+    >>> res.requires_grad
+    True
+
+    The method words with scipy sparse matrices, for example:
+
+    >>> from scipy import sparse
+    >>> mat = sparse.csr_matrix([[0, 1], [1, 0]])
+    >>> qml.math.expand_matrix(mat, wires=[1], wire_order=[0,1]).toarray()
+    array([[0., 1., 0., 0.],
+           [1., 0., 0., 0.],
+           [0., 0., 0., 1.],
+           [0., 0., 1., 0.]])
+
+    """
+
+    if (wire_order is None) or (wire_order == wires):
+        return base_matrix
+
+    interface = qml.math.get_interface(base_matrix)  # pylint: disable=protected-access
+    if interface == "scipy" and issparse(base_matrix):
+        return _sparse_expand_matrix(base_matrix, wires, wire_order, format=sparse_format)
+
+    wire_order = qml.wires.Wires(wire_order)
+    n = len(wires)
+    shape = qml.math.shape(base_matrix)
+    batch_dim = shape[0] if len(shape) == 3 else None
+
+    # operator's wire positions relative to wire ordering
+    op_wire_pos = wire_order.indices(wires)
+
+    identity = qml.math.reshape(
+        qml.math.eye(2 ** len(wire_order), like=interface), [2] * (len(wire_order) * 2)
+    )
+    # The first axis entries are range(n, 2n) for batch_dim=None and range(n+1, 2n+1) else
+    axes = (list(range(-n, 0)), op_wire_pos)
+
+    # reshape op.matrix()
+    op_matrix_interface = qml.math.convert_like(base_matrix, identity)
+    shape = [batch_dim] + [2] * (n * 2) if batch_dim else [2] * (n * 2)
+    mat_op_reshaped = qml.math.reshape(op_matrix_interface, shape)
+    mat_tensordot = qml.math.tensordot(
+        mat_op_reshaped, qml.math.cast_like(identity, mat_op_reshaped), axes
+    )
+
+    unused_idxs = [idx for idx in range(len(wire_order)) if idx not in op_wire_pos]
+    # permute matrix axes to match wire ordering
+    perm = op_wire_pos + unused_idxs
+    sources = wire_order.indices(wire_order)
+    if batch_dim:
+        perm = [p + 1 for p in perm]
+        sources = [s + 1 for s in sources]
+
+    mat = qml.math.moveaxis(mat_tensordot, sources, perm)
+    shape = [batch_dim] + [2 ** len(wire_order)] * 2 if batch_dim else [2 ** len(wire_order)] * 2
+    mat = qml.math.reshape(mat, shape)
+
+    return mat
+
+
+def reduce_matrices(
+    mats_and_wires_gen: Generator[Tuple[np.ndarray, Wires], None, None], reduce_func: callable
+) -> Tuple[np.ndarray, Wires]:
+    """Apply the given ``reduce_func`` cumulatively to the items of the ``mats_and_wires_gen``
+    generator, from left to right, so as to reduce the sequence to a tuple containing a single
+    matrix and the wires it acts on.
+
+    Args:
+        mats_and_wires_gen (Generator): generator of tuples containing the matrix and the wires of
+            each operator
+        reduce_func (callable): function used to reduce the sequence of operators
+
+    Returns:
+        Tuple[tensor, Wires]: a tuple containing the reduced matrix and the wires it acts on
+    """
+
+    def expand_and_reduce(op1_tuple: Tuple[np.ndarray, Wires], op2_tuple: Tuple[np.ndarray, Wires]):
+        mat1, wires1 = op1_tuple
+        mat2, wires2 = op2_tuple
+        expanded_wires = wires1 + wires2
+        mat1 = qml.math.expand_matrix(mat1, wires1, wire_order=expanded_wires)
+        mat2 = qml.math.expand_matrix(mat2, wires2, wire_order=expanded_wires)
+        return reduce_func(mat1, mat2), expanded_wires
+
+    reduced_mat, final_wires = reduce(expand_and_reduce, mats_and_wires_gen)
+
+    return reduced_mat, final_wires
+
+
+def _local_sparse_swap_mat(i, n, format="csr"):
+    """Helper function which generates the sparse matrix of SWAP
+    for qubits: i <--> i+1 with final shape (2**n, 2**n)."""
+    assert i < n - 1
+    swap_mat = csr_matrix([[1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]])
+
+    j = i + 1  # i is the index of the qubit, j is the number of qubits prior to and include qubit i
+    return kron(
+        kron(eye(2 ** (j - 1)), swap_mat), eye(2 ** (n - (j + 1))), format=format
+    )  # (j - 1) + 2 + (n - (j+1)) = n
+
+
+def _sparse_swap_mat(i, j, n, format="csr"):
+    """Helper function which generates the sparse matrix of SWAP
+    for qubits: i <--> j with final shape (2**n, 2**n)."""
+    assert i < n and j < n
+    if i == j:
+        return eye(2**n, format=format)
+
+    (small_i, big_j) = (i, j) if i < j else (j, i)
+    store_swaps = [
+        _local_sparse_swap_mat(index, n, format=format) for index in range(small_i, big_j)
+    ]
+
+    res = eye(2**n, format=format)
+    for mat in store_swaps:  # swap i --> j
+        res @= mat
+
+    for mat in store_swaps[-2::-1]:  # bring j --> old_i
+        res @= mat
+
+    return res
+
+
+def _sparse_expand_matrix(base_matrix, wires, wire_order, format="csr"):
+    """Re-express a sparse base matrix acting on a subspace defined by a set of wire labels
+    according to a global wire order.
+
+    Args:
+        base_matrix (scipy.sparse.spmatrix): base matrix to expand
+        wires (Iterable): wires determining the subspace that base matrix acts on; a base matrix of
+            dimension :math:`2^n` acts on a subspace of :math:`n` wires
+        wire_order (Iterable): global wire order, which has to contain all wire labels in ``wires``, but can also
+            contain additional labels
+        format (str): string representing the preferred scipy sparse matrix format to cast the expanded
+            matrix too
+
+    Returns:
+        tensor_like: expanded matrix
+    """
+    n_wires = len(wires)
+    n_total_wires = len(wire_order)
+
+    if isinstance(wires, qml.wires.Wires):
+        expanded_wires = wires.tolist()
+    else:
+        expanded_wires = list(copy.copy(wires))
+
+    for wire in wire_order:
+        if wire not in wires:
+            expanded_wires.append(wire)
+
+    num_missing_wires = n_total_wires - n_wires
+    if num_missing_wires > 0:
+        expanded_matrix = kron(
+            base_matrix, eye(2**num_missing_wires, format=format), format=format
+        )  # added missing wires at the end
+    else:
+        expanded_matrix = copy.copy(base_matrix)
+
+    U = eye(2**n_total_wires, format=format)
+    for i in range(n_total_wires):
+        if expanded_wires[i] != wire_order[i]:
+            j = expanded_wires.index(wire_order[i])  # location of correct wire
+            U = U @ _sparse_swap_mat(
+                i, j, n_total_wires, format=format
+            )  # swap incorrect wire for correct wire
+
+            expanded_wires[i], expanded_wires[j] = expanded_wires[j], expanded_wires[i]
+
+    expanded_matrix = U.T @ expanded_matrix @ U
+    return expanded_matrix.asformat(format)
diff --git a/openqaoa/optimizers/pennylane/math/multi_dispatch.py b/openqaoa/optimizers/pennylane/math/multi_dispatch.py
new file mode 100644
index 000000000..9502ca498
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/math/multi_dispatch.py
@@ -0,0 +1,871 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Multiple dispatch functions"""
+# pylint: disable=import-outside-toplevel,too-many-return-statements
+import warnings
+from collections.abc import Sequence
+import functools
+
+from autograd.numpy.numpy_boxes import ArrayBox
+from autoray import numpy as np
+from numpy import ndarray
+
+from . import single_dispatch  # pylint:disable=unused-import
+from .utils import cast, get_interface, requires_grad
+
+
+# pylint:disable=redefined-outer-name
+def array(*args, like=None, **kwargs):
+    """Creates an array or tensor object of the target framework.
+
+    This method preserves the Torch device used.
+
+    Returns:
+        tensor_like: the tensor_like object of the framework
+    """
+    res = np.array(*args, like=like, **kwargs)
+    if like is not None and get_interface(like) == "torch":
+        res = res.to(device=like.device)
+    return res
+
+
+def eye(*args, like=None, **kwargs):
+    """Creates an identity array or tensor object of the target framework.
+
+    This method preserves the Torch device used.
+
+    Returns:
+        tensor_like: the tensor_like object of the framework
+    """
+    res = np.eye(*args, like=like, **kwargs)
+    if like is not None and get_interface(like) == "torch":
+        res = res.to(device=like.device)
+    return res
+
+
+def _multi_dispatch(values):
+    """Determines the correct framework to dispatch to given a
+    sequence of tensor-like objects.
+
+    Args:
+        values (Sequence[tensor_like]): a sequence of tensor like objects
+
+    Returns:
+        str: the name of the interface
+
+    To determine the framework to dispatch to, the following rules
+    are applied:
+
+    * Tensors that are incompatible (such as Torch and TensorFlow tensors)
+      cannot both be present.
+
+    * Autograd tensors *may* be present alongside Torch and TensorFlow tensors,
+      but Torch and TensorFlow take precendence; the autograd arrays will
+      be treated as non-differentiable NumPy arrays. A warning will be raised
+      suggesting that vanilla NumPy be used instead.
+
+    * Vanilla NumPy arrays and SciPy sparse matrices can be used alongside other tensor objects;
+      they will always be treated as non-differentiable constants.
+    """
+    if "resource_variable" in getattr(values, "__module__", tuple()):
+        values = np.asarray(values)
+
+    interfaces = {get_interface(v) for v in values}
+
+    if len(set(interfaces) - {"numpy", "scipy", "autograd"}) > 1:
+        # contains multiple non-autograd interfaces
+        raise ValueError("Tensors contain mixed types; cannot determine dispatch library")
+
+    non_numpy_scipy_interfaces = set(interfaces) - {"numpy", "scipy"}
+
+    if len(non_numpy_scipy_interfaces) > 1:
+        # contains autograd and another interface
+        warnings.warn(
+            f"Contains tensors of types {non_numpy_scipy_interfaces}; dispatch will prioritize "
+            "TensorFlow and PyTorch over autograd. Consider replacing Autograd with vanilla NumPy.",
+            UserWarning,
+        )
+
+    if "tensorflow" in interfaces:
+        return "tensorflow"
+
+    if "torch" in interfaces:
+        return "torch"
+
+    if "autograd" in interfaces:
+        return "autograd"
+
+    if "jax" in interfaces:
+        return "jax"
+
+    return "numpy"
+
+
+def multi_dispatch(argnum=None, tensor_list=None):
+    r"""Decorater to dispatch arguments handled by the interface.
+
+    This helps simplify definitions of new functions inside PennyLane. We can
+    decorate the function, indicating the arguments that are tensors handled
+    by the interface:
+
+
+    >>> @qml.math.multi_dispatch(argnum=[0, 1])
+    ... def some_function(tensor1, tensor2, option, like):
+    ...     # the interface string is stored in `like`.
+    ...     ...
+
+
+    Args:
+        argnum (list[int]): A list of integers indicating indicating the indices
+            to dispatch (i.e., the arguments that are tensors handled by an interface).
+            If ``None``, dispatch over all arguments.
+        tensor_lists (list[int]): a list of integers indicating which indices
+            in ``argnum`` are expected to be lists of tensors. If an argument
+            marked as tensor list is not a ``tuple`` or ``list``, it is treated
+            as if it was not marked as tensor list. If ``None``, this option is ignored.
+
+    Returns:
+        func: A wrapped version of the function, which will automatically attempt
+        to dispatch to the correct autodifferentiation framework for the requested
+        arguments. Note that the ``like`` argument will be optional, but can be provided
+        if an explicit override is needed.
+
+    .. seealso:: :func:`pennylane.math.multi_dispatch._multi_dispatch`
+
+    .. note::
+        This decorator makes the interface argument "like" optional as it utilizes
+        the utility function `_multi_dispatch` to automatically detect the appropriate
+        interface based on the tensor types.
+
+    **Examples**
+
+    We can redefine external functions to be suitable for PennyLane. Here, we
+    redefine Autoray's ``stack`` function.
+
+    >>> stack = multi_dispatch(argnum=0, tensor_list=0)(autoray.numpy.stack)
+
+    We can also use the ``multi_dispatch`` decorator to dispatch
+    arguments of more more elaborate custom functions. Here is an example
+    of a ``custom_function`` that
+    computes :math:`c \\sum_i (v_i)^T v_i`, where :math:`v_i` are vectors in ``values`` and
+    :math:`c` is a fixed ``coefficient``. Note how ``argnum=0`` only points to the first argument ``values``,
+    how ``tensor_list=0`` indicates that said first argument is a list of vectors, and that ``coefficient`` is not
+    dispatched.
+
+    >>> @math.multi_dispatch(argnum=0, tensor_list=0)
+    >>> def custom_function(values, like, coefficient=10):
+    >>>     # values is a list of vectors
+    >>>     # like can force the interface (optional)
+    >>>     if like == "tensorflow":
+    >>>         # add interface-specific handling if necessary
+    >>>     return coefficient * np.sum([math.dot(v,v) for v in values])
+
+    We can then run
+
+    >>> values = [np.array([1, 2, 3]) for _ in range(5)]
+    >>> custom_function(values)
+    700
+
+    """
+
+    def decorator(fn):
+        @functools.wraps(fn)
+        def wrapper(*args, **kwargs):
+            argnums = argnum if argnum is not None else list(range(len(args)))
+            tensor_lists = tensor_list if tensor_list is not None else []
+
+            if not isinstance(argnums, Sequence):
+                argnums = [argnums]
+            if not isinstance(tensor_lists, Sequence):
+                tensor_lists = [tensor_lists]
+
+            dispatch_args = []
+
+            for a in argnums:
+                # Only use extend if the marked argument really
+                # is a (native) python Sequence
+                if a in tensor_lists and isinstance(args[a], (list, tuple)):
+                    dispatch_args.extend(args[a])
+                else:
+                    dispatch_args.append(args[a])
+
+            interface = kwargs.pop("like", None)
+            interface = interface or _multi_dispatch(dispatch_args)
+            kwargs["like"] = interface
+
+            return fn(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
+
+
+@multi_dispatch(argnum=[0], tensor_list=[0])
+def block_diag(values, like=None):
+    """Combine a sequence of 2D tensors to form a block diagonal tensor.
+
+    Args:
+        values (Sequence[tensor_like]): Sequence of 2D arrays/tensors to form
+            the block diagonal tensor.
+
+    Returns:
+        tensor_like: the block diagonal tensor
+
+    **Example**
+
+    >>> t = [
+    ...     np.array([[1, 2], [3, 4]]),
+    ...     torch.tensor([[1, 2, 3], [-1, -6, -3]]),
+    ...     torch.tensor(5)
+    ... ]
+    >>> qml.math.block_diag(t)
+    tensor([[ 1,  2,  0,  0,  0,  0],
+            [ 3,  4,  0,  0,  0,  0],
+            [ 0,  0,  1,  2,  3,  0],
+            [ 0,  0, -1, -6, -3,  0],
+            [ 0,  0,  0,  0,  0,  5]])
+    """
+    values = np.coerce(values, like=like)
+    return np.block_diag(values, like=like)
+
+
+@multi_dispatch(argnum=[0], tensor_list=[0])
+def concatenate(values, axis=0, like=None):
+    """Concatenate a sequence of tensors along the specified axis.
+
+    .. warning::
+
+        Tensors that are incompatible (such as Torch and TensorFlow tensors)
+        cannot both be present.
+
+    Args:
+        values (Sequence[tensor_like]): Sequence of tensor-like objects to
+            concatenate. The objects must have the same shape, except in the dimension corresponding
+            to axis (the first, by default).
+        axis (int): The axis along which the input tensors are concatenated. If axis is None,
+            tensors are flattened before use. Default is 0.
+
+    Returns:
+        tensor_like: The concatenated tensor.
+
+    **Example**
+
+    >>> x = tf.constant([0.6, 0.1, 0.6])
+    >>> y = tf.Variable([0.1, 0.2, 0.3])
+    >>> z = np.array([5., 8., 101.])
+    >>> concatenate([x, y, z])
+    <tf.Tensor: shape=(3, 3), dtype=float32, numpy=
+    array([6.00e-01, 1.00e-01, 6.00e-01, 1.00e-01, 2.00e-01, 3.00e-01, 5.00e+00, 8.00e+00, 1.01e+02], dtype=float32)>
+    """
+
+    if like == "torch":
+        import torch
+
+        device = (
+            "cuda"
+            if any(t.device.type == "cuda" for t in values if isinstance(t, torch.Tensor))
+            else "cpu"
+        )
+
+        if axis is None:
+            # flatten and then concatenate zero'th dimension
+            # to reproduce numpy's behaviour
+            values = [
+                np.flatten(torch.as_tensor(t, device=torch.device(device)))  # pragma: no cover
+                for t in values
+            ]
+            axis = 0
+        else:
+            values = [
+                torch.as_tensor(t, device=torch.device(device)) for t in values  # pragma: no cover
+            ]
+
+    if like == "tensorflow" and axis is None:
+        # flatten and then concatenate zero'th dimension
+        # to reproduce numpy's behaviour
+        values = [np.flatten(np.array(t)) for t in values]
+        axis = 0
+
+    return np.concatenate(values, axis=axis, like=like)
+
+
+@multi_dispatch(argnum=[0], tensor_list=[0])
+def diag(values, k=0, like=None):
+    """Construct a diagonal tensor from a list of scalars.
+
+    Args:
+        values (tensor_like or Sequence[scalar]): sequence of numeric values that
+            make up the diagonal
+        k (int): The diagonal in question. ``k=0`` corresponds to the main diagonal.
+            Use ``k>0`` for diagonals above the main diagonal, and ``k<0`` for
+            diagonals below the main diagonal.
+
+    Returns:
+        tensor_like: the 2D diagonal tensor
+
+    **Example**
+
+    >>> x = [1., 2., tf.Variable(3.)]
+    >>> qml.math.diag(x)
+    <tf.Tensor: shape=(3, 3), dtype=float32, numpy=
+    array([[1., 0., 0.],
+           [0., 2., 0.],
+           [0., 0., 3.]], dtype=float32)>
+    >>> y = tf.Variable([0.65, 0.2, 0.1])
+    >>> qml.math.diag(y, k=-1)
+    <tf.Tensor: shape=(4, 4), dtype=float32, numpy=
+    array([[0.  , 0.  , 0.  , 0.  ],
+           [0.65, 0.  , 0.  , 0.  ],
+           [0.  , 0.2 , 0.  , 0.  ],
+           [0.  , 0.  , 0.1 , 0.  ]], dtype=float32)>
+    >>> z = torch.tensor([0.1, 0.2])
+    >>> qml.math.diag(z, k=1)
+    tensor([[0.0000, 0.1000, 0.0000],
+            [0.0000, 0.0000, 0.2000],
+            [0.0000, 0.0000, 0.0000]])
+    """
+    if isinstance(values, (list, tuple)):
+        values = np.stack(np.coerce(values, like=like), like=like)
+
+    return np.diag(values, k=k, like=like)
+
+
+@multi_dispatch(argnum=[0, 1])
+def dot(tensor1, tensor2, like=None):
+    """Returns the matrix or dot product of two tensors.
+
+    * If both tensors are 0-dimensional, elementwise multiplication
+      is performed and a 0-dimensional scalar returned.
+
+    * If both tensors are 1-dimensional, the dot product is returned.
+
+    * If the first array is 2-dimensional and the second array 1-dimensional,
+      the matrix-vector product is returned.
+
+    * If both tensors are 2-dimensional, the matrix product is returned.
+
+    * Finally, if the the first array is N-dimensional and the second array
+      M-dimensional, a sum product over the last dimension of the first array,
+      and the second-to-last dimension of the second array is returned.
+
+    Args:
+        tensor1 (tensor_like): input tensor
+        tensor2 (tensor_like): input tensor
+
+    Returns:
+        tensor_like: the matrix or dot product of two tensors
+    """
+    x, y = np.coerce([tensor1, tensor2], like=like)
+
+    if like == "torch":
+        if x.ndim == 0 and y.ndim == 0:
+            return x * y
+
+        if x.ndim <= 2 and y.ndim <= 2:
+            return x @ y
+
+        return np.tensordot(x, y, axes=[[-1], [-2]], like=like)
+
+    if like == "tensorflow":
+        if len(np.shape(x)) == 0 and len(np.shape(y)) == 0:
+            return x * y
+
+        if len(np.shape(y)) == 1:
+            return np.tensordot(x, y, axes=[[-1], [0]], like=like)
+
+        if len(np.shape(x)) == 2 and len(np.shape(y)) == 2:
+            return x @ y
+
+        return np.tensordot(x, y, axes=[[-1], [-2]], like=like)
+
+    return np.dot(x, y, like=like)
+
+
+@multi_dispatch(argnum=[0, 1])
+def tensordot(tensor1, tensor2, axes=None, like=None):
+    """Returns the tensor product of two tensors.
+    In general ``axes`` specifies either the set of axes for both
+    tensors that are contracted (with the first/second entry of ``axes``
+    giving all axis indices for the first/second tensor) or --- if it is
+    an integer --- the number of last/first axes of the first/second
+    tensor to contract over.
+    There are some non-obvious special cases:
+
+    * If both tensors are 0-dimensional, ``axes`` must be 0.
+      and a 0-dimensional scalar is returned containing the simple product.
+
+    * If both tensors are 1-dimensional and ``axes=0``, the outer product
+      is returned.
+
+    * Products between a non-0-dimensional and a 0-dimensional tensor are not
+      supported in all interfaces.
+
+    Args:
+        tensor1 (tensor_like): input tensor
+        tensor2 (tensor_like): input tensor
+        axes (int or list[list[int]]): Axes to contract over, see detail description.
+
+    Returns:
+        tensor_like: the tensor product of the two input tensors
+    """
+    tensor1, tensor2 = np.coerce([tensor1, tensor2], like=like)
+    return np.tensordot(tensor1, tensor2, axes=axes, like=like)
+
+
+@multi_dispatch(argnum=[0], tensor_list=[0])
+def get_trainable_indices(values, like=None):
+    """Returns a set containing the trainable indices of a sequence of
+    values.
+
+    Args:
+        values (Iterable[tensor_like]): Sequence of tensor-like objects to inspect
+
+    Returns:
+        set[int]: Set containing the indices of the trainable tensor-like objects
+        within the input sequence.
+
+    **Example**
+
+    >>> def cost_fn(params):
+    ...     print("Trainable:", qml.math.get_trainable_indices(params))
+    ...     return np.sum(np.sin(params[0] * params[1]))
+    >>> values = [np.array([0.1, 0.2], requires_grad=True),
+    ... np.array([0.5, 0.2], requires_grad=False)]
+    >>> cost_fn(values)
+    Trainable: {0}
+    tensor(0.0899685, requires_grad=True)
+    """
+    trainable = requires_grad
+    trainable_params = set()
+
+    if like == "jax":
+        import jax
+
+        if not any(isinstance(v, jax.core.Tracer) for v in values):
+            # No JAX tracing is occuring; treat all `DeviceArray` objects as trainable.
+
+            # pylint: disable=function-redefined,unused-argument
+            def trainable(p, **kwargs):
+                return isinstance(p, jax.numpy.DeviceArray)
+
+        else:
+            # JAX tracing is occuring; use the default behaviour (only traced arrays
+            # are treated as trainable). This is required to ensure that `jax.grad(func, argnums=...)
+            # works correctly, as the argnums argnument determines which parameters are
+            # traced arrays.
+            trainable = requires_grad
+
+    for idx, p in enumerate(values):
+        if trainable(p, interface=like):
+            trainable_params.add(idx)
+
+    return trainable_params
+
+
+def ones_like(tensor, dtype=None):
+    """Returns a tensor of all ones with the same shape and dtype
+    as the input tensor.
+
+    Args:
+        tensor (tensor_like): input tensor
+        dtype (str, np.dtype, None): The desired output datatype of the array. If not provided, the dtype of
+            ``tensor`` is used. This argument can be any supported NumPy dtype representation, including
+            a string (``"float64"``), a ``np.dtype`` object (``np.dtype("float64")``), or
+            a dtype class (``np.float64``). If ``tensor`` is not a NumPy array, the
+            **equivalent** dtype in the dispatched framework is used.
+
+    Returns:
+        tensor_like: an all-ones tensor with the same shape and
+        size as ``tensor``
+
+    **Example**
+
+    >>> x = torch.tensor([1., 2.])
+    >>> ones_like(x)
+    tensor([1, 1])
+    >>> y = tf.Variable([[0], [5]])
+    >>> ones_like(y, dtype=np.complex128)
+    <tf.Tensor: shape=(2, 1), dtype=complex128, numpy=
+    array([[1.+0.j],
+           [1.+0.j]])>
+    """
+    if dtype is not None:
+        return cast(np.ones_like(tensor), dtype)
+
+    return np.ones_like(tensor)
+
+
+@multi_dispatch(argnum=[0], tensor_list=[0])
+def stack(values, axis=0, like=None):
+    """Stack a sequence of tensors along the specified axis.
+
+    .. warning::
+
+        Tensors that are incompatible (such as Torch and TensorFlow tensors)
+        cannot both be present.
+
+    Args:
+        values (Sequence[tensor_like]): Sequence of tensor-like objects to
+            stack. Each object in the sequence must have the same size in the given axis.
+        axis (int): The axis along which the input tensors are stacked. ``axis=0`` corresponds
+            to vertical stacking.
+
+    Returns:
+        tensor_like: The stacked array. The stacked array will have one additional dimension
+        compared to the unstacked tensors.
+
+    **Example**
+
+    >>> x = tf.constant([0.6, 0.1, 0.6])
+    >>> y = tf.Variable([0.1, 0.2, 0.3])
+    >>> z = np.array([5., 8., 101.])
+    >>> stack([x, y, z])
+    <tf.Tensor: shape=(3, 3), dtype=float32, numpy=
+    array([[6.00e-01, 1.00e-01, 6.00e-01],
+           [1.00e-01, 2.00e-01, 3.00e-01],
+           [5.00e+00, 8.00e+00, 1.01e+02]], dtype=float32)>
+    """
+    values = np.coerce(values, like=like)
+    return np.stack(values, axis=axis, like=like)
+
+
+def einsum(indices, *operands, like=None):
+    """Evaluates the Einstein summation convention on the operands.
+
+    Args:
+        indices (str): Specifies the subscripts for summation as comma separated list of
+            subscript labels. An implicit (classical Einstein summation) calculation is
+            performed unless the explicit indicator ‘->’ is included as well as subscript
+            labels of the precise output form.
+        operands (tuple[tensor_like]): The tensors for the operation.
+
+    Returns:
+        tensor_like: The calculation based on the Einstein summation convention.
+
+    **Examples**
+
+    >>> a = np.arange(25).reshape(5,5)
+    >>> b = np.arange(5)
+    >>> c = np.arange(6).reshape(2,3)
+
+    Trace of a matrix:
+
+    >>> qml.math.einsum('ii', a)
+    60
+
+    Extract the diagonal (requires explicit form):
+
+    >>> qml.math.einsum('ii->i', a)
+    array([ 0,  6, 12, 18, 24])
+
+    Sum over an axis (requires explicit form):
+
+    >>> qml.math.einsum('ij->i', a)
+    array([ 10,  35,  60,  85, 110])
+
+    Compute a matrix transpose, or reorder any number of axes:
+
+    >>> np.einsum('ij->ji', c)
+    array([[0, 3],
+           [1, 4],
+           [2, 5]])
+
+    Matrix vector multiplication:
+
+    >>> np.einsum('ij,j', a, b)
+    array([ 30,  80, 130, 180, 230])
+    """
+    if like is None:
+        like = _multi_dispatch(operands)
+    operands = np.coerce(operands, like=like)
+    return np.einsum(indices, *operands, like=like)
+
+
+def where(condition, x=None, y=None):
+    """Returns elements chosen from x or y depending on a boolean tensor condition,
+    or the indices of entries satisfying the condition.
+
+    The input tensors ``condition``, ``x``, and ``y`` must all be broadcastable to the same shape.
+
+    Args:
+        condition (tensor_like[bool]): A boolean tensor. Where ``True`` , elements from
+            ``x`` will be chosen, otherwise ``y``. If ``x`` and ``y`` are ``None`` the
+            indices where ``condition==True`` holds will be returned.
+        x (tensor_like): values from which to choose if the condition evaluates to ``True``
+        y (tensor_like): values from which to choose if the condition evaluates to ``False``
+
+    Returns:
+        tensor_like or tuple[tensor_like]: If ``x is None`` and ``y is None``, a tensor
+        or tuple of tensors with the indices where ``condition`` is ``True`` .
+        Else, a tensor with elements from ``x`` where the ``condition`` is ``True``,
+        and ``y`` otherwise. In this case, the output tensor has the same shape as
+        the input tensors.
+
+    **Example with three arguments**
+
+    >>> a = torch.tensor([0.6, 0.23, 0.7, 1.5, 1.7], requires_grad=True)
+    >>> b = torch.tensor([-1., -2., -3., -4., -5.], requires_grad=True)
+    >>> math.where(a < 1, a, b)
+    tensor([ 0.6000,  0.2300,  0.7000, -4.0000, -5.0000], grad_fn=<SWhereBackward>)
+
+    .. warning::
+
+        The output format for ``x=None`` and ``y=None`` follows the respective
+        interface and differs between TensorFlow and all other interfaces:
+        For TensorFlow, the output is a tensor with shape
+        ``(num_true, len(condition.shape))`` where ``num_true`` is the number
+        of entries in ``condition`` that are ``True`` .
+        The entry at position ``(i, j)`` is the ``j`` th entry of the ``i`` th
+        index.
+        For all other interfaces, the output is a tuple of tensor-like objects,
+        with the ``j`` th object indicating the ``j`` th entries of all indices.
+        Also see the examples below.
+
+    **Example with single argument**
+
+    For Torch, Autograd, JAX and NumPy, the output formatting is as follows:
+
+    >>> a = [[0.6, 0.23, 1.7],[1.5, 0.7, -0.2]]
+    >>> math.where(torch.tensor(a) < 1)
+    (tensor([0, 0, 1, 1]), tensor([0, 1, 1, 2]))
+
+    This is not a single tensor-like object but corresponds to the shape
+    ``(2, 4)`` . For TensorFlow, on the other hand:
+
+    >>> math.where(tf.constant(a) < 1)
+    tf.Tensor(
+    [[0 0]
+     [0 1]
+     [1 1]
+     [1 2]], shape=(4, 2), dtype=int64)
+
+    As we can see, the dimensions are swapped and the output is a single Tensor.
+    Note that the number of dimensions of the output does *not* depend on the input
+    shape, it is always two-dimensional.
+
+    """
+    if x is None and y is None:
+        interface = _multi_dispatch([condition])
+        res = np.where(condition, like=interface)
+
+        if interface == "tensorflow":
+            return np.transpose(np.stack(res))
+
+        return res
+
+    interface = _multi_dispatch([condition, x, y])
+    res = np.where(condition, x, y, like=interface)
+
+    return res
+
+
+@multi_dispatch(argnum=[0, 1])
+def frobenius_inner_product(A, B, normalize=False, like=None):
+    r"""Frobenius inner product between two matrices.
+
+    .. math::
+
+        \langle A, B \rangle_F = \sum_{i,j=1}^n A_{ij} B_{ij} = \operatorname{tr} (A^T B)
+
+    The Frobenius inner product is equivalent to the Hilbert-Schmidt inner product for
+    matrices with real-valued entries.
+
+    Args:
+        A (tensor_like[float]): First matrix, assumed to be a square array.
+        B (tensor_like[float]): Second matrix, assumed to be a square array.
+        normalize (bool): If True, divide the inner product by the Frobenius norms of A and B.
+
+    Returns:
+        float: Frobenius inner product of A and B
+
+    **Example**
+
+    >>> A = np.random.random((3,3))
+    >>> B = np.random.random((3,3))
+    >>> qml.math.frobenius_inner_product(A, B)
+    3.091948202943376
+    """
+    A, B = np.coerce([A, B], like=like)
+
+    inner_product = np.sum(A * B)
+
+    if normalize:
+        norm = np.sqrt(np.sum(A * A) * np.sum(B * B))
+        inner_product = inner_product / norm
+
+    return inner_product
+
+
+@multi_dispatch(argnum=[1])
+def scatter(indices, array, new_dims, like=None):
+    """Scatters an array into a tensor of shape new_dims according to indices.
+
+    This operation is similar to scatter_element_add, except that the tensor
+    is zero-initialized. Calling scatter(indices, array, new_dims) is identical
+    to calling scatter_element_add(np.zeros(new_dims), indices, array)
+
+    Args:
+        indices (tensor_like[int]): Indices to update
+        array (tensor_like[float]): Values to assign to the new tensor
+        new_dims (int or tuple[int]): The shape of the new tensor
+        like (str): Manually chosen interface to dispatch to.
+    Returns:
+        tensor_like[float]: The tensor with the values modified the given indices.
+
+    **Example**
+
+    >>> indices = np.array([4, 3, 1, 7])
+    >>> updates = np.array([9, 10, 11, 12])
+    >>> shape = 8
+    >>> qml.math.scatter(indices, updates, shape)
+    array([ 0, 11,  0, 10,  9,  0,  0, 12])
+    """
+    return np.scatter(indices, array, new_dims, like=like)
+
+
+@multi_dispatch(argnum=[0, 2])
+def scatter_element_add(tensor, index, value, like=None):
+    """In-place addition of a multidimensional value over various
+    indices of a tensor.
+
+    Args:
+        tensor (tensor_like[float]): Tensor to add the value to
+        index (tuple or list[tuple]): Indices to which to add the value
+        value (float or tensor_like[float]): Value to add to ``tensor``
+        like (str): Manually chosen interface to dispatch to.
+    Returns:
+        tensor_like[float]: The tensor with the value added at the given indices.
+
+    **Example**
+
+    >>> tensor = torch.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]])
+    >>> index = (1, 2)
+    >>> value = -3.1
+    >>> qml.math.scatter_element_add(tensor, index, value)
+    tensor([[ 0.1000,  0.2000,  0.3000],
+            [ 0.4000,  0.5000, -2.5000]])
+
+    If multiple indices are given, in the form of a list of tuples, the
+    ``k`` th tuple is interpreted to contain the ``k`` th entry of all indices:
+
+    >>> indices = [(1, 0), (2, 1)] # This will modify the entries (1, 2) and (0, 1)
+    >>> values = torch.tensor([10, 20])
+    >>> qml.math.scatter_element_add(tensor, indices, values)
+    tensor([[ 0.1000, 20.2000,  0.3000],
+            [ 0.4000,  0.5000, 10.6000]])
+    """
+    if len(np.shape(tensor)) == 0 and index == ():
+        return tensor + value
+
+    return np.scatter_element_add(tensor, index, value, like=like)
+
+
+def unwrap(values, max_depth=None):
+    """Unwrap a sequence of objects to NumPy arrays.
+
+    Note that tensors on GPUs will automatically be copied
+    to the CPU.
+
+    Args:
+        values (Sequence[tensor_like]): sequence of tensor-like objects to unwrap
+        max_depth (int): Positive integer indicating the depth of unwrapping to perform
+            for nested tensor-objects. This argument only applies when unwrapping
+            Autograd ``ArrayBox`` objects.
+
+    **Example**
+
+    >>> values = [np.array([0.1, 0.2]), torch.tensor(0.1, dtype=torch.float64), torch.tensor([0.5, 0.2])]
+    >>> math.unwrap(values)
+    [array([0.1, 0.2]), 0.1, array([0.5, 0.2], dtype=float32)]
+
+    This function will continue to work during backpropagation:
+
+    >>> def cost_fn(params):
+    ...     unwrapped_params = math.unwrap(params)
+    ...     print("Unwrapped:", [(i, type(i)) for i in unwrapped_params])
+    ...     return np.sum(np.sin(params))
+    >>> params = np.array([0.1, 0.2, 0.3])
+    >>> grad = autograd.grad(cost_fn)(params)
+    Unwrapped: [(0.1, <class 'float'>), (0.2, <class 'float'>), (0.3, <class 'float'>)]
+    >>> print(grad)
+    [0.99500417 0.98006658 0.95533649]
+    """
+    res = []
+
+    for t in values:
+        if isinstance(t, ArrayBox):
+            a = np.to_numpy(t, max_depth=max_depth)
+        else:
+            a = np.to_numpy(t)
+
+        if isinstance(a, ndarray) and not a.shape:
+            # if NumPy array is scalar, convert to a Python float
+            res.append(a.tolist())
+        else:
+            res.append(a)
+
+    return res
+
+
+def add(*args, **kwargs):
+    """Add arguments element-wise."""
+    try:
+        return np.add(*args, **kwargs)
+    except TypeError:
+        # catch arg1 = torch, arg2=numpy error
+        # works fine with opposite order
+        return np.add(args[1], args[0], *args[2:], **kwargs)
+
+
+@multi_dispatch()
+def iscomplex(tensor, like=None):
+    """Return True if the tensor has a non-zero complex component."""
+    if like == "tensorflow":
+        import tensorflow as tf
+
+        imag_tensor = tf.math.imag(tensor)
+        return tf.math.count_nonzero(imag_tensor) > 0
+
+    if like == "torch":
+        import torch
+
+        if torch.is_complex(tensor):
+            imag_tensor = torch.imag(tensor)
+            return torch.count_nonzero(imag_tensor) > 0
+        return False
+
+    return np.iscomplex(tensor)
+
+
+@multi_dispatch()
+def expm(tensor, like=None):
+    """Compute the matrix exponential of an array :math:`e^{X}`.
+
+    ..note::
+        This function is not differentiable with Autograd, as it
+        relies on the scipy implementation.
+    """
+    if like == "torch":
+        return tensor.matrix_exp()
+    if like == "jax":
+        from jax.scipy.linalg import expm as jax_expm
+
+        return jax_expm(tensor)
+    if like == "tensorflow":
+        import tensorflow as tf
+
+        return tf.linalg.expm(tensor)
+    from scipy.linalg import expm as scipy_expm
+
+    return scipy_expm(tensor)
diff --git a/openqaoa/optimizers/pennylane/math/quantum.py b/openqaoa/optimizers/pennylane/math/quantum.py
new file mode 100644
index 000000000..fa03d7431
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/math/quantum.py
@@ -0,0 +1,940 @@
+# Copyright 2018-2022 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Differentiable quantum functions"""
+# pylint: disable=import-outside-toplevel
+import itertools
+import functools
+
+from string import ascii_letters as ABC
+from autoray import numpy as np
+from numpy import float64
+
+from openqaoa.optimizers import pennylane as qml #changed fromt the original code
+
+from . import single_dispatch  # pylint:disable=unused-import
+from .multi_dispatch import diag, dot, scatter_element_add, einsum, get_interface
+from .utils import is_abstract, allclose, cast, convert_like, cast_like
+
+ABC_ARRAY = np.array(list(ABC))
+
+
+def cov_matrix(prob, obs, wires=None, diag_approx=False):
+    """Calculate the covariance matrix of a list of commuting observables, given
+    the joint probability distribution of the system in the shared eigenbasis.
+
+    .. note::
+        This method only works for **commuting observables.**
+        If the probability distribution is the result of a quantum circuit,
+        the quantum state must be rotated into the shared
+        eigenbasis of the list of observables before measurement.
+
+    Args:
+        prob (tensor_like): probability distribution
+        obs (list[.Observable]): a list of observables for which
+            to compute the covariance matrix
+        diag_approx (bool): if True, return the diagonal approximation
+        wires (.Wires): The wire register of the system. If not provided,
+            it is assumed that the wires are labelled with consecutive integers.
+
+    Returns:
+        tensor_like: the covariance matrix of size ``(len(obs), len(obs))``
+
+    **Example**
+
+    Consider the following ansatz and observable list:
+
+    >>> obs_list = [qml.PauliX(0) @ qml.PauliZ(1), qml.PauliY(2)]
+    >>> ansatz = qml.templates.StronglyEntanglingLayers
+
+    We can construct a QNode to output the probability distribution in the shared eigenbasis of the
+    observables:
+
+    .. code-block:: python
+
+        dev = qml.device("default.qubit", wires=3)
+
+        @qml.qnode(dev, interface="autograd")
+        def circuit(weights):
+            ansatz(weights, wires=[0, 1, 2])
+            # rotate into the basis of the observables
+            for o in obs_list:
+                o.diagonalizing_gates()
+            return qml.probs(wires=[0, 1, 2])
+
+    We can now compute the covariance matrix:
+
+    >>> shape = qml.templates.StronglyEntanglingLayers.shape(n_layers=2, n_wires=3)
+    >>> weights = np.random.random(shape, requires_grad=True)
+    >>> cov = qml.math.cov_matrix(circuit(weights), obs_list)
+    >>> cov
+    array([[0.98707611, 0.03665537],
+         [0.03665537, 0.99998377]])
+
+    Autodifferentiation is fully supported using all interfaces.
+    Here we use autograd:
+
+    >>> cost_fn = lambda weights: qml.math.cov_matrix(circuit(weights), obs_list)[0, 1]
+    >>> qml.grad(cost_fn)(weights)[0]
+    array([[[ 4.94240914e-17, -2.33786398e-01, -1.54193959e-01],
+            [-3.05414996e-17,  8.40072236e-04,  5.57884080e-04],
+            [ 3.01859411e-17,  8.60411436e-03,  6.15745204e-04]],
+           [[ 6.80309533e-04, -1.23162742e-03,  1.08729813e-03],
+            [-1.53863193e-01, -1.38700657e-02, -1.36243323e-01],
+            [-1.54665054e-01, -1.89018172e-02, -1.56415558e-01]]])
+    """
+    variances = []
+
+    # diagonal variances
+    for i, o in enumerate(obs):
+        eigvals = cast(o.eigvals(), dtype=float64)
+        w = o.wires.labels if wires is None else wires.indices(o.wires)
+        p = marginal_prob(prob, w)
+
+        res = dot(eigvals**2, p) - (dot(eigvals, p)) ** 2
+        variances.append(res)
+
+    cov = diag(variances)
+
+    if diag_approx:
+        return cov
+
+    for i, j in itertools.combinations(range(len(obs)), r=2):
+        o1 = obs[i]
+        o2 = obs[j]
+
+        o1wires = o1.wires.labels if wires is None else wires.indices(o1.wires)
+        o2wires = o2.wires.labels if wires is None else wires.indices(o2.wires)
+        shared_wires = set(o1wires + o2wires)
+
+        l1 = cast(o1.eigvals(), dtype=float64)
+        l2 = cast(o2.eigvals(), dtype=float64)
+        l12 = cast(np.kron(l1, l2), dtype=float64)
+
+        p1 = marginal_prob(prob, o1wires)
+        p2 = marginal_prob(prob, o2wires)
+        p12 = marginal_prob(prob, shared_wires)
+
+        res = dot(l12, p12) - dot(l1, p1) * dot(l2, p2)
+
+        cov = scatter_element_add(cov, [i, j], res)
+        cov = scatter_element_add(cov, [j, i], res)
+
+    return cov
+
+
+def marginal_prob(prob, axis):
+    """Compute the marginal probability given a joint probability distribution expressed as a tensor.
+    Each random variable corresponds to a dimension.
+
+    If the distribution arises from a quantum circuit measured in computational basis, each dimension
+    corresponds to a wire. For example, for a 2-qubit quantum circuit `prob[0, 1]` is the probability of measuring the
+    first qubit in state 0 and the second in state 1.
+
+    Args:
+        prob (tensor_like): 1D tensor of probabilities. This tensor should of size
+            ``(2**N,)`` for some integer value ``N``.
+        axis (list[int]): the axis for which to calculate the marginal
+            probability distribution
+
+    Returns:
+        tensor_like: the marginal probabilities, of
+        size ``(2**len(axis),)``
+
+    **Example**
+
+    >>> x = tf.Variable([1, 0, 0, 1.], dtype=tf.float64) / np.sqrt(2)
+    >>> marginal_prob(x, axis=[0, 1])
+    <tf.Tensor: shape=(4,), dtype=float64, numpy=array([0.70710678, 0.        , 0.        , 0.70710678])>
+    >>> marginal_prob(x, axis=[0])
+    <tf.Tensor: shape=(2,), dtype=float64, numpy=array([0.70710678, 0.70710678])>
+    """
+    prob = np.flatten(prob)
+    num_wires = int(np.log2(len(prob)))
+
+    if num_wires == len(axis):
+        return prob
+
+    inactive_wires = tuple(set(range(num_wires)) - set(axis))
+    prob = np.reshape(prob, [2] * num_wires)
+    prob = np.sum(prob, axis=inactive_wires)
+    return np.flatten(prob)
+
+
+def _density_matrix_from_matrix(density_matrix, indices, check_state=False):
+    """Compute the density matrix from a state represented with a density matrix.
+
+
+    Args:
+        density_matrix (tensor_like): 2D density matrix tensor. This tensor should be of size ``(2**N, 2**N)`` for some
+            integer number of wires``N``.
+        indices (list(int)): List of indices in the considered subsystem.
+        check_state (bool): If True, the function will check the state validity (shape and norm).
+
+    Returns:
+        tensor_like: Density matrix of size ``(2**len(wires), 2**len(wires))``
+
+    **Example**
+
+    >>> x = np.array([[1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
+    >>> _density_matrix_from_matrix(x, indices=[0])
+    [[1.+0.j 0.+0.j]
+     [0.+0.j 0.+0.j]]
+
+    >>> y = [[0.5, 0, 0.5, 0], [0, 0, 0, 0], [0.5, 0, 0.5, 0], [0, 0, 0, 0]]
+    >>> _density_matrix_from_matrix(y, indices=[0])
+    [[0.5+0.j 0.5+0.j]
+     [0.5+0.j 0.5+0.j]]
+
+    >>> _density_matrix_from_matrix(y, indices=[1])
+    [[1.+0.j 0.+0.j]
+     [0.+0.j 0.+0.j]]
+
+    >>> z = tf.Variable([[1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=tf.complex128)
+    >>> _density_matrix_from_matrix(x, indices=[1])
+    tf.Tensor(
+    [[1.+0.j 0.+0.j]
+     [0.+0.j 0.+0.j]], shape=(2, 2), dtype=complex128)
+
+
+    """
+    shape = density_matrix.shape[0]
+    num_indices = int(np.log2(shape))
+
+    if check_state:
+        _check_density_matrix(density_matrix)
+
+    consecutive_indices = list(range(0, num_indices))
+
+    # Return the full density matrix if all the wires are given
+    if tuple(indices) == tuple(consecutive_indices):
+        return density_matrix
+
+    traced_wires = [x for x in consecutive_indices if x not in indices]
+    density_matrix = _partial_trace(density_matrix, traced_wires)
+    return density_matrix
+
+
+def _partial_trace(density_matrix, indices):
+    """Compute the reduced density matrix by tracing out the provided indices.
+
+    Args:
+        density_matrix (tensor_like): 2D density matrix tensor. This tensor should be of size ``(2**N, 2**N)`` for some
+            integer number of wires ``N``.
+        indices (list(int)): List of indices to be traced.
+
+    Returns:
+        tensor_like: (reduced) Density matrix of size ``(2**len(wires), 2**len(wires))``
+
+    **Example**
+
+    >>> x = np.array([[1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
+    >>> _partial_trace(x, indices=[0])
+    [[1.+0.j 0.+0.j]
+     [0.+0.j 0.+0.j]]
+
+
+    >>> x = tf.Variable([[1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=tf.complex128)
+    >>> _partial_trace(x, indices=[1])
+    tf.Tensor(
+    [[1.+0.j 0.+0.j]
+     [0.+0.j 0.+0.j]], shape=(2, 2), dtype=complex128)
+    """
+    # Autograd does not support same indices sum in backprop
+    if get_interface(density_matrix) == "autograd":
+        density_matrix = _partial_trace_autograd(density_matrix, indices)
+        return density_matrix
+
+    # Dimension and reshape
+    shape = density_matrix.shape[0]
+    num_indices = int(np.log2(shape))
+    rho_dim = 2 * num_indices
+
+    density_matrix = np.reshape(density_matrix, [2] * 2 * num_indices)
+    indices = np.sort(indices)
+
+    # For loop over wires
+    for i, target_index in enumerate(indices):
+        target_index = target_index - i
+        state_indices = ABC[: rho_dim - 2 * i]
+        state_indices = list(state_indices)
+
+        target_letter = state_indices[target_index]
+        state_indices[target_index + num_indices - i] = target_letter
+        state_indices = "".join(state_indices)
+
+        einsum_indices = f"{state_indices}"
+        density_matrix = einsum(einsum_indices, density_matrix)
+
+    number_wires_sub = num_indices - len(indices)
+    reduced_density_matrix = np.reshape(
+        density_matrix, (2**number_wires_sub, 2**number_wires_sub)
+    )
+    return reduced_density_matrix
+
+
+def _partial_trace_autograd(density_matrix, indices):
+    """Compute the reduced density matrix for autograd interface by tracing out the provided indices with the use
+    of projectors as same subscripts indices are not supported in autograd backprop.
+    """
+    # Dimension and reshape
+    shape = density_matrix.shape[0]
+    num_indices = int(np.log2(shape))
+    rho_dim = 2 * num_indices
+    density_matrix = np.reshape(density_matrix, [2] * 2 * num_indices)
+
+    kraus = cast(np.eye(2), density_matrix.dtype)
+
+    kraus = np.reshape(kraus, (2, 1, 2))
+    kraus_dagger = np.asarray([np.conj(np.transpose(k)) for k in kraus])
+
+    kraus = convert_like(kraus, density_matrix)
+    kraus_dagger = convert_like(kraus_dagger, density_matrix)
+    # For loop over wires
+    for target_wire in indices:
+        # Tensor indices of density matrix
+        state_indices = ABC[:rho_dim]
+        # row indices of the quantum state affected by this operation
+        row_wires_list = [target_wire]
+        row_indices = "".join(ABC_ARRAY[row_wires_list].tolist())
+        # column indices are shifted by the number of wires
+        col_wires_list = [w + num_indices for w in row_wires_list]
+        col_indices = "".join(ABC_ARRAY[col_wires_list].tolist())
+        # indices in einsum must be replaced with new ones
+        num_partial_trace_wires = 1
+        new_row_indices = ABC[rho_dim : rho_dim + num_partial_trace_wires]
+        new_col_indices = ABC[
+            rho_dim + num_partial_trace_wires : rho_dim + 2 * num_partial_trace_wires
+        ]
+        # index for summation over Kraus operators
+        kraus_index = ABC[
+            rho_dim + 2 * num_partial_trace_wires : rho_dim + 2 * num_partial_trace_wires + 1
+        ]
+        # new state indices replace row and column indices with new ones
+        new_state_indices = functools.reduce(
+            lambda old_string, idx_pair: old_string.replace(idx_pair[0], idx_pair[1]),
+            zip(col_indices + row_indices, new_col_indices + new_row_indices),
+            state_indices,
+        )
+        # index mapping for einsum, e.g., 'iga,abcdef,idh->gbchef'
+        einsum_indices = (
+            f"{kraus_index}{new_row_indices}{row_indices}, {state_indices},"
+            f"{kraus_index}{col_indices}{new_col_indices}->{new_state_indices}"
+        )
+        density_matrix = einsum(einsum_indices, kraus, density_matrix, kraus_dagger)
+
+    number_wires_sub = num_indices - len(indices)
+    reduced_density_matrix = np.reshape(
+        density_matrix, (2**number_wires_sub, 2**number_wires_sub)
+    )
+    return reduced_density_matrix
+
+
+def _density_matrix_from_state_vector(state, indices, check_state=False):
+    """Compute the density matrix from a state vector.
+
+    Args:
+        state (tensor_like): 1D tensor state vector. This tensor should of size ``(2**N,)`` for some integer value ``N``.
+        indices (list(int)): List of indices in the considered subsystem.
+        check_state (bool): If True, the function will check the state validity (shape and norm).
+
+    Returns:
+        tensor_like: Density matrix of size ``(2**len(indices), 2**len(indices))``
+
+    **Example**
+
+    >>> x = np.array([1, 0, 0, 0])
+    >>> _density_matrix_from_state_vector(x, indices=[0])
+    [[1.+0.j 0.+0.j]
+    [0.+0.j 0.+0.j]]
+
+    >>> y = [1, 0, 1, 0] / np.sqrt(2)
+    >>> _density_matrix_from_state_vector(y, indices=[0])
+    [[0.5+0.j 0.5+0.j]
+     [0.5+0.j 0.5+0.j]]
+
+    >>> _density_matrix_from_state_vector(y, indices=[1])
+    [[1.+0.j 0.+0.j]
+     [0.+0.j 0.+0.j]]
+
+    >>> z = tf.Variable([1, 0, 0, 0], dtype=tf.complex128)
+    >>> _density_matrix_from_state_vector(z, indices=[1])
+    tf.Tensor(
+    [[1.+0.j 0.+0.j]
+     [0.+0.j 0.+0.j]], shape=(2, 2), dtype=complex128)
+
+    """
+    len_state = np.shape(state)[0]
+
+    # Check the format and norm of the state vector
+    if check_state:
+        _check_state_vector(state)
+
+    # Get dimension of the quantum system and reshape
+    num_indices = int(np.log2(len_state))
+    consecutive_wires = list(range(num_indices))
+    state = np.reshape(state, [2] * num_indices)
+
+    # Get the system to be traced
+    traced_system = [x for x in consecutive_wires if x not in indices]
+
+    # Return the reduced density matrix by using numpy tensor product
+    density_matrix = np.tensordot(state, np.conj(state), axes=(traced_system, traced_system))
+    density_matrix = np.reshape(density_matrix, (2 ** len(indices), 2 ** len(indices)))
+
+    return density_matrix
+
+
+def reduced_dm(state, indices, check_state=False, c_dtype="complex128"):
+    """Compute the reduced density matrix from a state vector or a density matrix. It supports all interfaces (Numpy,
+    Autograd, Torch, Tensorflow and Jax).
+
+    Args:
+        state (tensor_like): ``(2**N)`` state vector or ``(2**N, 2**N)`` density matrix.
+        indices (Sequence(int)): List of indices in the considered subsystem.
+        check_state (bool): If True, the function will check the state validity (shape and norm).
+        c_dtype (str): Complex floating point precision type.
+
+    Returns:
+        tensor_like: Reduced density matrix of size ``(2**len(indices), 2**len(indices))``
+
+    **Example**
+
+    >>> x = [1, 0, 1, 0] / np.sqrt(2)
+    >>> reduced_dm(x, indices=[0])
+    [[0.5+0.j 0.5+0.j]
+     [0.5+0.j 0.5+0.j]]
+
+    >>> reduced_dm(x, indices=[1])
+    [[1.+0.j 0.+0.j]
+     [0.+0.j 0.+0.j]]
+
+    >>> y = tf.Variable([1, 0, 0, 0], dtype=tf.complex128)
+    >>> reduced_dm(y, indices=[1])
+    tf.Tensor(
+    [[1.+0.j 0.+0.j]
+     [0.+0.j 0.+0.j]], shape=(2, 2), dtype=complex128)
+
+    >>> z = [[0.5, 0, 0.0, 0.5], [0, 0, 0, 0], [0, 0, 0, 0], [0.5, 0, 0, 0.5]]
+    >>> reduced_dm(z, indices=[0])
+    [[0.5+0.j 0.0+0.j]
+     [0.0+0.j 0.5+0.j]]
+
+    >>> reduced_dm(z, indices=[1])
+    [[1.+0.j 0.+0.j]
+     [0.+0.j 0.+0.j]]
+
+    >>> y_mat_tf = tf.Variable([[1, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=tf.complex128)
+    >>> reduced_dm(y_mat_tf, indices=[1])
+    tf.Tensor(
+    [[1.+0.j 0.+0.j]
+     [0.+0.j 0.+0.j]], shape=(2, 2), dtype=complex128)
+
+    .. seealso:: :func:`pennylane.qinfo.transforms.reduced_dm` and :func:`pennylane.density_matrix`
+    """
+    # Cast as a c_dtype array
+    state = cast(state, dtype=c_dtype)
+    len_state = state.shape[0]
+    # State vector
+    if state.shape == (len_state,):
+        density_matrix = _density_matrix_from_state_vector(state, indices, check_state)
+        return density_matrix
+
+    density_matrix = _density_matrix_from_matrix(state, indices, check_state)
+
+    return density_matrix
+
+
+def vn_entropy(state, indices, base=None, check_state=False, c_dtype="complex128"):
+    r"""Compute the Von Neumann entropy from a state vector or density matrix on a given subsystem. It supports all
+    interfaces (Numpy, Autograd, Torch, Tensorflow and Jax).
+
+    .. math::
+        S( \rho ) = -\text{Tr}( \rho \log ( \rho ))
+
+    Args:
+        state (tensor_like): ``(2**N)`` state vector or ``(2**N, 2**N)`` density matrix.
+        indices (list(int)): List of indices in the considered subsystem.
+        base (float): Base for the logarithm. If None, the natural logarithm is used.
+        check_state (bool): If True, the function will check the state validity (shape and norm).
+        c_dtype (str): Complex floating point precision type.
+
+    Returns:
+        float: Von Neumann entropy of the considered subsystem.
+
+    **Example**
+
+    The entropy of a subsystem for any state vectors can be obtained. Here is an example for the
+    maximally entangled state, where the subsystem entropy is maximal (default base for log is exponential).
+
+
+    >>> x = [1, 0, 0, 1] / np.sqrt(2)
+    >>> vn_entropy(x, indices=[0])
+    0.6931472
+
+    The logarithm base can be switched to 2 for example.
+
+    >>> vn_entropy(x, indices=[0], base=2)
+    1.0
+
+    The entropy can be obtained by providing a quantum state as a density matrix, for example:
+
+    >>> y = [[1/2, 0, 0, 1/2], [0, 0, 0, 0], [0, 0, 0, 0], [1/2, 0, 0, 1/2]]
+    >>> vn_entropy(x, indices=[0])
+    0.6931472
+
+    .. seealso:: :func:`pennylane.qinfo.transforms.vn_entropy` and :func:`pennylane.vn_entropy`
+    """
+    density_matrix = reduced_dm(state, indices, check_state, c_dtype)
+    entropy = _compute_vn_entropy(density_matrix, base)
+
+    return entropy
+
+
+def _compute_vn_entropy(density_matrix, base=None):
+    """Compute the Von Neumann entropy from a density matrix
+
+    Args:
+        density_matrix (tensor_like): ``(2**N, 2**N)`` tensor density matrix for an integer `N`.
+        base (float, int): Base for the logarithm. If None, the natural logarithm is used.
+
+    Returns:
+        float: Von Neumann entropy of the density matrix.
+
+    **Example**
+
+    >>> x = [[1/2, 0], [0, 1/2]]
+    >>> _compute_vn_entropy(x)
+    0.6931472
+
+    >>> x = [[1/2, 0], [0, 1/2]]
+    >>> _compute_vn_entropy(x, base=2)
+    1.0
+
+    """
+    # Change basis if necessary
+    if base:
+        div_base = np.log(base)
+    else:
+        div_base = 1
+
+    evs = qml.math.eigvalsh(density_matrix)
+    evs = qml.math.where(evs > 0, evs, 1.0)
+    entropy = qml.math.entr(evs) / div_base
+
+    return entropy
+
+
+# pylint: disable=too-many-arguments
+def mutual_info(state, indices0, indices1, base=None, check_state=False, c_dtype="complex128"):
+    r"""Compute the mutual information between two subsystems given a state:
+
+    .. math::
+
+        I(A, B) = S(\rho^A) + S(\rho^B) - S(\rho^{AB})
+
+    where :math:`S` is the von Neumann entropy.
+
+    The mutual information is a measure of correlation between two subsystems.
+    More specifically, it quantifies the amount of information obtained about
+    one system by measuring the other system. It supports all interfaces
+    (Numpy, Autograd, Torch, Tensorflow and Jax).
+
+    Each state can be given as a state vector in the computational basis, or
+    as a density matrix.
+
+    Args:
+        state (tensor_like): ``(2**N)`` state vector or ``(2**N, 2**N)`` density matrix.
+        indices0 (list[int]): List of indices in the first subsystem.
+        indices1 (list[int]): List of indices in the second subsystem.
+        base (float): Base for the logarithm. If None, the natural logarithm is used.
+        check_state (bool): If True, the function will check the state validity (shape and norm).
+        c_dtype (str): Complex floating point precision type.
+
+    Returns:
+        float: Mutual information between the subsystems
+
+    **Examples**
+
+    The mutual information between subsystems for a state vector can be returned as follows:
+
+    >>> x = np.array([1, 0, 0, 1]) / np.sqrt(2)
+    >>> qml.math.mutual_info(x, indices0=[0], indices1=[1])
+    1.3862943611198906
+
+    It is also possible to change the log basis.
+
+    >>> qml.math.mutual_info(x, indices0=[0], indices1=[1], base=2)
+    2.0
+
+    Similarly the quantum state can be provided as a density matrix:
+
+    >>> y = np.array([[1/2, 1/2, 0, 1/2], [1/2, 0, 0, 0], [0, 0, 0, 0], [1/2, 0, 0, 1/2]])
+    >>> qml.math.mutual_info(y, indices0=[0], indices1=[1])
+    0.4682351577408206
+
+    .. seealso:: :func:`~.math.vn_entropy`, :func:`pennylane.qinfo.transforms.mutual_info` and :func:`pennylane.mutual_info`
+    """
+
+    # the subsystems cannot overlap
+    if len([index for index in indices0 if index in indices1]) > 0:
+        raise ValueError("Subsystems for computing mutual information must not overlap.")
+
+    # Cast to a complex array
+    state = cast(state, dtype=c_dtype)
+
+    state_shape = state.shape
+    if len(state_shape) > 0:
+        len_state = state_shape[0]
+        if state_shape in [(len_state,), (len_state, len_state)]:
+            return _compute_mutual_info(
+                state, indices0, indices1, base=base, check_state=check_state, c_dtype=c_dtype
+            )
+
+    raise ValueError("The state is not a state vector or a density matrix.")
+
+
+# pylint: disable=too-many-arguments
+def _compute_mutual_info(
+    state, indices0, indices1, base=None, check_state=False, c_dtype="complex128"
+):
+    """Compute the mutual information between the subsystems."""
+    all_indices = sorted([*indices0, *indices1])
+    vn_entropy_1 = vn_entropy(
+        state, indices=indices0, base=base, check_state=check_state, c_dtype=c_dtype
+    )
+    vn_entropy_2 = vn_entropy(
+        state, indices=indices1, base=base, check_state=check_state, c_dtype=c_dtype
+    )
+    vn_entropy_12 = vn_entropy(
+        state, indices=all_indices, base=base, check_state=check_state, c_dtype=c_dtype
+    )
+
+    return vn_entropy_1 + vn_entropy_2 - vn_entropy_12
+
+
+def fidelity(state0, state1, check_state=False, c_dtype="complex128"):
+    r"""Compute the fidelity for two states (a state can be a state vector or a density matrix) acting on quantum
+    systems with the same size.
+
+    The fidelity for two mixed states given by density matrices :math:`\rho` and :math:`\sigma`
+    is defined as
+
+    .. math::
+        F( \rho , \sigma ) = \text{Tr}( \sqrt{\sqrt{\rho} \sigma \sqrt{\rho}})^2
+
+    If one of the states is pure, say :math:`\rho=\ket{\psi}\bra{\psi}`, then the expression
+    for fidelity simplifies to
+
+    .. math::
+        F( \ket{\psi} , \sigma ) = \bra{\psi} \sigma \ket{\psi}
+
+    Finally, if both states are pure, :math:`\sigma=\ket{\phi}\bra{\phi}`, then the
+    fidelity is simply
+
+    .. math::
+        F( \ket{\psi} , \ket{\phi}) = \left|\braket{\psi, \phi}\right|^2
+
+    .. note::
+        It supports all interfaces (Numpy, Autograd, Torch, Tensorflow and Jax). The second state is coerced
+        to the type and dtype of the first state. The fidelity is returned in the type of the interface of the
+        first state.
+
+    Args:
+        state0 (tensor_like): 1D state vector or 2D density matrix
+        state1 (tensor_like): 1D state vector or 2D density matrix
+        check_state (bool): If True, the function will check the validity of both states; it checks (shape, norm) for
+            state vectors or (shape, trace, positive-definitiveness) for density matrices.
+        c_dtype (str): Complex floating point precision type.
+
+    Returns:
+        float: Fidelity between the two quantum states.
+
+    **Example**
+
+    Two state vectors can be used as arguments and the fidelity (overlap) is returned, e.g.:
+
+    >>> state0 = [0.98753537-0.14925137j, 0.00746879-0.04941796j]
+    >>> state1 = [0.99500417+0.j, 0.09983342+0.j]
+    >>> qml.math.fidelity(state0, state1)
+    0.9905158135644924
+
+    Alternatively one can give a state vector and a density matrix as arguments, e.g.:
+
+    >>> state0 = [0, 1]
+    >>> state1 = [[0, 0], [0, 1]]
+    >>> qml.math.fidelity(state0, state1)
+    1.0
+
+    It also works with two density matrices, e.g.:
+
+    >>> state0 = [[1, 0], [0, 0]]
+    >>> state1 = [[0, 0], [0, 1]]
+    >>> qml.math.fidelity(state0, state1)
+    0.0
+
+    .. seealso:: :func:`pennylane.qinfo.transforms.fidelity`
+
+    """
+    # Cast as a c_dtype array
+    state0 = cast(state0, dtype=c_dtype)
+    len_state0 = state0.shape[0]
+
+    # Cannot be cast_like if jit
+    if not is_abstract(state0):
+        state1 = cast_like(state1, state0)
+
+    len_state1 = state1.shape[0]
+
+    if check_state:
+        if state0.shape == (len_state0,):
+            _check_state_vector(state0)
+        else:
+            _check_density_matrix(state0)
+
+        if state1.shape == (len_state1,):
+            _check_state_vector(state1)
+        else:
+            _check_density_matrix(state1)
+
+    # Get dimension of the quantum system and reshape
+    num_indices0 = int(np.log2(len_state0))
+    num_indices1 = int(np.log2(len_state1))
+
+    if num_indices0 != num_indices1:
+        raise qml.QuantumFunctionError("The two states must have the same number of wires.")
+
+    # Two pure states, squared overlap
+    if state1.shape == (len_state1,) and state0.shape == (len_state0,):
+        overlap = np.tensordot(state0, np.transpose(np.conj(state1)), axes=1)
+        overlap = np.linalg.norm(overlap) ** 2
+        return overlap
+    # First state mixed, second state pure
+    if state1.shape == (len_state1,) and state0.shape != (len_state0,):
+        overlap = np.tensordot(state0, np.transpose(np.conj(state1)), axes=1)
+        overlap = np.tensordot(state1, overlap, axes=1)
+        overlap = np.real(overlap)
+        return overlap
+    # First state pure, second state mixed
+    if state0.shape == (len_state0,) and state1.shape != (len_state1,):
+        overlap = np.tensordot(state1, np.transpose(np.conj(state0)), axes=1)
+        overlap = np.tensordot(state0, overlap, axes=1)
+        overlap = np.real(overlap)
+        return overlap
+    # Two mixed states
+    fid = _compute_fidelity(state0, state1)
+    return fid
+
+
+def sqrt_matrix(density_matrix):
+    r"""Compute the square root matrix of a density matrix where :math:`\rho = \sqrt{\rho} \times \sqrt{\rho}`
+    Args:
+        density_matrix (tensor_like): 2D density matrix of the quantum system.
+    Returns:
+        (tensor_like): Square root of the density matrix.
+    """
+    evs, vecs = qml.math.linalg.eigh(density_matrix)
+    evs = np.real(evs)
+    evs = qml.math.where(evs > 0.0, evs, 0.0)
+    if not is_abstract(evs):
+        evs = qml.math.cast_like(evs, vecs)
+    return vecs @ qml.math.diag(np.sqrt(evs)) @ np.conj(np.transpose(vecs))
+
+
+def _compute_fidelity(density_matrix0, density_matrix1):
+    r"""Compute the fidelity for two density matrices with the same number of wires.
+
+    .. math::
+            F( \rho , \sigma ) = -\text{Tr}( \sqrt{\sqrt{\rho} \sigma \sqrt{\rho}})^2
+    """
+    # Implementation in single dispatches (sqrt(rho))
+    sqrt_mat = qml.math.sqrt_matrix(density_matrix0)
+
+    # sqrt(rho) * sigma * sqrt(rho)
+    sqrt_mat_sqrt = sqrt_mat @ density_matrix1 @ sqrt_mat
+
+    # extract eigenvalues
+    evs = qml.math.eigvalsh(sqrt_mat_sqrt)
+    evs = np.real(evs)
+    evs = qml.math.where(evs > 0.0, evs, 0.0)
+
+    trace = (qml.math.sum(qml.math.sqrt(evs))) ** 2
+
+    return trace
+
+
+def _compute_relative_entropy(rho, sigma, base=None):
+    r"""
+    Compute the quantum relative entropy of density matrix rho with respect to sigma.
+
+    .. math::
+        S(\rho\,\|\,\sigma)=-\text{Tr}(\rho\log\sigma)-S(\rho)=\text{Tr}(\rho\log\rho)-\text{Tr}(\rho\log\sigma)
+        =\text{Tr}(\rho(\log\rho-\log\sigma))
+
+    where :math:`S` is the von Neumann entropy.
+    """
+    if base:
+        div_base = np.log(base)
+    else:
+        div_base = 1
+
+    evs_rho, u_rho = qml.math.linalg.eigh(rho)
+    evs_sig, u_sig = qml.math.linalg.eigh(sigma)
+
+    # cast all eigenvalues to real
+    evs_rho, evs_sig = np.real(evs_rho), np.real(evs_sig)
+
+    # zero eigenvalues need to be treated very carefully here
+    # we use the convention that 0 * log(0) = 0
+    evs_sig = qml.math.where(evs_sig == 0, 0.0, evs_sig)
+    rho_nonzero_mask = qml.math.where(evs_rho == 0.0, False, True)
+
+    ent = qml.math.entr(qml.math.where(rho_nonzero_mask, evs_rho, 1.0))
+
+    # the matrix of inner products between eigenvectors of rho and eigenvectors
+    # of sigma; this is a doubly stochastic matrix
+    rel = np.abs(qml.math.dot(np.transpose(np.conj(u_rho)), u_sig)) ** 2
+
+    rel = qml.math.sum(qml.math.where(rel == 0.0, 0.0, np.log(evs_sig) * rel), axis=1)
+    rel = -qml.math.sum(qml.math.where(rho_nonzero_mask, evs_rho * rel, 0.0))
+
+    return (rel - ent) / div_base
+
+
+def relative_entropy(state0, state1, base=None, check_state=False, c_dtype="complex128"):
+    r"""
+    Compute the quantum relative entropy of one state with respect to another.
+
+    .. math::
+        S(\rho\,\|\,\sigma)=-\text{Tr}(\rho\log\sigma)-S(\rho)=\text{Tr}(\rho\log\rho)-\text{Tr}(\rho\log\sigma)
+        =\text{Tr}(\rho(\log\rho-\log\sigma))
+
+    Roughly speaking, quantum relative entropy is a measure of distinguishability between two
+    quantum states. It is the quantum mechanical analog of relative entropy.
+
+    Each state can be given as a state vector in the computational basis or
+    as a density matrix.
+
+    Args:
+        state0 (tensor_like): ``(2**N)`` state vector or ``(2**N, 2**N)`` density matrix.
+        state1 (tensor_like): ``(2**N)`` state vector or ``(2**N, 2**N)`` density matrix.
+        base (float): Base for the logarithm. If None, the natural logarithm is used.
+        check_state (bool): If True, the function will check the state validity (shape and norm).
+        c_dtype (str): Complex floating point precision type.
+
+    Returns:
+        float: Quantum relative entropy of state0 with respect to state1
+
+    **Examples**
+
+    The relative entropy between two equal states is always zero:
+
+    >>> x = np.array([1, 0])
+    >>> qml.math.relative_entropy(x, x)
+    0.0
+
+    and the relative entropy between two non-equal pure states is always infinity:
+
+    >>> y = np.array([1, 1]) / np.sqrt(2)
+    >>> qml.math.relative_entropy(x, y)
+    inf
+
+    The quantum states can be provided as density matrices, allowing for computation
+    of relative entropy between mixed states:
+
+    >>> rho = np.array([[0.3, 0], [0, 0.7]])
+    >>> sigma = np.array([[0.5, 0], [0, 0.5]])
+    >>> qml.math.relative_entropy(rho, sigma)
+    tensor(0.08228288, requires_grad=True)
+
+    It is also possible to change the log base:
+
+    >>> qml.math.relative_entropy(rho, sigma, base=2)
+    tensor(0.1187091, requires_grad=True)
+
+    .. seealso:: :func:`pennylane.qinfo.transforms.relative_entropy`
+    """
+    # Cast as a c_dtype array
+    state0 = cast(state0, dtype=c_dtype)
+    len_state0 = state0.shape[0]
+
+    # Cannot be cast_like if jit
+    if not is_abstract(state0):
+        state1 = cast_like(state1, state0)
+
+    len_state1 = state1.shape[0]
+
+    if check_state:
+        if state0.shape == (len_state0,):
+            _check_state_vector(state0)
+        else:
+            _check_density_matrix(state0)
+
+        if state1.shape == (len_state1,):
+            _check_state_vector(state1)
+        else:
+            _check_density_matrix(state1)
+
+    # Get dimension of the quantum system and reshape
+    num_indices0 = int(np.log2(len_state0))
+    num_indices1 = int(np.log2(len_state1))
+
+    if num_indices0 != num_indices1:
+        raise qml.QuantumFunctionError("The two states must have the same number of wires.")
+
+    if state0.shape == (len_state0,):
+        state0 = qml.math.outer(state0, np.conj(state0))
+
+    if state1.shape == (len_state1,):
+        state1 = qml.math.outer(state1, np.conj(state1))
+
+    return _compute_relative_entropy(state0, state1, base=base)
+
+
+def _check_density_matrix(density_matrix):
+    """Check the shape, the trace and the positive semi-definitiveness of a matrix."""
+    shape = density_matrix.shape[0]
+    if (
+        len(density_matrix.shape) != 2
+        or density_matrix.shape[0] != density_matrix.shape[1]
+        or not np.log2(shape).is_integer()
+    ):
+        raise ValueError("Density matrix must be of shape (2**N, 2**N).")
+    # Check trace
+    trace = np.trace(density_matrix)
+    if not is_abstract(trace):
+        if not allclose(trace, 1.0, atol=1e-10):
+            raise ValueError("The trace of the density matrix should be one.")
+        # Check if the matrix is Hermitian
+        conj_trans = np.transpose(np.conj(density_matrix))
+        if not allclose(density_matrix, conj_trans):
+            raise ValueError("The matrix is not Hermitian.")
+        # Check if positive semi-definite
+        evs = np.linalg.eigvalsh(density_matrix)
+        evs = np.real(evs)
+        evs_non_negative = [ev for ev in evs if ev >= 0.0]
+        if len(evs) != len(evs_non_negative):
+            raise ValueError("The matrix is not positive semi-definite.")
+
+
+def _check_state_vector(state_vector):
+    """Check the shape and the norm of a state vector."""
+    len_state = state_vector.shape[0]
+    # Check format
+    if len(np.shape(state_vector)) != 1 or not np.log2(len_state).is_integer():
+        raise ValueError("State vector must be of length 2**wires.")
+    # Check norm
+    norm = np.linalg.norm(state_vector, ord=2)
+    if not is_abstract(norm):
+        if not allclose(norm, 1.0, atol=1e-10):
+            raise ValueError("Sum of amplitudes-squared does not equal one.")
diff --git a/openqaoa/optimizers/pennylane/math/single_dispatch.py b/openqaoa/optimizers/pennylane/math/single_dispatch.py
new file mode 100644
index 000000000..fcac2d6d0
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/math/single_dispatch.py
@@ -0,0 +1,681 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Autoray registrations"""
+# pylint:disable=protected-access,import-outside-toplevel,wrong-import-position, disable=unnecessary-lambda
+from importlib import import_module
+import numbers
+
+import autoray as ar
+import numpy as np
+import semantic_version
+
+
+def _i(name):
+    """Convenience function to import PennyLane
+    interfaces via a string pattern"""
+    if name == "tf":
+        return import_module("tensorflow")
+
+    if name == "qml":
+        return import_module("pennylane")
+
+    return import_module(name)
+
+
+# -------------------------------- SciPy --------------------------------- #
+# the following is required to ensure that SciPy sparse Hamiltonians passed to
+# qml.SparseHamiltonian are not automatically 'unwrapped' to dense NumPy arrays.
+ar.register_function("scipy", "to_numpy", lambda x: x)
+
+ar.register_function("scipy", "shape", np.shape)
+ar.register_function("scipy", "conj", np.conj)
+ar.register_function("scipy", "transpose", np.transpose)
+ar.register_function("scipy", "ndim", np.ndim)
+
+
+# -------------------------------- NumPy --------------------------------- #
+from scipy.linalg import block_diag as _scipy_block_diag
+
+ar.register_function("numpy", "flatten", lambda x: x.flatten())
+ar.register_function("numpy", "coerce", lambda x: x)
+ar.register_function("numpy", "block_diag", lambda x: _scipy_block_diag(*x))
+ar.register_function("builtins", "block_diag", lambda x: _scipy_block_diag(*x))
+ar.register_function("numpy", "gather", lambda x, indices: x[np.array(indices)])
+ar.register_function("numpy", "unstack", list)
+
+ar.register_function("builtins", "unstack", list)
+
+
+def _scatter_numpy(indices, array, shape):
+    new_array = np.zeros(shape, dtype=array.dtype.type)
+    new_array[indices] = array
+    return new_array
+
+
+def _scatter_element_add_numpy(tensor, index, value):
+    """In-place addition of a multidimensional value over various
+    indices of a tensor."""
+    new_tensor = tensor.copy()
+    new_tensor[tuple(index)] += value
+    return new_tensor
+
+
+ar.register_function("numpy", "scatter", _scatter_numpy)
+ar.register_function("numpy", "scatter_element_add", _scatter_element_add_numpy)
+ar.register_function("numpy", "eigvalsh", np.linalg.eigvalsh)
+ar.register_function("numpy", "entr", lambda x: -np.sum(x * np.log(x)))
+
+
+def _cond(pred, true_fn, false_fn, args):
+    if pred:
+        return true_fn(*args)
+
+    return false_fn(*args)
+
+
+ar.register_function("numpy", "cond", _cond)
+ar.register_function("builtins", "cond", _cond)
+
+# -------------------------------- Autograd --------------------------------- #
+
+
+# When autoray inspects PennyLane NumPy tensors, they will be associated with
+# the 'pennylane' module, and not autograd. Set an alias so it understands this is
+# simply autograd.
+ar.autoray._BACKEND_ALIASES["pennylane"] = "autograd"
+
+# When dispatching to autograd, ensure that autoray will instead call
+# qml.numpy rather than autograd.numpy, to take into account our autograd modification.
+ar.autoray._MODULE_ALIASES["autograd"] = "pennylane.numpy"
+
+ar.register_function("autograd", "flatten", lambda x: x.flatten())
+ar.register_function("autograd", "coerce", lambda x: x)
+ar.register_function("autograd", "gather", lambda x, indices: x[np.array(indices)])
+ar.register_function("autograd", "unstack", list)
+
+
+def _block_diag_autograd(tensors):
+    """Autograd implementation of scipy.linalg.block_diag"""
+    _np = _i("qml").numpy
+    tensors = [t.reshape((1, len(t))) if len(t.shape) == 1 else t for t in tensors]
+    rsizes, csizes = _np.array([t.shape for t in tensors]).T
+    all_zeros = [[_np.zeros((rsize, csize)) for csize in csizes] for rsize in rsizes]
+
+    res = _np.hstack([tensors[0], *all_zeros[0][1:]])
+    for i, t in enumerate(tensors[1:], start=1):
+        row = _np.hstack([*all_zeros[i][:i], t, *all_zeros[i][i + 1 :]])
+        res = _np.vstack([res, row])
+
+    return res
+
+
+ar.register_function("autograd", "block_diag", _block_diag_autograd)
+
+
+def _unwrap_arraybox(arraybox, max_depth=None, _n=0):
+    if max_depth is not None and _n == max_depth:
+        return arraybox
+
+    val = getattr(arraybox, "_value", arraybox)
+
+    if hasattr(val, "_value"):
+        return _unwrap_arraybox(val, max_depth=max_depth, _n=_n + 1)
+
+    return val
+
+
+def _to_numpy_autograd(x, max_depth=None, _n=0):
+    if hasattr(x, "_value"):
+        # Catches the edge case where the data is an Autograd arraybox,
+        # which only occurs during backpropagation.
+        return _unwrap_arraybox(x, max_depth=max_depth, _n=_n)
+
+    return x.numpy()
+
+
+ar.register_function("autograd", "to_numpy", _to_numpy_autograd)
+
+
+def _scatter_element_add_autograd(tensor, index, value):
+    """In-place addition of a multidimensional value over various
+    indices of a tensor. Since Autograd doesn't support indexing
+    assignment, we have to be clever and use ravel_multi_index."""
+    pnp = _i("qml").numpy
+    size = tensor.size
+    flat_index = pnp.ravel_multi_index(index, tensor.shape)
+    if pnp.isscalar(flat_index):
+        flat_index = [flat_index]
+    if pnp.isscalar(value) or len(pnp.shape(value)) == 0:
+        value = [value]
+    t = [0] * size
+    for _id, val in zip(flat_index, value):
+        t[_id] = val
+    return tensor + pnp.array(t).reshape(tensor.shape)
+
+
+ar.register_function("autograd", "scatter_element_add", _scatter_element_add_autograd)
+
+
+def _take_autograd(tensor, indices, axis=None):
+    indices = _i("qml").numpy.asarray(indices)
+
+    if axis is None:
+        return tensor.flatten()[indices]
+
+    fancy_indices = [slice(None)] * axis + [indices]
+    return tensor[tuple(fancy_indices)]
+
+
+ar.register_function("autograd", "take", _take_autograd)
+ar.register_function("autograd", "eigvalsh", lambda x: _i("autograd").numpy.linalg.eigh(x)[0])
+ar.register_function(
+    "autograd", "entr", lambda x: -_i("autograd").numpy.sum(x * _i("autograd").numpy.log(x))
+)
+
+ar.register_function("autograd", "diagonal", lambda x, *args: _i("qml").numpy.diag(x))
+ar.register_function("autograd", "cond", _cond)
+
+
+# -------------------------------- TensorFlow --------------------------------- #
+
+
+ar.autoray._SUBMODULE_ALIASES["tensorflow", "angle"] = "tensorflow.math"
+ar.autoray._SUBMODULE_ALIASES["tensorflow", "arcsin"] = "tensorflow.math"
+ar.autoray._SUBMODULE_ALIASES["tensorflow", "arccos"] = "tensorflow.math"
+ar.autoray._SUBMODULE_ALIASES["tensorflow", "arctan"] = "tensorflow.math"
+ar.autoray._SUBMODULE_ALIASES["tensorflow", "arctan2"] = "tensorflow.math"
+ar.autoray._SUBMODULE_ALIASES["tensorflow", "diag"] = "tensorflow.linalg"
+ar.autoray._SUBMODULE_ALIASES["tensorflow", "kron"] = "tensorflow.experimental.numpy"
+ar.autoray._SUBMODULE_ALIASES["tensorflow", "moveaxis"] = "tensorflow.experimental.numpy"
+ar.autoray._SUBMODULE_ALIASES["tensorflow", "sinc"] = "tensorflow.experimental.numpy"
+ar.autoray._SUBMODULE_ALIASES["tensorflow", "isclose"] = "tensorflow.experimental.numpy"
+ar.autoray._SUBMODULE_ALIASES["tensorflow", "atleast_1d"] = "tensorflow.experimental.numpy"
+
+ar.autoray._FUNC_ALIASES["tensorflow", "arcsin"] = "asin"
+ar.autoray._FUNC_ALIASES["tensorflow", "arccos"] = "acos"
+ar.autoray._FUNC_ALIASES["tensorflow", "arctan"] = "atan"
+ar.autoray._FUNC_ALIASES["tensorflow", "arctan2"] = "atan2"
+ar.autoray._FUNC_ALIASES["tensorflow", "diag"] = "diag"
+
+ar.register_function(
+    "tensorflow", "asarray", lambda x, **kwargs: _i("tf").convert_to_tensor(x, **kwargs)
+)
+ar.register_function(
+    "tensorflow",
+    "hstack",
+    lambda *args, **kwargs: _i("tf").experimental.numpy.hstack(*args),
+)
+
+ar.register_function("tensorflow", "flatten", lambda x: _i("tf").reshape(x, [-1]))
+ar.register_function("tensorflow", "shape", lambda x: tuple(x.shape))
+ar.register_function(
+    "tensorflow",
+    "sqrt",
+    lambda x: _i("tf").math.sqrt(
+        _i("tf").cast(x, "float64") if x.dtype.name in ("int64", "int32") else x
+    ),
+)
+
+
+def _round_tf(tensor, decimals=0):
+    """Implement a TensorFlow version of np.round"""
+    tf = _i("tf")
+    tol = 10**decimals
+    return tf.round(tensor * tol) / tol
+
+
+ar.register_function("tensorflow", "round", _round_tf)
+
+
+def _ndim_tf(tensor):
+    try:
+        return _i("tf").experimental.numpy.ndim(tensor)
+    except AttributeError:
+        return len(tensor.shape)
+
+
+ar.register_function("tensorflow", "ndim", _ndim_tf)
+
+
+def _take_tf(tensor, indices, axis=None):
+    """Implement a TensorFlow version of np.take"""
+    tf = _i("tf")
+
+    if isinstance(indices, numbers.Number):
+        indices = [indices]
+
+    indices = tf.convert_to_tensor(indices)
+
+    if np.any(indices < 0):
+        # Unlike NumPy, TensorFlow doesn't support negative indices.
+        dim_length = tf.size(tensor).numpy() if axis is None else tf.shape(tensor)[axis]
+        indices = tf.where(indices >= 0, indices, indices + dim_length)
+
+    if axis is None:
+        # Unlike NumPy, if axis=None TensorFlow defaults to the first
+        # dimension rather than flattening the array.
+        data = tf.reshape(tensor, [-1])
+        return tf.gather(data, indices)
+
+    return tf.gather(tensor, indices, axis=axis)
+
+
+ar.register_function("tensorflow", "take", _take_tf)
+
+
+def _coerce_types_tf(tensors):
+    """Coerce the dtypes of a list of tensors so that they
+    all share the same dtype, without any reduction in information."""
+    tf = _i("tf")
+    tensors = [tf.convert_to_tensor(t) for t in tensors]
+    dtypes = {i.dtype for i in tensors}
+
+    if len(dtypes) == 1:
+        return tensors
+
+    complex_priority = [tf.complex64, tf.complex128]
+    float_priority = [tf.float16, tf.float32, tf.float64]
+    int_priority = [tf.int8, tf.int16, tf.int32, tf.int64]
+
+    complex_type = [i for i in complex_priority if i in dtypes]
+    float_type = [i for i in float_priority if i in dtypes]
+    int_type = [i for i in int_priority if i in dtypes]
+
+    cast_type = complex_type or float_type or int_type
+    cast_type = list(cast_type)[-1]
+
+    return [tf.cast(t, cast_type) for t in tensors]
+
+
+ar.register_function("tensorflow", "coerce", _coerce_types_tf)
+
+
+def _block_diag_tf(tensors):
+    """TensorFlow implementation of scipy.linalg.block_diag"""
+    tf = _i("tf")
+    int_dtype = None
+
+    if tensors[0].dtype in (tf.int32, tf.int64):
+        int_dtype = tensors[0].dtype
+        tensors = [tf.cast(t, tf.float32) for t in tensors]
+
+    linop_blocks = [tf.linalg.LinearOperatorFullMatrix(block) for block in tensors]
+    linop_block_diagonal = tf.linalg.LinearOperatorBlockDiag(linop_blocks)
+    res = linop_block_diagonal.to_dense()
+
+    if int_dtype is None:
+        return res
+
+    return tf.cast(res, int_dtype)
+
+
+ar.register_function("tensorflow", "block_diag", _block_diag_tf)
+
+
+def _scatter_tf(indices, array, new_dims):
+    import tensorflow as tf
+
+    indices = np.expand_dims(indices, 1)
+    return tf.scatter_nd(indices, array, new_dims)
+
+
+def _scatter_element_add_tf(tensor, index, value):
+    """In-place addition of a multidimensional value over various
+    indices of a tensor."""
+    import tensorflow as tf
+
+    if not isinstance(index[0], int):
+        index = tuple(zip(*index))
+    indices = tf.expand_dims(index, 0)
+    value = tf.cast(tf.expand_dims(value, 0), tensor.dtype)
+    return tf.tensor_scatter_nd_add(tensor, indices, value)
+
+
+ar.register_function("tensorflow", "scatter", _scatter_tf)
+ar.register_function("tensorflow", "scatter_element_add", _scatter_element_add_tf)
+
+
+def _transpose_tf(a, axes=None):
+    import tensorflow as tf
+
+    return tf.transpose(a, perm=axes)
+
+
+ar.register_function("tensorflow", "transpose", _transpose_tf)
+ar.register_function("tensorflow", "diagonal", lambda x, *args: _i("tf").linalg.diag_part(x))
+ar.register_function("tensorflow", "outer", lambda a, b: _i("tf").tensordot(a, b, axes=0))
+
+# for some reason Autoray modifies the default behaviour, so we change it back here
+ar.register_function("tensorflow", "where", lambda *args, **kwargs: _i("tf").where(*args, **kwargs))
+
+
+def _eigvalsh_tf(density_matrix):
+    evs = _i("tf").linalg.eigvalsh(density_matrix)
+    evs = _i("tf").math.real(evs)
+    return evs
+
+
+ar.register_function("tensorflow", "eigvalsh", _eigvalsh_tf)
+ar.register_function(
+    "tensorflow", "entr", lambda x: -_i("tf").math.reduce_sum(x * _i("tf").math.log(x))
+)
+
+
+def _kron_tf(a, b):
+    import tensorflow as tf
+
+    a_shape = a.shape
+    b_shape = b.shape
+
+    if len(a_shape) == 1:
+        a = a[:, tf.newaxis]
+        b = b[tf.newaxis, :]
+        return tf.reshape(a * b, (a_shape[0] * b_shape[0],))
+
+    a = a[:, tf.newaxis, :, tf.newaxis]
+    b = b[tf.newaxis, :, tf.newaxis, :]
+    return tf.reshape(a * b, (a_shape[0] * b_shape[0], a_shape[1] * b_shape[1]))
+
+
+ar.register_function("tensorflow", "kron", _kron_tf)
+
+
+def _cond_tf(pred, true_fn, false_fn, args):
+    import tensorflow as tf
+
+    return tf.cond(pred, lambda: true_fn(*args), lambda: false_fn(*args))
+
+
+ar.register_function("tensorflow", "cond", _cond_tf)
+
+
+ar.register_function(
+    "tensorflow",
+    "vander",
+    lambda *args, **kwargs: _i("tf").experimental.numpy.vander(*args, **kwargs),
+)
+
+
+# -------------------------------- Torch --------------------------------- #
+
+ar.autoray._FUNC_ALIASES["torch", "unstack"] = "unbind"
+
+
+def _to_numpy_torch(x):
+    if getattr(x, "is_conj", False) and x.is_conj():
+        x = x.resolve_conj()
+
+    return x.detach().cpu().numpy()
+
+
+ar.register_function("torch", "to_numpy", _to_numpy_torch)
+
+
+def _asarray_torch(x, dtype=None, **kwargs):
+    import torch
+
+    dtype_map = {
+        np.int8: torch.int8,
+        np.int16: torch.int16,
+        np.int32: torch.int32,
+        np.int64: torch.int64,
+        np.float16: torch.float16,
+        np.float32: torch.float32,
+        np.float64: torch.float64,
+        np.complex64: torch.complex64,
+        np.complex128: torch.complex128,
+    }
+
+    if dtype in dtype_map:
+        return torch.as_tensor(x, dtype=dtype_map[dtype], **kwargs)
+
+    return torch.as_tensor(x, dtype=dtype, **kwargs)
+
+
+ar.register_function("torch", "asarray", _asarray_torch)
+ar.register_function("torch", "diag", lambda x, k=0: _i("torch").diag(x, diagonal=k))
+ar.register_function("torch", "expand_dims", lambda x, axis: _i("torch").unsqueeze(x, dim=axis))
+ar.register_function("torch", "shape", lambda x: tuple(x.shape))
+ar.register_function("torch", "gather", lambda x, indices: x[indices])
+ar.register_function("torch", "equal", lambda x, y: _i("torch").eq(x, y))
+
+ar.register_function(
+    "torch",
+    "sqrt",
+    lambda x: _i("torch").sqrt(
+        x.to(_i("torch").float64) if x.dtype in (_i("torch").int64, _i("torch").int32) else x
+    ),
+)
+
+ar.autoray._SUBMODULE_ALIASES["torch", "arctan2"] = "torch"
+ar.autoray._FUNC_ALIASES["torch", "arctan2"] = "atan2"
+
+
+def _round_torch(tensor, decimals=0):
+    """Implement a Torch version of np.round"""
+    torch = _i("torch")
+    tol = 10**decimals
+    return torch.round(tensor * tol) / tol
+
+
+ar.register_function("torch", "round", _round_torch)
+
+
+def _take_torch(tensor, indices, axis=None):
+    """Torch implementation of np.take"""
+    torch = _i("torch")
+
+    if not isinstance(indices, torch.Tensor):
+        indices = torch.as_tensor(indices)
+
+    if axis is None:
+        return tensor.flatten()[indices]
+
+    if indices.ndim == 1:
+        if (indices < 0).any():
+            # index_select doesn't allow negative indices
+            dim_length = tensor.size()[0] if axis is None else tensor.shape[axis]
+            indices = torch.where(indices >= 0, indices, indices + dim_length)
+
+        return torch.index_select(tensor, dim=axis, index=indices)
+
+    fancy_indices = [slice(None)] * axis + [indices]
+    return tensor[fancy_indices]
+
+
+ar.register_function("torch", "take", _take_torch)
+
+
+def _coerce_types_torch(tensors):
+    """Coerce a list of tensors to all have the same dtype
+    without any loss of information."""
+    torch = _i("torch")
+
+    # Extract existing set devices, if any
+    device_set = set(t.device for t in tensors if isinstance(t, torch.Tensor))
+    if len(device_set) > 1:
+        device_names = ", ".join(str(d) for d in device_set)
+        raise RuntimeError(
+            f"Expected all tensors to be on the same device, but found at least two devices, {device_names}!"
+        )
+
+    device = device_set.pop() if len(device_set) == 1 else None
+    tensors = [torch.as_tensor(t, device=device) for t in tensors]
+
+    dtypes = {i.dtype for i in tensors}
+
+    if len(dtypes) == 1:
+        return tensors
+
+    complex_priority = [torch.complex64, torch.complex128]
+    float_priority = [torch.float16, torch.float32, torch.float64]
+    int_priority = [torch.int8, torch.int16, torch.int32, torch.int64]
+
+    complex_type = [i for i in complex_priority if i in dtypes]
+    float_type = [i for i in float_priority if i in dtypes]
+    int_type = [i for i in int_priority if i in dtypes]
+
+    cast_type = complex_type or float_type or int_type
+    cast_type = list(cast_type)[-1]
+
+    return [t.to(cast_type) for t in tensors]
+
+
+ar.register_function("torch", "coerce", _coerce_types_torch)
+
+
+def _block_diag_torch(tensors):
+    """Torch implementation of scipy.linalg.block_diag"""
+    torch = _i("torch")
+    sizes = np.array([t.shape for t in tensors])
+    shape = np.sum(sizes, axis=0).tolist()
+    res = torch.zeros(shape, dtype=tensors[0].dtype)
+
+    # get the diagonal indices at which new block
+    # diagonals need to be inserted
+    p = np.cumsum(sizes, axis=0)
+
+    # converted the diagonal indices to row and column indices
+    ridx, cidx = np.stack([p - sizes, p]).T
+
+    for t, r, c in zip(tensors, ridx, cidx):
+        row = np.arange(*r).reshape(-1, 1)
+        col = np.arange(*c).reshape(1, -1)
+        res[row, col] = t
+
+    return res
+
+
+ar.register_function("torch", "block_diag", _block_diag_torch)
+
+
+def _scatter_torch(indices, tensor, new_dimensions):
+    import torch
+
+    new_tensor = torch.zeros(new_dimensions, dtype=tensor.dtype, device=tensor.device)
+    new_tensor[indices] = tensor
+    return new_tensor
+
+
+def _scatter_element_add_torch(tensor, index, value):
+    """In-place addition of a multidimensional value over various
+    indices of a tensor. Note that Torch only supports index assignments
+    on non-leaf nodes; if the node is a leaf, we must clone it first."""
+    if tensor.is_leaf:
+        tensor = tensor.clone()
+    tensor[tuple(index)] += value
+    return tensor
+
+
+ar.register_function("torch", "scatter", _scatter_torch)
+ar.register_function("torch", "scatter_element_add", _scatter_element_add_torch)
+
+
+def _sort_torch(tensor):
+    """Update handling of sort to return only values not indices."""
+    sorted_tensor = _i("torch").sort(tensor)
+    return sorted_tensor.values
+
+
+ar.register_function("torch", "sort", _sort_torch)
+
+
+def _tensordot_torch(tensor1, tensor2, axes):
+    torch = _i("torch")
+    if not semantic_version.match(">=1.10.0", torch.__version__) and axes == 0:
+        return torch.outer(tensor1, tensor2)
+    return torch.tensordot(tensor1, tensor2, axes)
+
+
+ar.register_function("torch", "tensordot", _tensordot_torch)
+
+
+def _ndim_torch(tensor):
+    return tensor.dim()
+
+
+ar.register_function("torch", "ndim", _ndim_torch)
+
+ar.register_function("torch", "eigvalsh", lambda x: _i("torch").linalg.eigvalsh(x))
+ar.register_function("torch", "entr", lambda x: _i("torch").sum(_i("torch").special.entr(x)))
+
+
+def _sum_torch(tensor, axis=None, keepdims=False, dtype=None):
+    import torch
+
+    if axis is None:
+        return torch.sum(tensor, dtype=dtype)
+
+    if not isinstance(axis, int) and len(axis) == 0:
+        return tensor
+
+    return torch.sum(tensor, dim=axis, keepdim=keepdims, dtype=dtype)
+
+
+ar.register_function("torch", "sum", _sum_torch)
+ar.register_function("torch", "cond", _cond)
+
+
+# -------------------------------- JAX --------------------------------- #
+
+
+def _to_numpy_jax(x):
+    from jax.errors import TracerArrayConversionError
+
+    try:
+        return np.array(getattr(x, "val", x))
+    except TracerArrayConversionError as e:
+        raise ValueError(
+            "Converting a JAX array to a NumPy array not supported when using the JAX JIT."
+        ) from e
+
+
+ar.register_function("jax", "flatten", lambda x: x.flatten())
+ar.register_function(
+    "jax",
+    "take",
+    lambda x, indices, axis=None: _i("jax").numpy.take(
+        x, np.array(indices), axis=axis, mode="wrap"
+    ),
+)
+ar.register_function("jax", "coerce", lambda x: x)
+ar.register_function("jax", "to_numpy", _to_numpy_jax)
+ar.register_function("jax", "block_diag", lambda x: _i("jax").scipy.linalg.block_diag(*x))
+ar.register_function("jax", "gather", lambda x, indices: x[np.array(indices)])
+
+
+def _scatter_jax(indices, array, new_dimensions):
+    from jax import numpy as jnp
+
+    new_array = jnp.zeros(new_dimensions, dtype=array.dtype.type)
+    new_array = new_array.at[indices].set(array)
+    return new_array
+
+
+ar.register_function("jax", "scatter", _scatter_jax)
+ar.register_function(
+    "jax",
+    "scatter_element_add",
+    lambda x, index, value: x.at[tuple(index)].add(value),
+)
+ar.register_function("jax", "unstack", list)
+# pylint: disable=unnecessary-lambda
+ar.register_function("jax", "eigvalsh", lambda x: _i("jax").numpy.linalg.eigvalsh(x))
+ar.register_function("jax", "entr", lambda x: _i("jax").numpy.sum(_i("jax").scipy.special.entr(x)))
+
+ar.register_function(
+    "jax",
+    "cond",
+    lambda pred, true_fn, false_fn, args: _i("jax").lax.cond(pred, true_fn, false_fn, *args),
+)
diff --git a/openqaoa/optimizers/pennylane/math/utils.py b/openqaoa/optimizers/pennylane/math/utils.py
new file mode 100644
index 000000000..391af13fe
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/math/utils.py
@@ -0,0 +1,451 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utility functions"""
+# pylint: disable=import-outside-toplevel
+from autograd.numpy.numpy_boxes import ArrayBox
+import autoray as ar
+from autoray import numpy as np
+import numpy as _np
+
+from . import single_dispatch  # pylint:disable=unused-import
+
+
+def allequal(tensor1, tensor2, **kwargs):
+    """Returns True if two tensors are element-wise equal along a given axis.
+
+    This function is equivalent to calling ``np.all(tensor1 == tensor2, **kwargs)``,
+    but allows for ``tensor1`` and ``tensor2`` to differ in type.
+
+    Args:
+        tensor1 (tensor_like): tensor to compare
+        tensor2 (tensor_like): tensor to compare
+        **kwargs: Accepts any keyword argument that is accepted by ``np.all``,
+            such as ``axis``, ``out``, and ``keepdims``. See the `NumPy documentation
+            <https://numpy.org/doc/stable/reference/generated/numpy.all.html>`__ for
+            more details.
+
+    Returns:
+        ndarray, bool: If ``axis=None``, a logical AND reduction is applied to all elements
+        and a boolean will be returned, indicating if all elements evaluate to ``True``. Otherwise,
+        a boolean NumPy array will be returned.
+
+    **Example**
+
+    >>> a = torch.tensor([1, 2])
+    >>> b = np.array([1, 2])
+    >>> allequal(a, b)
+    True
+    """
+    t1 = ar.to_numpy(tensor1)
+    t2 = ar.to_numpy(tensor2)
+    return np.all(t1 == t2, **kwargs)
+
+
+def allclose(a, b, rtol=1e-05, atol=1e-08, **kwargs):
+    """Wrapper around np.allclose, allowing tensors ``a`` and ``b``
+    to differ in type"""
+    try:
+        # Some frameworks may provide their own allclose implementation.
+        # Try and use it if available.
+        res = np.allclose(a, b, rtol=rtol, atol=atol, **kwargs)
+    except (TypeError, AttributeError, ImportError, RuntimeError):
+        # Otherwise, convert the input to NumPy arrays.
+        #
+        # TODO: replace this with a bespoke, framework agnostic
+        # low-level implementation to avoid the NumPy conversion:
+        #
+        #    np.abs(a - b) <= atol + rtol * np.abs(b)
+        #
+        t1 = ar.to_numpy(a)
+        t2 = ar.to_numpy(b)
+        res = np.allclose(t1, t2, rtol=rtol, atol=atol, **kwargs)
+
+    return res
+
+
+allclose.__doc__ = _np.allclose.__doc__
+
+
+def cast(tensor, dtype):
+    """Casts the given tensor to a new type.
+
+    Args:
+        tensor (tensor_like): tensor to cast
+        dtype (str, np.dtype): Any supported NumPy dtype representation; this can be
+            a string (``"float64"``), a ``np.dtype`` object (``np.dtype("float64")``), or
+            a dtype class (``np.float64``). If ``tensor`` is not a NumPy array, the
+            **equivalent** dtype in the dispatched framework is used.
+
+    Returns:
+        tensor_like: a tensor with the same shape and values as ``tensor`` and the
+        same dtype as ``dtype``
+
+    **Example**
+
+    We can use NumPy dtype specifiers:
+
+    >>> x = torch.tensor([1, 2])
+    >>> cast(x, np.float64)
+    tensor([1., 2.], dtype=torch.float64)
+
+    We can also use strings:
+
+    >>> x = tf.Variable([1, 2])
+    >>> cast(x, "complex128")
+    <tf.Tensor: shape=(2,), dtype=complex128, numpy=array([1.+0.j, 2.+0.j])>
+    """
+    if isinstance(tensor, (list, tuple)):
+        tensor = np.asarray(tensor)
+
+    if not isinstance(dtype, str):
+        try:
+            dtype = np.dtype(dtype).name
+        except (AttributeError, TypeError, ImportError):
+            dtype = getattr(dtype, "name", dtype)
+
+    return ar.astype(tensor, ar.to_backend_dtype(dtype, like=ar.infer_backend(tensor)))
+
+
+def cast_like(tensor1, tensor2):
+    """Casts a tensor to the same dtype as another.
+
+    Args:
+        tensor1 (tensor_like): tensor to cast
+        tensor2 (tensor_like): tensor with corresponding dtype to cast to
+
+    Returns:
+        tensor_like: a tensor with the same shape and values as ``tensor1`` and the
+        same dtype as ``tensor2``
+
+    **Example**
+
+    >>> x = torch.tensor([1, 2])
+    >>> y = torch.tensor([3., 4.])
+    >>> cast_like(x, y)
+    tensor([1., 2.])
+    """
+    if not is_abstract(tensor2):
+        dtype = ar.to_numpy(tensor2).dtype.type
+    else:
+        dtype = tensor2.dtype
+    return cast(tensor1, dtype)
+
+
+def convert_like(tensor1, tensor2):
+    """Convert a tensor to the same type as another.
+
+    Args:
+        tensor1 (tensor_like): tensor to convert
+        tensor2 (tensor_like): tensor with corresponding type to convert to
+
+    Returns:
+        tensor_like: a tensor with the same shape, values, and dtype as ``tensor1`` and the
+        same type as ``tensor2``.
+
+    **Example**
+
+    >>> x = np.array([1, 2])
+    >>> y = tf.Variable([3, 4])
+    >>> convert_like(x, y)
+    <tf.Tensor: shape=(2,), dtype=int64, numpy=array([1, 2])>
+    """
+    interface = get_interface(tensor2)
+
+    if interface == "torch":
+        dev = tensor2.device
+        return np.asarray(tensor1, device=dev, like=interface)
+
+    return np.asarray(tensor1, like=interface)
+
+
+def get_interface(tensor):
+    """Returns the name of the package that any array/tensor manipulations
+    will dispatch to. The returned strings correspond to those used for PennyLane
+    :doc:`interfaces </introduction/interfaces>`.
+
+    Args:
+        tensor (tensor_like): tensor input
+
+    Returns:
+        str: name of the interface
+
+    **Example**
+
+    >>> x = torch.tensor([1., 2.])
+    >>> get_interface(x)
+    'torch'
+    >>> from pennylane import numpy as np
+    >>> x = np.array([4, 5], requires_grad=True)
+    >>> get_interface(x)
+    'autograd'
+    """
+    namespace = tensor.__class__.__module__.split(".")[0]
+
+    if namespace in ("pennylane", "autograd"):
+        return "autograd"
+
+    res = ar.infer_backend(tensor)
+
+    if res == "builtins":
+        return "numpy"
+
+    return res
+
+
+def is_abstract(tensor, like=None):
+    """Returns True if the tensor is considered abstract.
+
+    Abstract arrays have no internal value, and are used primarily when
+    tracing Python functions, for example, in order to perform just-in-time
+    (JIT) compilation.
+
+    Abstract tensors most commonly occur within a function that has been
+    decorated using ``@tf.function`` or ``@jax.jit``.
+
+    .. note::
+
+        Currently Autograd tensors and Torch tensors will always return ``False``.
+        This is because:
+
+        - Autograd does not provide JIT compilation, and
+
+        - ``@torch.jit.script`` is not currently compatible with QNodes.
+
+    Args:
+        tensor (tensor_like): input tensor
+        like (str): The name of the interface. Will be determined automatically
+            if not provided.
+
+    Returns:
+        bool: whether the tensor is abstract or not
+
+    **Example**
+
+    Consider the following JAX function:
+
+    .. code-block:: python
+
+        import jax
+        from jax import numpy as jnp
+
+        def function(x):
+            print("Value:", x)
+            print("Abstract:", qml.math.is_abstract(x))
+            return jnp.sum(x ** 2)
+
+    When we execute it, we see that the tensor is not abstract; it has known value:
+
+    >>> x = jnp.array([0.5, 0.1])
+    >>> function(x)
+    Value: [0.5, 0.1]
+    Abstract: False
+    DeviceArray(0.26, dtype=float32)
+
+    However, if we use the ``@jax.jit`` decorator, the tensor will now be abstract:
+
+    >>> x = jnp.array([0.5, 0.1])
+    >>> jax.jit(function)(x)
+    Value: Traced<ShapedArray(float32[2])>with<DynamicJaxprTrace(level=0/1)>
+    Abstract: True
+    DeviceArray(0.26, dtype=float32)
+
+    Note that JAX uses an abstract *shaped* array, so although we won't be able to
+    include conditionals within our function that depend on the value of the tensor,
+    we *can* include conditionals that depend on the shape of the tensor.
+
+    Similarly, consider the following TensorFlow function:
+
+    .. code-block:: python
+
+        import tensorflow as tf
+
+        def function(x):
+            print("Value:", x)
+            print("Abstract:", qml.math.is_abstract(x))
+            return tf.reduce_sum(x ** 2)
+
+    >>> x = tf.Variable([0.5, 0.1])
+    >>> function(x)
+    Value: <tf.Variable 'Variable:0' shape=(2,) dtype=float32, numpy=array([0.5, 0.1], dtype=float32)>
+    Abstract: False
+    <tf.Tensor: shape=(), dtype=float32, numpy=0.26>
+
+    If we apply the ``@tf.function`` decorator, the tensor will now be abstract:
+
+    >>> tf.function(function)(x)
+    Value: <tf.Variable 'Variable:0' shape=(2,) dtype=float32>
+    Abstract: True
+    <tf.Tensor: shape=(), dtype=float32, numpy=0.26>
+    """
+    interface = like or get_interface(tensor)
+
+    if interface == "jax":
+        import jax
+        from jax.interpreters.partial_eval import DynamicJaxprTracer
+
+        if isinstance(tensor, (jax.ad.JVPTracer, jax.interpreters.batching.BatchTracer)):
+            # Tracer objects will be used when computing gradients or applying transforms.
+            # If the value of the tracer is known, it will contain a ConcreteArray.
+            # Otherwise, it will be abstract.
+            return not isinstance(tensor.aval, jax.core.ConcreteArray)
+
+        return isinstance(tensor, DynamicJaxprTracer)
+
+    if interface == "tensorflow":
+        import tensorflow as tf
+        from tensorflow.python.framework.ops import EagerTensor
+
+        return not isinstance(tf.convert_to_tensor(tensor), EagerTensor)
+
+    # Autograd does not have a JIT
+
+    # QNodes do not currently support TorchScript:
+    #   NotSupportedError: Compiled functions can't take variable number of arguments or
+    #   use keyword-only arguments with defaults.
+    return False
+
+
+def requires_grad(tensor, interface=None):
+    """Returns True if the tensor is considered trainable.
+
+    .. warning::
+
+        The implementation depends on the contained tensor type, and
+        may be context dependent.
+
+        For example, Torch tensors and PennyLane tensors track trainability
+        as a property of the tensor itself. TensorFlow, on the other hand,
+        only tracks trainability if being watched by a gradient tape.
+
+    Args:
+        tensor (tensor_like): input tensor
+        interface (str): The name of the interface. Will be determined automatically
+            if not provided.
+
+    **Example**
+
+    Calling this function on a PennyLane NumPy array:
+
+    >>> x = np.array([1., 5.], requires_grad=True)
+    >>> requires_grad(x)
+    True
+    >>> x.requires_grad = False
+    >>> requires_grad(x)
+    False
+
+    PyTorch has similar behaviour.
+
+    With TensorFlow, the output is dependent on whether the tensor
+    is currently being watched by a gradient tape:
+
+    >>> x = tf.Variable([0.6, 0.1])
+    >>> requires_grad(x)
+    False
+    >>> with tf.GradientTape() as tape:
+    ...     print(requires_grad(x))
+    True
+
+    While TensorFlow constants are by default not trainable, they can be
+    manually watched by the gradient tape:
+
+    >>> x = tf.constant([0.6, 0.1])
+    >>> with tf.GradientTape() as tape:
+    ...     print(requires_grad(x))
+    False
+    >>> with tf.GradientTape() as tape:
+    ...     tape.watch([x])
+    ...     print(requires_grad(x))
+    True
+    """
+    interface = interface or get_interface(tensor)
+
+    if interface == "tensorflow":
+        import tensorflow as tf
+
+        try:
+            from tensorflow.python.eager.tape import should_record_backprop
+        except ImportError:  # pragma: no cover
+            from tensorflow.python.eager.tape import (
+                should_record as should_record_backprop,
+            )
+
+        return should_record_backprop([tf.convert_to_tensor(tensor)])
+
+    if interface == "autograd":
+        if isinstance(tensor, ArrayBox):
+            return True
+
+        return getattr(tensor, "requires_grad", False)
+
+    if interface == "torch":
+        return getattr(tensor, "requires_grad", False)
+
+    if interface == "numpy":
+        return False
+
+    if interface == "jax":
+        import jax
+
+        return isinstance(tensor, jax.core.Tracer)
+
+    raise ValueError(f"Argument {tensor} is an unknown object")
+
+
+def in_backprop(tensor, interface=None):
+    """Returns True if the tensor is considered to be in a backpropagation environment, it works for Autograd,
+    Tensorflow and Jax. It is not only checking the differentiability of the tensor like :func:`~.requires_grad`, but
+    rather checking if the gradient is actually calculated.
+
+    Args:
+        tensor (tensor_like): input tensor
+        interface (str): The name of the interface. Will be determined automatically
+            if not provided.
+
+    **Example**
+
+    >>> x = tf.Variable([0.6, 0.1])
+    >>> requires_grad(x)
+    False
+    >>> with tf.GradientTape() as tape:
+    ...     print(requires_grad(x))
+    True
+
+    .. seealso:: :func:`~.requires_grad`
+    """
+    interface = interface or get_interface(tensor)
+
+    if interface == "tensorflow":
+        import tensorflow as tf
+
+        try:
+            from tensorflow.python.eager.tape import should_record_backprop
+        except ImportError:  # pragma: no cover
+            from tensorflow.python.eager.tape import (
+                should_record as should_record_backprop,
+            )
+
+        return should_record_backprop([tf.convert_to_tensor(tensor)])
+
+    if interface == "autograd":
+        return isinstance(tensor, ArrayBox)
+
+    if interface == "jax":
+        import jax
+
+        return isinstance(tensor, jax.core.Tracer)
+
+    if interface == "numpy":
+        return False
+
+    raise ValueError(f"Cannot determine if {tensor} is in backpropagation.")
diff --git a/openqaoa/optimizers/pennylane/numpy/__init__.py b/openqaoa/optimizers/pennylane/numpy/__init__.py
new file mode 100644
index 000000000..a64c81196
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/numpy/__init__.py
@@ -0,0 +1,103 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Overview
+--------
+
+The PennyLane NumPy subpackage provides a differentiable wrapper around NumPy, that enables
+backpropagation through standard NumPy code.
+
+This version of NumPy **must** be used when using PennyLane with the :doc:`Autograd interface
+</introduction/interfaces/numpy>`:
+
+>>> from pennylane import numpy as np
+
+.. note::
+
+    If using other interfaces, such as :doc:`TensorFlow </introduction/interfaces/tf>` :doc:`PyTorch
+    </introduction/interfaces/torch>`, or :doc:`JAX </introduction/interfaces/jax>`, then the
+    PennyLane-provided NumPy should not be used; instead, simply use the standard NumPy import.
+
+This package is a wrapper around ``autograd.numpy``; for details on all available functions,
+please refer to the `Autograd
+docs <https://github.com/HIPS/autograd/blob/master/docs/tutorial.md>`__.
+
+PennyLane additionally extends Autograd with the following classes,
+errors, and functions:
+
+.. autosummary::
+    :toctree: api
+    :nosignatures:
+    :template: autosummary/class_no_inherited.rst
+
+    ~wrap_arrays
+    ~extract_tensors
+    ~tensor_wrapper
+    ~tensor
+    ~NonDifferentiableError
+
+Caveats
+-------
+
+This package is a wrapper around ``autograd.numpy``, and therefore comes with several caveats
+inherited from Autograd:
+
+**Do not use:**
+
+- Assignment to arrays, such as ``A[0, 0] = x``.
+
+..
+
+- Implicit casting of lists to arrays, for example ``A = np.sum([x, y])``.
+  Make sure to explicitly cast to a NumPy array first, i.e.,
+  ``A = np.sum(np.array([x, y]))`` instead.
+
+..
+
+- ``A.dot(B)`` notation. Use ``np.dot(A, B)`` or ``A @ B`` instead.
+
+..
+
+- In-place operations such as ``a += b``. Use ``a = a + b`` instead.
+
+..
+
+- Some ``isinstance`` checks, like ``isinstance(x, np.ndarray)`` or ``isinstance(x, tuple)``,
+  without first doing ``from autograd.builtins import isinstance, tuple``.
+
+For more details, please consult the `Autograd
+docs <https://github.com/HIPS/autograd/blob/master/docs/tutorial.md>`__.
+
+"""
+# pylint: disable=wrong-import-position,wildcard-import,undefined-variable
+from autograd import numpy as _np
+from autograd.numpy import *
+
+from .wrapper import wrap_arrays, extract_tensors, tensor_wrapper
+
+wrap_arrays(_np.__dict__, globals())
+
+# Delete the unwrapped fft, linalg, random modules
+# so that we can re-import our wrapped versions.
+del fft
+del linalg
+del random
+
+from . import fft
+from . import linalg
+from . import random
+
+from .tensor import tensor, NonDifferentiableError, asarray as _asarray
+
+asarray = tensor_wrapper(_asarray)
diff --git a/openqaoa/optimizers/pennylane/numpy/fft.py b/openqaoa/optimizers/pennylane/numpy/fft.py
new file mode 100644
index 000000000..590d7061a
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/numpy/fft.py
@@ -0,0 +1,21 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This package provides a wrapped version of autograd.numpy.fft, such that
+it works with the PennyLane :class:`~.tensor` class.
+"""
+from autograd.numpy import fft as _fft
+from .wrapper import wrap_arrays
+
+wrap_arrays(_fft.__dict__, globals())
diff --git a/openqaoa/optimizers/pennylane/numpy/linalg.py b/openqaoa/optimizers/pennylane/numpy/linalg.py
new file mode 100644
index 000000000..7d0c5d613
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/numpy/linalg.py
@@ -0,0 +1,21 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This package provides a wrapped version of autograd.numpy.linalg, such that
+it works with the PennyLane :class:`~.tensor` class.
+"""
+from autograd.numpy import linalg as _linalg
+from .wrapper import wrap_arrays
+
+wrap_arrays(_linalg.__dict__, globals())
diff --git a/openqaoa/optimizers/pennylane/numpy/random.py b/openqaoa/optimizers/pennylane/numpy/random.py
new file mode 100644
index 000000000..68a884149
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/numpy/random.py
@@ -0,0 +1,59 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This package provides a wrapped version of autograd.numpy.random, such that
+it works with the PennyLane :class:`~.tensor` class.
+"""
+import semantic_version
+
+from autograd.numpy import random as _random
+from numpy import __version__ as np_version
+from numpy.random import MT19937, PCG64, Philox, SFC64  # pylint: disable=unused-import
+
+from .wrapper import wrap_arrays, tensor_wrapper
+
+wrap_arrays(_random.__dict__, globals())
+
+
+np_version_spec = semantic_version.SimpleSpec(">=0.17.0")
+if np_version_spec.match(semantic_version.Version(np_version)):
+    # pylint: disable=too-few-public-methods
+    # pylint: disable=missing-class-docstring
+    class Generator(_random.Generator):
+        def __init__(self, *args, **kwargs):
+            super().__init__(*args, **kwargs)
+
+            self.__doc__ = "PennyLane wrapped NumPy Generator object\n" + super().__doc__
+
+            for name in dir(_random.Generator):
+                if name[0] != "_":
+                    self.__dict__[name] = tensor_wrapper(getattr(super(), name))
+
+    # pylint: disable=missing-function-docstring
+    def default_rng(seed=None):
+        # Mostly copied from NumPy, but uses our Generator instead
+
+        if hasattr(seed, "capsule"):  # I changed this line
+            # We were passed a BitGenerator, so just wrap it up.
+            return Generator(seed)
+        if isinstance(seed, Generator):
+            # Pass through a Generator.
+            return seed
+        # Otherwise we need to instantiate a new BitGenerator and Generator as
+        # normal.
+        return Generator(PCG64(seed))
+
+    default_rng.__doc__ = (
+        "PennyLane duplicated generator constructor\n" + _random.default_rng.__doc__
+    )
diff --git a/openqaoa/optimizers/pennylane/numpy/tensor.py b/openqaoa/optimizers/pennylane/numpy/tensor.py
new file mode 100644
index 000000000..8acaa9397
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/numpy/tensor.py
@@ -0,0 +1,313 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This module provides the PennyLane :class:`~.tensor` class.
+"""
+import numpy as onp
+
+from autograd import numpy as _np
+from autograd.extend import primitive, defvjp
+
+from autograd.tracer import Box
+from autograd.numpy.numpy_boxes import ArrayBox
+from autograd.numpy.numpy_vspaces import ComplexArrayVSpace, ArrayVSpace
+from autograd.core import VSpace
+
+
+__doc__ = "NumPy with automatic differentiation support, provided by Autograd and PennyLane."
+
+# Hotfix since _np.asarray doesn't have a gradient rule defined.
+@primitive
+def asarray(vals, *args, **kwargs):
+    """Gradient supporting autograd asarray"""
+    if isinstance(vals, (onp.ndarray, _np.ndarray)):
+        return _np.asarray(vals, *args, **kwargs)
+    return _np.array(vals, *args, **kwargs)
+
+
+def asarray_gradmaker(ans, *args, **kwargs):
+    """Gradient maker for asarray"""
+    del ans, args, kwargs
+    return lambda g: g
+
+
+defvjp(asarray, asarray_gradmaker, argnums=(0,))
+
+
+class tensor(_np.ndarray):
+    """Constructs a PennyLane tensor for use with Autograd QNodes.
+
+    The ``tensor`` class is a subclass of ``numpy.ndarray``,
+    providing the same multidimensional, homogeneous data-structure
+    of fixed-size items, with an additional flag to indicate to PennyLane
+    whether the contained data is differentiable or not.
+
+    .. warning::
+
+        PennyLane ``tensor`` objects are only used as part of the Autograd QNode
+        interface. If using another machine learning library such as PyTorch or
+        TensorFlow, use their built-in ``tf.Variable`` and ``torch.tensor`` classes
+        instead.
+
+    .. warning::
+
+        Tensors should be constructed using standard array construction functions
+        provided as part of PennyLane's NumPy implementation, including
+        ``np.array``, ``np.zeros`` or ``np.empty``.
+
+        The parameters given here refer to a low-level class
+        for instantiating tensors.
+
+
+    Args:
+        input_array (array_like): Any data structure in any form that can be converted to
+            an array. This includes lists, lists of tuples, tuples, tuples of tuples,
+            tuples of lists and ndarrays.
+        requires_grad (bool): whether the tensor supports differentiation
+
+    **Example**
+
+    The trainability of a tensor can be set on construction via the
+    ``requires_grad`` keyword argument,
+
+    >>> from pennylane import numpy as np
+    >>> x = np.array([0, 1, 2], requires_grad=True)
+    >>> x
+    tensor([0, 1, 2], requires_grad=True)
+
+    or in-place by modifying the ``requires_grad`` attribute:
+
+    >>> x.requires_grad = False
+    tensor([0, 1, 2], requires_grad=False)
+
+    Since tensors are subclasses of ``np.ndarray``, they can be provided as arguments
+    to any PennyLane-wrapped NumPy function:
+
+    >>> np.sin(x)
+    tensor([0.        , 0.84147098, 0.90929743], requires_grad=True)
+
+    When composing functions of multiple tensors, if at least one input tensor is differentiable,
+    then the output will also be differentiable:
+
+    >>> x = np.array([0, 1, 2], requires_grad=False)
+    >>> y = np.zeros([3], requires_grad=True)
+    >>> np.vstack([x, y])
+    tensor([[0., 1., 2.],
+        [0., 0., 0.]], requires_grad=True)
+    """
+
+    def __new__(cls, input_array, *args, requires_grad=True, **kwargs):
+        obj = asarray(input_array, *args, **kwargs)
+
+        if isinstance(obj, onp.ndarray):
+            obj = obj.view(cls)
+            obj.requires_grad = requires_grad
+
+        return obj
+
+    def __array_finalize__(self, obj):
+        # pylint: disable=attribute-defined-outside-init
+        if obj is None:  # pragma: no cover
+            return
+
+        self.requires_grad = getattr(obj, "requires_grad", None)
+
+    def __repr__(self):
+        string = super().__repr__()
+        return string[:-1] + f", requires_grad={self.requires_grad})"
+
+    def __array_wrap__(self, obj):
+        out_arr = tensor(obj, requires_grad=self.requires_grad)
+        return super().__array_wrap__(out_arr)
+
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        # pylint: disable=no-member,attribute-defined-outside-init
+
+        # unwrap any outputs the ufunc might have
+        outputs = [i.view(onp.ndarray) for i in kwargs.get("out", ())]
+
+        if outputs:
+            # Insert the unwrapped outputs into the keyword
+            # args dictionary, to be passed to ndarray.__array_ufunc__
+            outputs = tuple(outputs)
+            kwargs["out"] = outputs
+        else:
+            # If the ufunc has no ouputs, we simply
+            # create a tuple containing None for all potential outputs.
+            outputs = (None,) * ufunc.nout
+
+        # unwrap the input arguments to the ufunc
+        args = [i.unwrap() if hasattr(i, "unwrap") else i for i in inputs]
+
+        # call the ndarray.__array_ufunc__ method to compute the result
+        # of the vectorized ufunc
+        res = super().__array_ufunc__(ufunc, method, *args, **kwargs)
+
+        if ufunc.nout == 1:
+            res = (res,)
+
+        # construct a list of ufunc outputs to return
+        ufunc_output = [
+            (onp.asarray(result) if output is None else output)
+            for result, output in zip(res, outputs)
+        ]
+
+        # if any of the inputs were trainable, the output is also trainable
+        requires_grad = any(
+            isinstance(x, onp.ndarray) and getattr(x, "requires_grad", True) for x in inputs
+        )
+
+        # Iterate through the ufunc outputs and convert each to a PennyLane tensor.
+        # We also correctly set the requires_grad attribute.
+        for i in range(len(ufunc_output)):  # pylint: disable=consider-using-enumerate
+            ufunc_output[i] = tensor(ufunc_output[i], requires_grad=requires_grad)
+
+        if len(ufunc_output) == 1:
+            # the ufunc has a single output so return a single tensor
+            return ufunc_output[0]
+
+        # otherwise we must return a tuple of tensors
+        return tuple(ufunc_output)
+
+    def __getitem__(self, *args, **kwargs):
+        item = super().__getitem__(*args, **kwargs)
+
+        if not isinstance(item, tensor):
+            item = tensor(item, requires_grad=self.requires_grad)
+
+        return item
+
+    def __hash__(self):
+        if self.ndim == 0:
+            # Allowing hashing if the tensor is a scalar.
+            # We hash both the scalar value *and* the differentiability information,
+            # to match the behaviour of PyTorch.
+            return hash((self.item(), self.requires_grad))
+
+        raise TypeError("unhashable type: 'numpy.tensor'")
+
+    def __reduce__(self):
+        # Called when pickling the object.
+        # Numpy ndarray uses __reduce__ instead of __getstate__ to prepare an object for
+        # pickling. self.requires_grad needs to be included in the tuple returned by
+        # __reduce__ in order to be preserved in the unpickled object.
+        reduced_obj = super().__reduce__()
+        # The last (2nd) element of this tuple holds the data. Add requires_grad to this:
+        full_reduced_data = reduced_obj[2] + (self.requires_grad,)
+        return (reduced_obj[0], reduced_obj[1], full_reduced_data)
+
+    def __setstate__(self, reduced_obj) -> None:
+        # Called when unpickling the object.
+        # Set self.requires_grad with the last element in the tuple returned by __reduce__:
+        # pylint: disable=attribute-defined-outside-init,no-member
+        self.requires_grad = reduced_obj[-1]
+        # And call parent's __setstate__ without this element:
+        super().__setstate__(reduced_obj[:-1])
+
+    def unwrap(self):
+        """Converts the tensor to a standard, non-differentiable NumPy ndarray or Python scalar if
+        the tensor is 0-dimensional.
+
+        All information regarding differentiability of the tensor will be lost.
+
+        .. warning::
+
+            The returned array is a new view onto the **same data**. That is,
+            the tensor and the returned ``ndarray`` share the same underlying storage.
+            Changes to the tensor object will be reflected within the returned array,
+            and vice versa.
+
+        **Example**
+
+        >>> from pennylane import numpy as np
+        >>> x = np.array([1, 2], requires_grad=True)
+        >>> x
+        tensor([1, 2], requires_grad=True)
+        >>> x.unwrap()
+        array([1, 2])
+
+        Zero dimensional array are converted to Python scalars:
+
+        >>> x = np.array(1.543, requires_grad=False)
+        >>> x.unwrap()
+        1.543
+        >>> type(x.unwrap())
+        float
+
+        The underlying data is **not** copied:
+
+        >>> x = np.array([1, 2], requires_grad=True)
+        >>> y = x.unwrap()
+        >>> x[0] = 5
+        >>> y
+        array([5, 2])
+        >>> y[1] = 7
+        >>> x
+        tensor([5, 7], requires_grad=True)
+
+
+        To create a copy, the ``copy()`` method can be used:
+
+        >>> x = np.array([1, 2], requires_grad=True)
+        >>> y = x.unwrap().copy()
+        >>> x[0] = 5
+        >>> y
+        array([1, 2])
+        """
+        if self.ndim == 0:
+            return self.view(onp.ndarray).item()
+
+        return self.view(onp.ndarray)
+
+    def numpy(self):
+        """Converts the tensor to a standard, non-differentiable NumPy ndarray or Python scalar if
+        the tensor is 0-dimensional.
+
+        This method is an alias for :meth:`~.unwrap`. See :meth:`~.unwrap` for more details.
+        """
+        return self.unwrap()
+
+
+class NonDifferentiableError(Exception):
+    """Exception raised if attempting to differentiate non-trainable
+    :class:`~.tensor` using Autograd."""
+
+
+def tensor_to_arraybox(x, *args):
+    """Convert a :class:`~.tensor` to an Autograd ``ArrayBox``.
+
+    Args:
+        x (array_like): Any data structure in any form that can be converted to
+            an array. This includes lists, lists of tuples, tuples, tuples of tuples,
+            tuples of lists and ndarrays.
+
+    Returns:
+        autograd.numpy.numpy_boxes.ArrayBox: Autograd ArrayBox instance of the array
+
+    Raises:
+        NonDifferentiableError: if the provided tensor is non-differentiable
+    """
+    if isinstance(x, tensor):
+        if x.requires_grad:
+            return ArrayBox(x, *args)
+
+        raise NonDifferentiableError(
+            f"{x} is non-differentiable. Set the requires_grad attribute to True."
+        )
+
+    return ArrayBox(x, *args)
+
+
+Box.type_mappings[tensor] = tensor_to_arraybox
+VSpace.mappings[tensor] = lambda x: ComplexArrayVSpace(x) if onp.iscomplexobj(x) else ArrayVSpace(x)
diff --git a/openqaoa/optimizers/pennylane/numpy/wrapper.py b/openqaoa/optimizers/pennylane/numpy/wrapper.py
new file mode 100644
index 000000000..dee567dac
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/numpy/wrapper.py
@@ -0,0 +1,154 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This module provides the PennyLane wrapper functions for modifying NumPy,
+such that it accepts the PennyLane :class:`~.tensor` class.
+"""
+from collections.abc import Sequence
+import functools
+
+from autograd import numpy as _np
+
+from .tensor import tensor
+
+
+def extract_tensors(x):
+    """Iterate through an iterable, and extract any PennyLane
+    tensors that appear.
+
+    Args:
+        x (.tensor or Sequence): an input tensor or sequence
+
+    Yields:
+        tensor: the next tensor in the sequence. If the input was a single
+        tensor, than the tensor is yielded and the iterator completes.
+
+    **Example**
+
+    >>> from pennylane import numpy as np
+    >>> import numpy as onp
+    >>> iterator = np.extract_tensors([0.1, np.array(0.1), "string", onp.array(0.5)])
+    >>> list(iterator)
+    [tensor(0.1, requires_grad=True)]
+    """
+    if isinstance(x, tensor):
+        # If the item is a tensor, return it
+        yield x
+    elif isinstance(x, Sequence) and not isinstance(x, (str, bytes)):
+        # If the item is a sequence, recursively look through its
+        # elements for tensors.
+        # NOTE: we choose to branch on Sequence here and not Iterable,
+        # as NumPy arrays are not Sequences.
+        for item in x:
+            yield from extract_tensors(item)
+
+
+def tensor_wrapper(obj):
+    """Decorator that wraps callable objects and classes so that they both accept
+    a ``requires_grad`` keyword argument, as well as returning a PennyLane
+    :class:`~.tensor`.
+
+    Only if the decorated object returns an ``ndarray`` is the
+    output converted to a :class:`~.tensor`; this avoids superfluous conversion
+    of scalars and other native-Python types.
+
+    .. note::
+
+        This wrapper does *not* enable autodifferentiation of the wrapped function,
+        it merely adds support for :class:`~pennylane.numpy.tensor` output.
+
+    Args:
+        obj: a callable object or class
+
+    **Example**
+
+    By default, the ``ones`` function provided by Autograd
+    constructs standard ``ndarray`` objects, and does not
+    permit a ``requires_grad`` argument:
+
+    >>> from autograd.numpy import ones
+    >>> ones([2, 2])
+    array([[1., 1.],
+        [1., 1.]])
+    >>> ones([2, 2], requires_grad=True)
+    TypeError: ones() got an unexpected keyword argument 'requires_grad'
+
+    ``tensor_wrapper`` both enables construction of :class:`~pennylane.numpy.tensor`
+    objects, while also converting the output.
+
+    >>> from pennylane import numpy as np
+    >>> ones = np.tensor_wrapper(ones)
+    >>> ones([2, 2], requires_grad=True)
+    tensor([[1., 1.],
+        [1., 1.]], requires_grad=True)
+    """
+
+    @functools.wraps(obj)
+    def _wrapped(*args, **kwargs):
+        """Wrapped NumPy function"""
+
+        tensor_kwargs = {}
+
+        if "requires_grad" in kwargs:
+            tensor_kwargs["requires_grad"] = kwargs.pop("requires_grad")
+        else:
+            tensor_args = list(extract_tensors(args))
+
+            if tensor_args:
+                # Unless the user specifies otherwise, if all tensors in the argument
+                # list are non-trainable, the output is also non-trainable.
+                # Equivalently: if any tensor is trainable, the output is also trainable.
+                # NOTE: Use of Python's ``any`` results in an infinite recursion,
+                # and I'm not sure why. Using ``np.any`` works fine.
+                tensor_kwargs["requires_grad"] = _np.any([i.requires_grad for i in tensor_args])
+
+        # evaluate the original object
+        res = obj(*args, **kwargs)
+
+        if isinstance(res, _np.ndarray):
+            # only if the output of the object is a ndarray,
+            # then convert to a PennyLane tensor
+            res = tensor(res, **tensor_kwargs)
+
+        return res
+
+    return _wrapped
+
+
+def wrap_arrays(old, new):
+    """Loop through an object's symbol table,
+    wrapping each function with :func:`~pennylane.numpy.tensor_wrapper`.
+
+    This is useful if you would like to wrap **every** function
+    provided by an imported module.
+
+    Args:
+        old (dict): The symbol table to be wrapped. Note that
+            callable classes are ignored; only functions are wrapped.
+        new (dict): The symbol table that contains the wrapped values.
+
+    .. seealso:: :func:`~pennylane.numpy.tensor_wrapper`
+
+    **Example**
+
+    This function is used to wrap the imported ``autograd.numpy``
+    module, to enable all functions to support ``requires_grad``
+    arguments, and to output :class:`~pennylane.numpy.tensor` objects:
+
+    >>> from autograd import numpy as _np
+    >>> wrap_arrays(_np.__dict__, globals())
+    """
+    for name, obj in old.items():
+        if callable(obj) and not isinstance(obj, type):
+            new[name] = tensor_wrapper(obj)
diff --git a/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py b/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py
index 4f92fec98..eaa78e612 100644
--- a/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py
+++ b/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py
@@ -19,7 +19,8 @@
 Similarly as with the custom optimization methods Scipy `minimize` is used. Extends available scipy methods.
 """
 
-import pennylane as pl
+from openqaoa.optimizers.pennylane.pennylane_optimizers.gradient_descent import GradientDescentOptimizer
+from openqaoa.optimizers import pennylane as pl
 import inspect
 from scipy.optimize import OptimizeResult
 import numpy as np
@@ -30,7 +31,6 @@
                             'vgd': pl.GradientDescentOptimizer, 
                             'momentum':  pl.MomentumOptimizer,
                             'nesterov_momentum': pl.NesterovMomentumOptimizer,
-                            'natural_grad_descent': pl.QNGOptimizer,
                             'rmsprop': pl.RMSPropOptimizer,
                             'rotosolve': pl.RotosolveOptimizer, 
                             'spsa': pl.SPSAOptimizer,
@@ -38,7 +38,7 @@
 
 
 
-def pennylane_optimizer(fun, x0, args=(), maxfev=None, pennylane_method='vgd', qfim=None,
+def pennylane_optimizer(fun, x0, args=(), maxfev=None, pennylane_method='vgd', 
                         maxiter=100, tol=10**(-6), jac=None, callback=None,                         
                         nums_frequency=None, spectra=None, shifts=None, **options):
 
@@ -59,14 +59,12 @@ def pennylane_optimizer(fun, x0, args=(), maxfev=None, pennylane_method='vgd', q
         Maximum number of function evaluations.
     pennylane_method : string, optional
         Optimizer method to compute the steps.
-    qfim : callable, optional (required for natural_grad_descent)
-        Callable Fubini-Study metric tensor
     maxiter : int, optional
         Maximum number of iterations.
     tol : float
         Tolerance before the optimizer terminates; if `tol` is larger than the difference between two steps, terminate optimization.
-    jac : callable, optinal (required for all methods but rotosolve and spsa)
-        Callable gradient function.
+    jac : callable, optinal
+        Callable gradient function. Required for all methods but rotosolve and spsa.
     callback : callable, optional
         Called after each iteration, as ``callback(xk)``, where ``xk`` is the
         current parameter vector.
@@ -122,11 +120,11 @@ def cost(params, **k): # define a function to convert the params list from penny
         improved = False
 
         # compute step (depends on the optimizer)
-        if pennylane_method in ['natural_grad_descent']: 
-            testx, testy = optimizer.step_and_cost(cost, bestx, *args, grad_fn=jac, metric_tensor_fn=qfim) 
         if pennylane_method in ['adagrad', 'adam', 'vgd', 'momentum', 'nesterov_momentum', 'rmsprop']:
             testx, testy = optimizer.step_and_cost(cost, bestx, *args, grad_fn=jac)
-        if pennylane_method in ['rotosolve']: 
+        elif pennylane_method in ['rotosolve']: 
+            print('Rotosolve')
+            print('s', bestx)
             testx, testy = optimizer.step_and_cost(
                                                     cost, bestx, *args,
                                                     nums_frequency={'params': {(i,):1 for i in range(bestx.size)}} if not nums_frequency else nums_frequency,
@@ -134,7 +132,7 @@ def cost(params, **k): # define a function to convert the params list from penny
                                                     shifts=shifts,
                                                     full_output=False,
                                                   )
-        if pennylane_method in ['spsa']:       
+        elif pennylane_method in ['spsa']:       
             testx, testy = optimizer.step_and_cost(cost, bestx, *args)
 
         # check if stable
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/adagrad.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/adagrad.py
new file mode 100644
index 000000000..b1b1c35c2
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/adagrad.py
@@ -0,0 +1,93 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Adagrad optimizer"""
+from numpy import sqrt # changed from the original pennylane code
+from .gradient_descent import GradientDescentOptimizer
+
+
+class AdagradOptimizer(GradientDescentOptimizer):
+    r"""Gradient-descent optimizer with past-gradient-dependent
+    learning rate in each dimension.
+
+    Adagrad adjusts the learning rate for each parameter :math:`x_i`
+    in :math:`x` based on past gradients. We therefore have to consider
+    each parameter update individually,
+
+    .. math::
+        x^{(t+1)}_i = x^{(t)}_i - \eta_i^{(t+1)} \partial_{w_i} f(x^{(t)}),
+
+    where the gradient is replaced by a (scalar) partial derivative.
+
+    The learning rate in step :math:`t` is given by
+
+    .. math::
+        \eta_i^{(t+1)} = \frac{ \eta_{\mathrm{init}} }{ \sqrt{a_i^{(t+1)} + \epsilon } },
+        ~~~ a_i^{(t+1)} = \sum_{k=1}^t (\partial_{x_i} f(x^{(k)}))^2.
+
+    The offset :math:`\epsilon` avoids division by zero.
+
+    :math:`\eta` is the step size, a user defined parameter.
+
+    Args:
+        stepsize (float): the user-defined hyperparameter :math:`\eta`
+        eps (float): offset :math:`\epsilon` added for numerical stability
+    """
+
+    def __init__(self, stepsize=0.01, eps=1e-8):
+        super().__init__(stepsize)
+        self.eps = eps
+        self.accumulation = None
+
+    def apply_grad(self, grad, args):
+        r"""Update the variables in args to take a single optimization step. Flattens and unflattens
+        the inputs to maintain nested iterables as the parameters of the optimization.
+
+        Args:
+            grad (tuple[array]): the gradient of the objective
+                function at point :math:`x^{(t)}`: :math:`\nabla f(x^{(t)})`
+            args (tuple): the current value of the variables :math:`x^{(t)}`
+
+        Returns:
+            list: the new values :math:`x^{(t+1)}`
+        """
+        args_new = list(args)
+
+        if self.accumulation is None:
+            self.accumulation = [0.0] * len(args)
+
+        trained_index = 0
+        for index, arg in enumerate(args):
+            if getattr(arg, "requires_grad", False):
+
+                self._update_accumulation(index, grad[trained_index])
+
+                coeff = self.stepsize / sqrt(self.accumulation[index] + self.eps)
+                args_new[index] = arg - coeff * grad[trained_index]
+
+                trained_index += 1
+
+        return args_new
+
+    def _update_accumulation(self, index, grad):
+        r"""Update the accumulation at index with gradient.
+
+        Args:
+            index (int): index of parameter to update.
+            grad_flat (ndarray): gradient at index
+        """
+        self.accumulation[index] = self.accumulation[index] + grad**2
+
+    def reset(self):
+        """Reset optimizer by erasing memory of past steps."""
+        self.accumulation = None
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/adam.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/adam.py
new file mode 100644
index 000000000..2d303c499
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/adam.py
@@ -0,0 +1,133 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Adam optimizer"""
+from numpy import sqrt # changed from the original pennylane code
+from .gradient_descent import GradientDescentOptimizer
+
+
+class AdamOptimizer(GradientDescentOptimizer):
+    r"""Gradient-descent optimizer with adaptive learning rate, first and second moment.
+
+    Adaptive Moment Estimation uses a step-dependent learning rate,
+    a first moment :math:`a` and a second moment :math:`b`, reminiscent of
+    the momentum and velocity of a particle:
+
+    .. math::
+        x^{(t+1)} = x^{(t)} - \eta^{(t+1)} \frac{a^{(t+1)}}{\sqrt{b^{(t+1)}} + \epsilon },
+
+    where the update rules for the two moments are given by
+
+    .. math::
+        a^{(t+1)} &= \beta_1 a^{(t)} + (1-\beta_1) \nabla f(x^{(t)}),\\
+        b^{(t+1)} &= \beta_2 b^{(t)} + (1-\beta_2) (\nabla f(x^{(t)}))^{\odot 2},\\
+        \eta^{(t+1)} &= \eta \frac{\sqrt{(1-\beta_2^{t+1})}}{(1-\beta_1^{t+1})}.
+
+    Above, :math:`( \nabla f(x^{(t-1)}))^{\odot 2}` denotes the element-wise square operation,
+    which means that each element in the gradient is multiplied by itself. The hyperparameters
+    :math:`\beta_1` and :math:`\beta_2` can also be step-dependent. Initially, the first and
+    second moment are zero.
+
+    The shift :math:`\epsilon` avoids division by zero.
+
+    For more details, see `arXiv:1412.6980 <https://arxiv.org/abs/1412.6980>`_.
+
+    Args:
+        stepsize (float): the user-defined hyperparameter :math:`\eta`
+        beta1 (float): hyperparameter governing the update of the first and second moment
+        beta2 (float): hyperparameter governing the update of the first and second moment
+        eps (float): offset :math:`\epsilon` added for numerical stability
+
+    """
+
+    def __init__(self, stepsize=0.01, beta1=0.9, beta2=0.99, eps=1e-8):
+        super().__init__(stepsize)
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.eps = eps
+        self.accumulation = None
+
+    def apply_grad(self, grad, args):
+        r"""Update the variables args to take a single optimization step. Flattens and unflattens
+        the inputs to maintain nested iterables as the parameters of the optimization.
+
+        Args:
+            grad (tuple[ndarray]): the gradient of the objective
+                function at point :math:`x^{(t)}`: :math:`\nabla f(x^{(t)})`
+            args (tuple): the current value of the variables :math:`x^{(t)}`
+
+        Returns:
+            list: the new values :math:`x^{(t+1)}`
+        """
+        args_new = list(args)
+
+        if self.accumulation is None:
+            self.accumulation = {"fm": [0] * len(args), "sm": [0] * len(args), "t": 0}
+
+        self.accumulation["t"] += 1
+
+        # Update step size (instead of correcting for bias)
+        new_stepsize = (
+            self.stepsize
+            * sqrt(1 - self.beta2 ** self.accumulation["t"])
+            / (1 - self.beta1 ** self.accumulation["t"])
+        )
+
+        trained_index = 0
+        for index, arg in enumerate(args):
+            if getattr(arg, "requires_grad", False):
+
+                self._update_accumulation(index, grad[trained_index])
+                args_new[index] = arg - new_stepsize * self.accumulation["fm"][index] / (
+                    sqrt(self.accumulation["sm"][index]) + self.eps
+                )
+
+                trained_index += 1
+
+        return args_new
+
+    def _update_accumulation(self, index, grad):
+        r"""Update the moments.
+
+        Args:
+            index (int): the index of the argument to update
+            grad (ndarray): the gradient for that trainable param
+        """
+        # update first moment
+        self.accumulation["fm"][index] = (
+            self.beta1 * self.accumulation["fm"][index] + (1 - self.beta1) * grad
+        )
+
+        # update second moment
+        self.accumulation["sm"][index] = (
+            self.beta2 * self.accumulation["sm"][index] + (1 - self.beta2) * grad**2
+        )
+
+    def reset(self):
+        """Reset optimizer by erasing memory of past steps."""
+        self.accumulation = None
+
+    @property
+    def fm(self):
+        """Returns estimated first moments of gradient"""
+        return None if self.accumulation is None else self.accumulation["fm"]
+
+    @property
+    def sm(self):
+        """Returns estimated second moments of gradient"""
+        return None if self.accumulation is None else self.accumulation["sm"]
+
+    @property
+    def t(self):
+        """Returns accumulated timesteps"""
+        return None if self.accumulation is None else self.accumulation["t"]
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py
index 632424b81..a5cd4a222 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py
@@ -13,8 +13,7 @@
 # limitations under the License.
 """Gradient descent optimizer"""
 
-from pennylane._grad import grad as get_gradient
-
+get_gradient = None # changed from the original pennylane code
 
 class GradientDescentOptimizer:
     r"""Basic gradient-descent optimizer.
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/momentum.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/momentum.py
new file mode 100644
index 000000000..5cb25e48f
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/momentum.py
@@ -0,0 +1,84 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Momentum optimizer"""
+from .gradient_descent import GradientDescentOptimizer
+
+
+class MomentumOptimizer(GradientDescentOptimizer):
+    r"""Gradient-descent optimizer with momentum.
+
+    The momentum optimizer adds a "momentum" term to gradient descent
+    which considers the past gradients:
+
+    .. math:: x^{(t+1)} = x^{(t)} - a^{(t+1)}.
+
+    The accumulator term :math:`a` is updated as follows:
+
+    .. math:: a^{(t+1)} = m a^{(t)} + \eta \nabla f(x^{(t)}),
+
+    with user defined parameters:
+
+    * :math:`\eta`: the step size
+    * :math:`m`: the momentum
+
+    Args:
+        stepsize (float): user-defined hyperparameter :math:`\eta`
+        momentum (float): user-defined hyperparameter :math:`m`
+    """
+
+    def __init__(self, stepsize=0.01, momentum=0.9):
+        super().__init__(stepsize)
+        self.momentum = momentum
+        self.accumulation = None
+
+    def apply_grad(self, grad, args):
+        r"""Update the trainable args to take a single optimization step. Flattens and unflattens
+        the inputs to maintain nested iterables as the parameters of the optimization.
+
+        Args:
+            grad (tuple [array]): the gradient of the objective
+                function at point :math:`x^{(t)}`: :math:`\nabla f(x^{(t)})`.
+            args (tuple): the current value of the variables :math:`x^{(t)}`.
+
+        Returns:
+            list [array]: the new values :math:`x^{(t+1)}`.
+        """
+        args_new = list(args)
+
+        if self.accumulation is None:
+            self.accumulation = [0.0] * len(args)
+
+        trained_index = 0
+        for index, arg in enumerate(args):
+            if getattr(arg, "requires_grad", False):
+
+                self._update_accumulation(index, grad[trained_index])
+                args_new[index] = arg - self.accumulation[index]
+
+                trained_index += 1
+
+        return args_new
+
+    def _update_accumulation(self, index, grad):
+        r"""Update the accumulation.
+
+        Args:
+            index (int): index of argument to update.
+            grad (ndarray): gradient at index
+        """
+        self.accumulation[index] = self.momentum * self.accumulation[index] + self.stepsize * grad
+
+    def reset(self):
+        """Reset optimizer by erasing memory of past steps."""
+        self.accumulation = None
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/nesterov_momentum.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/nesterov_momentum.py
new file mode 100644
index 000000000..c3836fb4e
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/nesterov_momentum.py
@@ -0,0 +1,75 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Nesterov momentum optimizer"""
+get_gradient = None # changed from the original pennylane code
+from .momentum import MomentumOptimizer
+
+
+class NesterovMomentumOptimizer(MomentumOptimizer):
+    r"""Gradient-descent optimizer with Nesterov momentum.
+
+    Nesterov Momentum works like the
+    :class:`Momentum optimizer <.pennylane.optimize.MomentumOptimizer>`,
+    but shifts the current input by the momentum term when computing the gradient
+    of the objective function:
+
+    .. math:: a^{(t+1)} = m a^{(t)} + \eta \nabla f(x^{(t)} - m a^{(t)}).
+
+    The user defined parameters are:
+
+    * :math:`\eta`: the step size
+    * :math:`m`: the momentum
+
+    Args:
+        stepsize (float): user-defined hyperparameter :math:`\eta`
+        momentum (float): user-defined hyperparameter :math:`m`
+    """
+
+    def compute_grad(
+        self, objective_fn, args, kwargs, grad_fn=None
+    ):  # pylint: disable=arguments-renamed
+        r"""Compute gradient of the objective function at at the shifted point :math:`(x -
+        m\times\text{accumulation})` and return it along with the objective function forward pass
+        (if available).
+
+        Args:
+            objective_fn (function): the objective function for optimization.
+            args (tuple): tuple of NumPy arrays containing the current values for the
+                objection function.
+            kwargs (dict): keyword arguments for the objective function.
+            grad_fn (function): optional gradient function of the objective function with respect to
+                the variables ``x``. If ``None``, the gradient function is computed automatically.
+                Must return the same shape of tuple [array] as the autograd derivative.
+
+        Returns:
+            tuple [array]: the NumPy array containing the gradient :math:`\nabla f(x^{(t)})` and the
+            objective function output. If ``grad_fn`` is provided, the objective function
+            will not be evaluted and instead ``None`` will be returned.
+        """
+        shifted_args = list(args)
+
+        trainable_indices = [
+            i for i, arg in enumerate(args) if getattr(arg, "requires_grad", False)
+        ]
+
+        if self.accumulation:
+            for index in trainable_indices:
+                shifted_args[index] = args[index] - self.momentum * self.accumulation[index]
+
+        g = get_gradient(objective_fn) if grad_fn is None else grad_fn
+        grad = g(*shifted_args, **kwargs)
+        forward = getattr(g, "forward", None)
+
+        grad = (grad,) if len(trainable_indices) == 1 else grad
+        return grad, forward
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/rms_prop.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/rms_prop.py
new file mode 100644
index 000000000..4d5aa7eb4
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/rms_prop.py
@@ -0,0 +1,89 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Root mean square propagation optimizer"""
+from numpy import sqrt  # changed from the original pennylane code
+from .adagrad import AdagradOptimizer
+
+
+class RMSPropOptimizer(AdagradOptimizer):
+    r"""Root mean squared propagation optimizer.
+
+    The root mean square progation optimizer is a modified
+    :class:`Adagrad optimizer <pennylane.optmimize.AdagradOptimizer>`,
+    with a decay of learning rate adaptation.
+
+    Extensions of the Adagrad optimization method generally
+    start the sum :math:`a` over past gradients in the denominator
+    of the learning rate at a finite :math:`t'` with :math:`0 < t' < t`,
+    or decay past gradients to avoid an ever-decreasing learning rate.
+
+    Root Mean Square propagation is such an adaptation, where
+
+    .. math::
+        a_i^{(t+1)} = \gamma a_i^{(t)} + (1-\gamma) (\partial_{x_i} f(x^{(t)}))^2.
+
+    Args:
+        stepsize (float): the user-defined hyperparameter :math:`\eta`
+            used in the Adagrad optmization
+        decay (float): the learning rate decay :math:`\gamma`
+        eps (float): offset :math:`\epsilon` added for numerical stability
+            (see :class:`Adagrad <pennylane.optmimize.AdagradOptimizer>`)
+
+    """
+
+    def __init__(self, stepsize=0.01, decay=0.9, eps=1e-8):
+        super().__init__(stepsize)
+        self.decay = decay
+        self.eps = eps
+
+    def apply_grad(self, grad, args):
+        r"""Update the variables args to take a single optimization step. Flattens and unflattens
+        the inputs to maintain nested iterables as the parameters of the optimization.
+
+        Args:
+            grad (tuple [array]): the gradient of the objective function at
+                point :math:`x^{(t)}`: :math:`\nabla f(x^{(t)})`.
+            args (tuple): the current value of the variables :math:`x^{(t)}`.
+
+        Returns:
+            list [array]: the new values :math:`x^{(t+1)}`
+        """
+        args_new = list(args)
+
+        if self.accumulation is None:
+            self.accumulation = [0.0] * len(args)
+
+        trained_index = 0
+        for index, arg in enumerate(args):
+            if getattr(arg, "requires_grad", False):
+                self._update_accumulation(index, grad[trained_index])
+                args_new[index] = (
+                    arg
+                    - (self.stepsize / sqrt(self.accumulation[index] + self.eps))
+                    * grad[trained_index]
+                )
+                trained_index += 1
+
+        return args_new
+
+    def _update_accumulation(self, index, grad):
+        r"""Update the accumulation with the gradient.
+
+        Args:
+            index (int): index of argument to update.
+            grad (ndarray): gradient at the index.
+        """
+        self.accumulation[index] = (
+            self.decay * self.accumulation[index] + (1 - self.decay) * grad**2
+        )
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/rotosolve.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/rotosolve.py
new file mode 100644
index 000000000..15e812dbd
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/rotosolve.py
@@ -0,0 +1,667 @@
+# Copyright 2018-2021 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Rotosolve gradient free optimizer"""
+# pylint: disable=too-many-branches,cell-var-from-loop
+
+from inspect import signature
+import numpy as np
+from scipy.optimize import brute, shgo
+
+from openqaoa.optimizers import pennylane as qml # changed from the original code
+
+
+def _brute_optimizer(fun, num_steps, bounds=None, **kwargs):
+    r"""Brute force optimizer, wrapper of scipy.optimize.brute that repeats it
+    ``num_steps`` times. Signature is as expected by ``RotosolveOptimizer._min_numeric``
+    below, returning a scalar minimal position and the function value at that position."""
+    Ns = kwargs.pop("Ns")
+    width = bounds[0][1] - bounds[0][0]
+    center = (bounds[0][1] + bounds[0][0]) / 2
+    for _ in range(num_steps):
+        range_ = (center - width / 2, center + width / 2)
+        center, y_min, *_ = brute(fun, ranges=(range_,), full_output=True, Ns=Ns, **kwargs)
+        # We only ever use this function for 1D optimization
+        center = center[0]
+        width /= Ns
+
+    return center, y_min
+
+
+def _shgo_optimizer(fun, **kwargs):
+    r"""Wrapper for ``scipy.optimize.shgo`` (Simplicial Homology global optimizer).
+    Signature is as expected by ``RotosolveOptimizer._min_numeric`` below, providing
+    a scalar minimal position and the function value at that position."""
+    opt_res = shgo(fun, **kwargs)
+    return opt_res.x[0], opt_res.fun
+
+
+def _validate_inputs(requires_grad, args, nums_frequency, spectra):
+    """Checks that for each trainable argument either the number of
+    frequencies or the frequency spectrum is given."""
+
+    if not any(requires_grad.values()):
+        raise ValueError(
+            "Found no parameters to optimize. The parameters to optimize "
+            "have to be marked as trainable."
+        )
+    for arg, (arg_name, _requires_grad) in zip(args, requires_grad.items()):
+        if _requires_grad:
+            _nums_frequency = nums_frequency.get(arg_name, {})
+            _spectra = spectra.get(arg_name, {})
+            all_keys = set(_nums_frequency) | set(_spectra)
+
+            shape = qml.math.shape(arg)
+            indices = np.ndindex(shape) if len(shape) > 0 else [()]
+            for par_idx in indices:
+                if par_idx not in all_keys:
+                    raise ValueError(
+                        "Neither the number of frequencies nor the frequency spectrum "
+                        f"was provided for the entry {par_idx} of argument {arg_name}."
+                    )
+
+
+def _restrict_to_univariate(fn, arg_idx, par_idx, args, kwargs):
+    r"""Restrict a function to a univariate function for given argument
+    and parameter indices.
+
+    Args:
+        fn (callable): Multivariate function
+        arg_idx (int): Index of the argument that contains the parameter to restrict
+        par_idx (tuple[int]): Index of the parameter to restrict to within the argument
+        args (tuple): Arguments at which to restrict the function.
+        kwargs (dict): Keyword arguments at which to restrict the function.
+
+    Returns:
+        callable: Univariate restriction of ``fn``. That is, this callable takes
+        a single float value as input and has the same return type as ``fn``.
+        All arguments are set to the given ``args`` and the input value to this
+        function is added to the marked parameter.
+    """
+    the_arg = args[arg_idx]
+    if len(qml.math.shape(the_arg)) == 0:
+        shift_vec = qml.math.ones_like(the_arg)
+    else:
+        shift_vec = qml.math.zeros_like(the_arg)
+        shift_vec = qml.math.scatter_element_add(shift_vec, par_idx, 1.0)
+
+    def _univariate_fn(x):
+        return fn(*args[:arg_idx], the_arg + shift_vec * x, *args[arg_idx + 1 :], **kwargs)
+
+    return _univariate_fn
+
+
+class RotosolveOptimizer:
+    r"""Rotosolve gradient-free optimizer.
+
+    The Rotosolve optimizer minimizes an objective function with respect to the parameters of a
+    quantum circuit without the need for calculating the gradient of the function. The algorithm
+    updates the parameters :math:`\boldsymbol{\theta} = \theta_1, \dots, \theta_D` by
+    separately reconstructing the cost function with respect to each circuit parameter,
+    while keeping all other parameters fixed.
+
+    Args:
+        substep_optimizer (str or callable): Optimizer to use for the substeps of Rotosolve
+            that carries out a univariate (i.e., single-parameter) global optimization.
+            *Only used if there are more than one frequency for a given parameter.*
+            It must take as inputs:
+
+            - A function ``fn`` that maps scalars to scalars,
+
+            - the (keyword) argument ``bounds``, and
+
+            - optional keyword arguments.
+
+            It must return two scalars:
+
+            - The input value ``x_min`` for which ``fn`` is minimal, and
+
+            - the minimal value ``y_min=fn(x_min)`` or ``None``.
+
+            Alternatively, the following optimizers are built-in and can be chosen by
+            passing their name:
+
+            - ``"brute"``: An iterative version of
+              `SciPy's brute force optimizer <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.brute.html>`_.
+              It evaluates the function at ``Ns`` equidistant points across the range
+              :math:`[-\pi, \pi]` and iteratively refines the range around the point
+              with the smallest cost value for ``num_steps`` times.
+
+            - ``"shgo"``: `SciPy's SHGO optimizer <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.shgo.html>`_.
+
+        substep_kwargs (dict): Keyword arguments to be passed to the ``substep_optimizer``
+            callable. For ``substep_optimizer="shgo"``, the original keyword arguments of
+            the SciPy implementation are available, for ``substep_optimizer="brute"`` the
+            keyword arguments ``ranges``, ``Ns`` and ``num_steps`` are useful.
+            *Only used if there are more than one frequency for a given parameter.*
+
+    For each parameter, a purely classical one-dimensional global optimization over the
+    interval :math:`(-\pi,\pi]` is performed, which is replaced automatically by a
+    closed-form expression for the optimal value if the :math:`d\text{th}` parametrized
+    gate has only two eigenvalues. This means that ``substep_optimizer`` and
+    ``substep_kwargs`` will not be used for these parameters.
+    In this case, the optimal value :math:`\theta^*_d` is given analytically by
+
+    .. math::
+
+        \theta^*_d &= \underset{\theta_d}{\text{argmin}}\left<H\right>_{\theta_d}\\
+              &= -\frac{\pi}{2} - \text{arctan2}\left(2\left<H\right>_{\theta_d=0}
+              - \left<H\right>_{\theta_d=\pi/2} - \left<H\right>_{\theta_d=-\pi/2},
+              \left<H\right>_{\theta_d=\pi/2} - \left<H\right>_{\theta_d=-\pi/2}\right),
+
+    where :math:`\left<H\right>_{\theta_d}` is the expectation value of the objective function
+    restricted to only depend on the parameter :math:`\theta_d`.
+
+    .. warning::
+
+        The built-in one-dimensional optimizers ``"brute"`` and ``"shgo"`` for the substeps
+        of a Rotosolve optimization step use the interval :math:`(-\pi,\pi]`, rescaled with
+        the inverse smallest frequency as default domain to optimize over. For complicated
+        cost functions, this domain might not be suitable for the substep optimization and
+        an appropriate range should be passed via ``bounds`` in ``substep_kwargs``.
+
+    The algorithm is described in further detail in
+    `Vidal and Theis (2018) <https://arxiv.org/abs/1812.06323>`_,
+    `Nakanishi, Fujii and Todo (2019) <https://journals.aps.org/prresearch/abstract/10.1103/PhysRevResearch.2.043158>`_,
+    `Parrish et al. (2019) <https://arxiv.org/abs/1904.03206>`_,
+    and
+    `Ostaszewski et al. (2019) <https://quantum-journal.org/papers/q-2021-01-28-391/>`_,
+    and the reconstruction method used for more general operations is described in
+    `Wierichs et al. (2022) <https://doi.org/10.22331/q-2022-03-30-677>`_.
+
+    .. warning::
+
+        ``RotosolveOptimizer`` will only update parameters that are *explicitly*
+        marked as trainable. This can be done via ``requires_grad`` if using Autograd
+        or PyTorch. ``RotosolveOptimizer`` is not yet implemented to work in a stable
+        manner with TensorFlow or JAX.
+
+    **Example:**
+
+    Initialize the optimizer and set the number of steps to optimize over.
+    Recall that the optimization with ``RotosolveOptimizer`` uses global optimization substeps
+    of univariate functions. The optimization technique for these substeps can be chosen via the
+    ``substep_optimizer`` and ``substep_kwargs`` keyword arguments.
+    Here we use the built-in iterative version of
+    `SciPy's brute force optimizer <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.brute.html>`_
+    with four iterations.
+    We will run Rotosolve itself for three iterations.
+
+    >>> opt_kwargs = {"num_steps": 4}
+    >>> opt = qml.optimize.RotosolveOptimizer(substep_optimizer="brute", substep_kwargs=opt_kwargs)
+    >>> num_steps = 3
+
+    Next, we create a QNode we wish to optimize:
+
+    .. code-block :: python
+
+        dev = qml.device('default.qubit', wires=3, shots=None)
+
+        @qml.qnode(dev)
+        def cost_function(rot_param, layer_par, crot_param, rot_weights=None, crot_weights=None):
+            for i, par in enumerate(rot_param * rot_weights):
+                qml.RX(par, wires=i)
+            for w in dev.wires:
+                qml.RX(layer_par, wires=w)
+            for i, par in enumerate(crot_param*crot_weights):
+                qml.CRY(par, wires=[i, (i+1)%3])
+            return qml.expval(qml.PauliZ(0) @ qml.PauliZ(1) @ qml.PauliZ(2))
+
+    This QNode is defined simply by measuring the expectation value of the tensor
+    product of ``PauliZ`` operators on all qubits.
+    It takes three parameters:
+
+    - ``rot_param`` controls three Pauli rotations with three parameters, multiplied with ``rot_weights``,
+    - ``layer_par`` feeds into a layer of rotations with a single parameter, and
+    - ``crot_param`` feeds three parameters, multiplied with ``crot_weights``, into
+      three controlled Pauli rotations.
+
+    We also initialize a set of parameters for all these operations, and start with
+    uniform weights, i.e., all ``rot_weights`` and ``crot_weights`` are set to one.
+    This means that all frequencies with which the parameters in ``rot_param`` and
+    ``crot_param`` enter the QNode are integer-valued.
+    The number of frequencies per parameter are summarized in ``nums_frequency``.
+
+    .. code-block :: python
+
+        init_param = (
+            np.array([0.3, 0.2, 0.67], requires_grad=True),
+            np.array(1.1, requires_grad=True),
+            np.array([-0.2, 0.1, -2.5], requires_grad=True),
+        )
+        rot_weights = np.ones(3)
+        crot_weights = np.ones(3)
+
+        nums_frequency = {
+            "rot_param": {(0,): 1, (1,): 1, (2,): 1},
+            "layer_par": {(): 3},
+            "crot_param": {(0,): 2, (1,): 2, (2,): 2},
+        }
+
+    The keyword argument ``requires_grad`` can be used to determine whether the respective
+    parameter should be optimized or not, following the behaviour of gradient computations and
+    gradient-based optimizers when using Autograd or Torch.
+    With TensorFlow, a ``tf.Variable`` inside a ``tf.GradientTape`` may be used to
+    mark variables as trainable.
+
+    Now we carry out the optimization.
+    The minimized cost of the intermediate univariate reconstructions can
+    be read out via ``full_output``, including the cost *after* the full Rotosolve step:
+
+    >>> param = init_param
+    >>> cost_rotosolve = []
+    >>> for step in range(num_steps):
+    ...     param, cost, sub_cost = opt.step_and_cost(
+    ...         cost_function,
+    ...         *param,
+    ...         nums_frequency=nums_frequency,
+    ...         full_output=True,
+    ...         rot_weights=rot_weights,
+    ...         crot_weights=crot_weights,
+    ...     )
+    ...     print(f"Cost before step: {cost}")
+    ...     print(f"Minimization substeps: {np.round(sub_cost, 6)}")
+    ...     cost_rotosolve.extend(sub_cost)
+    Cost before step: 0.04200821039253547
+    Minimization substeps: [-0.230905 -0.863336 -0.980072 -0.980072 -1.       -1.       -1.      ]
+    Cost before step: -0.9999999990681161
+    Minimization substeps: [-1. -1. -1. -1. -1. -1. -1.]
+    Cost before step: -0.9999999999999996
+    Minimization substeps: [-1. -1. -1. -1. -1. -1. -1.]
+
+    The optimized values for the parameters are now stored in ``param``
+    and the optimization behaviour can be assessed by plotting ``cost_rotosolve``,
+    which include the substeps of the Rotosolve optimization.
+    The ``full_output`` feature is available for both, ``step`` and ``step_and_cost``.
+
+    In general, the frequencies in a QNode will not be integer-valued, requiring us
+    to provide the ``RotosolveOptimizer`` not only with the number of frequencies
+    but their concrete values. For the example QNode above, this happens if the
+    weights are no longer one:
+
+    >>> rot_weights = np.array([0.4, 0.8, 1.2], requires_grad=False)
+    >>> crot_weights = np.array([0.5, 1.0, 1.5], requires_grad=False)
+    >>> spectrum_fn = qml.fourier.qnode_spectrum(cost_function)
+    >>> spectra = spectrum_fn(*param, rot_weights=rot_weights, crot_weights=crot_weights)
+    >>> spectra["rot_param"]
+    {(0,): [-0.4, 0.0, 0.4], (1,): [-0.8, 0.0, 0.8], (2,): [-1.2, 0.0, 1.2]}
+    >>> spectra["crot_param"]
+    {(0,): [-0.5, -0.25, 0.0, 0.25, 0.5], (1,): [-1.0, -0.5, 0.0, 0.5, 1.0], (2,): [-1.5, -0.75, 0.0, 0.75, 1.5]}
+
+    We may provide these spectra instead of ``nums_frequency`` to Rotosolve to
+    enable the optimization of the QNode at these weights:
+
+    >>> param = init_param
+    >>> for step in range(num_steps):
+    ...     param, cost, sub_cost = opt.step_and_cost(
+    ...         cost_function,
+    ...         *param,
+    ...         spectra=spectra,
+    ...         full_output=True,
+    ...         rot_weights = rot_weights,
+    ...         crot_weights = crot_weights,
+    ...     )
+    ...     print(f"Cost before step: {cost}")
+    ...     print(f"Minimization substeps: {np.round(sub_cost, 6)}")
+    Cost before step: 0.09299359486191039
+    Minimization substeps: [-0.268008 -0.713209 -0.24993  -0.871989 -0.907672 -0.907892 -0.940474]
+    Cost before step: -0.9404742138557066
+    Minimization substeps: [-0.940474 -1.       -1.       -1.       -1.       -1.       -1.      ]
+    Cost before step: -1.0
+    Minimization substeps: [-1. -1. -1. -1. -1. -1. -1.]
+
+    As we can see, while the optimization got a bit harder and the optimizer takes a bit longer
+    to converge than previously, Rotosolve was able to adapt to the more complicated
+    dependence on the input arguments and still found the global minimum successfully.
+    """
+    # pylint: disable=too-few-public-methods
+
+    def __init__(self, substep_optimizer="brute", substep_kwargs=None):
+        self.substep_kwargs = {} if substep_kwargs is None else substep_kwargs
+        if substep_optimizer == "brute":
+            self.substep_optimizer = _brute_optimizer
+            self.substep_kwargs.setdefault("num_steps", 4)
+            self.substep_kwargs.setdefault("Ns", 100)
+        elif substep_optimizer == "shgo":
+            self.substep_optimizer = _shgo_optimizer
+        else:
+            self.substep_optimizer = substep_optimizer
+
+    def step_and_cost(
+        self,
+        objective_fn,
+        *args,
+        nums_frequency=None,
+        spectra=None,
+        shifts=None,
+        full_output=False,
+        **kwargs,
+    ):
+        r"""Update args with one step of the optimizer and return the corresponding objective
+        function value prior to the step. Each step includes multiple substeps, one per
+        parameter.
+
+        Args:
+            objective_fn (function): the objective function for optimization. It should take a
+                sequence of the values ``*args`` and a list of the gates ``generators`` as inputs,
+                and return a single value.
+            *args (Sequence): variable length sequence containing the initial values of the
+                variables to be optimized over or a single float with the initial value.
+            nums_frequency (dict[dict]): The number of frequencies in the ``objective_fn`` per
+                parameter. The keys must correspond to argument names of the objective
+                function, the values must be dictionaries that map parameter indices (``tuple``)
+                in the argument to the number of frequencies with which it enters the objective
+                function (``int``).
+                The parameter index for a scalar QNode argument is ``()``, for
+                one-dimensional array QNode arguments, it takes the form ``(i,)`` for the
+                i-th parameter in the argument.
+            spectra (dict[dict]): Frequency spectra in the ``objective_fn`` per parameter.
+                The formatting is the same as for ``nums_frequency``, but the values
+                of the inner dictionaries must be sequences of frequencies
+                (``Sequence[float]``).
+                For each parameter, ``num_frequency`` take precedence over ``spectra``.
+            shifts (dict[dict]): Shift angles for the reconstruction per QNode parameter.
+                The keys have to be argument names of ``qnode`` and the inner dictionaries have to
+                be mappings from parameter indices to the respective shift angles to be used for
+                that parameter. For :math:`R` non-zero frequencies, there must be :math:`2R+1`
+                shifts given. Ignored if ``nums_frequency`` gives a number of frequencies
+                for the respective parameter in the QNode argument.
+            full_output (bool): whether to return the intermediate minimized energy values from
+                the univariate optimization substeps.
+            **kwargs : variable length keyword arguments for the objective function.
+
+        Returns:
+            list [array] or array: the new variable values :math:`x^{(t+1)}`.
+            If a single arg is provided, list [array] is replaced by array.
+            float: the objective function output prior to the step.
+            list [float]: the intermediate objective values, only returned if
+            ``full_output=True``.
+
+        The optimization step consists of multiple substeps.
+
+        For each substep,
+        one of the parameters in one of the QNode arguments is singled out, and the
+        objective function is considered as univariate function (i.e., function that
+        depends on a single scalar) of that parameter.
+
+        If ``nums_frequency`` states that there is only a single frequency, or ``spectra``
+        only contains one positive frequency, for a parameter, an analytic formula is
+        used to return the minimum of the univariate restriction.
+
+        For multiple frequencies, :func:`.fourier.reconstruct` is used to reconstruct
+        the univariate restriction and a numeric minimization is performed instead.
+        The latter minimization is performed using the ``substep_optimizer`` passed to
+        ``RotosolveOptimizer`` at initialization.
+
+        .. note::
+
+            One of ``nums_frequency`` and ``spectra`` must contain information
+            about each parameter that is to be trained with ``RotosolveOptimizer``.
+            For each univariate reconstruction, the data in ``nums_frequency`` takes
+            precedence over the information in ``spectra``.
+
+        """
+        # todo: does this signature call cover all cases?
+        sign_fn = objective_fn.func if isinstance(objective_fn, qml.QNode) else objective_fn
+        arg_names = list(signature(sign_fn).parameters.keys())
+        requires_grad = {
+            arg_name: True for arg_name, arg in zip(arg_names, args) # changed from the original code
+        }
+        nums_frequency = nums_frequency or {}
+        spectra = spectra or {}
+        _validate_inputs(requires_grad, args, nums_frequency, spectra)
+
+        # we will single out one arg to change at a time
+        # the following hold the arguments not getting updated
+        before_args = []
+        after_args = list(args)
+
+        # Prepare intermediate minimization results cache
+        if full_output:
+            y_output = []
+        # Compute the very first evaluation in order to be able to cache it
+        fun_at_zero = objective_fn(*args, **kwargs)
+        first_substep_in_step = True
+
+        for arg_idx, (arg, arg_name) in enumerate(zip(args, arg_names)):
+            del after_args[0]
+
+            if not requires_grad[arg_name]:
+                before_args.append(arg)
+                continue
+            shape = qml.math.shape(arg)
+            indices = np.ndindex(shape) if len(shape) > 0 else [()]
+            for par_idx in indices:
+                _fun_at_zero = fun_at_zero if first_substep_in_step else None
+                # Set a single parameter in a single argument to be reconstructed
+                num_freq = nums_frequency.get(arg_name, {}).get(par_idx, None)
+                spectrum = spectra.get(arg_name, {}).get(par_idx, None)
+                if spectrum is not None:
+                    spectrum = np.array(spectrum)
+
+                if num_freq == 1 or (spectrum is not None and len(spectrum[spectrum > 0])) == 1:
+                    _args = before_args + [arg] + after_args
+                    univariate = _restrict_to_univariate(
+                        objective_fn, arg_idx, par_idx, _args, kwargs
+                    )
+                    freq = 1.0 if num_freq is not None else spectrum[spectrum > 0][0]
+                    x_min, y_min = self.min_analytic(univariate, freq, _fun_at_zero)
+                    arg = qml.math.scatter_element_add(arg, par_idx, x_min)
+
+                else:
+                    ids = {arg_name: (par_idx,)}
+                    _nums_frequency = (
+                        {arg_name: {par_idx: num_freq}} if num_freq is not None else None
+                    )
+                    _spectra = {arg_name: {par_idx: spectrum}} if spectrum is not None else None
+
+                    # Set up the reconstruction function
+                    recon_fn = qml.fourier.reconstruct(
+                        objective_fn, ids, _nums_frequency, _spectra, shifts
+                    )
+                    # Perform the reconstruction
+                    recon = recon_fn(*before_args, arg, *after_args, f0=_fun_at_zero, **kwargs)[
+                        arg_name
+                    ][par_idx]
+                    if spectrum is None:
+                        spectrum = list(range(num_freq + 1))
+                    x_min, y_min = self._min_numeric(recon, spectrum)
+
+                    # Update the currently treated argument
+                    arg = qml.math.scatter_element_add(arg, par_idx, x_min - arg[par_idx])
+                first_substep_in_step = False
+
+                if full_output:
+                    y_output.append(y_min)
+
+            # updating before_args for next argument
+            before_args.append(arg)
+
+        # All arguments have been updated and/or passed to before_args
+        args = before_args
+        # unwrap arguments if only one, backward compatible and cleaner
+        if len(args) == 1:
+            args = args[0]
+
+        if full_output:
+            return args, fun_at_zero, y_output
+
+        return args, fun_at_zero
+
+    def step(
+        self,
+        objective_fn,
+        *args,
+        nums_frequency=None,
+        spectra=None,
+        shifts=None,
+        full_output=False,
+        **kwargs,
+    ):
+        r"""Update args with one step of the optimizer. Each step includes
+        multiple substeps, one per parameter.
+
+        Args:
+            objective_fn (function): the objective function for optimization. It should take a
+                sequence of the values ``*args`` and a list of the gates ``generators`` as inputs,
+                and return a single value.
+            *args (Sequence): variable length sequence containing the initial values of the
+                variables to be optimized over or a single float with the initial value.
+            nums_frequency (dict[dict]): The number of frequencies in the ``objective_fn`` per
+                parameter. The keys must correspond to argument names of the objective
+                function, the values must be dictionaries that map parameter indices (``tuple``)
+                in the argument to the number of frequencies with which it enters the objective
+                function (``int``).
+                The parameter index for a scalar QNode argument is ``()``, for
+                one-dimensional array QNode arguments, it takes the form ``(i,)`` for the
+                i-th parameter in the argument.
+            spectra (dict[dict]): Frequency spectra in the ``objective_fn`` per parameter.
+                The formatting is the same as for ``nums_frequency``, but the values
+                of the inner dictionaries must be sequences of frequencies
+                (``Sequence[float]``).
+                For each parameter, ``num_frequency`` take precedence over ``spectra``.
+            shifts (dict[dict]): Shift angles for the reconstruction per QNode parameter.
+                The keys have to be argument names of ``qnode`` and the inner dictionaries have to
+                be mappings from parameter indices to the respective shift angles to be used for
+                that parameter. For :math:`R` non-zero frequencies, there must be :math:`2R+1`
+                shifts given. Ignored if ``nums_frequency`` gives a number of frequencies
+                for the respective parameter in the QNode argument.
+            full_output (bool): whether to return the intermediate minimized energy values from
+                the univariate optimization substeps.
+            **kwargs : variable length keyword arguments for the objective function.
+
+        Returns:
+            list [array] or array: the new variable values :math:`x^{(t+1)}`.
+                If a single arg is provided, list [array] is replaced by array.
+            list [float]: the intermediate objective values, only returned if
+                ``full_output=True``.
+
+        The optimization step consists of multiple substeps.
+
+        For each substep,
+        one of the parameters in one of the QNode arguments is singled out, and the
+        objective function is considered as univariate function (i.e., function that
+        depends on a single scalar) of that parameter.
+
+        If ``nums_frequency`` states that there is only a single frequency, or ``spectra``
+        only contains one positive frequency, for a parameter, an analytic formula is
+        used to return the minimum of the univariate restriction.
+
+        For multiple frequencies, :func:`.fourier.reconstruct` is used to reconstruct
+        the univariate restriction and a numeric minimization is performed instead.
+        The latter minimization is performed using the ``substep_optimizer`` passed to
+        ``RotosolveOptimizer`` at initialization.
+
+        .. note::
+
+            One of ``nums_frequency`` and ``spectra`` must contain information
+            about each parameter that is to be trained with ``RotosolveOptimizer``.
+            For each univariate reconstruction, the data in ``nums_frequency`` takes
+            precedence over the information in ``spectra``.
+
+        """
+        x_new, _, *y_output = self.step_and_cost(
+            objective_fn,
+            *args,
+            nums_frequency=nums_frequency,
+            spectra=spectra,
+            shifts=shifts,
+            full_output=full_output,
+            **kwargs,
+        )
+        if full_output:
+            # For full_output=True, y_output was wrapped in an outer list due
+            # to the dynamic unpacking
+            return x_new, y_output[0]
+
+        return x_new
+
+    def _min_numeric(self, objective_fn, spectrum):
+        r"""Numerically minimize a trigonometric function that depends on a
+        single parameter. Uses potentially large numbers of function evaluations,
+        depending on the used substep_optimizer. The optimization method and
+        options are stored in ``RotosolveOptimizer.substep_optimizer``
+        and ``RotosolveOptimizer.substep_kwargs``.
+
+        Args:
+            objective_fn (callable): Trigonometric function to minimize
+
+        Returns:
+            float: Position of the minimum of ``objective_fn``
+            float: Value of the minimum of ``objective_fn``
+
+        The returned position is guaranteed to lie within :math:`(-\pi, \pi]`.
+        """
+        opt_kwargs = self.substep_kwargs.copy()
+        if "bounds" not in self.substep_kwargs:
+            spectrum = qml.math.array(spectrum)
+            half_width = np.pi / qml.math.min(spectrum[spectrum > 0])
+            opt_kwargs["bounds"] = ((-half_width, half_width),)
+
+        x_min, y_min = self.substep_optimizer(objective_fn, **opt_kwargs)
+        if y_min is None:
+            y_min = objective_fn(x_min)
+
+        return x_min, y_min
+
+    @staticmethod
+    def min_analytic(objective_fn, freq, f0):
+        r"""Analytically minimize a trigonometric function that depends on a
+        single parameter and has a single frequency. Uses two or
+        three function evaluations.
+
+        Args:
+            objective_fn (callable): Trigonometric function to minimize
+            freq (float): Frequency :math:`f` in the ``objective_fn``
+            f0 (float): Value of the ``objective_fn`` at zero. Reduces the
+                number of calls to the function from three to two if given.
+
+        Returns:
+            float: Position of the minimum of ``objective_fn``
+            float: Value of the minimum of ``objective_fn``
+
+        The closed form expression used here was derived in
+        `Vidal & Theis (2018) <https://arxiv.org/abs/1812.06323>`__ ,
+        `Parrish et al (2019) <https://arxiv.org/abs/1904.03206>`__ and
+        `Ostaszewski et al (2021) <https://doi.org/10.22331/q-2021-01-28-391>`__.
+        We use the notation of Appendix A of the last of these references,
+        although we allow for an arbitrary frequency instead of restricting
+        to :math:`f=1`.
+        The returned position is guaranteed to lie within :math:`(-\pi/f, \pi/f]`.
+
+        The used formula for the minimization of the :math:`d-\text{th}`
+        parameter then reads
+
+        .. math::
+
+            \theta^*_d &= \underset{\theta_d}{\text{argmin}}\left<H\right>_{\theta_d}\\
+                  &= -\frac{\pi}{2f} - \frac{1}{f}\text{arctan2}\left(2\left<H\right>_{\theta_d=0}
+                  - \left<H\right>_{\theta_d=\pi/(2f)} - \left<H\right>_{\theta_d=-\pi/(2f)},
+                  \left<H\right>_{\theta_d=\pi/(2f)} - \left<H\right>_{\theta_d=-\pi/(2f)}\right),
+
+        """
+        if f0 is None:
+            f0 = objective_fn(0.0)
+        shift = 0.5 * np.pi / freq
+        fp = objective_fn(shift)
+        fm = objective_fn(-shift)
+        C = 0.5 * (fp + fm)
+        B = np.arctan2(2 * f0 - fp - fm, fp - fm)
+        x_min = -shift - B / freq
+        A = np.sqrt((f0 - C) ** 2 + 0.25 * (fp - fm) ** 2)
+        y_min = -A + C
+
+        if x_min <= -2 * shift:
+            x_min = x_min + 4 * shift
+
+        return x_min, y_min
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/spsa.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/spsa.py
new file mode 100644
index 000000000..6dcde9860
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/spsa.py
@@ -0,0 +1,295 @@
+# Copyright 2018-2022 Xanadu Quantum Technologies Inc.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""SPSA optimizer"""
+
+from openqaoa.optimizers.pennylane import numpy as np # changed from the original pennylane code
+
+
+class SPSAOptimizer:
+    r"""The Simultaneous Perturbation Stochastic Approximation method (SPSA)
+    is a stochastic approximation algorithm for optimizing cost functions whose evaluation may involve noise.
+
+    While other gradient-based optimization methods usually attempt to compute
+    the gradient analytically, SPSA involves approximating gradients at the cost of
+    evaluating the cost function twice in each iteration step. This cost may result in
+    a significant decrease in the overall cost of function evaluations for the entire optimization.
+    It is based on an approximation of the unknown gradient :math:`\hat{g}(\hat{\theta}_{k})`
+    through a simultaneous perturbation of the input parameters:
+
+    .. math::
+        \hat{g}_k(\hat{\theta}_k) = \frac{y(\hat{\theta}_k+c_k\Delta_k)-
+        y(\hat{\theta}_k-c_k\Delta_k)}{2c_k} \begin{bmatrix}
+           \Delta_{k1}^{-1} \\
+           \Delta_{k2}^{-1} \\
+           \vdots \\
+           \Delta_{kp}^{-1}
+         \end{bmatrix}\text{,}
+
+    where
+
+    * :math:`k` is the current iteration step,
+    * :math:`\hat{\theta}_k` are the input parameters at iteration step :math:`k`,
+    * :math:`y` is the objective function,
+    * :math:`c_k=\frac{c}{k^\gamma}` is the gain sequence corresponding to evaluation step size
+      and it can be controlled with
+
+      * scaling parameter :math:`c` and
+      * scaling exponent :math:`\gamma`
+
+    * :math:`\Delta_{ki}^{-1} \left(1 \leq i \leq p \right)` are the inverted elements of
+      random pertubation vector :math:`\Delta_k`.
+
+    :math:`\hat{\theta}_k` is updated to a new set of parameters with
+
+    .. math::
+        \hat{\theta}_{k+1} = \hat{\theta}_{k} - a_k\hat{g}_k(\hat{\theta}_k)\text{,}
+
+    where the gain sequences :math:`a_k=\frac{a}{(A+k)^\alpha}` controls parameter update step size.
+
+    The gain sequence :math:`a_k` can be controlled with
+
+    * scaling parameter :math:`a`,
+    * scaling exponent :math:`\alpha` and
+    * stability constant :math:`A`
+
+    For more details, see `Spall (1998a)
+    <https://www.jhuapl.edu/SPSA/PDF-SPSA/Spall_An_Overview.PDF>`_.
+
+    .. note::
+
+        * One SPSA iteration step of a cost function that involves computing the expectation value of
+          a Hamiltonian with ``M`` terms requires :math:`2*M` quantum device executions.
+        * The forward-pass value of the cost function is not computed when stepping the optimizer.
+          Therefore, in case of using ``step_and_cost`` method instead of ``step``, the number
+          of executions will include the cost function evaluations.
+
+
+    **Examples:**
+
+    For VQE/VQE-like problems, the objective function can be the following:
+
+    >>> coeffs = [0.2, -0.543, 0.4514]
+    >>> obs = [qml.PauliX(0) @ qml.PauliZ(1), qml.PauliZ(0) @ qml.Hadamard(2),
+    ...             qml.PauliX(3) @ qml.PauliZ(1)]
+    >>> H = qml.Hamiltonian(coeffs, obs)
+    >>> num_qubits = 4
+    >>> dev = qml.device("default.qubit", wires=num_qubits)
+    >>> @qml.qnode(dev)
+    ... def cost(params, num_qubits=1):
+    ...     qml.BasisState(np.array([1, 1, 0, 0]), wires=range(num_qubits))
+    ...     for i in range(num_qubits):
+    ...         qml.Rot(*params[i], wires=0)
+    ...         qml.CNOT(wires=[2, 3])
+    ...         qml.CNOT(wires=[2, 0])
+    ...         qml.CNOT(wires=[3, 1])
+    ...     return qml.expval(H)
+    ...
+    >>> params = np.random.normal(0, np.pi, (num_qubits, 3), requires_grad=True)
+
+    Once constructed, the cost function can be passed directly to the
+    ``step`` or ``step_and_cost`` function of the optimizer:
+
+    >>> max_iterations = 100
+    >>> opt = qml.SPSAOptimizer(maxiter=max_iterations)
+    >>> for _ in range(max_iterations):
+    ...     params, energy = opt.step_and_cost(cost, params, num_qubits=num_qubits)
+    >>> print(energy)
+    -0.4294539602541956
+
+    The algorithm provided by SPSA does not rely on built-in automatic differentiation capabilities of the interface being used
+    and therefore the optimizer can be used in more complex hybrid classical-quantum workflow with any of the interfaces:
+
+    >>> n_qubits = 1
+    >>> max_iterations = 20
+    >>> dev = qml.device("default.qubit", wires=n_qubits)
+    >>> @qml.qnode(dev, interface="tf")
+    ... def layer_fn_spsa(inputs, weights):
+    ...     qml.AngleEmbedding(inputs, wires=range(n_qubits))
+    ...     qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
+    ...     return qml.expval(qml.PauliZ(wires=0))
+    ...
+    >>> opt = qml.SPSAOptimizer(maxiter=max_iterations)
+    ... def fn(params, tensor_in, tensor_out):
+    ...     with tf.init_scope():
+    ...             for _ in range(max_iterations):
+    ...                     # Some classical steps before the quantum computation
+    ...                     params_a, layer_res = opt.step_and_cost(layer_fn_spsa,
+    ...                                     np.tensor(tensor_in, requires_grad=False),
+    ...                                     np.tensor(params))
+    ...                     params = params_a[1]
+    ...                     tensor_out = layer_res
+    ...                     # Some classical steps after the quantum computation
+    ...     return layer_res
+    ...
+    >>> tensor_in = tf.Variable([0.27507603])
+    >>> tensor_out = tf.Variable([0])
+    >>> params = tf.Variable([[3.97507603],
+    ...     [3.12950603],
+    ...     [1.00854038],
+    ...     [1.25907603]])
+    >>> loss = fn(params, tensor_in, tensor_out)
+    >>> print(loss)
+    tf.Tensor(-0.9995854230771829, shape=(), dtype=float64)
+
+
+
+    Keyword Args:
+        maxiter (int): the maximum number of iterations expected to be performed.
+            Used to determine :math:`A`, if :math:`A` is not supplied, otherwise ignored.
+        alpha (float): A hyperparameter to calculate :math:`a_k=\frac{a}{(A+k+1)^\alpha}`
+            for each iteration. Its asymptotically optimal value is 1.0.
+        gamma (float): An hyperparameter to calculate :math:`c_k=\frac{c}{(k+1)^\gamma}`
+            for each iteration. Its asymptotically optimal value is 1/6.
+        c (float): A hyperparameter related to the expected noise. It should be
+            approximately the standard deviation of the expected noise of the cost function.
+        A (float): The stability constant; if not provided, set to be 10% of the maximum number
+            of expected iterations.
+        a (float): A hyperparameter expected to be small in noisy situations,
+            its value could be picked using `A`, :math:`\alpha` and :math:`\hat{g_0} (\hat{\theta_0})`.
+            For more details, see `Spall (1998b)
+            <https://www.jhuapl.edu/spsa/PDF-SPSA/Spall_Implementation_of_the_Simultaneous.PDF>`_.
+    """
+    # pylint: disable-msg=too-many-arguments
+    def __init__(self, maxiter=None, alpha=0.602, gamma=0.101, c=0.2, A=None, a=None):
+        self.a = a
+        self.A = A
+        if not maxiter and not A:
+            raise TypeError("One of the parameters maxiter or A must be provided.")
+        if not A:
+            self.A = maxiter * 0.1
+        if not a:
+            self.a = 0.05 * (self.A + 1) ** alpha
+        self.c = c
+        self.alpha = alpha
+        self.gamma = gamma
+        self.k = 1
+        self.ak = self.a / (self.A + 1) ** self.alpha
+
+    def step_and_cost(self, objective_fn, *args, **kwargs):
+        r"""Update the parameter array :math:`\hat{\theta}_k` with one step of the
+        optimizer and return the step and the corresponding objective function. The number
+        of steps stored by the ``k`` attribute of the optimizer is counted internally when calling ``step_and_cost`` and ``cost``.
+
+        Args:
+            objective_fn (function): the objective function for optimization
+            *args : variable length argument array for objective function
+            **kwargs : variable length of keyword arguments for the objective function
+
+        Returns:
+            tuple[list [array], float]: the new variable values :math:`\hat{\theta}_{k+1}` and the
+            objective function output prior to the step.
+        """
+        g = self.compute_grad(objective_fn, args, kwargs)
+        new_args = self.apply_grad(g, args)
+
+        self.k += 1
+
+        forward = objective_fn(*args, **kwargs)
+
+        # unwrap from list if one argument, cleaner return
+        if len(new_args) == 1:
+            return new_args[0], forward
+        return new_args, forward
+
+    def step(self, objective_fn, *args, **kwargs):
+        r"""Update trainable arguments with one step of the optimizer. The number
+        of steps is being counted through calls to ``step_and_cost`` and ``cost``.
+
+        Args:
+            objective_fn (function): the objective function for optimization
+            *args : variable length argument array for objective function
+            **kwargs : variable length of keyword arguments for the objective function
+
+        Returns:
+            list [array]: the new variable values :math:`\hat{\theta}_{k+1}`.
+        """
+        g = self.compute_grad(objective_fn, args, kwargs)
+        new_args = self.apply_grad(g, args)
+
+        self.k += 1
+
+        # unwrap from list if one argument, cleaner return
+        if len(new_args) == 1:
+            return new_args[0]
+
+        return new_args
+
+    def compute_grad(self, objective_fn, args, kwargs):
+        r"""Approximate the gradient of the objective function at the
+        given point.
+
+        Args:
+            objective_fn (function): The objective function for optimization
+            args (tuple): tuple of NumPy array containing the current parameters
+                for objective function
+            kwargs (dict): keyword arguments for the objective function
+
+        Returns:
+            tuple (array): NumPy array containing the gradient
+                :math:`\hat{g}_k(\hat{\theta}_k)`
+        """
+        ck = self.c / self.k**self.gamma
+
+        delta = []
+        thetaplus = list(args)
+        thetaminus = list(args)
+
+        for index, arg in enumerate(args):
+            if getattr(arg, "requires_grad", False):
+                # Use the symmetric Bernoulli distribution to generate
+                # the coordinates of delta. Note that other distributions
+                # may also be used (they need to satisfy certain conditions).
+                # Refer to the paper linked in the class docstring for more info.
+                di = np.random.choice([-1, 1], size=arg.shape)
+                multiplier = ck * di
+                thetaplus[index] = arg + multiplier
+                thetaminus[index] = arg - multiplier
+                delta.append(di)
+        yplus = objective_fn(*thetaplus, **kwargs)
+        yminus = objective_fn(*thetaminus, **kwargs)
+        try:
+            if np.prod(objective_fn.func(*args).shape(objective_fn.device)) > 1:
+                raise ValueError(
+                    "The objective function must be a scalar function for the gradient "
+                    "to be computed."
+                )
+        except AttributeError:
+            if yplus.size > 1:
+                raise ValueError(  # pylint: disable=raise-missing-from
+                    "The objective function must be a scalar function for the gradient "
+                    "to be computed."
+                )
+        grad = [(yplus - yminus) / (2 * ck * di) for di in delta]
+
+        return tuple(grad)
+
+    def apply_grad(self, grad, args):
+        r"""Update the variables to take a single optimization step.
+
+        Args:
+            grad (tuple [array]): the gradient approximation of the objective
+                function at point :math:`\hat{\theta}_{k}`
+            args (tuple): the current value of the variables :math:`\hat{\theta}_{k}`
+
+        Returns:
+            list [array]: the new values :math:`\hat{\theta}_{k+1}`"""
+        self.ak = self.a / (self.A + self.k) ** self.alpha
+        args_new = list(args)
+        trained_index = 0
+        for index, arg in enumerate(args):
+            if getattr(arg, "requires_grad", False):
+                args_new[index] = arg - self.ak * grad[trained_index]
+                trained_index += 1
+
+        return args_new
diff --git a/openqaoa/optimizers/training_vqa.py b/openqaoa/optimizers/training_vqa.py
index b6b03cf79..52607345f 100644
--- a/openqaoa/optimizers/training_vqa.py
+++ b/openqaoa/optimizers/training_vqa.py
@@ -697,8 +697,7 @@ class PennyLaneOptimizer(OptimizeVQA):
     """
     PENNYLANE_OPTIMIZERS = ['pennylane_adagrad', 'pennylane_adam', 'pennylane_vgd', 
                                   'pennylane_momentum', 'pennylane_nesterov_momentum',
-                                  'pennylane_natural_grad_descent', 'pennylane_rmsprop', 
-                                  'pennylane_rotosolve', 'pennylane_spsa']
+                                  'pennylane_rmsprop', 'pennylane_rotosolve', 'pennylane_spsa']
 
     def __init__(self,
                  vqa_object: Type[VQABaseBackend],
@@ -786,13 +785,13 @@ def optimize(self):
         elif self.options['pennylane_method'] in ['spsa', 'rotosolve']:    
             self.jac = None 
         
-        try:
-            result = minimize(self.optimize_this, x0=self.initial_params, method=method,
+        # try:
+        result = minimize(self.optimize_this, x0=self.initial_params, method=method,
                                 jac=self.jac, tol=self.tol, constraints=self.constraints,
                                 options=self.options, bounds=self.bounds)
-        except Exception as e:
-            print("The optimization has been terminated early. Most likely due to a connection error. You can retrieve results from the optimization runs that were completed through the .results_information method.")
-            raise e
-        finally:
-            self.results_dictionary()
-            return self
+        # except Exception as e:
+        #     print("The optimization has been terminated early. Most likely due to a connection error. You can retrieve results from the optimization runs that were completed through the .results_information method.")
+        #     print(e)
+        # finally:
+        self.results_dictionary()
+        return self
diff --git a/setup.py b/setup.py
index a903dad83..be6490aed 100644
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,9 @@
     "qiskit>=0.36.1",
     "pyquil>=3.1.0",
     "docplex>=2.23.1",
-    "pennylane>=0.26.0"
+    "autograd~=1.4",
+    "semantic_version~=2.10",
+    "autoray==0.3.1"
 ]
 
 requirements_docs = [

From 4473e27372e9bde36ec001f06b32d68ed2b47cf5 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Wed, 16 Nov 2022 11:35:38 +0800
Subject: [PATCH 23/36] Deleting some prints

---
 openqaoa/optimizers/pennylane/optimization_methods_pennylane.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py b/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py
index eaa78e612..8de682bea 100644
--- a/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py
+++ b/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py
@@ -123,8 +123,6 @@ def cost(params, **k): # define a function to convert the params list from penny
         if pennylane_method in ['adagrad', 'adam', 'vgd', 'momentum', 'nesterov_momentum', 'rmsprop']:
             testx, testy = optimizer.step_and_cost(cost, bestx, *args, grad_fn=jac)
         elif pennylane_method in ['rotosolve']: 
-            print('Rotosolve')
-            print('s', bestx)
             testx, testy = optimizer.step_and_cost(
                                                     cost, bestx, *args,
                                                     nums_frequency={'params': {(i,):1 for i in range(bestx.size)}} if not nums_frequency else nums_frequency,

From d66524d88436e01c1484703d1f3891109c7b3aa4 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Wed, 16 Nov 2022 13:36:02 +0800
Subject: [PATCH 24/36] Added NOTICE, it specifies code is from PennyLane

---
 openqaoa/optimizers/pennylane/FILE.txt              |  0
 openqaoa/optimizers/pennylane/NOTICE                | 13 +++++++++++++
 .../optimizers/pennylane/fourier/reconstruct.py     |  5 ++++-
 .../optimizers/pennylane/math/is_independent.py     |  5 ++++-
 .../pennylane/math/matrix_manipulation.py           |  5 ++++-
 openqaoa/optimizers/pennylane/math/quantum.py       |  5 ++++-
 .../pennylane/pennylane_optimizers/adagrad.py       |  5 ++++-
 .../pennylane/pennylane_optimizers/adam.py          |  5 ++++-
 .../pennylane_optimizers/gradient_descent.py        |  5 ++++-
 .../pennylane_optimizers/nesterov_momentum.py       |  5 ++++-
 .../pennylane/pennylane_optimizers/rms_prop.py      |  5 ++++-
 .../pennylane/pennylane_optimizers/rotosolve.py     |  7 +++++--
 .../pennylane/pennylane_optimizers/spsa.py          |  5 ++++-
 13 files changed, 58 insertions(+), 12 deletions(-)
 delete mode 100644 openqaoa/optimizers/pennylane/FILE.txt
 create mode 100644 openqaoa/optimizers/pennylane/NOTICE

diff --git a/openqaoa/optimizers/pennylane/FILE.txt b/openqaoa/optimizers/pennylane/FILE.txt
deleted file mode 100644
index e69de29bb..000000000
diff --git a/openqaoa/optimizers/pennylane/NOTICE b/openqaoa/optimizers/pennylane/NOTICE
new file mode 100644
index 000000000..67fc8ad38
--- /dev/null
+++ b/openqaoa/optimizers/pennylane/NOTICE
@@ -0,0 +1,13 @@
+Apache HTTP Server
+Copyright 2016 The Apache Software Foundation.
+
+This folder contains code developed in Xanadu Quantum Technologies Inc.
+The code in folders 
+    openqaoa.optimizers.pennylane.fourier
+    openqaoa.optimizers.pennylane.math
+    openqaoa.optimizers.pennylane.numpy
+    openqaoa.optimizers.pennylane.pennylane_optimizers
+has been taken from the PennyLane library, https://github.com/PennyLaneAI/pennylane, 
+developed by Xanadu Quantum Technologies Inc.
+PennyLane is free and open source, released under the Apache License, Version 2.0.
+Some lines of code have been modified, which has been indicated in the modified files.
diff --git a/openqaoa/optimizers/pennylane/fourier/reconstruct.py b/openqaoa/optimizers/pennylane/fourier/reconstruct.py
index a78ce4b1c..f9a522db6 100644
--- a/openqaoa/optimizers/pennylane/fourier/reconstruct.py
+++ b/openqaoa/optimizers/pennylane/fourier/reconstruct.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# Some lines of code have been modified from the original PennyLane source code
+
 """Contains a function that computes the fourier series of
 a quantum expectation value."""
 from functools import wraps
@@ -19,7 +22,7 @@
 
 import numpy as np
 from autoray import numpy as anp
-from openqaoa.optimizers import pennylane as qml # changed from the original code
+from openqaoa.optimizers import pennylane as qml # changed from the original PennyLane code
 
 
 def _reconstruct_equ(fun, num_frequency, x0=None, f0=None, interface=None):
diff --git a/openqaoa/optimizers/pennylane/math/is_independent.py b/openqaoa/optimizers/pennylane/math/is_independent.py
index 3d57fe7c1..f69c9e8c8 100644
--- a/openqaoa/optimizers/pennylane/math/is_independent.py
+++ b/openqaoa/optimizers/pennylane/math/is_independent.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# Some lines of code have been modified from the original PennyLane source code
+
 """
 This file contains the is_independent function that checks if
 a function is independent of its arguments for the interfaces
@@ -26,7 +29,7 @@
 from autograd.tracer import isbox, new_box, trace_stack
 from autograd.core import VJPNode
 
-from openqaoa.optimizers.pennylane import numpy as np # changed from the original pennylane code
+from openqaoa.optimizers.pennylane import numpy as np # changed from the original PennyLane code
 
 
 def _autograd_is_indep_analytic(func, *args, **kwargs):
diff --git a/openqaoa/optimizers/pennylane/math/matrix_manipulation.py b/openqaoa/optimizers/pennylane/math/matrix_manipulation.py
index f8a342b8d..7252be42b 100644
--- a/openqaoa/optimizers/pennylane/math/matrix_manipulation.py
+++ b/openqaoa/optimizers/pennylane/math/matrix_manipulation.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# Some lines of code have been modified from the original PennyLane source code
+
 """This module contains methods to expand the matrix representation of an operator
 to a higher hilbert space with re-ordered wires."""
 import copy
@@ -20,7 +23,7 @@
 import numpy as np
 from scipy.sparse import csr_matrix, eye, issparse, kron
 
-from openqaoa.optimizers import pennylane as qml # changed from the original code
+from openqaoa.optimizers import pennylane as qml # changed from the original PennyLane code
 Wires = None
 
 
diff --git a/openqaoa/optimizers/pennylane/math/quantum.py b/openqaoa/optimizers/pennylane/math/quantum.py
index fa03d7431..177ee12ef 100644
--- a/openqaoa/optimizers/pennylane/math/quantum.py
+++ b/openqaoa/optimizers/pennylane/math/quantum.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# Some lines of code have been modified from the original PennyLane source code
+
 """Differentiable quantum functions"""
 # pylint: disable=import-outside-toplevel
 import itertools
@@ -20,7 +23,7 @@
 from autoray import numpy as np
 from numpy import float64
 
-from openqaoa.optimizers import pennylane as qml #changed fromt the original code
+from openqaoa.optimizers import pennylane as qml # changed from the original PennyLane code
 
 from . import single_dispatch  # pylint:disable=unused-import
 from .multi_dispatch import diag, dot, scatter_element_add, einsum, get_interface
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/adagrad.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/adagrad.py
index b1b1c35c2..80d347419 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/adagrad.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/adagrad.py
@@ -11,8 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# Some lines of code have been modified from the original PennyLane source code
+
 """Adagrad optimizer"""
-from numpy import sqrt # changed from the original pennylane code
+from numpy import sqrt # changed from the original PennyLane code
 from .gradient_descent import GradientDescentOptimizer
 
 
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/adam.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/adam.py
index 2d303c499..0fdb96c15 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/adam.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/adam.py
@@ -11,8 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# Some lines of code have been modified from the original PennyLane source code
+
 """Adam optimizer"""
-from numpy import sqrt # changed from the original pennylane code
+from numpy import sqrt # changed from the original PennyLane code
 from .gradient_descent import GradientDescentOptimizer
 
 
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py
index a5cd4a222..9e19ff0be 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py
@@ -11,9 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# Some lines of code have been modified from the original PennyLane source code
+
 """Gradient descent optimizer"""
 
-get_gradient = None # changed from the original pennylane code
+get_gradient = None # changed from the original PennyLane code
 
 class GradientDescentOptimizer:
     r"""Basic gradient-descent optimizer.
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/nesterov_momentum.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/nesterov_momentum.py
index c3836fb4e..489adbc73 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/nesterov_momentum.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/nesterov_momentum.py
@@ -11,8 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# Some lines of code have been modified from the original PennyLane source code
+
 """Nesterov momentum optimizer"""
-get_gradient = None # changed from the original pennylane code
+get_gradient = None # changed from the original PennyLane code
 from .momentum import MomentumOptimizer
 
 
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/rms_prop.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/rms_prop.py
index 4d5aa7eb4..5ddd8b22f 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/rms_prop.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/rms_prop.py
@@ -11,8 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# Some lines of code have been modified from the original PennyLane source code
+
 """Root mean square propagation optimizer"""
-from numpy import sqrt  # changed from the original pennylane code
+from numpy import sqrt  # changed from the original PennyLane code
 from .adagrad import AdagradOptimizer
 
 
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/rotosolve.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/rotosolve.py
index 15e812dbd..7d6c7aa28 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/rotosolve.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/rotosolve.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# Some lines of code have been modified from the original PennyLane source code
+
 """Rotosolve gradient free optimizer"""
 # pylint: disable=too-many-branches,cell-var-from-loop
 
@@ -18,7 +21,7 @@
 import numpy as np
 from scipy.optimize import brute, shgo
 
-from openqaoa.optimizers import pennylane as qml # changed from the original code
+from openqaoa.optimizers import pennylane as qml # changed from the original PennyLane code
 
 
 def _brute_optimizer(fun, num_steps, bounds=None, **kwargs):
@@ -415,7 +418,7 @@ def step_and_cost(
         sign_fn = objective_fn.func if isinstance(objective_fn, qml.QNode) else objective_fn
         arg_names = list(signature(sign_fn).parameters.keys())
         requires_grad = {
-            arg_name: True for arg_name, arg in zip(arg_names, args) # changed from the original code
+            arg_name: True for arg_name, arg in zip(arg_names, args) # changed from the original PennyLane code
         }
         nums_frequency = nums_frequency or {}
         spectra = spectra or {}
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/spsa.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/spsa.py
index 6dcde9860..c02bc5554 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/spsa.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/spsa.py
@@ -11,9 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# Some lines of code have been modified from the original PennyLane source code
+
 """SPSA optimizer"""
 
-from openqaoa.optimizers.pennylane import numpy as np # changed from the original pennylane code
+from openqaoa.optimizers.pennylane import numpy as np # changed from the original PennyLane code
 
 
 class SPSAOptimizer:

From 24a64ddb754e4099d58f6c08c17f27495f74c3fc Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Wed, 16 Nov 2022 13:44:06 +0800
Subject: [PATCH 25/36] Requirements.txt updated

---
 docs/requirements.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index a58ec2ad5..988290231 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -9,3 +9,6 @@ sphinx-rtd-theme==1.0.0
 ipython==8.2.0
 pandas>=1.3.5
 amazon-braket-sdk==1.23.0
+autograd~=1.4
+semantic_version~=2.10
+autoray==0.3.1

From 3b196fb922d492b87102ff7bc126fd576bb47808 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Wed, 16 Nov 2022 13:45:06 +0800
Subject: [PATCH 26/36] Docs optimizers

---
 docs/source/optimizers.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst
index 6ae99e0d6..d0ac36f68 100644
--- a/docs/source/optimizers.rst
+++ b/docs/source/optimizers.rst
@@ -21,6 +21,12 @@ SciPy Optimizers
     :show-inheritance:
     :inherited-members:
 
+.. autoclass:: openqaoa.optimizers.training_vqa.PennyLaneOptimizer
+    :members:
+    :undoc-members:
+    :show-inheritance:
+    :inherited-members:
+
 Optimization Methods
 --------------------
 .. automodule:: openqaoa.optimizers.optimization_methods

From c3a7fb7368ba79deef6fee51a4e23d2d9e5b4010 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Wed, 16 Nov 2022 13:53:16 +0800
Subject: [PATCH 27/36] Debugging

---
 tests/test_optimizers_pennylane.py | 2 +-
 tests/test_workflows.py            | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_optimizers_pennylane.py b/tests/test_optimizers_pennylane.py
index 95a8b7ef4..29f46b0cf 100644
--- a/tests/test_optimizers_pennylane.py
+++ b/tests/test_optimizers_pennylane.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import networkx as nx
-import pennylane as pl
+import openqaoa.optimizers.pennylane as pl
 import copy
 import inspect
 
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 73a68c777..d9a00ab3c 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -29,7 +29,6 @@
 from openqaoa.optimizers.training_vqa import ScipyOptimizer, CustomScipyGradientOptimizer, PennyLaneOptimizer
 import unittest
 import networkx as nw
-import pytest
 import numpy as np
 
 from openqaoa.problems.problem import MinimumVertexCover, QUBO

From 88a6ac5e9eec7fca3124367fd1cc14a3b503cf93 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Wed, 16 Nov 2022 13:58:05 +0800
Subject: [PATCH 28/36] Debugging docs

---
 docs/source/optimizers.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst
index d0ac36f68..88b1c660a 100644
--- a/docs/source/optimizers.rst
+++ b/docs/source/optimizers.rst
@@ -35,7 +35,7 @@ Optimization Methods
     :show-inheritance:
     :inherited-members:
 
-.. automodule:: openqaoa.optimizers.optimization_methods_pennylane
+.. automodule:: openqaoa.optimizers.pennylane.optimization_methods_pennylane
     :members:
     :undoc-members:
     :show-inheritance:

From 97f7aaef292036400a1ff1b474aed56c40b8540d Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Wed, 16 Nov 2022 14:04:31 +0800
Subject: [PATCH 29/36] Debugging

---
 openqaoa/optimizers/training_vqa.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/openqaoa/optimizers/training_vqa.py b/openqaoa/optimizers/training_vqa.py
index 52607345f..4ab185e81 100644
--- a/openqaoa/optimizers/training_vqa.py
+++ b/openqaoa/optimizers/training_vqa.py
@@ -785,13 +785,12 @@ def optimize(self):
         elif self.options['pennylane_method'] in ['spsa', 'rotosolve']:    
             self.jac = None 
         
-        # try:
-        result = minimize(self.optimize_this, x0=self.initial_params, method=method,
+        try:
+            result = minimize(self.optimize_this, x0=self.initial_params, method=method,
                                 jac=self.jac, tol=self.tol, constraints=self.constraints,
                                 options=self.options, bounds=self.bounds)
-        # except Exception as e:
-        #     print("The optimization has been terminated early. Most likely due to a connection error. You can retrieve results from the optimization runs that were completed through the .results_information method.")
-        #     print(e)
-        # finally:
-        self.results_dictionary()
-        return self
+        except ConnectionError as e:
+            print("The optimization has been terminated early. Most likely due to a connection error. You can retrieve results from the optimization runs that were completed through the .results_information method.")
+        finally:
+            self.results_dictionary()
+            return self

From 98dde8217071e43523a706893af7389bb1448e7f Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Wed, 16 Nov 2022 14:27:00 +0800
Subject: [PATCH 30/36] except Exception as e. In PennyLane opt

---
 openqaoa/optimizers/training_vqa.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/openqaoa/optimizers/training_vqa.py b/openqaoa/optimizers/training_vqa.py
index ab281e099..3c2c573d4 100644
--- a/openqaoa/optimizers/training_vqa.py
+++ b/openqaoa/optimizers/training_vqa.py
@@ -797,7 +797,10 @@ def optimize(self):
                                 jac=self.jac, tol=self.tol, constraints=self.constraints,
                                 options=self.options, bounds=self.bounds)
         except ConnectionError as e:
+            print(e, '\n')
             print("The optimization has been terminated early. Most likely due to a connection error. You can retrieve results from the optimization runs that were completed through the .results_information method.")
+        except Exception as e:
+            raise e
         finally:
             self.results_dictionary()
         

From d164b9c8db8bc0b615198e7619ddfbebc21eb10f Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Fri, 18 Nov 2022 15:41:53 +0800
Subject: [PATCH 31/36] Requirements with >=

---
 docs/requirements.txt | 4 ++--
 setup.py              | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 988290231..f47f353a3 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -9,6 +9,6 @@ sphinx-rtd-theme==1.0.0
 ipython==8.2.0
 pandas>=1.3.5
 amazon-braket-sdk==1.23.0
-autograd~=1.4
-semantic_version~=2.10
+autograd>=1.4
+semantic_version>=2.10
 autoray==0.3.1
diff --git a/setup.py b/setup.py
index be6490aed..9a2ec2d89 100644
--- a/setup.py
+++ b/setup.py
@@ -20,8 +20,8 @@
     "qiskit>=0.36.1",
     "pyquil>=3.1.0",
     "docplex>=2.23.1",
-    "autograd~=1.4",
-    "semantic_version~=2.10",
+    "autograd>=1.4",
+    "semantic_version>=2.10",
     "autoray==0.3.1"
 ]
 

From 2aebedb9d2ce85d5879b148333b3f521e3e06a67 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Fri, 18 Nov 2022 15:43:51 +0800
Subject: [PATCH 32/36] Documentation and copyright

---
 openqaoa/optimizers/__init__.py                       |  1 +
 openqaoa/optimizers/pennylane/NOTICE                  |  4 ++--
 openqaoa/optimizers/pennylane/__init__.py             |  6 +-----
 openqaoa/optimizers/pennylane/fourier/reconstruct.py  |  3 ++-
 openqaoa/optimizers/pennylane/math/__init__.py        | 11 ++++++-----
 openqaoa/optimizers/pennylane/math/is_independent.py  |  3 ++-
 .../optimizers/pennylane/math/matrix_manipulation.py  |  3 ++-
 openqaoa/optimizers/pennylane/math/quantum.py         |  3 ++-
 .../pennylane/pennylane_optimizers/adagrad.py         |  3 ++-
 .../optimizers/pennylane/pennylane_optimizers/adam.py |  3 ++-
 .../pennylane_optimizers/gradient_descent.py          |  3 ++-
 .../pennylane_optimizers/nesterov_momentum.py         |  3 ++-
 .../pennylane/pennylane_optimizers/rms_prop.py        |  3 ++-
 .../pennylane/pennylane_optimizers/rotosolve.py       |  3 ++-
 .../optimizers/pennylane/pennylane_optimizers/spsa.py |  3 ++-
 15 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/openqaoa/optimizers/__init__.py b/openqaoa/optimizers/__init__.py
index 190fcfd0f..dac853941 100644
--- a/openqaoa/optimizers/__init__.py
+++ b/openqaoa/optimizers/__init__.py
@@ -17,6 +17,7 @@
 
 Currently supports:
 	ScipyOptimizers (both gradient-free and gradient-based)
+	PennylaneOptimizers (adagrad, adam, gradient descent, nestrov momentum, rms prop, rotosolve, spsa)
 """
 
 from .training_vqa import *
diff --git a/openqaoa/optimizers/pennylane/NOTICE b/openqaoa/optimizers/pennylane/NOTICE
index 67fc8ad38..e22c9e574 100644
--- a/openqaoa/optimizers/pennylane/NOTICE
+++ b/openqaoa/optimizers/pennylane/NOTICE
@@ -1,5 +1,5 @@
-Apache HTTP Server
-Copyright 2016 The Apache Software Foundation.
+PennyLane
+Copyright 2018-2021 Xanadu Quantum Technologies Inc.
 
 This folder contains code developed in Xanadu Quantum Technologies Inc.
 The code in folders 
diff --git a/openqaoa/optimizers/pennylane/__init__.py b/openqaoa/optimizers/pennylane/__init__.py
index 2aef9d078..9982062ac 100644
--- a/openqaoa/optimizers/pennylane/__init__.py
+++ b/openqaoa/optimizers/pennylane/__init__.py
@@ -13,13 +13,9 @@
 #   limitations under the License.
 
 """
-Optimizer directory for the classical optimization loop for QAOA 
-
-Currently supports:
-	ScipyOptimizers (both gradient-free and gradient-based)
+PennyLane optimizers directory for the classical optimization loop for QAOA 
 """
 
-# from .optimization_methods_pennylane import *
 from .pennylane_optimizers.adagrad import *
 from .pennylane_optimizers.adam import *
 from .pennylane_optimizers.gradient_descent import *
diff --git a/openqaoa/optimizers/pennylane/fourier/reconstruct.py b/openqaoa/optimizers/pennylane/fourier/reconstruct.py
index f9a522db6..b1ec6f514 100644
--- a/openqaoa/optimizers/pennylane/fourier/reconstruct.py
+++ b/openqaoa/optimizers/pennylane/fourier/reconstruct.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Some lines of code have been modified from the original PennyLane source code
+# NOTICE #
+# Lines modified by Entropica Labs will bear the comment # changed from the original PennyLane code
 
 """Contains a function that computes the fourier series of
 a quantum expectation value."""
diff --git a/openqaoa/optimizers/pennylane/math/__init__.py b/openqaoa/optimizers/pennylane/math/__init__.py
index f138fb6c4..eb2c5b883 100644
--- a/openqaoa/optimizers/pennylane/math/__init__.py
+++ b/openqaoa/optimizers/pennylane/math/__init__.py
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# # NOTICE #
+# Lines modified by Entropica Labs will bear the comment # changed from the original PennyLane code
+
 """
 This package contains unified functions for framework-agnostic tensor and array
 manipulation. Given the input tensor-like object, the call is dispatched
@@ -26,11 +30,8 @@
 The following frameworks are currently supported:
 
 * NumPy
-* Autograd
-* TensorFlow
-* PyTorch
-* JAX
-"""
+""" # changed from the original PennyLane code
+
 import autoray as ar
 
 from .multi_dispatch import (
diff --git a/openqaoa/optimizers/pennylane/math/is_independent.py b/openqaoa/optimizers/pennylane/math/is_independent.py
index f69c9e8c8..8300d08d8 100644
--- a/openqaoa/optimizers/pennylane/math/is_independent.py
+++ b/openqaoa/optimizers/pennylane/math/is_independent.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Some lines of code have been modified from the original PennyLane source code
+# NOTICE #
+# Lines modified by Entropica Labs will bear the comment # changed from the original PennyLane code
 
 """
 This file contains the is_independent function that checks if
diff --git a/openqaoa/optimizers/pennylane/math/matrix_manipulation.py b/openqaoa/optimizers/pennylane/math/matrix_manipulation.py
index 7252be42b..1214a9e2a 100644
--- a/openqaoa/optimizers/pennylane/math/matrix_manipulation.py
+++ b/openqaoa/optimizers/pennylane/math/matrix_manipulation.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Some lines of code have been modified from the original PennyLane source code
+# NOTICE #
+# Lines modified by Entropica Labs will bear the comment # changed from the original PennyLane code
 
 """This module contains methods to expand the matrix representation of an operator
 to a higher hilbert space with re-ordered wires."""
diff --git a/openqaoa/optimizers/pennylane/math/quantum.py b/openqaoa/optimizers/pennylane/math/quantum.py
index 177ee12ef..a2596b461 100644
--- a/openqaoa/optimizers/pennylane/math/quantum.py
+++ b/openqaoa/optimizers/pennylane/math/quantum.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Some lines of code have been modified from the original PennyLane source code
+# NOTICE #
+# Lines modified by Entropica Labs will bear the comment # changed from the original PennyLane code
 
 """Differentiable quantum functions"""
 # pylint: disable=import-outside-toplevel
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/adagrad.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/adagrad.py
index 80d347419..9230f144c 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/adagrad.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/adagrad.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Some lines of code have been modified from the original PennyLane source code
+# NOTICE #
+# Lines modified by Entropica Labs will bear the comment # changed from the original PennyLane code
 
 """Adagrad optimizer"""
 from numpy import sqrt # changed from the original PennyLane code
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/adam.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/adam.py
index 0fdb96c15..21de6d52a 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/adam.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/adam.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Some lines of code have been modified from the original PennyLane source code
+# NOTICE #
+# Lines modified by Entropica Labs will bear the comment # changed from the original PennyLane code
 
 """Adam optimizer"""
 from numpy import sqrt # changed from the original PennyLane code
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py
index 9e19ff0be..e73e4a801 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/gradient_descent.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Some lines of code have been modified from the original PennyLane source code
+# NOTICE #
+# Lines modified by Entropica Labs will bear the comment # changed from the original PennyLane code
 
 """Gradient descent optimizer"""
 
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/nesterov_momentum.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/nesterov_momentum.py
index 489adbc73..a1681fbe0 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/nesterov_momentum.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/nesterov_momentum.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Some lines of code have been modified from the original PennyLane source code
+# NOTICE #
+# Lines modified by Entropica Labs will bear the comment # changed from the original PennyLane code
 
 """Nesterov momentum optimizer"""
 get_gradient = None # changed from the original PennyLane code
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/rms_prop.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/rms_prop.py
index 5ddd8b22f..8c1327daa 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/rms_prop.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/rms_prop.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Some lines of code have been modified from the original PennyLane source code
+# NOTICE #
+# Lines modified by Entropica Labs will bear the comment # changed from the original PennyLane code
 
 """Root mean square propagation optimizer"""
 from numpy import sqrt  # changed from the original PennyLane code
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/rotosolve.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/rotosolve.py
index 7d6c7aa28..74e7577e2 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/rotosolve.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/rotosolve.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Some lines of code have been modified from the original PennyLane source code
+# NOTICE #
+# Lines modified by Entropica Labs will bear the comment # changed from the original PennyLane code
 
 """Rotosolve gradient free optimizer"""
 # pylint: disable=too-many-branches,cell-var-from-loop
diff --git a/openqaoa/optimizers/pennylane/pennylane_optimizers/spsa.py b/openqaoa/optimizers/pennylane/pennylane_optimizers/spsa.py
index c02bc5554..256e8fbbc 100644
--- a/openqaoa/optimizers/pennylane/pennylane_optimizers/spsa.py
+++ b/openqaoa/optimizers/pennylane/pennylane_optimizers/spsa.py
@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Some lines of code have been modified from the original PennyLane source code
+# NOTICE #
+# Lines modified by Entropica Labs will bear the comment # changed from the original PennyLane code
 
 """SPSA optimizer"""
 

From 7f8c55d143c282ac39fbdf7c7167754e0468d070 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Fri, 18 Nov 2022 15:44:34 +0800
Subject: [PATCH 33/36] Cleaning imports of optimization_methods_pennylane

---
 openqaoa/optimizers/pennylane/optimization_methods_pennylane.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py b/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py
index 8de682bea..622cf7898 100644
--- a/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py
+++ b/openqaoa/optimizers/pennylane/optimization_methods_pennylane.py
@@ -19,7 +19,6 @@
 Similarly as with the custom optimization methods Scipy `minimize` is used. Extends available scipy methods.
 """
 
-from openqaoa.optimizers.pennylane.pennylane_optimizers.gradient_descent import GradientDescentOptimizer
 from openqaoa.optimizers import pennylane as pl
 import inspect
 from scipy.optimize import OptimizeResult

From d478b536f05e014d466f4165c50aa994e133ac33 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Fri, 18 Nov 2022 16:30:39 +0800
Subject: [PATCH 34/36] Documentation

---
 openqaoa/optimizers/training_vqa.py | 121 ++++++----------------------
 1 file changed, 26 insertions(+), 95 deletions(-)

diff --git a/openqaoa/optimizers/training_vqa.py b/openqaoa/optimizers/training_vqa.py
index 3c2c573d4..a3c5e1507 100644
--- a/openqaoa/optimizers/training_vqa.py
+++ b/openqaoa/optimizers/training_vqa.py
@@ -324,36 +324,14 @@ class ScipyOptimizer(OptimizeVQA):
         Object of class QAOAVariationalBaseParams, which contains information on the circuit to be executed,  the type of parametrisation, and the angles of the VQA circuit.
 
     optimizer_dict:
-        jac: 
-            gradient as `Callable` if defined. else None
-
-        hess: 
-            hessian as `Callable` if defined. else None
-
-        * bounds
-        
-            * parameter bounds while training, defaults to ``None``
-
-        * constraints
-        
-            * Linear/Non-Linear constraints (only for COBYLA, SLSQP and trust-constr)
-
-        * tol
-        
-            * Tolerance for termination
-
-        * maxiters
-        
-            * sets ``maxiters = 100`` by default if not specified.
-            
-        * maxfev
-        
-            * sets ``maxfev = 100`` by default if not specified.
-            
-        * optimizer_options
-        
-            * Dictionary of optimiser-specific arguments, defaults to ``None``
-
+        * 'jac': gradient as ``Callable``, if defined else ``None``
+        * 'hess': hessian as ``Callable``, if defined else ``None``
+        * 'bounds': parameter bounds while training, defaults to ``None``
+        * 'constraints': Linear/Non-Linear constraints (only for COBYLA, SLSQP and trust-constr)
+        * 'tol': Tolerance for termination
+        * 'maxiter': sets ``maxiters = 100`` by default if not specified.            
+        * 'maxfev': sets ``maxfev = 100`` by default if not specified.
+        * 'optimizer_options': dictionary of optimiser-specific arguments, defaults to ``None``
     """
     GRADIENT_FREE = ['cobyla', 'nelder-mead', 'powell', 'slsqp']
     SCIPY_METHODS = MINIMIZE_METHODS
@@ -493,37 +471,14 @@ class CustomScipyGradientOptimizer(OptimizeVQA):
         Object of class QAOAVariationalBaseParams, which contains information on the circuit to be executed,  the type of parametrisation, and the angles of the VQA circuit.
 
     optimizer_dict:
-        * jac
-        
-            * gradient as ``Callable``, if defined else ``None``
-
-        * hess
-        
-            * hessian as ``Callable``, if defined else ``None``
-
-        * bounds
-        
-            * parameter bounds while training, defaults to ``None``
-
-        * constraints
-        
-            * Linear/Non-Linear constraints (only for COBYLA, SLSQP and trust-constr)
-
-        * tol
-        
-            * Tolerance for termination
-
-        * maxiters
-        
-            * sets ``maxiters = 100`` by default if not specified.
-            
-        * maxfev
-        
-            * sets ``maxfev = 100`` by default if not specified.
-            
-        * optimizer_options
-        
-            * Dictionary of optimiser-specific arguments, defaults to ``None``
+        * 'jac': gradient as ``Callable``, if defined else ``None``
+        * 'hess': hessian as ``Callable``, if defined else ``None``
+        * 'bounds': parameter bounds while training, defaults to ``None``
+        * 'constraints': Linear/Non-Linear constraints (only for COBYLA, SLSQP and trust-constr)
+        * 'tol': Tolerance for termination
+        * 'maxiter': sets ``maxiters = 100`` by default if not specified.            
+        * 'maxfev': sets ``maxfev = 100`` by default if not specified.
+        * 'optimizer_options': dictionary of optimiser-specific arguments, defaults to ``None``
 
     """
     CUSTOM_GRADIENT_OPTIMIZERS = ['vgd', 'newton',
@@ -615,7 +570,7 @@ def optimize(self):
         Returns
         -------
         : 
-            The optimized return object from the ``scipy.optimize`` package the result is assigned to the attribute ``opt_result``
+            Returns self after the optimization process is completed. The optimized result is assigned to the attribute ``opt_result``
         '''
 
         if self.method == 'vgd':
@@ -668,38 +623,14 @@ class PennyLaneOptimizer(OptimizeVQA):
         Object of class QAOAVariationalBaseParams, which contains information on the circuit to be executed,  the type of parametrisation, and the angles of the VQA circuit.
 
     optimizer_dict:
-        * jac
-        
-            * gradient as ``Callable``, if defined else ``None``
-
-        * hess
-        
-            * hessian as ``Callable``, if defined else ``None``
-
-        * bounds
-        
-            * parameter bounds while training, defaults to ``None``
-
-        * constraints
-        
-            * Linear/Non-Linear constraints (only for COBYLA, SLSQP and trust-constr)
-
-        * tol
-        
-            * Tolerance for termination
-
-        * maxiters
-        
-            * sets ``maxiters = 100`` by default if not specified.
-            
-        * maxfev
-        
-            * sets ``maxfev = 100`` by default if not specified.
-            
-        * optimizer_options
-        
-            * Dictionary of optimiser-specific arguments, defaults to ``None``
-            * Used also for the pennylande optimizers (and step function) arguments
+        * 'jac': gradient as ``Callable``, if defined else ``None``
+        * 'hess': hessian as ``Callable``, if defined else ``None``
+        * 'bounds': parameter bounds while training, defaults to ``None``
+        * 'constraints': Linear/Non-Linear constraints (only for COBYLA, SLSQP and trust-constr)
+        * 'tol': Tolerance for termination
+        * 'maxiter': sets ``maxiters = 100`` by default if not specified.            
+        * 'maxfev': sets ``maxfev = 100`` by default if not specified.
+        * 'optimizer_options': dictionary of optimiser-specific arguments, defaults to ``None``. Used also for the pennylande optimizers (and step function) arguments.
 
     """
     PENNYLANE_OPTIMIZERS = ['pennylane_adagrad', 'pennylane_adam', 'pennylane_vgd', 
@@ -778,7 +709,7 @@ def optimize(self):
         Returns
         -------
         : 
-            The optimized return object from the ``scipy.optimize`` package the result is assigned to the attribute ``opt_result``
+            Returns self after the optimization process is completed. The optimized result is assigned to the attribute ``opt_result``
         '''
 
         #set the optimizer function

From 4fa48d3be0419237739ca586d90e93e172c0699c Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Fri, 18 Nov 2022 16:49:09 +0800
Subject: [PATCH 35/36] Requirements -> autoray>=0.3.1

---
 docs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index f47f353a3..ef1650f66 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -11,4 +11,4 @@ pandas>=1.3.5
 amazon-braket-sdk==1.23.0
 autograd>=1.4
 semantic_version>=2.10
-autoray==0.3.1
+autoray>=0.3.1

From 9b76633941f1d8e748d91b58e803c4460944b4d8 Mon Sep 17 00:00:00 2001
From: Raul Conchello Vendrell <raul@entropicalabs.com>
Date: Fri, 18 Nov 2022 16:56:54 +0800
Subject: [PATCH 36/36] Requirements -> autoray>=0.3.1

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 9a2ec2d89..877059758 100644
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,7 @@
     "docplex>=2.23.1",
     "autograd>=1.4",
     "semantic_version>=2.10",
-    "autoray==0.3.1"
+    "autoray>=0.3.1"
 ]
 
 requirements_docs = [