diff --git a/.gitignore b/.gitignore index 1b864150..b066019b 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,7 @@ events.out.tfevents.* *.dvi *.gv + +# mlflow +mlruns/ +mlartifacts/ diff --git a/docs/tutorials/qml/ml_tools.md b/docs/tutorials/qml/ml_tools.md index 5efb6cb7..f2d762ab 100644 --- a/docs/tutorials/qml/ml_tools.md +++ b/docs/tutorials/qml/ml_tools.md @@ -306,3 +306,129 @@ def train( return model, optimizer ``` + +## Experiment tracking with mlflow + +Qadence allows to track runs and log hyperparameters, models and plots with [tensorboard](https://pytorch.org/tutorials/recipes/recipes/tensorboard_with_pytorch.html) and [mlflow](https://mlflow.org/). In the following, we demonstrate the integration with mlflow. + +### mlflow configuration +We have control over our tracking configuration by setting environment variables. First, let's look at the tracking URI. For the purpose of this demo we will be working with a local database, in a similar fashion as described [here](https://mlflow.org/docs/latest/tracking/tutorials/local-database.html), +```bash +export MLFLOW_TRACKING_URI=sqlite:///mlruns.db +``` + +Qadence can also read the following two environment variables to define the mlflow experiment name and run name +```bash +export MLFLOW_EXPERIMENT=test_experiment +export MLFLOW_RUN_NAME=run_0 +``` + +If no tracking URI is provided, mlflow stores run information and artifacts in the local `./mlflow` directory and if no names are defined, the experiment and run will be named with random UUIDs. + +### Setup +Let's do the necessary imports and declare a `DataLoader`. We can already define some hyperparameters here, including the seed for random number generators. mlflow can log hyperparameters with arbitrary types, for example the observable that we want to monitor (`Z` in this case, which has a `qadence.Operation` type). + +```python +import random +from itertools import count + +import numpy as np +import torch +from matplotlib import pyplot as plt +from matplotlib.figure import Figure +from torch.nn import Module +from torch.utils.data import DataLoader + +from qadence import hea, QuantumCircuit, Z +from qadence.constructors import feature_map, hamiltonian_factory +from qadence.ml_tools import train_with_grad, TrainConfig +from qadence.ml_tools.data import to_dataloader +from qadence.ml_tools.utils import rand_featureparameters +from qadence.models import QNN, QuantumModel +from qadence.types import ExperimentTrackingTool + +hyperparams = { + "seed": 42, + "batch_size": 10, + "n_qubits": 2, + "ansatz_depth": 1, + "observable": Z, +} + +np.random.seed(hyperparams["seed"]) +torch.manual_seed(hyperparams["seed"]) +random.seed(hyperparams["seed"]) + + +def dataloader(batch_size: int = 25) -> DataLoader: + x = torch.linspace(0, 1, batch_size).reshape(-1, 1) + y = torch.cos(x) + return to_dataloader(x, y, batch_size=batch_size, infinite=True) +``` + +We continue with the regular QNN definition, together with the loss function and optimizer. + +```python +obs = hamiltonian_factory(register=hyperparams["n_qubits"], detuning=hyperparams["observable"]) + +data = dataloader(hyperparams["batch_size"]) +fm = feature_map(hyperparams["n_qubits"], param="x") + +model = QNN( + QuantumCircuit( + hyperparams["n_qubits"], fm, hea(hyperparams["n_qubits"], hyperparams["ansatz_depth"]) + ), + observable=obs, + inputs=["x"], +) + +cnt = count() +criterion = torch.nn.MSELoss() +optimizer = torch.optim.Adam(model.parameters(), lr=0.1) + +inputs = rand_featureparameters(model, 1) + +def loss_fn(model: QuantumModel, data: torch.Tensor) -> tuple[torch.Tensor, dict]: + next(cnt) + out = model.expectation(inputs) + loss = criterion(out, torch.rand(1)) + return loss, {} +``` + +### `TrainConfig` specifications +Qadence offers different tracking options via `TrainConfig`. Here we use the `ExperimentTrackingTool` type to specify that we want to track the experiment with mlflow. Tracking with tensorboard is also possible. We can then indicate *what* and *how often* we want to track or log. `write_every` controls the number of epochs after which the loss values is logged. Thanks to the `plotting_functions` and `plot_every`arguments, we are also able to plot model-related quantities throughout training. Notice that arbitrary plotting functions can be passed, as long as the signature is the same as `plot_fn` below. Finally, the trained model can be logged by setting `log_model=True`. Here is an example of plotting function and training configuration + +```python +def plot_fn(model: Module, iteration: int) -> tuple[str, Figure]: + descr = f"ufa_prediction_epoch_{iteration}.png" + fig, ax = plt.subplots() + x = torch.linspace(0, 1, 100).reshape(-1, 1) + out = model.expectation(x) + ax.plot(x.detach().numpy(), out.detach().numpy()) + return descr, fig + + +config = TrainConfig( + folder="mlflow_demonstration", + max_iter=10, + checkpoint_every=1, + plot_every=2, + write_every=1, + log_model=True, + tracking_tool=ExperimentTrackingTool.MLFLOW, + hyperparams=hyperparams, + plotting_functions=(plot_fn,), +) +``` + +### Training and inspecting +Model training happens as usual +```python +train_with_grad(model, data, optimizer, config, loss_fn=loss_fn) +``` + +After training , we can inspect our experiment via the mlflow UI +```bash +mlflow ui --port 8080 --backend-store-uri sqlite:///mlruns.db +``` +In this case, since we're running on a local server, we can access the mlflow UI by navigating to http://localhost:8080/. diff --git a/pyproject.toml b/pyproject.toml index 0de906de..64052033 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,10 +21,11 @@ authors = [ { name = "Smit Chaudhary", email = "smit.chaudhary@pasqal.com" }, { name = "Ignacio Fernández Graña", email = "ignacio.fernandez-grana@pasqal.com" }, { name = "Charles Moussa", email = "charles.moussa@pasqal.com" }, + { name = "Giorgio Tosti Balducci", email = "giorgio.tosti-balducci@pasqal.com" }, ] requires-python = ">=3.9" license = { text = "Apache 2.0" } -version = "1.7.2" +version = "1.7.3" classifiers = [ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", @@ -83,8 +84,8 @@ horqrux = [ protocols = ["qadence-protocols"] libs = ["qadence-libs"] dlprof = ["nvidia-pyindex", "nvidia-dlprof[pytorch]"] -all = ["pulser", "braket", "visualization", "protocols", "libs"] - +mlflow = ["mlflow"] +all = ["pulser", "braket", "visualization", "protocols", "libs", "mlflow"] [tool.hatch.envs.default] dependencies = [ @@ -102,7 +103,7 @@ dependencies = [ "ruff", "pydocstringformatter", ] -features = ["pulser", "braket", "visualization", "horqrux"] +features = ["pulser", "braket", "visualization", "horqrux", "mlflow"] [tool.hatch.envs.default.scripts] test = "pytest -n auto --cov-report lcov --cov-config=pyproject.toml --cov=qadence --cov=tests --ignore=./tests/test_examples.py {args}" @@ -139,7 +140,7 @@ dependencies = [ "markdown-exec", "mike", ] -features = ["pulser", "braket", "horqrux", "visualization"] +features = ["pulser", "braket", "horqrux", "visualization", "mlflow"] [tool.hatch.envs.docs.scripts] build = "mkdocs build --clean --strict" diff --git a/qadence/ml_tools/config.py b/qadence/ml_tools/config.py index 24dbbfd6..d15ec78b 100644 --- a/qadence/ml_tools/config.py +++ b/qadence/ml_tools/config.py @@ -5,15 +5,25 @@ from dataclasses import dataclass, field, fields from logging import getLogger from pathlib import Path -from typing import Callable, Optional, Type +from typing import Callable, Type +from uuid import uuid4 from sympy import Basic +from torch import Tensor from qadence.blocks.analog import AnalogBlock from qadence.blocks.primitive import ParametricBlock from qadence.operations import RX, AnalogRX from qadence.parameters import Parameter -from qadence.types import AnsatzType, BasisSet, MultivariateStrategy, ReuploadScaling, Strategy +from qadence.types import ( + AnsatzType, + BasisSet, + ExperimentTrackingTool, + LoggablePlotFunction, + MultivariateStrategy, + ReuploadScaling, + Strategy, +) logger = getLogger(__file__) @@ -37,10 +47,14 @@ class TrainConfig: print_every: int = 1000 """Print loss/metrics.""" write_every: int = 50 - """Write tensorboard logs.""" + """Write loss and metrics with the tracking tool.""" checkpoint_every: int = 5000 """Write model/optimizer checkpoint.""" - folder: Optional[Path] = None + plot_every: int = 5000 + """Write figures.""" + log_model: bool = False + """Logs a serialised version of the model.""" + folder: Path | None = None """Checkpoint/tensorboard logs folder.""" create_subfolder_per_run: bool = False """Checkpoint/tensorboard logs stored in subfolder with name `_`. @@ -59,14 +73,38 @@ class TrainConfig: validation loss across previous iterations. """ - validation_criterion: Optional[Callable] = None + validation_criterion: Callable | None = None """A boolean function which evaluates a given validation metric is satisfied.""" - trainstop_criterion: Optional[Callable] = None + trainstop_criterion: Callable | None = None """A boolean function which evaluates a given training stopping metric is satisfied.""" batch_size: int = 1 """The batch_size to use when passing a list/tuple of torch.Tensors.""" verbose: bool = True """Whether or not to print out metrics values during training.""" + tracking_tool: ExperimentTrackingTool = ExperimentTrackingTool.TENSORBOARD + """The tracking tool of choice.""" + hyperparams: dict = field(default_factory=dict) + """Hyperparameters to track.""" + plotting_functions: tuple[LoggablePlotFunction, ...] = field(default_factory=tuple) # type: ignore + """Functions for in-train plotting.""" + + # tensorboard only allows for certain types as hyperparameters + _tb_allowed_hyperparams_types: tuple = field( + default=(int, float, str, bool, Tensor), init=False, repr=False + ) + + def _filter_tb_hyperparams(self) -> None: + keys_to_remove = [ + key + for key, value in self.hyperparams.items() + if not isinstance(value, TrainConfig._tb_allowed_hyperparams_types) + ] + if keys_to_remove: + logger.warning( + f"Tensorboard cannot log the following hyperparameters: {keys_to_remove}." + ) + for key in keys_to_remove: + self.hyperparams.pop(key) def __post_init__(self) -> None: if self.folder: @@ -81,6 +119,64 @@ def __post_init__(self) -> None: self.trainstop_criterion = lambda x: x <= self.max_iter if self.validation_criterion is None: self.validation_criterion = lambda *x: False + if self.hyperparams and self.tracking_tool == ExperimentTrackingTool.TENSORBOARD: + self._filter_tb_hyperparams() + if self.tracking_tool == ExperimentTrackingTool.MLFLOW: + self._mlflow_config = MLFlowConfig() + if self.plotting_functions and self.tracking_tool != ExperimentTrackingTool.MLFLOW: + logger.warning("In-training plots are only available with mlflow tracking.") + if not self.plotting_functions and self.tracking_tool == ExperimentTrackingTool.MLFLOW: + logger.warning("Tracking with mlflow, but no plotting functions provided.") + + @property + def mlflow_config(self) -> MLFlowConfig: + if self.tracking_tool == ExperimentTrackingTool.MLFLOW: + return self._mlflow_config + else: + raise AttributeError( + "mlflow_config is available only for with the mlflow tracking tool." + ) + + +class MLFlowConfig: + """ + Configuration for mlflow tracking. + + Example: + + export MLFLOW_TRACKING_URI=tracking_uri + export MLFLOW_EXPERIMENT=experiment_name + export MLFLOW_RUN_NAME=run_name + """ + + def __init__(self) -> None: + import mlflow + + self.tracking_uri: str = os.getenv("MLFLOW_TRACKING_URI", "") + """The URI of the mlflow tracking server. + + An empty string, or a local file path, prefixed with file:/. + Data is stored locally at the provided file (or ./mlruns if empty). + """ + + self.experiment_name: str = os.getenv("MLFLOW_EXPERIMENT", str(uuid4())) + """The name of the experiment. + + If None or empty, a new experiment is created with a random UUID. + """ + + self.run_name: str = os.getenv("MLFLOW_RUN_NAME", str(uuid4())) + """The name of the run.""" + + mlflow.set_tracking_uri(self.tracking_uri) + + # activate existing or create experiment + exp_filter_string = f"name = '{self.experiment_name}'" + if not mlflow.search_experiments(filter_string=exp_filter_string): + mlflow.create_experiment(name=self.experiment_name) + + self.experiment = mlflow.set_experiment(self.experiment_name) + self.run = mlflow.start_run(run_name=self.run_name, nested=False) @dataclass diff --git a/qadence/ml_tools/printing.py b/qadence/ml_tools/printing.py index 002d4c28..047a0a8f 100644 --- a/qadence/ml_tools/printing.py +++ b/qadence/ml_tools/printing.py @@ -1,7 +1,23 @@ from __future__ import annotations +from logging import getLogger +from typing import Any, Callable, Union + +from matplotlib.figure import Figure +from mlflow.models import infer_signature +from torch import Tensor +from torch.nn import Module +from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter +from qadence.ml_tools.data import DictDataLoader +from qadence.types import ExperimentTrackingTool + +logger = getLogger(__name__) + +PlottingFunction = Callable[[Module, int], tuple[str, Figure]] +InputData = Union[Tensor, dict[str, Tensor]] + def print_metrics(loss: float | None, metrics: dict, iteration: int) -> None: msg = " ".join( @@ -20,5 +36,110 @@ def write_tensorboard( writer.add_scalar(key, arg, iteration) -def log_hyperparams(writer: SummaryWriter, hyperparams: dict, metrics: dict) -> None: +def log_hyperparams_tensorboard(writer: SummaryWriter, hyperparams: dict, metrics: dict) -> None: writer.add_hparams(hyperparams, metrics) + + +def plot_tensorboard( + writer: SummaryWriter, + model: Module, + iteration: int, + plotting_functions: tuple[PlottingFunction], +) -> None: + for pf in plotting_functions: + descr, fig = pf(model, iteration) + writer.add_figure(descr, fig, global_step=iteration) + + +def log_model_tensorboard( + writer: SummaryWriter, + model: Module, + dataloader: Union[None, DataLoader, DictDataLoader], +) -> None: + logger.warning("Model logging is not supported by tensorboard. No model will be logged.") + + +def write_mlflow(writer: Any, loss: float | None, metrics: dict, iteration: int) -> None: + writer.log_metrics({"loss": float(loss)}, step=iteration) # type: ignore + writer.log_metrics(metrics, step=iteration) # logs the single metrics + + +def log_hyperparams_mlflow(writer: Any, hyperparams: dict, metrics: dict) -> None: + writer.log_params(hyperparams) # type: ignore + + +def plot_mlflow( + writer: Any, + model: Module, + iteration: int, + plotting_functions: tuple[PlottingFunction], +) -> None: + for pf in plotting_functions: + descr, fig = pf(model, iteration) + writer.log_figure(fig, descr) + + +def log_model_mlflow( + writer: Any, model: Module, dataloader: DataLoader | DictDataLoader | None +) -> None: + if dataloader is not None: + xs: InputData + xs, *_ = next(iter(dataloader)) + preds = model(xs) + if isinstance(xs, Tensor): + xs = xs.numpy() + preds = preds.detach().numpy() + elif isinstance(xs, dict): + for key, val in xs.items(): + xs[key] = val.numpy() + for key, val in preds.items(): + preds[key] = val.detach.numpy() + signature = infer_signature(xs, preds) + else: + signature = None + writer.pytorch.log_model(model, artifact_path="model", signature=signature) + + +TRACKER_MAPPING: dict[ExperimentTrackingTool, Callable[..., None]] = { + ExperimentTrackingTool.TENSORBOARD: write_tensorboard, + ExperimentTrackingTool.MLFLOW: write_mlflow, +} + +LOGGER_MAPPING: dict[ExperimentTrackingTool, Callable[..., None]] = { + ExperimentTrackingTool.TENSORBOARD: log_hyperparams_tensorboard, + ExperimentTrackingTool.MLFLOW: log_hyperparams_mlflow, +} + +PLOTTER_MAPPING: dict[ExperimentTrackingTool, Callable[..., None]] = { + ExperimentTrackingTool.TENSORBOARD: plot_tensorboard, + ExperimentTrackingTool.MLFLOW: plot_mlflow, +} + +MODEL_LOGGER_MAPPING: dict[ExperimentTrackingTool, Callable[..., None]] = { + ExperimentTrackingTool.TENSORBOARD: log_model_tensorboard, + ExperimentTrackingTool.MLFLOW: log_model_mlflow, +} + + +def write_tracker( + *args: Any, tracking_tool: ExperimentTrackingTool = ExperimentTrackingTool.TENSORBOARD +) -> None: + return TRACKER_MAPPING[tracking_tool](*args) + + +def log_tracker( + *args: Any, tracking_tool: ExperimentTrackingTool = ExperimentTrackingTool.TENSORBOARD +) -> None: + return LOGGER_MAPPING[tracking_tool](*args) + + +def plot_tracker( + *args: Any, tracking_tool: ExperimentTrackingTool = ExperimentTrackingTool.TENSORBOARD +) -> None: + return PLOTTER_MAPPING[tracking_tool](*args) + + +def log_model_tracker( + *args: Any, tracking_tool: ExperimentTrackingTool = ExperimentTrackingTool.TENSORBOARD +) -> None: + return MODEL_LOGGER_MAPPING[tracking_tool](*args) diff --git a/qadence/ml_tools/train_grad.py b/qadence/ml_tools/train_grad.py index 6ddfb903..89ac6383 100644 --- a/qadence/ml_tools/train_grad.py +++ b/qadence/ml_tools/train_grad.py @@ -1,10 +1,17 @@ from __future__ import annotations +import importlib import math from logging import getLogger from typing import Callable, Union -from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn, TimeRemainingColumn +from rich.progress import ( + BarColumn, + Progress, + TaskProgressColumn, + TextColumn, + TimeRemainingColumn, +) from torch import complex128, float32, float64 from torch import device as torch_device from torch import dtype as torch_dtype @@ -16,8 +23,15 @@ from qadence.ml_tools.config import TrainConfig from qadence.ml_tools.data import DictDataLoader, data_to_device from qadence.ml_tools.optimize_step import optimize_step -from qadence.ml_tools.printing import print_metrics, write_tensorboard +from qadence.ml_tools.printing import ( + log_model_tracker, + log_tracker, + plot_tracker, + print_metrics, + write_tracker, +) from qadence.ml_tools.saveload import load_checkpoint, write_checkpoint +from qadence.types import ExperimentTrackingTool logger = getLogger(__name__) @@ -30,7 +44,6 @@ def train( loss_fn: Callable, device: torch_device = None, optimize_step: Callable = optimize_step, - write_tensorboard: Callable = write_tensorboard, dtype: torch_dtype = None, ) -> tuple[Module, Optimizer]: """Runs the training loop with gradient-based optimizer. @@ -53,10 +66,6 @@ def train( optimize_step: Customizable optimization callback which is called at every iteration.= The function must have the signature `optimize_step(model, optimizer, loss_fn, xs, device="cpu")`. - write_tensorboard: Customizable tensorboard logging callback which is - called every `config.write_every` iterations. The function must have - the signature `write_tensorboard(writer, loss, metrics, iteration)` - (see the example below). dtype: The dtype to use for the data. Example: @@ -122,8 +131,11 @@ def loss_fn(model: torch.nn.Module, data: torch.Tensor) -> tuple[torch.Tensor, d model = model.module.to(device=device, dtype=dtype) else: model = model.to(device=device, dtype=dtype) - # initialize tensorboard - writer = SummaryWriter(config.folder, purge_step=init_iter) + # initialize tracking tool + if config.tracking_tool == ExperimentTrackingTool.TENSORBOARD: + writer = SummaryWriter(config.folder, purge_step=init_iter) + else: + writer = importlib.import_module("mlflow") perform_val = isinstance(config.val_every, int) if perform_val: @@ -166,7 +178,7 @@ def loss_fn(model: torch.nn.Module, data: torch.Tensor) -> tuple[torch.Tensor, d best_val_loss, metrics = loss_fn(model, xs_to_device) metrics["val_loss"] = best_val_loss - write_tensorboard(writer, None, metrics, init_iter) + write_tracker(writer, None, metrics, init_iter, tracking_tool=config.tracking_tool) if config.folder: if config.checkpoint_best_only: @@ -174,6 +186,14 @@ def loss_fn(model: torch.nn.Module, data: torch.Tensor) -> tuple[torch.Tensor, d else: write_checkpoint(config.folder, model, optimizer, init_iter) + plot_tracker( + writer, + model, + init_iter, + config.plotting_functions, + tracking_tool=config.tracking_tool, + ) + except KeyboardInterrupt: logger.info("Terminating training gracefully after the current iteration.") @@ -218,8 +238,18 @@ def loss_fn(model: torch.nn.Module, data: torch.Tensor) -> tuple[torch.Tensor, d print_metrics(loss, metrics, iteration - 1) if iteration % config.write_every == 0: - write_tensorboard(writer, loss, metrics, iteration - 1) + write_tracker( + writer, loss, metrics, iteration, tracking_tool=config.tracking_tool + ) + if iteration % config.plot_every == 0: + plot_tracker( + writer, + model, + iteration, + config.plotting_functions, + tracking_tool=config.tracking_tool, + ) if perform_val: if iteration % config.val_every == 0: xs = next(dl_iter_val) @@ -230,7 +260,9 @@ def loss_fn(model: torch.nn.Module, data: torch.Tensor) -> tuple[torch.Tensor, d if config.folder and config.checkpoint_best_only: write_checkpoint(config.folder, model, optimizer, iteration="best") metrics["val_loss"] = val_loss - write_tensorboard(writer, None, metrics, iteration) + write_tracker( + writer, loss, metrics, iteration, tracking_tool=config.tracking_tool + ) if config.folder: if iteration % config.checkpoint_every == 0 and not config.checkpoint_best_only: @@ -254,10 +286,23 @@ def loss_fn(model: torch.nn.Module, data: torch.Tensor) -> tuple[torch.Tensor, d except KeyboardInterrupt: logger.info("Terminating training gracefully after the current iteration.") - # Final printing, writing and checkpointing + # Final checkpointing and writing if config.folder and not config.checkpoint_best_only: write_checkpoint(config.folder, model, optimizer, iteration) - write_tensorboard(writer, loss, metrics, iteration) - writer.close() + write_tracker(writer, loss, metrics, iteration, tracking_tool=config.tracking_tool) + + # writing hyperparameters + if config.hyperparams: + log_tracker(writer, config.hyperparams, metrics, tracking_tool=config.tracking_tool) + + # logging the model + if config.log_model: + log_model_tracker(writer, model, dataloader, tracking_tool=config.tracking_tool) + + # close tracker + if config.tracking_tool == ExperimentTrackingTool.TENSORBOARD: + writer.close() + elif config.tracking_tool == ExperimentTrackingTool.MLFLOW: + writer.end_run() return model, optimizer diff --git a/qadence/ml_tools/train_no_grad.py b/qadence/ml_tools/train_no_grad.py index d50250d2..43a62dfe 100644 --- a/qadence/ml_tools/train_no_grad.py +++ b/qadence/ml_tools/train_no_grad.py @@ -1,11 +1,18 @@ from __future__ import annotations +import importlib from logging import getLogger from typing import Callable import nevergrad as ng from nevergrad.optimization.base import Optimizer as NGOptimizer -from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn, TimeRemainingColumn +from rich.progress import ( + BarColumn, + Progress, + TaskProgressColumn, + TextColumn, + TimeRemainingColumn, +) from torch import Tensor from torch.nn import Module from torch.utils.data import DataLoader @@ -14,9 +21,16 @@ from qadence.ml_tools.config import TrainConfig from qadence.ml_tools.data import DictDataLoader from qadence.ml_tools.parameters import get_parameters, set_parameters -from qadence.ml_tools.printing import print_metrics, write_tensorboard +from qadence.ml_tools.printing import ( + log_model_tracker, + log_tracker, + plot_tracker, + print_metrics, + write_tracker, +) from qadence.ml_tools.saveload import load_checkpoint, write_checkpoint from qadence.ml_tools.tensors import promote_to_tensor +from qadence.types import ExperimentTrackingTool logger = getLogger(__name__) @@ -42,6 +56,7 @@ def train( dataloader: Dataloader constructed via `dictdataloader` optimizer: The optimizer to use taken from the Nevergrad library. If this is not the case the function will raise an AssertionError + config: `TrainConfig` with additional training options. loss_fn: Loss function returning (loss: float, metrics: dict[str, float]) """ init_iter = 0 @@ -63,8 +78,11 @@ def _update_parameters( # TODO: support also Scipy optimizers assert isinstance(optimizer, NGOptimizer), "Use only optimizers from the Nevergrad library" - # initialize tensorboard - writer = SummaryWriter(config.folder, purge_step=init_iter) + # initialize tracking tool + if config.tracking_tool == ExperimentTrackingTool.TENSORBOARD: + writer = SummaryWriter(config.folder, purge_step=init_iter) + else: + writer = importlib.import_module("mlflow") # set optimizer configuration and initial parameters optimizer.budget = config.max_iter @@ -100,7 +118,16 @@ def _update_parameters( print_metrics(loss, metrics, iteration) if iteration % config.write_every == 0: - write_tensorboard(writer, loss, metrics, iteration) + write_tracker(writer, loss, metrics, iteration, tracking_tool=config.tracking_tool) + + if iteration % config.plot_every == 0: + plot_tracker( + writer, + model, + iteration, + config.plotting_functions, + tracking_tool=config.tracking_tool, + ) if config.folder: if iteration % config.checkpoint_every == 0: @@ -109,10 +136,22 @@ def _update_parameters( if iteration >= init_iter + config.max_iter: break - ## Final writing and stuff + # writing hyperparameters + if config.hyperparams: + log_tracker(writer, config.hyperparams, metrics, tracking_tool=config.tracking_tool) + + if config.log_model: + log_model_tracker(writer, model, dataloader, tracking_tool=config.tracking_tool) + + # Final writing and checkpointing if config.folder: write_checkpoint(config.folder, model, optimizer, iteration) - write_tensorboard(writer, loss, metrics, iteration) - writer.close() + write_tracker(writer, loss, metrics, iteration, tracking_tool=config.tracking_tool) + + # close tracker + if config.tracking_tool == ExperimentTrackingTool.TENSORBOARD: + writer.close() + elif config.tracking_tool == ExperimentTrackingTool.MLFLOW: + writer.end_run() return model, optimizer diff --git a/qadence/types.py b/qadence/types.py index d52cce73..5b86c1b9 100644 --- a/qadence/types.py +++ b/qadence/types.py @@ -6,9 +6,11 @@ import numpy as np import sympy +from matplotlib.figure import Figure from numpy.typing import ArrayLike from pyqtorch.utils import SolverType from torch import Tensor, pi +from torch.nn import Module TNumber = Union[int, float, complex, np.int64, np.float64] """Union of python and numpy numeric types.""" @@ -445,3 +447,13 @@ class ObservableTransform: """Use the given values as min and max.""" NONE = "none" """No transformation.""" + + +class ExperimentTrackingTool(StrEnum): + TENSORBOARD = "tensorboard" + """Use the tensorboard experiment tracker.""" + MLFLOW = "mlflow" + """Use the ml-flow experiment tracker.""" + + +LoggablePlotFunction = Callable[[Module, int], tuple[str, Figure]] diff --git a/tests/ml_tools/test_logging.py b/tests/ml_tools/test_logging.py new file mode 100644 index 00000000..d8669341 --- /dev/null +++ b/tests/ml_tools/test_logging.py @@ -0,0 +1,299 @@ +from __future__ import annotations + +import os +import shutil +from itertools import count +from pathlib import Path +from typing import Callable +from urllib.parse import urlparse + +import mlflow +import pytest +import torch +from matplotlib import pyplot as plt +from matplotlib.figure import Figure +from mlflow import MlflowClient +from mlflow.entities import Run +from torch.nn import Module +from torch.optim import Optimizer +from torch.utils.data import DataLoader + +from qadence.ml_tools import TrainConfig, train_with_grad +from qadence.ml_tools.data import to_dataloader +from qadence.ml_tools.models import QNN +from qadence.ml_tools.utils import rand_featureparameters +from qadence.model import QuantumModel +from qadence.types import ExperimentTrackingTool + + +def dataloader(batch_size: int = 25) -> DataLoader: + x = torch.linspace(0, 1, batch_size).reshape(-1, 1) + y = torch.cos(x) + return to_dataloader(x, y, batch_size=batch_size, infinite=True) + + +def setup(model: Module) -> tuple[Callable, Optimizer]: + cnt = count() + criterion = torch.nn.MSELoss() + optimizer = torch.optim.Adam(model.parameters(), lr=0.1) + inputs = rand_featureparameters(model, 1) + + def loss_fn(model: QuantumModel, data: torch.Tensor) -> tuple[torch.Tensor, dict]: + next(cnt) + out = model.expectation(inputs) + loss = criterion(out, torch.rand(1)) + return loss, {} + + return loss_fn, optimizer + + +def load_mlflow_model(train_config: TrainConfig) -> None: + run_id = train_config.mlflow_config.run.info.run_id + + mlflow.pytorch.load_model(model_uri=f"runs:/{run_id}/model") + + +def find_mlflow_artifacts_path(run: Run) -> Path: + artifact_uri = run.info.artifact_uri + parsed_uri = urlparse(artifact_uri) + return Path(os.path.abspath(os.path.join(parsed_uri.netloc, parsed_uri.path))) + + +def clean_mlflow_experiment(train_config: TrainConfig) -> None: + experiment_id = train_config.mlflow_config.run.info.experiment_id + client = MlflowClient() + + runs = client.search_runs(experiment_id) + + def clean_artifacts(run: Run) -> None: + local_path = find_mlflow_artifacts_path(run) + shutil.rmtree(local_path) + + for run in runs: + clean_artifacts(run) + + run_id = run.info.run_id + client.delete_run(run_id) + + mlruns_base_dir = "./mlruns" + if os.path.isdir(mlruns_base_dir): + shutil.rmtree(os.path.join(mlruns_base_dir, experiment_id)) + + +def test_hyperparams_logging_mlflow(BasicQuantumModel: QuantumModel, tmp_path: Path) -> None: + model = BasicQuantumModel + + loss_fn, optimizer = setup(model) + + hyperparams = {"max_iter": int(10), "lr": 0.1} + + config = TrainConfig( + folder=tmp_path, + max_iter=hyperparams["max_iter"], # type: ignore + checkpoint_every=1, + write_every=1, + hyperparams=hyperparams, + tracking_tool=ExperimentTrackingTool.MLFLOW, + ) + + train_with_grad(model, None, optimizer, config, loss_fn=loss_fn) + + mlflow_config = config.mlflow_config + experiment_id = mlflow_config.run.info.experiment_id + run_id = mlflow_config.run.info.run_id + + experiment_dir = Path(f"mlruns/{experiment_id}") + hyperparams_files = [experiment_dir / run_id / "params" / key for key in hyperparams.keys()] + + assert all([os.path.isfile(hf) for hf in hyperparams_files]) + + clean_mlflow_experiment(config) + + +def test_hyperparams_logging_tensorboard(BasicQuantumModel: QuantumModel, tmp_path: Path) -> None: + model = BasicQuantumModel + + loss_fn, optimizer = setup(model) + + hyperparams = {"max_iter": int(10), "lr": 0.1} + + config = TrainConfig( + folder=tmp_path, + max_iter=hyperparams["max_iter"], # type: ignore + checkpoint_every=1, + write_every=1, + hyperparams=hyperparams, + tracking_tool=ExperimentTrackingTool.TENSORBOARD, + ) + + train_with_grad(model, None, optimizer, config, loss_fn=loss_fn) + + +def test_model_logging_mlflow_basicQM(BasicQuantumModel: QuantumModel, tmp_path: Path) -> None: + model = BasicQuantumModel + + loss_fn, optimizer = setup(model) + + config = TrainConfig( + folder=tmp_path, + max_iter=10, # type: ignore + checkpoint_every=1, + write_every=1, + log_model=True, + tracking_tool=ExperimentTrackingTool.MLFLOW, + ) + + train_with_grad(model, None, optimizer, config, loss_fn=loss_fn) + + load_mlflow_model(config) + + clean_mlflow_experiment(config) + + +def test_model_logging_mlflow_basicQNN(BasicQNN: QNN, tmp_path: Path) -> None: + data = dataloader() + model = BasicQNN + + loss_fn, optimizer = setup(model) + + config = TrainConfig( + folder=tmp_path, + max_iter=10, # type: ignore + checkpoint_every=1, + write_every=1, + log_model=True, + tracking_tool=ExperimentTrackingTool.MLFLOW, + ) + + train_with_grad(model, data, optimizer, config, loss_fn=loss_fn) + + load_mlflow_model(config) + + clean_mlflow_experiment(config) + + +def test_model_logging_mlflow_basicAdjQNN(BasicAdjointQNN: QNN, tmp_path: Path) -> None: + data = dataloader() + model = BasicAdjointQNN + + loss_fn, optimizer = setup(model) + + config = TrainConfig( + folder=tmp_path, + max_iter=10, # type: ignore + checkpoint_every=1, + write_every=1, + log_model=True, + tracking_tool=ExperimentTrackingTool.MLFLOW, + ) + + train_with_grad(model, data, optimizer, config, loss_fn=loss_fn) + + load_mlflow_model(config) + + clean_mlflow_experiment(config) + + +def test_model_logging_tensorboard( + BasicQuantumModel: QuantumModel, tmp_path: Path, caplog: pytest.LogCaptureFixture +) -> None: + model = BasicQuantumModel + + loss_fn, optimizer = setup(model) + + config = TrainConfig( + folder=tmp_path, + max_iter=10, # type: ignore + checkpoint_every=1, + write_every=1, + log_model=True, + tracking_tool=ExperimentTrackingTool.TENSORBOARD, + ) + + train_with_grad(model, None, optimizer, config, loss_fn=loss_fn) + + assert "Model logging is not supported by tensorboard. No model will be logged." in caplog.text + + +def test_plotting_mlflow(BasicQNN: QNN, tmp_path: Path) -> None: + data = dataloader() + model = BasicQNN + + loss_fn, optimizer = setup(model) + + def plot_model(model: QuantumModel, iteration: int) -> tuple[str, Figure]: + descr = f"model_prediction_epoch_{iteration}.png" + fig, ax = plt.subplots() + x = torch.linspace(0, 1, 100).reshape(-1, 1) + out = model.expectation(x) + ax.plot(x.detach().numpy(), out.detach().numpy()) + return descr, fig + + def plot_error(model: QuantumModel, iteration: int) -> tuple[str, Figure]: + descr = f"error_epoch_{iteration}.png" + fig, ax = plt.subplots() + x = torch.linspace(0, 1, 100).reshape(-1, 1) + out = model.expectation(x) + ground_truth = torch.rand_like(out) + error = ground_truth - out + ax.plot(x.detach().numpy(), error.detach().numpy()) + return descr, fig + + max_iter = 10 + plot_every = 2 + config = TrainConfig( + folder=tmp_path, + max_iter=max_iter, + checkpoint_every=1, + write_every=1, + plot_every=plot_every, + tracking_tool=ExperimentTrackingTool.MLFLOW, + plotting_functions=(plot_model, plot_error), + ) + + train_with_grad(model, data, optimizer, config, loss_fn=loss_fn) + + all_plot_names = [f"model_prediction_epoch_{i}.png" for i in range(0, max_iter, plot_every)] + all_plot_names.extend([f"error_epoch_{i}.png" for i in range(0, max_iter, plot_every)]) + + artifact_path = find_mlflow_artifacts_path(config.mlflow_config.run) + + assert all([os.path.isfile(artifact_path / pn) for pn in all_plot_names]) + + clean_mlflow_experiment(config) + + +def test_plotting_tensorboard(BasicQNN: QNN, tmp_path: Path) -> None: + data = dataloader() + model = BasicQNN + + loss_fn, optimizer = setup(model) + + def plot_model(model: QuantumModel, iteration: int) -> tuple[str, Figure]: + descr = f"model_prediction_epoch_{iteration}.png" + fig, ax = plt.subplots() + x = torch.linspace(0, 1, 100).reshape(-1, 1) + out = model.expectation(x) + ax.plot(x.detach().numpy(), out.detach().numpy()) + return descr, fig + + def plot_error(model: QuantumModel, iteration: int) -> tuple[str, Figure]: + descr = f"error_epoch_{iteration}.png" + fig, ax = plt.subplots() + x = torch.linspace(0, 1, 100).reshape(-1, 1) + out = model.expectation(x) + ground_truth = torch.rand_like(out) + error = ground_truth - out + ax.plot(x.detach().numpy(), error.detach().numpy()) + return descr, fig + + config = TrainConfig( + folder=tmp_path, + max_iter=10, + checkpoint_every=1, + write_every=1, + tracking_tool=ExperimentTrackingTool.TENSORBOARD, + plotting_functions=(plot_model, plot_error), + ) + + train_with_grad(model, data, optimizer, config, loss_fn=loss_fn)