Skip to content

Commit

Permalink
add script
Browse files Browse the repository at this point in the history
  • Loading branch information
mattiasakesson committed Aug 13, 2024
1 parent efcea62 commit 17401ba
Show file tree
Hide file tree
Showing 4 changed files with 321 additions and 0 deletions.
50 changes: 50 additions & 0 deletions examples/mnist-pytorch-DPSGD/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
FEDn Project: Federated Differential Privacy MNIST (Opacus + PyTorch)
-----------------------------

This example FEDn Project demonstrates how Differential Privacy can be integrated to enhance the confidentiality of client data.
We have expanded our baseline MNIST-PyTorch example by incorporating the Opacus framework, which is specifically designed for PyTorch models.



Prerequisites
-------------

- `Python >=3.8, <=3.12 <https://www.python.org/downloads>`__
- `A project in FEDn Studio <https://fedn.scaleoutsystems.com/signup>`__

Creating the compute package and seed model
-------------------------------------------

Install fedn:

.. code-block::
pip install fedn
Clone this repository, then locate into this directory:

.. code-block::
git clone https://github.com/scaleoutsystems/fedn.git
cd fedn/examples/mnist-pytorch
Create the compute package:

.. code-block::
fedn package create --path client
This creates a file 'package.tgz' in the project folder.

Next, generate the seed model:

.. code-block::
fedn run build --path client
This will create a model file 'seed.npz' in the root of the project. This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv).

Running the project on FEDn
----------------------------

To learn how to set up your FEDn Studio project and connect clients, take the quickstart tutorial: https://fedn.readthedocs.io/en/stable/quickstart.html.
76 changes: 76 additions & 0 deletions examples/mnist-pytorch-DPSGD/client/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import collections

import torch

from fedn.utils.helpers.helpers import get_helper

HELPER_MODULE = "numpyhelper"
helper = get_helper(HELPER_MODULE)


def compile_model():
"""Compile the pytorch model.
:return: The compiled model.
:rtype: torch.nn.Module
"""

class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = torch.nn.Linear(784, 64)
self.fc2 = torch.nn.Linear(64, 32)
self.fc3 = torch.nn.Linear(32, 10)

def forward(self, x):
x = torch.nn.functional.relu(self.fc1(x.reshape(x.size(0), 784)))
x = torch.nn.functional.dropout(x, p=0.5, training=self.training)
x = torch.nn.functional.relu(self.fc2(x))
x = torch.nn.functional.log_softmax(self.fc3(x), dim=1)
return x

return Net()


def save_parameters(model, out_path):
"""Save model paramters to file.
:param model: The model to serialize.
:type model: torch.nn.Module
:param out_path: The path to save to.
:type out_path: str
"""
parameters_np = [val.cpu().numpy() for _, val in model.state_dict().items()]
helper.save(parameters_np, out_path)


def load_parameters(model_path):
"""Load model parameters from file and populate model.
param model_path: The path to load from.
:type model_path: str
:return: The loaded model.
:rtype: torch.nn.Module
"""
model = compile_model()
parameters_np = helper.load(model_path)

params_dict = zip(model.state_dict().keys(), parameters_np)
state_dict = collections.OrderedDict({key: torch.tensor(x) for key, x in params_dict})
model.load_state_dict(state_dict, strict=True)
return model


def init_seed(out_path="seed.npz"):
"""Initialize seed model and save it to file.
:param out_path: The path to save the seed model to.
:type out_path: str
"""
# Init and save
model = compile_model()
save_parameters(model, out_path)


if __name__ == "__main__":
init_seed("../seed.npz")
10 changes: 10 additions & 0 deletions examples/mnist-pytorch-DPSGD/client/python_env.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: mnist-pytorch
build_dependencies:
- pip
- setuptools
- wheel
dependencies:
- torch==2.3.1
- torchvision==0.18.1
- fedn
- opacus
185 changes: 185 additions & 0 deletions examples/mnist-pytorch-DPSGD/client/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import math
import os
import sys

import torch
from model import load_parameters, save_parameters

from data import load_data
from fedn.utils.helpers.helpers import save_metadata

from opacus import PrivacyEngine
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm

import numpy as np
from opacus.utils.batch_memory_manager import BatchMemoryManager
# Define a custom Dataset class
class CustomDataset(Dataset):
def __init__(self, x_data, y_data):
self.x_data = x_data
self.y_data = y_data

def __len__(self):
return len(self.x_data)

def __getitem__(self, idx):
x_data = self.x_data[idx]
y_data = self.y_data[idx]
return x_data, y_data


dir_path = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.abspath(dir_path))

MAX_GRAD_NORM = 1.2
FINAL_EPSILON = 8.0
ROUNDS = 4
EPOCHS = 5
EPSILON = FINAL_EPSILON/ROUNDS
DELTA = 1e-5

MAX_PHYSICAL_BATCH_SIZE = 32

def train(in_model_path, out_model_path, data_path=None, batch_size=32, epochs=1, lr=0.01):
"""Complete a model update.
Load model paramters from in_model_path (managed by the FEDn client),
perform a model update, and write updated paramters
to out_model_path (picked up by the FEDn client).
:param in_model_path: The path to the input model.
:type in_model_path: str
:param out_model_path: The path to save the output model to.
:type out_model_path: str
:param data_path: The path to the data file.
:type data_path: str
:param batch_size: The batch size to use.
:type batch_size: int
:param epochs: The number of epochs to train.
:type epochs: int
:param lr: The learning rate to use.
:type lr: float
"""
# Load data
print("data_path: ", data_path)
x_train, y_train = load_data(data_path)
trainset = CustomDataset(x_train, y_train)
batch_size = 32
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=2)

# Load parmeters and initialize model
model = load_parameters(in_model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


# Load epsilon
if os.path.isfile('epsilon.npy'):

tot_epsilon = np.load('epsilon.npy')
print("load consumed epsilon: ", tot_epsilon)

else:

print("initiate tot_epsilon")
tot_epsilon = 0.

# Train
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
n_batches = int(math.ceil(len(x_train) / batch_size))
criterion = torch.nn.NLLLoss()

privacy_engine = PrivacyEngine()

model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
module=model,
optimizer=optimizer,
data_loader=train_loader,
epochs=EPOCHS,
target_epsilon=EPSILON,
target_delta=DELTA,
max_grad_norm=MAX_GRAD_NORM,
)

print(f"Using sigma={optimizer.noise_multiplier} and C={MAX_GRAD_NORM}")



for epoch in tqdm(range(EPOCHS), desc="Epoch", unit="epoch"):
train_dp(model, train_loader, optimizer, epoch + 1, device, privacy_engine)

d_epsilon = privacy_engine.get_epsilon(DELTA)
print("epsilon spent: ", d_epsilon)
tot_epsilon += d_epsilon
print("saving tot_epsilon: ", tot_epsilon)
np.save('epsilon.npy', tot_epsilon)

# Metadata needed for aggregation server side
metadata = {
# num_examples are mandatory
"num_examples": len(x_train),
"batch_size": batch_size,
"epochs": epochs,
"lr": lr,
}

# Save JSON metadata file (mandatory)
save_metadata(metadata, out_model_path)

# Save model update (mandatory)
save_parameters(model, out_model_path)

def accuracy(preds, labels):
return (preds == labels).mean()





def train_dp(model, train_loader, optimizer, epoch, device, privacy_engine):
model.train()
criterion = torch.nn.NLLLoss() # nn.CrossEntropyLoss()

losses = []
top1_acc = []

with BatchMemoryManager(
data_loader=train_loader,
max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE,
optimizer=optimizer
) as memory_safe_data_loader:

for i, (images, target) in enumerate(memory_safe_data_loader):
optimizer.zero_grad()
images = images.to(device)
target = target.to(device)

# compute output
output = model(images)
loss = criterion(output, target)

preds = np.argmax(output.detach().cpu().numpy(), axis=1)
labels = target.detach().cpu().numpy()

# measure accuracy and record loss
acc = accuracy(preds, labels)

losses.append(loss.item())
top1_acc.append(acc)

loss.backward()
optimizer.step()

if (i + 1) % 200 == 0:
epsilon = privacy_engine.get_epsilon(DELTA)
print(
f"\tTrain Epoch: {epoch} \t"
f"Loss: {np.mean(losses):.6f} "
f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
f"(ε = {epsilon:.2f}, δ = {DELTA})"
)

if __name__ == "__main__":
train(sys.argv[1], sys.argv[2])

0 comments on commit 17401ba

Please sign in to comment.