-
Notifications
You must be signed in to change notification settings - Fork 36
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
efcea62
commit 17401ba
Showing
4 changed files
with
321 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
FEDn Project: Federated Differential Privacy MNIST (Opacus + PyTorch) | ||
----------------------------- | ||
|
||
This example FEDn Project demonstrates how Differential Privacy can be integrated to enhance the confidentiality of client data. | ||
We have expanded our baseline MNIST-PyTorch example by incorporating the Opacus framework, which is specifically designed for PyTorch models. | ||
|
||
|
||
|
||
Prerequisites | ||
------------- | ||
|
||
- `Python >=3.8, <=3.12 <https://www.python.org/downloads>`__ | ||
- `A project in FEDn Studio <https://fedn.scaleoutsystems.com/signup>`__ | ||
|
||
Creating the compute package and seed model | ||
------------------------------------------- | ||
|
||
Install fedn: | ||
|
||
.. code-block:: | ||
pip install fedn | ||
Clone this repository, then locate into this directory: | ||
|
||
.. code-block:: | ||
git clone https://github.com/scaleoutsystems/fedn.git | ||
cd fedn/examples/mnist-pytorch | ||
Create the compute package: | ||
|
||
.. code-block:: | ||
fedn package create --path client | ||
This creates a file 'package.tgz' in the project folder. | ||
|
||
Next, generate the seed model: | ||
|
||
.. code-block:: | ||
fedn run build --path client | ||
This will create a model file 'seed.npz' in the root of the project. This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv). | ||
|
||
Running the project on FEDn | ||
---------------------------- | ||
|
||
To learn how to set up your FEDn Studio project and connect clients, take the quickstart tutorial: https://fedn.readthedocs.io/en/stable/quickstart.html. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import collections | ||
|
||
import torch | ||
|
||
from fedn.utils.helpers.helpers import get_helper | ||
|
||
HELPER_MODULE = "numpyhelper" | ||
helper = get_helper(HELPER_MODULE) | ||
|
||
|
||
def compile_model(): | ||
"""Compile the pytorch model. | ||
:return: The compiled model. | ||
:rtype: torch.nn.Module | ||
""" | ||
|
||
class Net(torch.nn.Module): | ||
def __init__(self): | ||
super(Net, self).__init__() | ||
self.fc1 = torch.nn.Linear(784, 64) | ||
self.fc2 = torch.nn.Linear(64, 32) | ||
self.fc3 = torch.nn.Linear(32, 10) | ||
|
||
def forward(self, x): | ||
x = torch.nn.functional.relu(self.fc1(x.reshape(x.size(0), 784))) | ||
x = torch.nn.functional.dropout(x, p=0.5, training=self.training) | ||
x = torch.nn.functional.relu(self.fc2(x)) | ||
x = torch.nn.functional.log_softmax(self.fc3(x), dim=1) | ||
return x | ||
|
||
return Net() | ||
|
||
|
||
def save_parameters(model, out_path): | ||
"""Save model paramters to file. | ||
:param model: The model to serialize. | ||
:type model: torch.nn.Module | ||
:param out_path: The path to save to. | ||
:type out_path: str | ||
""" | ||
parameters_np = [val.cpu().numpy() for _, val in model.state_dict().items()] | ||
helper.save(parameters_np, out_path) | ||
|
||
|
||
def load_parameters(model_path): | ||
"""Load model parameters from file and populate model. | ||
param model_path: The path to load from. | ||
:type model_path: str | ||
:return: The loaded model. | ||
:rtype: torch.nn.Module | ||
""" | ||
model = compile_model() | ||
parameters_np = helper.load(model_path) | ||
|
||
params_dict = zip(model.state_dict().keys(), parameters_np) | ||
state_dict = collections.OrderedDict({key: torch.tensor(x) for key, x in params_dict}) | ||
model.load_state_dict(state_dict, strict=True) | ||
return model | ||
|
||
|
||
def init_seed(out_path="seed.npz"): | ||
"""Initialize seed model and save it to file. | ||
:param out_path: The path to save the seed model to. | ||
:type out_path: str | ||
""" | ||
# Init and save | ||
model = compile_model() | ||
save_parameters(model, out_path) | ||
|
||
|
||
if __name__ == "__main__": | ||
init_seed("../seed.npz") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
name: mnist-pytorch | ||
build_dependencies: | ||
- pip | ||
- setuptools | ||
- wheel | ||
dependencies: | ||
- torch==2.3.1 | ||
- torchvision==0.18.1 | ||
- fedn | ||
- opacus |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
import math | ||
import os | ||
import sys | ||
|
||
import torch | ||
from model import load_parameters, save_parameters | ||
|
||
from data import load_data | ||
from fedn.utils.helpers.helpers import save_metadata | ||
|
||
from opacus import PrivacyEngine | ||
from torch.utils.data import Dataset, DataLoader | ||
from tqdm.notebook import tqdm | ||
|
||
import numpy as np | ||
from opacus.utils.batch_memory_manager import BatchMemoryManager | ||
# Define a custom Dataset class | ||
class CustomDataset(Dataset): | ||
def __init__(self, x_data, y_data): | ||
self.x_data = x_data | ||
self.y_data = y_data | ||
|
||
def __len__(self): | ||
return len(self.x_data) | ||
|
||
def __getitem__(self, idx): | ||
x_data = self.x_data[idx] | ||
y_data = self.y_data[idx] | ||
return x_data, y_data | ||
|
||
|
||
dir_path = os.path.dirname(os.path.realpath(__file__)) | ||
sys.path.append(os.path.abspath(dir_path)) | ||
|
||
MAX_GRAD_NORM = 1.2 | ||
FINAL_EPSILON = 8.0 | ||
ROUNDS = 4 | ||
EPOCHS = 5 | ||
EPSILON = FINAL_EPSILON/ROUNDS | ||
DELTA = 1e-5 | ||
|
||
MAX_PHYSICAL_BATCH_SIZE = 32 | ||
|
||
def train(in_model_path, out_model_path, data_path=None, batch_size=32, epochs=1, lr=0.01): | ||
"""Complete a model update. | ||
Load model paramters from in_model_path (managed by the FEDn client), | ||
perform a model update, and write updated paramters | ||
to out_model_path (picked up by the FEDn client). | ||
:param in_model_path: The path to the input model. | ||
:type in_model_path: str | ||
:param out_model_path: The path to save the output model to. | ||
:type out_model_path: str | ||
:param data_path: The path to the data file. | ||
:type data_path: str | ||
:param batch_size: The batch size to use. | ||
:type batch_size: int | ||
:param epochs: The number of epochs to train. | ||
:type epochs: int | ||
:param lr: The learning rate to use. | ||
:type lr: float | ||
""" | ||
# Load data | ||
print("data_path: ", data_path) | ||
x_train, y_train = load_data(data_path) | ||
trainset = CustomDataset(x_train, y_train) | ||
batch_size = 32 | ||
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, | ||
shuffle=True, num_workers=2) | ||
|
||
# Load parmeters and initialize model | ||
model = load_parameters(in_model_path) | ||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | ||
model = model.to(device) | ||
|
||
|
||
# Load epsilon | ||
if os.path.isfile('epsilon.npy'): | ||
|
||
tot_epsilon = np.load('epsilon.npy') | ||
print("load consumed epsilon: ", tot_epsilon) | ||
|
||
else: | ||
|
||
print("initiate tot_epsilon") | ||
tot_epsilon = 0. | ||
|
||
# Train | ||
optimizer = torch.optim.SGD(model.parameters(), lr=lr) | ||
n_batches = int(math.ceil(len(x_train) / batch_size)) | ||
criterion = torch.nn.NLLLoss() | ||
|
||
privacy_engine = PrivacyEngine() | ||
|
||
model, optimizer, train_loader = privacy_engine.make_private_with_epsilon( | ||
module=model, | ||
optimizer=optimizer, | ||
data_loader=train_loader, | ||
epochs=EPOCHS, | ||
target_epsilon=EPSILON, | ||
target_delta=DELTA, | ||
max_grad_norm=MAX_GRAD_NORM, | ||
) | ||
|
||
print(f"Using sigma={optimizer.noise_multiplier} and C={MAX_GRAD_NORM}") | ||
|
||
|
||
|
||
for epoch in tqdm(range(EPOCHS), desc="Epoch", unit="epoch"): | ||
train_dp(model, train_loader, optimizer, epoch + 1, device, privacy_engine) | ||
|
||
d_epsilon = privacy_engine.get_epsilon(DELTA) | ||
print("epsilon spent: ", d_epsilon) | ||
tot_epsilon += d_epsilon | ||
print("saving tot_epsilon: ", tot_epsilon) | ||
np.save('epsilon.npy', tot_epsilon) | ||
|
||
# Metadata needed for aggregation server side | ||
metadata = { | ||
# num_examples are mandatory | ||
"num_examples": len(x_train), | ||
"batch_size": batch_size, | ||
"epochs": epochs, | ||
"lr": lr, | ||
} | ||
|
||
# Save JSON metadata file (mandatory) | ||
save_metadata(metadata, out_model_path) | ||
|
||
# Save model update (mandatory) | ||
save_parameters(model, out_model_path) | ||
|
||
def accuracy(preds, labels): | ||
return (preds == labels).mean() | ||
|
||
|
||
|
||
|
||
|
||
def train_dp(model, train_loader, optimizer, epoch, device, privacy_engine): | ||
model.train() | ||
criterion = torch.nn.NLLLoss() # nn.CrossEntropyLoss() | ||
|
||
losses = [] | ||
top1_acc = [] | ||
|
||
with BatchMemoryManager( | ||
data_loader=train_loader, | ||
max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE, | ||
optimizer=optimizer | ||
) as memory_safe_data_loader: | ||
|
||
for i, (images, target) in enumerate(memory_safe_data_loader): | ||
optimizer.zero_grad() | ||
images = images.to(device) | ||
target = target.to(device) | ||
|
||
# compute output | ||
output = model(images) | ||
loss = criterion(output, target) | ||
|
||
preds = np.argmax(output.detach().cpu().numpy(), axis=1) | ||
labels = target.detach().cpu().numpy() | ||
|
||
# measure accuracy and record loss | ||
acc = accuracy(preds, labels) | ||
|
||
losses.append(loss.item()) | ||
top1_acc.append(acc) | ||
|
||
loss.backward() | ||
optimizer.step() | ||
|
||
if (i + 1) % 200 == 0: | ||
epsilon = privacy_engine.get_epsilon(DELTA) | ||
print( | ||
f"\tTrain Epoch: {epoch} \t" | ||
f"Loss: {np.mean(losses):.6f} " | ||
f"Acc@1: {np.mean(top1_acc) * 100:.6f} " | ||
f"(ε = {epsilon:.2f}, δ = {DELTA})" | ||
) | ||
|
||
if __name__ == "__main__": | ||
train(sys.argv[1], sys.argv[2]) |