diff --git a/README.md b/README.md index 9dd5ade8..bd6621c7 100644 --- a/README.md +++ b/README.md @@ -186,6 +186,7 @@ You can also find more examples in our tutorials and documentation. | Defense | Homomorphic Encryption | [Paiilier](https://link.springer.com/chapter/10.1007/3-540-48910-X_16) | | Defense | Differential Privacy | [DPSGD](https://arxiv.org/abs/1607.00133), [AdaDPS](https://arxiv.org/pdf/2202.05963.pdf) | | Defense | Anonymization | [Mondrian](https://ieeexplore.ieee.org/document/1617393) | +| Defense | Certified Robustness | [PixelDP](https://arxiv.org/abs/1802.03471v4) | | Defense | Debugging | [Model Assertions](https://cs.stanford.edu/~matei/papers/2019/debugml_model_assertions.pdf), [Rain](https://arxiv.org/abs/2004.05722), [Neuron Coverage](https://dl.acm.org/doi/abs/10.1145/3132747.3132785) | | Defense | Others | [Soteria](https://openaccess.thecvf.com/content/CVPR2021/papers/Sun_Soteria_Provable_Defense_Against_Privacy_Leakage_in_Federated_Learning_From_CVPR_2021_paper.pdf), [FoolsGold](https://arxiv.org/abs/1808.04866), [MID](https://arxiv.org/abs/2009.05241), [Sparse Gradient](https://aclanthology.org/D17-1045/) | diff --git a/docs/source/notebooks/aijack_pixeldp.ipynb b/docs/source/notebooks/aijack_pixeldp.ipynb new file mode 100644 index 00000000..adc0b051 --- /dev/null +++ b/docs/source/notebooks/aijack_pixeldp.ipynb @@ -0,0 +1,281 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a8977547-5a4f-480c-ad22-af0276284c5e", + "metadata": {}, + "source": [ + "# PixelDP\n", + "\n", + "`Lecuyer, Mathias, et al. \"Certified robustness to adversarial examples with differential privacy.\" 2019 IEEE symposium on security and privacy (SP). IEEE, 2019.`" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ad5e6eda-27de-4beb-b9f9-edd60a2f43a0", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "/usr/local/lib/python3.10/dist-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: '/usr/local/lib/python3.10/dist-packages/torchvision/image.so: undefined symbol: _ZN3c104cuda20CUDACachingAllocator9allocatorE'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n", + " warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import cv2\n", + "import numpy as np\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "import torchvision.transforms as transforms\n", + "from torch.utils.data import TensorDataset\n", + "from matplotlib import pyplot as plt\n", + "import glob\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "from aijack.defense.crobustness import PixelDP\n", + "from aijack.utils import NumpyDataset\n", + "\n", + "BASE = \"data/\"\n", + "torch.manual_seed(42)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6a164f0e-1adb-423d-8eec-69681a6ee768", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.fla = nn.Flatten()\n", + " self.fc = nn.Linear(112 * 92, 40)\n", + "\n", + " def forward(self, x):\n", + " x = self.fla(x)\n", + " x = self.fc(x)\n", + " x = F.softmax(x, dim=1)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "00059bd8-3178-484d-b65b-dbf94ce362a9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "pdp = PixelDP(\n", + " Net(),\n", + " 40,\n", + " L=0.01,\n", + " eps=0.3,\n", + " delta=1e-5,\n", + " n_population_mc=100,\n", + " batch_size_mc=32,\n", + " mode=\"gaussian\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2f89977d-a415-40a9-a9aa-f7e34361dee0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "imgs = []\n", + "labels = []\n", + "for i in range(1, 41):\n", + " for j in range(1, 11):\n", + " img = cv2.imread(BASE + f\"s{i}/{j}.pgm\", 0)\n", + " imgs.append(img)\n", + " labels.append(i - 1)\n", + "\n", + "X = np.stack(imgs)\n", + "y = np.array(labels)\n", + "\n", + "# ToTensor:画像のグレースケール化(RGBの0~255を0~1の範囲に正規化)、Normalize:Z値化(RGBの平均と標準偏差を0.5で決め打ちして正規化)\n", + "transform = transforms.Compose(\n", + " [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]\n", + ")\n", + "trainset = NumpyDataset(X, y, transform=transform)\n", + "trainloader = torch.utils.data.DataLoader(\n", + " trainset, batch_size=4, shuffle=True, num_workers=2\n", + ")\n", + "\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = optim.SGD(pdp.parameters(), lr=0.005, momentum=0.9)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c593f39d-650c-4da5-94f3-6dffe10a1a44", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch 0: loss is 0.9139378321170807\n", + "epoch 1: loss is 0.8554459440708161\n", + "epoch 2: loss is 0.8069418483972549\n", + "epoch 3: loss is 0.7713756114244461\n", + "epoch 4: loss is 0.7584735369682312\n", + "epoch 5: loss is 0.75129163980484\n", + "epoch 6: loss is 0.7420633500814438\n", + "epoch 7: loss is 0.731513864994049\n", + "epoch 8: loss is 0.7287486070394515\n", + "epoch 9: loss is 0.7276095592975617\n", + "Test Accuracy is: 0.845\n" + ] + } + ], + "source": [ + "for epoch in range(10): # loop over the dataset multiple times\n", + " running_loss = 0\n", + " data_size = 0\n", + " for i, data in enumerate(trainloader, 0):\n", + " # get the inputs; data is a list of [inputs, labels]\n", + " inputs, labels = data\n", + "\n", + " # zero the parameter gradients\n", + " optimizer.zero_grad()\n", + "\n", + " # forward + backward + optimize\n", + " outputs = pdp(inputs)\n", + " loss = criterion(outputs, labels.to(torch.int64))\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " running_loss += loss.item()\n", + " data_size += inputs.shape[0]\n", + "\n", + " print(f\"epoch {epoch}: loss is {running_loss/data_size}\")\n", + "\n", + "\n", + "in_preds = []\n", + "in_label = []\n", + "with torch.no_grad():\n", + " for data in trainloader:\n", + " inputs, labels = data\n", + " outputs = pdp(inputs)\n", + " in_preds.append(outputs)\n", + " in_label.append(labels)\n", + " in_preds = torch.cat(in_preds)\n", + " in_label = torch.cat(in_label)\n", + "print(\n", + " \"Test Accuracy is: \",\n", + " accuracy_score(np.array(torch.argmax(in_preds, axis=1)), np.array(in_label)),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "49bd0b26-08f9-4b37-97a9-2f31d6d273db", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(tensor([9.6808e-04, 1.6266e-02, 8.4722e+00, 2.2943e-02, 8.0590e-04, 6.2990e-06,\n", + " 2.2331e-02, 6.8285e-04, 1.1202e-03, 5.9374e-03, 8.0079e-03, 5.8963e-01,\n", + " 1.9559e+01, 1.4635e-03, 1.6586e-02, 2.5163e-03, 2.7637e-04, 4.3312e-02,\n", + " 1.5605e-02, 2.8981e-03, 2.0027e-02, 2.2794e-01, 1.0082e-02, 2.5106e-04,\n", + " 1.3825e-02, 1.5252e+01, 3.3047e-02, 5.0347e+01, 2.1533e-03, 1.3620e-08,\n", + " 2.0145e-03, 2.7776e-04, 2.3923e-05, 4.3114e-04, 2.0491e-01, 2.7598e-03,\n", + " 1.8949e-02, 3.8814e-03, 1.9326e-03, 5.0761e+00]),\n", + " tensor([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 100., 0., 0., 0., 0., 0., 0., 0., 0.,\n", + " 0., 0., 0., 0.]))" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pdp.eval()\n", + "expected_pred, counts = pdp(data[0][[0]])\n", + "expected_pred, counts" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1098abb4-5805-4587-8226-eeab7445f82b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.06263168938608399" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pdp.certify(counts)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index aebf901a..3e7e8c8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,8 @@ dependencies = [ 'pybind11', 'pybind11[global]', 'matplotlib', - 'mpi4py' + 'mpi4py', + 'statsmodels' ] [tool.setuptools] diff --git a/src/aijack/defense/crobustness/__init__.py b/src/aijack/defense/crobustness/__init__.py new file mode 100644 index 00000000..6eabdc9d --- /dev/null +++ b/src/aijack/defense/crobustness/__init__.py @@ -0,0 +1 @@ +from .pixeldp import PixelDP # noqa: F401 diff --git a/src/aijack/defense/crobustness/pixeldp.py b/src/aijack/defense/crobustness/pixeldp.py new file mode 100644 index 00000000..a34c6042 --- /dev/null +++ b/src/aijack/defense/crobustness/pixeldp.py @@ -0,0 +1,152 @@ +import math +from math import log, sqrt + +import numpy as np +import torch +from statsmodels.stats.proportion import proportion_confint + +log1c25 = log(1.25) + + +def clopper_pearson_interval(num_success, num_total, alpha): + return proportion_confint(num_success, num_total, alpha=2 * alpha, method="beta") + + +def gaus_delta_term(delta): + return sqrt(2 * (log1c25 - log(delta))) + + +def get_maximum_L_laplace(lower_bound, upper_bound, L, dp_eps): + if lower_bound <= upper_bound: + return 0.0 + return L * log(lower_bound / upper_bound) / (2 * dp_eps) + + +def get_maximum_L_gaussian( + p_max_lb, + p_sec_ub, + attack_size, + dp_epsilon, + dp_delta, + delta_range=list(np.arange(0.001, 0.3, 0.001)), + eps_min=0.0, + eps_max=1.0, + tolerance=0.001, +): + # Based on the original implementation: + # https://github.com/columbia/pixeldp/blob/master/models/utils/robustness.py + if p_max_lb <= p_sec_ub: + return 0.0 + + max_r = 0.0 + for delta in delta_range: + eps = (eps_min + eps_max) / 2 + while eps_min < eps and eps_max >= eps: + L_temp = ( + attack_size + * (eps / dp_epsilon) + * (gaus_delta_term(dp_delta) / gaus_delta_term(delta)) + ) + if p_max_lb >= math.e ** (2 * eps) * p_sec_ub + (1 + math.e**eps) * delta: + if L_temp > max_r: + max_r = L_temp + eps_min = eps + eps = (eps_min + eps_max) / 2.0 + else: + # eps is too big for delta + eps_max = eps + eps = (eps_min + eps_max) / 2.0 + + if eps_max - eps_min < tolerance: + break + + return max_r + + +def get_certified_robustness_size_argmax(counts, eta, L, eps, delta, mode="gaussian"): + total_counts = torch.sum(counts) + sorted_counts, _ = torch.sort(counts) + lb = clopper_pearson_interval(sorted_counts[-1], total_counts, eta)[0] + ub = clopper_pearson_interval(sorted_counts[-2], total_counts, eta)[1] + + if mode == "laplace": + return get_maximum_L_laplace(lb, ub, L, eps) + elif mode == "gaussian": + return get_maximum_L_gaussian(lb, ub, L, eps, delta) + else: + raise ValueError(f"{mode} is not supported") + + +class PixelDP(torch.nn.Module): + def __init__( + self, + model, + num_classes, + L, + eps, + delta, + n_population_mc=1000, + batch_size_mc=32, + eta=0.05, + mode="laplace", + sensitivity=1, + ): + super(PixelDP, self).__init__() + + self.model = model + self.num_classes = num_classes + self.L = L + self.eps = eps + self.delta = delta + self.n_population_mc = n_population_mc + self.batch_size_mc = batch_size_mc + self.eta = eta + self.mode = mode + + if self.mode == "laplace": + self.sigma = sensitivity * L / eps + self.dist = torch.distributions.laplace.Laplace(0, self.sigma) + elif self.mode == "gaussian": + self.sigma = gaus_delta_term(delta) * sensitivity * L / eps + else: + raise ValueError(f"{mode} is not supported") + + def sample_noise(self, x): + if self.mode == "laplace": + return self.dist.sample(x.shape).to(x.device) + else: + return torch.randn_like(x, device=x.device) * self.sigma + + def forward(self, x): + if self.training: + return self.forward_train(x) + else: + return self.forward_eval(x) + + def forward_train(self, x): + return self.model(x + self.sample_noise(x)) + + def forward_eval(self, x): + remaining_num = self.n_population_mc + input_dim = len(x.shape) - 1 + with torch.no_grad(): + preds = torch.zeros(self.num_classes, device=x.device) + counts = torch.zeros(self.num_classes, device=x.device) + while remaining_num > 0: + batch_size = min(self.batch_size_mc, remaining_num) + remaining_num -= batch_size + inputs = x.repeat(tuple([batch_size] + [1] * input_dim)) + noise = self.sample_noise(inputs) + tmp_pred = self.model(inputs + noise) + tmp_pred_argmax = tmp_pred.argmax(1) + + preds += tmp_pred.sum(0) + for i in range(len(tmp_pred_argmax)): + counts[tmp_pred_argmax[i]] += 1 + + return preds, counts + + def certify(self, counts): + return get_certified_robustness_size_argmax( + counts, self.eta, self.L, self.eps, self.delta, self.mode + )