diff --git a/fault-localization/artifact.ipynb b/fault-localization/artifact.ipynb index 40dd33d..ac3110b 100644 --- a/fault-localization/artifact.ipynb +++ b/fault-localization/artifact.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -34,30 +34,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/gulzar/anaconda3/envs/torch/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "Global seed set to 786\n", - "Global seed set to 786\n" - ] - }, - { - "data": { - "text/plain": [ - "786" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import logging\n", "import time\n", @@ -92,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -130,7 +109,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -151,212 +130,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - " ***Simulating FL setup iid_resnet50_cifar10_clients_5_faulty_[0]_bsize_512_epochs_5_lr_0.001 ***\n", - "Files already downloaded and verified\n", - "Files already downloaded and verified\n", - "Spliting Datasets 50000 into parts:[10000, 10000, 10000, 10000, 10000]\n", - "input shape, torch.Size([1, 3, 32, 32])\n", - "Training : .storage/checkpoints/iid_resnet50_cifar10_clients_5_faulty_[0]_bsize_512_epochs_5_lr_0.001/faulty_client_0_noise_rate_1_classes.ckpt\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using 16bit native Automatic Mixed Precision (AMP)\n", - "GPU available: True (cuda), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train mod batch = 272, and drop_last = False\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 4: 100%|██████████| 20/20 [00:05<00:00, 3.47it/s, loss=2.35, train_acc=0.130, train_loss=2.360, val_acc=0.0961, val_loss=2.460] " - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "`Trainer.fit` stopped: `max_epochs=5` reached.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 4: 100%|██████████| 20/20 [00:05<00:00, 3.47it/s, loss=2.35, train_acc=0.130, train_loss=2.360, val_acc=0.0961, val_loss=2.460]\n", - "Training : .storage/checkpoints/iid_resnet50_cifar10_clients_5_faulty_[0]_bsize_512_epochs_5_lr_0.001/client_1.ckpt\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using 16bit native Automatic Mixed Precision (AMP)\n", - "GPU available: True (cuda), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n", - "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train mod batch = 272, and drop_last = False\n", - "Epoch 4: 100%|██████████| 20/20 [00:06<00:00, 3.17it/s, loss=0.585, train_acc=0.812, train_loss=0.597, val_acc=0.729, val_loss=0.810]" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "`Trainer.fit` stopped: `max_epochs=5` reached.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 4: 100%|██████████| 20/20 [00:06<00:00, 3.17it/s, loss=0.585, train_acc=0.812, train_loss=0.597, val_acc=0.729, val_loss=0.810]\n", - "Training : .storage/checkpoints/iid_resnet50_cifar10_clients_5_faulty_[0]_bsize_512_epochs_5_lr_0.001/client_2.ckpt\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using 16bit native Automatic Mixed Precision (AMP)\n", - "GPU available: True (cuda), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n", - "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train mod batch = 272, and drop_last = False\n", - "Epoch 4: 100%|██████████| 20/20 [00:06<00:00, 3.21it/s, loss=0.544, train_acc=0.817, train_loss=0.516, val_acc=0.745, val_loss=0.840]" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "`Trainer.fit` stopped: `max_epochs=5` reached.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 4: 100%|██████████| 20/20 [00:06<00:00, 3.21it/s, loss=0.544, train_acc=0.817, train_loss=0.516, val_acc=0.745, val_loss=0.840]\n", - "Training : .storage/checkpoints/iid_resnet50_cifar10_clients_5_faulty_[0]_bsize_512_epochs_5_lr_0.001/client_3.ckpt\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using 16bit native Automatic Mixed Precision (AMP)\n", - "GPU available: True (cuda), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n", - "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train mod batch = 272, and drop_last = False\n", - "Epoch 4: 100%|██████████| 20/20 [00:07<00:00, 2.85it/s, loss=0.54, train_acc=0.817, train_loss=0.539, val_acc=0.743, val_loss=0.842] " - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "`Trainer.fit` stopped: `max_epochs=5` reached.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 4: 100%|██████████| 20/20 [00:07<00:00, 2.84it/s, loss=0.54, train_acc=0.817, train_loss=0.539, val_acc=0.743, val_loss=0.842]\n", - "Training : .storage/checkpoints/iid_resnet50_cifar10_clients_5_faulty_[0]_bsize_512_epochs_5_lr_0.001/client_4.ckpt\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using 16bit native Automatic Mixed Precision (AMP)\n", - "GPU available: True (cuda), used: True\n", - "TPU available: False, using: 0 TPU cores\n", - "IPU available: False, using: 0 IPUs\n", - "HPU available: False, using: 0 HPUs\n", - "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train mod batch = 272, and drop_last = False\n", - "Epoch 4: 100%|██████████| 20/20 [00:05<00:00, 3.51it/s, loss=0.558, train_acc=0.803, train_loss=0.622, val_acc=0.744, val_loss=0.849]" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "`Trainer.fit` stopped: `max_epochs=5` reached.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 4: 100%|██████████| 20/20 [00:05<00:00, 3.50it/s, loss=0.558, train_acc=0.803, train_loss=0.622, val_acc=0.744, val_loss=0.849]\n", - "Total clients: 5\n", - "++Training is done: iid_resnet50_cifar10_clients_5_faulty_[0]_bsize_512_epochs_5_lr_0.001\n" - ] - } - ], + "outputs": [], "source": [ "# FL training\n", "c2ms, exp2info = trainFLMain(args)\n", @@ -366,18 +142,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> Running FaultyClientLocalization ..\n", - "Same prediction threshold 5\n" - ] - } - ], + "outputs": [], "source": [ "# Fault localazation to find potetial faulty clients\n", "potential_faulty_clients, _, _ = runFaultyClientLocalization(\n", @@ -386,27 +153,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+++ Faulty Clients {0}\n", - "+++ Faulty Clients {0}\n", - "+++ Faulty Clients {0}\n", - "+++ Faulty Clients {0}\n", - "+++ Faulty Clients {0}\n", - "+++ Faulty Clients {0}\n", - "+++ Faulty Clients {0}\n", - "+++ Faulty Clients {0}\n", - "+++ Faulty Clients {0}\n", - "+++ Faulty Clients {0}\n", - "Fault Localization Accuracy: 100.0\n" - ] - } - ], + "outputs": [], "source": [ "# Fault localization accuracy \n", "acc = evaluateFaultLocalization(\n", diff --git a/fault-localization/utils/FLSimulation.py b/fault-localization/utils/FLSimulation.py index 3ca9316..78d9cc4 100644 --- a/fault-localization/utils/FLSimulation.py +++ b/fault-localization/utils/FLSimulation.py @@ -10,8 +10,10 @@ from .dl_models import ImageClassifer, initialize_model from .fl_datasets import * -logging.basicConfig(filename='example.log', level=logging.ERROR) -logger = logging.getLogger("pytorch_lightning") +# logging.basicConfig(filename='example.log', level=logging.ERROR) +# logger = logging.getLogger("pytorch_lightning") +logging.getLogger("pytorch_lightning").setLevel(logging.WARNING) + seed_everything(786) @@ -126,7 +128,7 @@ def getFLClientsDatasets(): stringID2intID[k] = normal_id data_config["single_input_shape"] = valid[0][0].unsqueeze(0).shape - print(f'input shape, {data_config["single_input_shape"]}') + # print(f'input shape, {data_config["single_input_shape"]}') # return model_config["classes"] = num_classes diff --git a/fault-localization/utils/faulty_client_localization/InferenceGuidedInputs.py b/fault-localization/utils/faulty_client_localization/InferenceGuidedInputs.py index 0faba0d..b6c6406 100644 --- a/fault-localization/utils/faulty_client_localization/InferenceGuidedInputs.py +++ b/fault-localization/utils/faulty_client_localization/InferenceGuidedInputs.py @@ -11,7 +11,7 @@ class InferenceGuidedInputs: def __init__(self, clients2models, shape, randomGenerator, apply_transform, dname=None, k_gen_inputs=10, min_nclients_same_pred=5, time_delta=60): self.clients2models = clients2models self.min_nclients_same_pred = 3 #min_nclients_same_pred - print(f"Same prediction threshold {self.min_nclients_same_pred}") + # print(f"Same prediction threshold {self.min_nclients_same_pred}") self.same_seqs_set = set() self.k_gen_inputs = k_gen_inputs self.size = 1024 diff --git a/fault-localization/utils/fl_datasets.py b/fault-localization/utils/fl_datasets.py index c736a8b..59b925e 100644 --- a/fault-localization/utils/fl_datasets.py +++ b/fault-localization/utils/fl_datasets.py @@ -273,8 +273,8 @@ def __init__(self, train_dataset, val_dataset, batch_size, num_workers=4) -> Non print( f"Dropping last batch because of uneven data size: {len(self.train_dataset)} % {self.batch_size} == 1") - print( - f"Train mod batch = {len(train_dataset) % batch_size}, and drop_last = {self.drop_last}") + # print( + # f"Train mod batch = {len(train_dataset) % batch_size}, and drop_last = {self.drop_last}") def train_dataloader(self): return DataLoader(self.train_dataset, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True, shuffle=True, drop_last=self.drop_last)