Skip to content

Commit

Permalink
fl support code
Browse files Browse the repository at this point in the history
  • Loading branch information
hasan7n committed Feb 28, 2024
1 parent 905e9fd commit c837479
Show file tree
Hide file tree
Showing 108 changed files with 5,902 additions and 45 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,4 @@ cython_debug/
# Dev Environment Specific
.vscode
.venv
server/keys
47 changes: 47 additions & 0 deletions TODO
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# TODO: remove me from the repo

FOR TUTORIAL

- stream logs
- check benchmark execution mlcube training exp ID
- if association request failed for some reason, delete private key (or at least check if rerunning the request will simply overwrite the key)
- define output folders in medperf storage (logs for both, weights for agg)
- adding email to CN currently could be challenging. THINK
- ASSUMPTION: emails are not changed after signup

- We now have demo data url and hash in training exp (dummy) that we don't use.
- what to say about this in miccai (I think no worries; it's hidden now)
- rethink/review about the following serializers and if necessary use atomic transactions
- association creation (dataset-training, agg-training)
- association approval (dataset-training, agg-training)
- training experiment creation (creating keypair); this could move to approval
- public/private keys uniqueness constraint while blank; check django docs on how
- fix bug about association list; /home/hasan/work/openfl_ws/medperf-private/server/utils/views.py
- pull latest medperf main
- test agg and training exp owner being same user
- basically, test the tutorial steps EXACTLY

AFTER TUTORIAL

- FOLLOWUP: collaborators doesn't use tensorboard logs.
- FOLLOWUP: show csr hash on approval is not necessary since now CSRs are transported securely
- test remote aggregator
- make network config better structured (URL to file? no, could be annoying.)
- move key generation after admin approval of training experiments.
- when the training experiment owner wants to "lock" the experiment
- ask for confirmation? it's an easy command and after execution there is no going back; a mess if unintended.
- secretstorage gcloud

NOT SURE

- consider if we want to enable restarts and epochs/"fresh restarts" for training exps (it's hard)
- mlcube for agg alone

LATER / FUTURE INVESTIGATIONS

- root key thing.
- limit network access (for now we can rely on the review of the experiment owner)
- compatibility tests
- rethink if keys are always needed (just for exps where they on't need a custom cert)
- server side verification of CSRs (check common names)
- later: the whole design might be changed
5 changes: 4 additions & 1 deletion cli/medperf/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
)
import medperf.commands.association.association as association
import medperf.commands.compatibility_test.compatibility_test as compatibility_test

import medperf.commands.training.training as training
import medperf.commands.aggregator.aggregator as aggregator

app = typer.Typer()
app.add_typer(mlcube.app, name="mlcube", help="Manage mlcubes")
Expand All @@ -38,6 +39,8 @@
app.add_typer(profile.app, name="profile", help="Manage profiles")
app.add_typer(compatibility_test.app, name="test", help="Manage compatibility tests")
app.add_typer(auth.app, name="auth", help="Authentication")
app.add_typer(training.app, name="training", help="Training")
app.add_typer(aggregator.app, name="aggregator", help="Aggregator")


@app.command("run")
Expand Down
93 changes: 65 additions & 28 deletions cli/medperf/account_management.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,82 @@
import keyring
from medperf.utils import read_config, write_config
import os
import base64
from medperf.utils import read_config, write_config, base_storage_path, remove_path
from medperf import config
from medperf.exceptions import MedperfException


# class TokenStore:
# def __init__(self):
# pass

# def set_tokens(self, account_id, access_token, refresh_token):
# keyring.set_password(
# config.keyring_access_token_service_name,
# account_id,
# access_token,
# )
# keyring.set_password(
# config.keyring_refresh_token_service_name,
# account_id,
# refresh_token,
# )

# def read_tokens(self, account_id):
# access_token = keyring.get_password(
# config.keyring_access_token_service_name,
# account_id,
# )
# refresh_token = keyring.get_password(
# config.keyring_refresh_token_service_name,
# account_id,
# )
# return access_token, refresh_token

# def delete_tokens(self, account_id):
# keyring.delete_password(
# config.keyring_access_token_service_name,
# account_id,
# )
# keyring.delete_password(
# config.keyring_refresh_token_service_name,
# account_id,
# )

class TokenStore:
def __init__(self):
pass
self.creds_folder = base_storage_path(config.creds_folder)
os.makedirs(self.creds_folder, exist_ok=True)

def __get_paths(self, account_id):
account_id_encoded = base64.b64encode(account_id.encode("utf-8")).decode("utf-8")
account_folder = os.path.join(self.creds_folder, account_id_encoded)
os.makedirs(account_folder, exist_ok=True)

access_token_file = os.path.join(account_folder, config.keyring_access_token_service_name)
refresh_token_file = os.path.join(account_folder, config.keyring_refresh_token_service_name)

return access_token_file, refresh_token_file

def set_tokens(self, account_id, access_token, refresh_token):
keyring.set_password(
config.keyring_access_token_service_name,
account_id,
access_token,
)
keyring.set_password(
config.keyring_refresh_token_service_name,
account_id,
refresh_token,
)
access_token_file, refresh_token_file = self.__get_paths(account_id)
with open(access_token_file, "w") as f:
f.write(access_token)
with open(refresh_token_file, "w") as f:
f.write(refresh_token)

def read_tokens(self, account_id):
access_token = keyring.get_password(
config.keyring_access_token_service_name,
account_id,
)
refresh_token = keyring.get_password(
config.keyring_refresh_token_service_name,
account_id,
)
access_token_file, refresh_token_file = self.__get_paths(account_id)
with open(access_token_file) as f:
access_token = f.read()
with open(refresh_token_file) as f:
refresh_token = f.read()
return access_token, refresh_token

def delete_tokens(self, account_id):
keyring.delete_password(
config.keyring_access_token_service_name,
account_id,
)
keyring.delete_password(
config.keyring_refresh_token_service_name,
account_id,
)
access_token_file, refresh_token_file = self.__get_paths(account_id)
remove_path(access_token_file)
remove_path(refresh_token_file)


def read_user_account():
Expand Down
107 changes: 107 additions & 0 deletions cli/medperf/commands/aggregator/aggregator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from typing import Optional
from medperf.entities.aggregator import Aggregator
import typer

import medperf.config as config
from medperf.decorators import clean_except
from medperf.commands.aggregator.submit import SubmitAggregator
from medperf.commands.aggregator.associate import AssociateAggregator
from medperf.commands.aggregator.run import StartAggregator

from medperf.commands.list import EntityList
from medperf.commands.view import EntityView

app = typer.Typer()


@app.command("submit")
@clean_except
def submit(
name: str = typer.Option(..., "--name", "-n", help="Name of the agg"),
address: str = typer.Option(
..., "--address", "-a", help="UID of benchmark to associate with"
),
port: int = typer.Option(
..., "--port", "-p", help="UID of benchmark to associate with"
),
):
"""Associates a benchmark with a given mlcube or dataset. Only one option at a time."""
SubmitAggregator.run(name, address, port)
config.ui.print("✅ Done!")


@app.command("associate")
@clean_except
def associate(
aggregator_id: int = typer.Option(
..., "--aggregator_id", "-a", help="UID of benchmark to associate with"
),
training_exp_id: int = typer.Option(
..., "--training_exp_id", "-t", help="UID of benchmark to associate with"
),
approval: bool = typer.Option(False, "-y", help="Skip approval step"),
):
"""Associates a benchmark with a given mlcube or dataset. Only one option at a time."""
AssociateAggregator.run(aggregator_id, training_exp_id, approved=approval)
config.ui.print("✅ Done!")


@app.command("start")
@clean_except
def run(
aggregator_id: int = typer.Option(
..., "--aggregator_id", "-a", help="UID of benchmark to associate with"
),
training_exp_id: int = typer.Option(
..., "--training_exp_id", "-t", help="UID of benchmark to associate with"
),
):
"""Associates a benchmark with a given mlcube or dataset. Only one option at a time."""
StartAggregator.run(training_exp_id, aggregator_id)
config.ui.print("✅ Done!")


@app.command("ls")
@clean_except
def list(
local: bool = typer.Option(False, "--local", help="Get local aggregators"),
mine: bool = typer.Option(False, "--mine", help="Get current-user aggregators"),
):
"""List aggregators stored locally and remotely from the user"""
EntityList.run(
Aggregator,
fields=["UID", "Name", "Address", "Port"],
local_only=local,
mine_only=mine,
)


@app.command("view")
@clean_except
def view(
entity_id: Optional[int] = typer.Argument(None, help="Benchmark ID"),
format: str = typer.Option(
"yaml",
"-f",
"--format",
help="Format to display contents. Available formats: [yaml, json]",
),
local: bool = typer.Option(
False,
"--local",
help="Display local benchmarks if benchmark ID is not provided",
),
mine: bool = typer.Option(
False,
"--mine",
help="Display current-user benchmarks if benchmark ID is not provided",
),
output: str = typer.Option(
None,
"--output",
"-o",
help="Output file to store contents. If not provided, the output will be displayed",
),
):
"""Displays the information of one or more aggregators"""
EntityView.run(entity_id, Aggregator, format, local, mine, output)
38 changes: 38 additions & 0 deletions cli/medperf/commands/aggregator/associate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from medperf import config
from medperf.entities.aggregator import Aggregator
from medperf.entities.training_exp import TrainingExp
from medperf.utils import approval_prompt, generate_agg_csr
from medperf.exceptions import InvalidArgumentError


class AssociateAggregator:
@staticmethod
def run(training_exp_id: int, agg_uid: int, approved=False):
"""Associates a registered aggregator with a benchmark
Args:
agg_uid (int): UID of the registered aggregator to associate
benchmark_uid (int): UID of the benchmark to associate with
"""
comms = config.comms
ui = config.ui
agg = Aggregator.get(agg_uid)
if agg.id is None:
msg = "The provided aggregator is not registered."
raise InvalidArgumentError(msg)

training_exp = TrainingExp.get(training_exp_id)
csr, csr_hash = generate_agg_csr(training_exp_id, agg.address, agg.id)
msg = "Please confirm that you would like to associate"
msg += f" the aggregator {agg.name} with the training exp {training_exp.name}."
msg += f" The certificate signing request hash is: {csr_hash}"
msg += " [Y/n]"

approved = approved or approval_prompt(msg)
if approved:
ui.print("Generating aggregator training association")
# TODO: delete keys if upload fails
# check if on failure, other (possible) request will overwrite key
comms.associate_aggregator(agg.id, training_exp_id, csr)
else:
ui.print("Aggregator association operation cancelled.")
Loading

0 comments on commit c837479

Please sign in to comment.