Skip to content

Commit

Permalink
Merge branch 'support_multi_host_names' of github.com:yanchengnv/NVFl…
Browse files Browse the repository at this point in the history
…are into support_multi_host_names
  • Loading branch information
yanchengnv committed Oct 11, 2024
2 parents 30620e4 + c368791 commit c6398fe
Show file tree
Hide file tree
Showing 10 changed files with 8 additions and 28 deletions.
2 changes: 1 addition & 1 deletion docs/real_world_fl/overview.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ For advanced users, you can customize your provision with additional behavior th
- **Zip**: To create password protected zip archives for the startup kits, see :ref:`distribution_builder`
- **Docker-compose**: Provision to launch NVIDIA FLARE system via docker containers. You can customize the provisioning process and ask the provisioner to generate a docker-compose file. This can be found in :ref:`docker_compose`.
- **Docker**: Provision to launch NVIDIA FLARE system via docker containers. If you just want to use docker files, see :ref:`containerized_deployment`.
- **Helm**: To change the provisioning tool to generate an NVIDIA FLARE Helm chart for Kubernetes deployment, see :ref:` helm_chart`.
- **Helm**: To change the provisioning tool to generate an NVIDIA FLARE Helm chart for Kubernetes deployment, see :ref:`helm_chart`.
- **CUSTOM**: you can build custom builders specific to your needs like in :ref:`distribution_builder`.

Package distribution
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
nvflare~=2.5.0rc
nvflare~=2.5.0
torch
torchvision
tensorboard
openmined.psi==1.1.1
openmined.psi==2.0.5
pandas
2 changes: 1 addition & 1 deletion nvflare/app_opt/pt/job_config/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@


class PTModel:
def __init__(self, model, persistor: Optional[ModelPersistor], locator: Optional[ModelLocator]):
def __init__(self, model, persistor: Optional[ModelPersistor] = None, locator: Optional[ModelLocator] = None):
"""PyTorch model wrapper.
If model is an nn.Module, add a PTFileModelPersistor with the model and a TFModelPersistor.
Expand Down
2 changes: 0 additions & 2 deletions nvflare/private/fed/app/simulator/simulator_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,8 +450,6 @@ def simulator_run_main(self):
try:
self.create_clients()
self.server.engine.run_processes[SimulatorConstants.JOB_NAME] = {
RunProcessKey.LISTEN_PORT: None,
RunProcessKey.CONNECTION: None,
RunProcessKey.CHILD_PROCESS: None,
RunProcessKey.JOB_ID: SimulatorConstants.JOB_NAME,
RunProcessKey.PARTICIPANTS: self.server.engine.client_manager.clients,
Expand Down
4 changes: 0 additions & 4 deletions nvflare/private/fed/client/client_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from nvflare.apis.fl_constant import FLContextKey, MachineStatus, SystemComponents, WorkspaceConstants
from nvflare.apis.fl_context import FLContext, FLContextManager
from nvflare.apis.workspace import Workspace
from nvflare.fuel.utils.network_utils import get_open_ports
from nvflare.private.defs import ERROR_MSG_PREFIX, ClientStatusKey, EngineConstant
from nvflare.private.event import fire_event
from nvflare.private.fed.server.job_meta_validator import JobMetaValidator
Expand Down Expand Up @@ -161,15 +160,12 @@ def start_app(

self.logger.info("Starting client app. rank: {}".format(self.rank))

open_port = get_open_ports(1)[0]

server_config = list(self.client.servers.values())[0]
self.client_executor.start_app(
self.client,
job_id,
self.args,
app_custom_folder,
open_port,
allocated_resource,
token,
resource_manager,
Expand Down
6 changes: 0 additions & 6 deletions nvflare/private/fed/client/client_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def start_app(
job_id,
args,
app_custom_folder,
listen_port,
allocated_resource,
token,
resource_manager,
Expand All @@ -56,7 +55,6 @@ def start_app(
job_id: the job_id
args: admin command arguments for starting the FL client training
app_custom_folder: FL application custom folder
listen_port: port to listen the command.
allocated_resource: allocated resources
token: token from resource manager
resource_manager: resource manager
Expand Down Expand Up @@ -149,7 +147,6 @@ def start_app(
job_id,
args,
app_custom_folder,
listen_port,
allocated_resource,
token,
resource_manager: ResourceManagerSpec,
Expand All @@ -163,7 +160,6 @@ def start_app(
job_id: the job_id
args: admin command arguments for starting the worker process
app_custom_folder: FL application custom folder
listen_port: port to listen the command.
allocated_resource: allocated resources
token: token from resource manager
resource_manager: resource manager
Expand Down Expand Up @@ -208,8 +204,6 @@ def start_app(

with self.lock:
self.run_processes[job_id] = {
RunProcessKey.LISTEN_PORT: listen_port,
RunProcessKey.CONNECTION: None,
RunProcessKey.CHILD_PROCESS: process,
RunProcessKey.STATUS: ClientStatus.STARTING,
}
Expand Down
7 changes: 0 additions & 7 deletions nvflare/private/fed/server/server_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
from nvflare.fuel.f3.cellnet.defs import MessageHeaderKey
from nvflare.fuel.f3.cellnet.defs import ReturnCode as CellMsgReturnCode
from nvflare.fuel.utils.argument_utils import parse_vars
from nvflare.fuel.utils.network_utils import get_open_ports
from nvflare.fuel.utils.zip_utils import zip_directory_to_bytes
from nvflare.private.admin_defs import Message, MsgHeader
from nvflare.private.defs import CellChannel, CellMessageHeaderKeys, RequestHeader, TrainingTopic, new_cell_message
Expand Down Expand Up @@ -179,13 +178,11 @@ def start_app_on_server(self, run_number: str, job: Job = None, job_clients=None
if not isinstance(job, Job):
return "Must provide a job object to start the server app."

open_ports = get_open_ports(2)
self._start_runner_process(
self.args,
app_root,
run_number,
app_custom_folder,
open_ports,
job.job_id,
job_clients,
snapshot,
Expand Down Expand Up @@ -233,7 +230,6 @@ def _start_runner_process(
app_root,
run_number,
app_custom_folder,
open_ports,
job_id,
job_clients,
snapshot,
Expand All @@ -244,7 +240,6 @@ def _start_runner_process(
if app_custom_folder != "":
add_custom_dir_to_path(app_custom_folder, new_env)

listen_port = open_ports[1]
if snapshot:
restore_snapshot = True
else:
Expand Down Expand Up @@ -289,8 +284,6 @@ def _start_runner_process(

with self.lock:
self.run_processes[run_number] = {
RunProcessKey.LISTEN_PORT: listen_port,
RunProcessKey.CONNECTION: None,
RunProcessKey.CHILD_PROCESS: process,
RunProcessKey.JOB_ID: job_id,
RunProcessKey.PARTICIPANTS: job_clients,
Expand Down
2 changes: 0 additions & 2 deletions nvflare/private/fed/simulator/simulator_client_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ def __init__(self, client, args, rank=0):
fl_ctx.set_prop(FLContextKey.SIMULATE_MODE, True, private=True, sticky=True)

self.client_executor.run_processes[SimulatorConstants.JOB_NAME] = {
RunProcessKey.LISTEN_PORT: None,
RunProcessKey.CONNECTION: None,
RunProcessKey.CHILD_PROCESS: None,
RunProcessKey.STATUS: ClientStatus.STARTED,
}
Expand Down
2 changes: 1 addition & 1 deletion research/fed-bpt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ The models code is copied from the [transformers](https://github.com/huggingface
We recommend creating a [conda environment](https://www.anaconda.com) following [BBT](https://github.com/txsun1997/Black-Box-Tuning#prepare-your-environment)
with the addition of installing NVFlare for running federated learning and some other updates:
```commandline
conda create --name fedbpt python=3.8
conda create --name fedbpt python=3.12
conda activate fedbpt
pip install -r requirements.txt
```
Expand Down
5 changes: 3 additions & 2 deletions research/fed-bpt/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
transformers==4.38.2
fastNLP==0.6.0
datasets
cma
cma==3.4.0
scikit-learn
tensorboard
cvxopt
nvflare~=2.4.1rc
nvflare~=2.5.1rc

0 comments on commit c6398fe

Please sign in to comment.