-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* remove source-lit * docs * docs * docs * docs * ic * deploy * deploy * deploy * deploy * deploy * deploy * Apply suggestions from code review * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * make build run Co-authored-by: Jirka Borovec <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Rick Izzo <[email protected]>
- Loading branch information
1 parent
136a090
commit d5c0eff
Showing
25 changed files
with
224 additions
and
128 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# install lightning | ||
pip install lightning | ||
|
||
# run the app | ||
lightning run app app.py --cloud | ||
# run the app on the --cloud (--setup installs deps automatically) | ||
lightning run app app.py --setup --cloud |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 change: 1 addition & 0 deletions
1
docs/source-app/levels/basic/hello_components/code_run_cloud_setup.bash
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
lightning run app app.py --setup --cloud |
5 changes: 5 additions & 0 deletions
5
docs/source-app/levels/basic/hello_components/code_run_cloud_yours_setup.bash
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# first create a cluster (creation could take ~30 minutes) | ||
lightning create cluster pikachu --provider aws --role-arn arn:aws:iam::1234567890:role/lai-byoc | ||
|
||
# run on that cluster | ||
lightning run app app.py --setup --cloud pikachu |
1 change: 1 addition & 0 deletions
1
docs/source-app/levels/basic/hello_components/code_run_local_setup.bash
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
lightning run app app.py --setup |
34 changes: 25 additions & 9 deletions
34
docs/source-app/levels/basic/hello_components/deploy_model.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,31 @@ | ||
# A hello world component | ||
# app.py | ||
# !pip install torchvision | ||
import lightning as L | ||
from lightning.app.components.serve import PythonServer, Image, Number | ||
import base64, io, torchvision, torch | ||
from PIL import Image as PILImage | ||
|
||
|
||
class YourComponent(L.LightningWork): | ||
def run(self): | ||
print('RUN ANY PYTHON CODE HERE') | ||
class PyTorchServer(PythonServer): | ||
def setup(self): | ||
self._model = torchvision.models.resnet18(pretrained=True) | ||
self._device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | ||
self._model.to(self._device) | ||
|
||
def predict(self, request): | ||
image = base64.b64decode(request.image.encode("utf-8")) | ||
image = PILImage.open(io.BytesIO(image)) | ||
transforms = torchvision.transforms.Compose([ | ||
torchvision.transforms.Resize(224), | ||
torchvision.transforms.ToTensor(), | ||
torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) | ||
]) | ||
image = transforms(image) | ||
image = image.to(self._device) | ||
prediction = self._model(image.unsqueeze(0)) | ||
return {"prediction": prediction.argmax().item()} | ||
|
||
|
||
# run on a cloud machine | ||
compute = L.CloudCompute("cpu") | ||
worker = YourComponent(cloud_compute=compute) | ||
app = L.LightningApp(worker) | ||
component = PyTorchServer( | ||
input_type=Image, output_type=Number, cloud_compute=L.CloudCompute('gpu') | ||
) | ||
app = L.LightningApp(component) |
20 changes: 20 additions & 0 deletions
20
docs/source-app/levels/basic/hello_components/pl_multinode.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# app.py | ||
import lightning as L | ||
from lightning.app.components import PyTorchLightningMultiNode | ||
from lightning.pytorch.demos.boring_classes import BoringModel | ||
|
||
|
||
class LightningTrainerDistributed(L.LightningWork): | ||
@staticmethod | ||
def run(): | ||
model = BoringModel() | ||
trainer = L.Trainer(max_epochs=10, strategy="ddp") | ||
trainer.fit(model) | ||
|
||
# 8 GPU: (2 nodes of 4 x v100) | ||
component = PyTorchLightningMultiNode( | ||
LightningTrainerDistributed, | ||
num_nodes=2, | ||
cloud_compute=L.CloudCompute("gpu-fast-multi"), # 4 x v100 | ||
) | ||
app = L.LightningApp(component) |
75 changes: 53 additions & 22 deletions
75
docs/source-app/levels/basic/hello_components/pt_multinode.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,61 @@ | ||
# !pip install torch | ||
# app.py | ||
# ! pip install torch | ||
import lightning as L | ||
from lightning.app.components import MultiNode | ||
import torch | ||
from torch.nn.parallel.distributed import DistributedDataParallel | ||
|
||
class MultiNodePytorchComponent(L.LightningWork): | ||
def run( | ||
self, | ||
main_address: str, | ||
main_port: int, | ||
node_rank: int, | ||
world_size: int, | ||
): | ||
# this machine creates a group of processes and registers to the main node | ||
print(f"Init process group: {main_address=}, {main_port=}, {world_size=}, {node_rank=}") | ||
|
||
def distributed_train(local_rank: int, main_address: str, main_port: int, num_nodes: int, node_rank: int, nprocs: int): | ||
# 1. SET UP DISTRIBUTED ENVIRONMENT | ||
global_rank = local_rank + node_rank * nprocs | ||
world_size = num_nodes * nprocs | ||
|
||
if torch.distributed.is_available() and not torch.distributed.is_initialized(): | ||
torch.distributed.init_process_group( | ||
backend="gloo", | ||
init_method=f"tcp://{main_address}:{main_port}", | ||
"nccl" if torch.cuda.is_available() else "gloo", | ||
rank=global_rank, | ||
world_size=world_size, | ||
rank=node_rank | ||
init_method=f"tcp://{main_address}:{main_port}", | ||
) | ||
|
||
# 2. PREPARE DISTRIBUTED MODEL | ||
model = torch.nn.Linear(32, 2) | ||
device = torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else torch.device("cpu") | ||
device_ids = device if torch.cuda.is_available() else None | ||
model = DistributedDataParallel(model, device_ids=device_ids).to(device) | ||
|
||
# 3. SETUP LOSS AND OPTIMIZER | ||
criterion = torch.nn.MSELoss() | ||
optimizer = torch.optim.SGD(model.parameters(), lr=0.01) | ||
|
||
# 4.TRAIN THE MODEL FOR 50 STEPS | ||
for step in range(50): | ||
model.zero_grad() | ||
x = torch.randn(64, 32).to(device) | ||
output = model(x) | ||
loss = criterion(output, torch.ones_like(output)) | ||
print(f"global_rank: {global_rank} step: {step} loss: {loss}") | ||
loss.backward() | ||
optimizer.step() | ||
|
||
# 5. VERIFY ALL COPIES OF THE MODEL HAVE THE SAME WEIGTHS AT END OF TRAINING | ||
weight = model.module.weight.clone() | ||
torch.distributed.all_reduce(weight) | ||
assert torch.equal(model.module.weight, weight / world_size) | ||
|
||
print("Multi Node Distributed Training Done!") | ||
|
||
class PyTorchDistributed(L.LightningWork): | ||
def run(self, main_address: str, main_port: int, num_nodes: int, node_rank: int): | ||
nprocs = torch.cuda.device_count() if torch.cuda.is_available() else 1 | ||
torch.multiprocessing.spawn( | ||
distributed_train, | ||
args=(main_address, main_port, num_nodes, node_rank, nprocs), | ||
nprocs=nprocs | ||
) | ||
for step in range(10000): | ||
gathered = [torch.zeros(1) for _ in range(world_size)] | ||
torch.distributed.all_gather(gathered, torch.tensor([node_rank]).float()) | ||
print(f'step: {step}, tensor: {gathered}') | ||
|
||
# gpu-multi-fast has 4 GPUs x 8 nodes = 32 GPUs | ||
component = MultiNodePytorchComponent(cloud_compute=L.CloudCompute("gpu-multi-fast")) | ||
component = MultiNode(component, nodes=8) | ||
|
||
# 32 GPUs: (8 nodes x 4 v 100) | ||
compute = L.CloudCompute("gpu-fast-multi") # 4xV100 | ||
component = MultiNode(PyTorchDistributed, num_nodes=8, cloud_compute=compute) | ||
app = L.LightningApp(component) |
13 changes: 13 additions & 0 deletions
13
docs/source-app/levels/basic/hello_components/run_ptl_script.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# app.py | ||
# !curl https://bit.ly/demoLightningScriptpy -o pl_boring_script.py | ||
import lightning as L | ||
from lightning.app.components.training import LightningTrainerScript | ||
|
||
# run script that trains PyTorch with the Lightning Trainer | ||
model_script = 'pl_boring_script.py' | ||
component = LightningTrainerScript( | ||
model_script, | ||
num_nodes=1, | ||
cloud_compute=L.CloudCompute("gpu") | ||
) | ||
app = L.LightningApp(component) |
15 changes: 0 additions & 15 deletions
15
docs/source-app/levels/basic/hello_components/run_script.py
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.