diff --git a/docs/source-app/api_reference/components.rst b/docs/source-app/api_reference/components.rst index 828a78ea89db1..0bcefeb1c8594 100644 --- a/docs/source-app/api_reference/components.rst +++ b/docs/source-app/api_reference/components.rst @@ -20,6 +20,6 @@ ___________________ ~python.popen.PopenPythonScript ~python.tracer.TracerPythonScript - ~training.LightningTrainingComponent + ~training.LightningTrainerScript ~serve.gradio.ServeGradio ~serve.serve.ModelInferenceAPI diff --git a/docs/source-app/api_references.rst b/docs/source-app/api_references.rst index 340d500ef8ef5..30e0ade3a25ad 100644 --- a/docs/source-app/api_references.rst +++ b/docs/source-app/api_references.rst @@ -34,7 +34,7 @@ ___________________ ~python.popen.PopenPythonScript ~python.tracer.TracerPythonScript - ~training.LightningTrainingComponent + ~training.LightningTrainerScript ~serve.gradio.ServeGradio ~serve.serve.ModelInferenceAPI diff --git a/docs/source-app/index.rst b/docs/source-app/index.rst index 8f038b78f5a82..ddd39cdc63e73 100644 --- a/docs/source-app/index.rst +++ b/docs/source-app/index.rst @@ -6,7 +6,7 @@ ####################### Welcome to ⚡ Lightning ####################### -Build models and full stack AI apps ⚡ *Lightning fast*. +Build models, ML components and full stack AI apps ⚡ *Lightning fast*. .. join_slack:: :align: left @@ -22,23 +22,22 @@ Build models and full stack AI apps ⚡ *Lightning fast*. .. app_card:: :title: Develop and Train - :description: Train an LLM (64 GPUs) + :description: Train a model (32 GPUs) :width: 280 :image: https://lightning-ai-docs.s3.amazonaws.com/develop_n_train_v1.jpg - :preview: https://lightning.ai - :deploy: https://lightning.ai - :target: https://apple.com - :tags: Model + :target: levels/basic/real_lightning_component_implementations.html#ex-pytorch-lightning-trainer + :preview: levels/basic/real_lightning_component_implementations.html#ex-pytorch-lightning-trainer + :tags: Training .. app_card:: :title: Serve and deploy - :description: Production-ready stable diffusion server (<2s latency) + :description: Production diffusion server (<2s latency) :width: 280 :app_id: HvUwbEG90E :image: https://lightning-ai-docs.s3.amazonaws.com/serve_n_deploy_v1.jpg :deploy: https://lightning.ai :target: https://01gbx4m78rbkpczdf5cpz2hpbh.litng-ai-03.litng.ai/root.api_component/ - :tags: App + :tags: Serving .. app_card:: :title: Scale and build a product @@ -47,7 +46,7 @@ Build models and full stack AI apps ⚡ *Lightning fast*. :app_id: HvUwbEG90E :image: https://lightning-ai-docs.s3.amazonaws.com/scale_n_build_v1.jpg :target: https://lightning.ai/muse - :tags: App + :tags: AI App .. raw:: html @@ -56,13 +55,14 @@ Build models and full stack AI apps ⚡ *Lightning fast*. ---- -**************************************** -Build self-contained, modular components -**************************************** -Lightning is a hyper-minimalistic framework designed to maximize expressivity that -enables you to build modular, self-contained components and plug them into your existing workflows. -A Lightning component organizes arbitrary code so it can run on the cloud. A component can train a model, deploy, or even host a web UI. -The component manages its own infrastructure, cloud costs, networking and more, so you can focus on application logic and not engineering. +******************************** +Build self-contained, components +******************************** +Use Lightning, the hyper-minimalistic framework, to build machine learning components that can plug into existing ML workflows. +A Lightning component organizes arbitrary code to run on the cloud, manage its own infrastructure, cloud costs, networking, and more. +Focus on component logic and not engineering. + +Use components on their own, or compose them into full-stack AI apps with our next-generation Lightning orchestrator. .. raw:: html @@ -78,6 +78,10 @@ The component manages its own infrastructure, cloud costs, networking and more, .. include:: ./levels/basic/hero_components.rst +| + +Components run the same on the cloud and locally on your choice of hardware. + .. lit_tabs:: :code_files: landing_app_run.bash :highlights: 5 diff --git a/docs/source-app/landing_app_run.bash b/docs/source-app/landing_app_run.bash index ce52902d1c5fd..fd306212e1e1c 100644 --- a/docs/source-app/landing_app_run.bash +++ b/docs/source-app/landing_app_run.bash @@ -1,5 +1,5 @@ # install lightning pip install lightning -# run the app -lightning run app app.py --cloud +# run the app on the --cloud (--setup installs deps automatically) +lightning run app app.py --setup --cloud diff --git a/docs/source-app/levels/basic/build_a_lightning_component.rst b/docs/source-app/levels/basic/build_a_lightning_component.rst index da9107e86c649..39522614fe03b 100644 --- a/docs/source-app/levels/basic/build_a_lightning_component.rst +++ b/docs/source-app/levels/basic/build_a_lightning_component.rst @@ -14,13 +14,12 @@ Level 1: Package code in a lightning component ********************************* Why you need Lightning components ********************************* -A Lightning component organizes a piece of code into a self-contained, modular component that -can be integrated into your existing workflows or assembled to form a Lightning app. -A Lightning component manages its own infrastructure, auto-scaling, cost management, and more, so you -can focus on the program logic and not the cloud engineering. +A Lightning component is a self-contained, modular machine-learning component +that you can plug into your existing ML workflows. A Lightning component organizes arbitrary code so it can run on the cloud, manages +its own infrastructure, cloud costs, networking and more. Connect components using your current workflow management tools or +our `next-generation reactive orchestrator <../intermediate/index.html>`_. -Components run on the cloud or your laptop without code changes 🤯🤯. Connect components using your current workflow management tools or use -Lightning apps to build powerful sequential AND reactive workflows. +Components run on the cloud or your laptop without code changes 🤯🤯. .. raw:: html @@ -109,7 +108,7 @@ First, install Lightning. ************************** Build your first component ************************** -A Lightning component organizes Python code into a self-contained module so it can run on the cloud. +A Lightning component organizes arbitrary code so it can run on the cloud, manages its own infrastructure, cloud costs, networking and more **Run one of these components!** @@ -119,12 +118,7 @@ A Lightning component organizes Python code into a self-contained module so it c Components run the same on the cloud and locally on your choice of hardware. -.. lit_tabs:: - :titles: Lightning Cloud (fully-managed); Your AWS account; Your own hardware - :code_files: ./hello_components/code_run_cloud.bash; ./hello_components/code_run_cloud_yours.bash; ./hello_components/code_run_local.bash - :tab_rows: 4 - :highlights: ; 5; 0 - :height: 195px +.. include:: /levels/basic/hero_run_setup.rst ---- diff --git a/docs/source-app/levels/basic/hello_components/code_run_cloud_setup.bash b/docs/source-app/levels/basic/hello_components/code_run_cloud_setup.bash new file mode 100644 index 0000000000000..ed69b303c279a --- /dev/null +++ b/docs/source-app/levels/basic/hello_components/code_run_cloud_setup.bash @@ -0,0 +1 @@ +lightning run app app.py --setup --cloud diff --git a/docs/source-app/levels/basic/hello_components/code_run_cloud_yours_setup.bash b/docs/source-app/levels/basic/hello_components/code_run_cloud_yours_setup.bash new file mode 100644 index 0000000000000..f2ff22d5551d4 --- /dev/null +++ b/docs/source-app/levels/basic/hello_components/code_run_cloud_yours_setup.bash @@ -0,0 +1,5 @@ +# first create a cluster (creation could take ~30 minutes) +lightning create cluster pikachu --provider aws --role-arn arn:aws:iam::1234567890:role/lai-byoc + +# run on that cluster +lightning run app app.py --setup --cloud pikachu diff --git a/docs/source-app/levels/basic/hello_components/code_run_local_setup.bash b/docs/source-app/levels/basic/hello_components/code_run_local_setup.bash new file mode 100644 index 0000000000000..11cc8a4c5a284 --- /dev/null +++ b/docs/source-app/levels/basic/hello_components/code_run_local_setup.bash @@ -0,0 +1 @@ +lightning run app app.py --setup diff --git a/docs/source-app/levels/basic/hello_components/deploy_model.py b/docs/source-app/levels/basic/hello_components/deploy_model.py index 75c65bad900b3..9847db33bdfdc 100644 --- a/docs/source-app/levels/basic/hello_components/deploy_model.py +++ b/docs/source-app/levels/basic/hello_components/deploy_model.py @@ -1,15 +1,31 @@ -# A hello world component -# app.py +# !pip install torchvision import lightning as L +from lightning.app.components.serve import PythonServer, Image, Number +import base64, io, torchvision, torch +from PIL import Image as PILImage -class YourComponent(L.LightningWork): - def run(self): - print('RUN ANY PYTHON CODE HERE') +class PyTorchServer(PythonServer): + def setup(self): + self._model = torchvision.models.resnet18(pretrained=True) + self._device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + self._model.to(self._device) + def predict(self, request): + image = base64.b64decode(request.image.encode("utf-8")) + image = PILImage.open(io.BytesIO(image)) + transforms = torchvision.transforms.Compose([ + torchvision.transforms.Resize(224), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + image = transforms(image) + image = image.to(self._device) + prediction = self._model(image.unsqueeze(0)) + return {"prediction": prediction.argmax().item()} -# run on a cloud machine -compute = L.CloudCompute("cpu") -worker = YourComponent(cloud_compute=compute) -app = L.LightningApp(worker) +component = PyTorchServer( + input_type=Image, output_type=Number, cloud_compute=L.CloudCompute('gpu') +) +app = L.LightningApp(component) diff --git a/docs/source-app/levels/basic/hello_components/pl_multinode.py b/docs/source-app/levels/basic/hello_components/pl_multinode.py new file mode 100644 index 0000000000000..0ba033e0d86c0 --- /dev/null +++ b/docs/source-app/levels/basic/hello_components/pl_multinode.py @@ -0,0 +1,20 @@ +# app.py +import lightning as L +from lightning.app.components import PyTorchLightningMultiNode +from lightning.pytorch.demos.boring_classes import BoringModel + + +class LightningTrainerDistributed(L.LightningWork): + @staticmethod + def run(): + model = BoringModel() + trainer = L.Trainer(max_epochs=10, strategy="ddp") + trainer.fit(model) + +# 8 GPU: (2 nodes of 4 x v100) +component = PyTorchLightningMultiNode( + LightningTrainerDistributed, + num_nodes=2, + cloud_compute=L.CloudCompute("gpu-fast-multi"), # 4 x v100 +) +app = L.LightningApp(component) diff --git a/docs/source-app/levels/basic/hello_components/pt_multinode.py b/docs/source-app/levels/basic/hello_components/pt_multinode.py index b8ebc0bc04126..585b85540bf61 100644 --- a/docs/source-app/levels/basic/hello_components/pt_multinode.py +++ b/docs/source-app/levels/basic/hello_components/pt_multinode.py @@ -1,30 +1,61 @@ -# !pip install torch +# app.py +# ! pip install torch import lightning as L from lightning.app.components import MultiNode import torch +from torch.nn.parallel.distributed import DistributedDataParallel -class MultiNodePytorchComponent(L.LightningWork): - def run( - self, - main_address: str, - main_port: int, - node_rank: int, - world_size: int, - ): - # this machine creates a group of processes and registers to the main node - print(f"Init process group: {main_address=}, {main_port=}, {world_size=}, {node_rank=}") + +def distributed_train(local_rank: int, main_address: str, main_port: int, num_nodes: int, node_rank: int, nprocs: int): + # 1. SET UP DISTRIBUTED ENVIRONMENT + global_rank = local_rank + node_rank * nprocs + world_size = num_nodes * nprocs + + if torch.distributed.is_available() and not torch.distributed.is_initialized(): torch.distributed.init_process_group( - backend="gloo", - init_method=f"tcp://{main_address}:{main_port}", + "nccl" if torch.cuda.is_available() else "gloo", + rank=global_rank, world_size=world_size, - rank=node_rank + init_method=f"tcp://{main_address}:{main_port}", + ) + + # 2. PREPARE DISTRIBUTED MODEL + model = torch.nn.Linear(32, 2) + device = torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else torch.device("cpu") + device_ids = device if torch.cuda.is_available() else None + model = DistributedDataParallel(model, device_ids=device_ids).to(device) + + # 3. SETUP LOSS AND OPTIMIZER + criterion = torch.nn.MSELoss() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + + # 4.TRAIN THE MODEL FOR 50 STEPS + for step in range(50): + model.zero_grad() + x = torch.randn(64, 32).to(device) + output = model(x) + loss = criterion(output, torch.ones_like(output)) + print(f"global_rank: {global_rank} step: {step} loss: {loss}") + loss.backward() + optimizer.step() + + # 5. VERIFY ALL COPIES OF THE MODEL HAVE THE SAME WEIGTHS AT END OF TRAINING + weight = model.module.weight.clone() + torch.distributed.all_reduce(weight) + assert torch.equal(model.module.weight, weight / world_size) + + print("Multi Node Distributed Training Done!") + +class PyTorchDistributed(L.LightningWork): + def run(self, main_address: str, main_port: int, num_nodes: int, node_rank: int): + nprocs = torch.cuda.device_count() if torch.cuda.is_available() else 1 + torch.multiprocessing.spawn( + distributed_train, + args=(main_address, main_port, num_nodes, node_rank, nprocs), + nprocs=nprocs ) - for step in range(10000): - gathered = [torch.zeros(1) for _ in range(world_size)] - torch.distributed.all_gather(gathered, torch.tensor([node_rank]).float()) - print(f'step: {step}, tensor: {gathered}') - -# gpu-multi-fast has 4 GPUs x 8 nodes = 32 GPUs -component = MultiNodePytorchComponent(cloud_compute=L.CloudCompute("gpu-multi-fast")) -component = MultiNode(component, nodes=8) + +# 32 GPUs: (8 nodes x 4 v 100) +compute = L.CloudCompute("gpu-fast-multi") # 4xV100 +component = MultiNode(PyTorchDistributed, num_nodes=8, cloud_compute=compute) app = L.LightningApp(component) diff --git a/docs/source-app/levels/basic/hello_components/run_ptl_script.py b/docs/source-app/levels/basic/hello_components/run_ptl_script.py new file mode 100644 index 0000000000000..84a86ec00d470 --- /dev/null +++ b/docs/source-app/levels/basic/hello_components/run_ptl_script.py @@ -0,0 +1,13 @@ +# app.py +# !curl https://bit.ly/demoLightningScriptpy -o pl_boring_script.py +import lightning as L +from lightning.app.components.training import LightningTrainerScript + +# run script that trains PyTorch with the Lightning Trainer +model_script = 'pl_boring_script.py' +component = LightningTrainerScript( + model_script, + num_nodes=1, + cloud_compute=L.CloudCompute("gpu") +) +app = L.LightningApp(component) diff --git a/docs/source-app/levels/basic/hello_components/run_script.py b/docs/source-app/levels/basic/hello_components/run_script.py deleted file mode 100644 index ac3ba777b5b19..0000000000000 --- a/docs/source-app/levels/basic/hello_components/run_script.py +++ /dev/null @@ -1,15 +0,0 @@ -# EXAMPLE COMPONENT: RUN A SCRIPT -# app.py -import lightning as L - - -class YourComponent(L.LightningWork): - def run(self): - print('RUN ANY PYTHON CODE HERE') - - - -# run on a cloud machine -compute = L.CloudCompute("cpu") -worker = YourComponent(cloud_compute=compute) -app = L.LightningApp(worker) diff --git a/docs/source-app/levels/basic/hello_components/build_demo.py b/docs/source-app/levels/basic/hello_components/streamlit_demo.py similarity index 62% rename from docs/source-app/levels/basic/hello_components/build_demo.py rename to docs/source-app/levels/basic/hello_components/streamlit_demo.py index 6f6b5951b772e..d5fb3b0b4941b 100644 --- a/docs/source-app/levels/basic/hello_components/build_demo.py +++ b/docs/source-app/levels/basic/hello_components/streamlit_demo.py @@ -1,7 +1,6 @@ # app.py # !pip install streamlit omegaconf scipy # !pip install torch - import lightning as L import torch from io import BytesIO @@ -10,21 +9,11 @@ import streamlit as st -class LitStreamlit(L.app.components.ServeStreamlit): +class StreamlitApp(L.app.components.ServeStreamlit): def build_model(self): sample_rate = 48000 - - model, _ = torch.hub.load( - repo_or_dir='snakers4/silero-models', - model='silero_tts', - speaker="v3_en", - ) - - return partial( - model.apply_tts, - sample_rate=sample_rate, - speaker="en_0", - ), sample_rate + model, _ = torch.hub.load('snakers4/silero-models', model='silero_tts',speaker="v3_en") + return partial(model.apply_tts, sample_rate=sample_rate, speaker="en_0"), sample_rate def render(self): st.title("Text To Speech") @@ -38,4 +27,4 @@ def render(self): audio.seek(0) st.audio(audio) -app = L.LightningApp(LitStreamlit()) +app = L.LightningApp(StreamlitApp()) diff --git a/docs/source-app/levels/basic/hello_components/train_pytorch.py b/docs/source-app/levels/basic/hello_components/train_pytorch.py index fe33bd77d75a7..7bfc3b54b1352 100644 --- a/docs/source-app/levels/basic/hello_components/train_pytorch.py +++ b/docs/source-app/levels/basic/hello_components/train_pytorch.py @@ -1,4 +1,5 @@ # app.py +# ! pip install torch import lightning as L import torch @@ -23,4 +24,5 @@ def run(self): optimizer.step() compute = L.CloudCompute('gpu') -app = L.LightningApp(PyTorchComponent(cloud_compute=compute)) +componet = PyTorchComponent(cloud_compute=compute) +app = L.LightningApp(componet) diff --git a/docs/source-app/levels/basic/hello_components/xgboost.py b/docs/source-app/levels/basic/hello_components/xgboost.py index a180dfd9296f2..0cedda2aa45b9 100644 --- a/docs/source-app/levels/basic/hello_components/xgboost.py +++ b/docs/source-app/levels/basic/hello_components/xgboost.py @@ -1,6 +1,5 @@ # app.py # !pip install sklearn xgboost - import lightning as L from sklearn import datasets from sklearn.model_selection import train_test_split diff --git a/docs/source-app/levels/basic/hero_components.rst b/docs/source-app/levels/basic/hero_components.rst index cb50bca86d9a5..6bb8947a1a9cd 100644 --- a/docs/source-app/levels/basic/hero_components.rst +++ b/docs/source-app/levels/basic/hero_components.rst @@ -1,7 +1,7 @@ .. lit_tabs:: - :titles: Hello world; Hello GPU world; Train PyTorch (cloud GPU); Train PyTorch (32 cloud GPUs); Train PyTorch + Lightning Trainer (cloud GPU); Deploy a model on cloud GPUs; Run a model script; XGBoost; XGBoost (GPU accelerated); Build a streamlit demo - :code_files: /levels/basic/hello_components/hello_world.py; /levels/basic/hello_components/hello_world_gpu.py; /levels/basic/hello_components/train_pytorch.py; /levels/basic/hello_components/pt_multinode.py; /levels/basic/hello_components/train_ptl.py; /levels/basic/hello_components/deploy_model.py; /levels/basic/hello_components/run_script.py; /levels/basic/hello_components/xgboost.py; /levels/basic/hello_components/xgboost_gpu.py; /levels/basic/hello_components/build_demo.py - :highlights: 7; 10, 11; 3, 6;3; 3;4;5; 6, 9; 15, 20, 21; 10, 14, 29 + :titles: Hello world; Hello GPU world; PyTorch & ⚡⚡⚡ Trainer (1+ cloud GPUs); Train PyTorch (cloud GPU); Train PyTorch (32 cloud GPUs); Deploy a model on cloud GPUs; Run a model script; XGBoost; Streamlit demo + :code_files: /levels/basic/hello_components/hello_world.py; /levels/basic/hello_components/hello_world_gpu.py; /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/train_pytorch.py; /levels/basic/hello_components/pt_multinode.py; /levels/basic/hello_components/deploy_model.py; /levels/basic/hello_components/run_ptl_script.py; /levels/basic/hello_components/xgboost.py; /levels/basic/hello_components/streamlit_demo.py + :highlights: 7; 10, 11; 10-12, 17, 18; 4, 8, 12, 18-19, 26; 5, 10, 22, 28, 32, 42, 58-60; 3, 11-13, 25, 30; 7, 10; 15, 21; 9, 15, 24 :app_id: abc123 - :tab_rows: 4 - :height: 550px + :tab_rows: 3 + :height: 620px diff --git a/docs/source-app/levels/basic/hero_run.rst b/docs/source-app/levels/basic/hero_run.rst new file mode 100644 index 0000000000000..4fdabf1f1c08c --- /dev/null +++ b/docs/source-app/levels/basic/hero_run.rst @@ -0,0 +1,6 @@ +.. lit_tabs:: + :titles: Lightning Cloud (fully-managed); Your AWS account; Your own hardware + :code_files: /levels/basic/hello_components/code_run_cloud.bash; /levels/basic/hello_components/code_run_cloud_yours.bash; /levels/basic/hello_components/code_run_local.bash + :tab_rows: 4 + :highlights: ; 5; 0 + :height: 195px diff --git a/docs/source-app/levels/basic/hero_run_setup.rst b/docs/source-app/levels/basic/hero_run_setup.rst new file mode 100644 index 0000000000000..2200f177b6fee --- /dev/null +++ b/docs/source-app/levels/basic/hero_run_setup.rst @@ -0,0 +1,6 @@ +.. lit_tabs:: + :titles: Lightning Cloud (fully-managed); Your AWS account; Your own hardware + :code_files: /levels/basic/hello_components/code_run_cloud_setup.bash; /levels/basic/hello_components/code_run_cloud_yours_setup.bash; /levels/basic/hello_components/code_run_local_setup.bash + :tab_rows: 4 + :highlights: ; 5; 0 + :height: 195px diff --git a/docs/source-app/levels/basic/index.rst b/docs/source-app/levels/basic/index.rst index d280a57feaa8b..05075bfab785b 100644 --- a/docs/source-app/levels/basic/index.rst +++ b/docs/source-app/levels/basic/index.rst @@ -11,7 +11,10 @@ ############ Basic skills ############ -Learn to package your code into Lightning components. Every new user should start here. +Learn to package your code into Lightning components which can plug into your existing ML workflows. + +A Lightning component organizes arbitrary code so it can run on the cloud, manages +its own infrastructure, cloud costs, networking and more. .. join_slack:: :align: left @@ -27,7 +30,7 @@ Learn to package your code into Lightning components. Every new user should star .. displayitem:: :header: Level 1: Package code in a Lightning component - :description: Learn to build a Lightning component which you can use in existing workflows or turn into a Lightning app. + :description: Learn to package your code into Lightning components which can plug into your existing ML workflows. :button_link: build_a_lightning_component.html :col_css: col-md-6 :height: 170 diff --git a/docs/source-app/levels/basic/real_lightning_component_implementations.rst b/docs/source-app/levels/basic/real_lightning_component_implementations.rst index b0c9f16273915..c660c1679ac72 100644 --- a/docs/source-app/levels/basic/real_lightning_component_implementations.rst +++ b/docs/source-app/levels/basic/real_lightning_component_implementations.rst @@ -7,35 +7,56 @@ Level 2: Explore real component implementations ---- -*************************** -Debug a lightning component -*************************** -Before we dive into real component implementations, we'll learn to debug a Lightning component. +*********************** +Real component examples +*********************** +Use this guide to understand what is happening in each type of component. +These are a few prototypical components. Since each component organizes +Python, you can build virtually infinite components for any use-case +you can think of. -To stop the code execution at a particular line, enable a breakpoint -with **L.pdb.set_trace()**: +---- + +******************************* +Ex: PyTorch + Lightning Trainer +******************************* +This example shows how to train PyTorch with the Lightning trainer on your machine +or cloud GPUs without code changes. .. lit_tabs:: - :descriptions: Toy app; Add a breakpoint. When the program runs, it will stop at this line. - :code_files: ./scripts/toy_app_1_component.py; ./scripts/toy_app_1_component_pdb.py - :highlights: ; 7 + :descriptions: import Lightning; We're using a demo LightningModule; Move your training code here (usually your main.py); Pass your component to the multi-node executor (it works on CPU or single GPUs also); Select the number of machines (nodes). Here we choose 2.; Choose from over 15+ machine types. This one has 4 v100 GPUs.; Initialize the App object that executes the component logic. + :code_files: /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/pl_multinode.py; + :highlights: 2; 4; 10-12; 15-18; 17; 18; 20 :app_id: abc123 - :tab_rows: 3 - :height: 350px + :tab_rows: 5 + :height: 420px | -.. note:: +Run the component on the cloud: - Direct python pdb support is work in progress and open to contributions +.. include:: /levels/basic/hero_run.rst ---- -***************************** -Ex 1: Train PyTorch component -***************************** -TODO: +********************************* +Ex: Deploy a PyTorch API endpoint +********************************* +This example shows how to deploy PyTorch and create an API + +.. lit_tabs:: + :descriptions: Shortcut to list dependencies without a requirements.txt file.; Import one of our serving components (high-performance ones are available on the enterprise tiers); Define the setup function to load your favorite pretrained models and do any kind of pre-processing.; Define the predict function which is called when the endpoint is hit.; Initialize the server and define the type of cloud machine to use. + :code_files: /levels/basic/hello_components/deploy_model.py; /levels/basic/hello_components/deploy_model.py; /levels/basic/hello_components/deploy_model.py; /levels/basic/hello_components/deploy_model.py; /levels/basic/hello_components/deploy_model.py; + :highlights: 1; 3; 10-12; 15-25; 28-30 + :app_id: abc123 + :tab_rows: 4 + :height: 620px + +| + +Run the component on the cloud: +.. include:: /levels/basic/hero_run.rst ---- diff --git a/examples/app_multi_node/train_lt_script.py b/examples/app_multi_node/train_lt_script.py index b4073846b4eb4..d2254e19daac0 100644 --- a/examples/app_multi_node/train_lt_script.py +++ b/examples/app_multi_node/train_lt_script.py @@ -1,10 +1,10 @@ import lightning as L -from lightning.app.components import LightningTrainingComponent +from lightning.app.components import LightningTrainerScript from lightning.app.utilities.packaging.cloud_compute import CloudCompute # Run over 2 nodes of 4 x V100 app = L.LightningApp( - LightningTrainingComponent( + LightningTrainerScript( "pl_boring_script.py", num_nodes=2, cloud_compute=CloudCompute("gpu-fast-multi"), diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index e2c03313586ba..7e97e8659c1ba 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -152,7 +152,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Add support to see Lightning AI BYOC cluster logs ([#14334](https://github.com/Lightning-AI/lightning/pull/14334)) - Add support to run Lightning apps on Lightning AI BYOC clusters ([#13894](https://github.com/Lightning-AI/lightning/pull/13894)) - Add support for listing Lightning AI apps ([#13987](https://github.com/Lightning-AI/lightning/pull/13987)) -- Adds `LightningTrainingComponent`. `LightningTrainingComponent` orchestrates multi-node training in the cloud ([#13830](https://github.com/Lightning-AI/lightning/pull/13830)) +- Adds `LightningTrainerScript`. `LightningTrainerScript` orchestrates multi-node training in the cloud ([#13830](https://github.com/Lightning-AI/lightning/pull/13830)) - Add support for printing application logs using CLI `lightning show logs [components]` ([#13634](https://github.com/Lightning-AI/lightning/pull/13634)) - Add support for `Lightning API` through the `configure_api` hook on the Lightning Flow and the `Post`, `Get`, `Delete`, `Put` HttpMethods ([#13945](https://github.com/Lightning-AI/lightning/pull/13945)) - Added a warning when `configure_layout` returns URLs configured with http instead of https ([#14233](https://github.com/Lightning-AI/lightning/pull/14233)) diff --git a/src/lightning_app/components/__init__.py b/src/lightning_app/components/__init__.py index 918d4ba911875..ee52fb55670f2 100644 --- a/src/lightning_app/components/__init__.py +++ b/src/lightning_app/components/__init__.py @@ -12,7 +12,7 @@ from lightning_app.components.serve.python_server import Image, Number, PythonServer from lightning_app.components.serve.serve import ModelInferenceAPI from lightning_app.components.serve.streamlit import ServeStreamlit -from lightning_app.components.training import LightningTrainingComponent, PyTorchLightningScriptRunner +from lightning_app.components.training import LightningTrainerScript, PyTorchLightningScriptRunner __all__ = [ "DatabaseClient", @@ -28,7 +28,7 @@ "Number", "MultiNode", "LiteMultiNode", - "LightningTrainingComponent", + "LightningTrainerScript", "PyTorchLightningScriptRunner", "PyTorchSpawnMultiNode", "LightningTrainerMultiNode", diff --git a/src/lightning_app/components/training.py b/src/lightning_app/components/training.py index b93d04b52ec61..4618b5aa9e9cb 100644 --- a/src/lightning_app/components/training.py +++ b/src/lightning_app/components/training.py @@ -111,7 +111,7 @@ def is_running_in_cloud(self) -> bool: return "LIGHTNING_APP_STATE_URL" in os.environ -class LightningTrainingComponent(LightningFlow): +class LightningTrainerScript(LightningFlow): def __init__( self, script_path: str, @@ -127,11 +127,11 @@ def __init__( Example:: from lightning_app import LightningApp - from lightning_app.components.training import LightningTrainingComponent + from lightning_app.components.training import LightningTrainerScript from lightning_app.utilities.packaging.cloud_compute import CloudCompute app = LightningApp( - LightningTrainingComponent( + LightningTrainerScript( "train.py", num_nodes=2, cloud_compute=CloudCompute("gpu"),