Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[App] Multiprocessing-safe work pickling #15836

Merged
merged 32 commits into from
Dec 8, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
46871ed
safe work pickling
Nov 27, 2022
e40df25
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 27, 2022
fef035d
mypy
Nov 28, 2022
9fe8bfd
Update tests/tests_app/utilities/test_safe_pickle.py
Nov 28, 2022
b041d4e
changelog
Nov 28, 2022
2c77e46
Merge branch 'safe_pickle' of github.com:Lightning-AI/lightning into …
Nov 28, 2022
8732ef4
trying to fix ubuntu test
Nov 28, 2022
4a7dd7d
more typing fixes
Nov 28, 2022
5723995
trying to fix ubuntu test
Nov 28, 2022
22c1625
Update src/lightning_app/CHANGELOG.md
Nov 28, 2022
6a3fce3
Update src/lightning_app/utilities/safe_pickle.py
Nov 28, 2022
8ebc36d
Merge branch 'master' into safe_pickle
Nov 28, 2022
26003eb
Merge branch 'master' into safe_pickle
Nov 28, 2022
0443f4e
Merge branch 'master' into safe_pickle
Nov 28, 2022
8ac4974
Merge branch 'master' into safe_pickle
Dec 5, 2022
1333e9b
trim work args that has mp queue reference
Dec 5, 2022
1025aef
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 5, 2022
63f46ba
mypy
Dec 5, 2022
d06e564
Merge branch 'safe_pickle' of github.com:Lightning-AI/lightning into …
Dec 5, 2022
8c955a2
review
Dec 5, 2022
f16735f
Update src/lightning_app/utilities/safe_pickle.py
Dec 5, 2022
7e0aff7
Merge branch 'master' into safe_pickle
Dec 5, 2022
76b3ca6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 5, 2022
16a8742
Merge branch 'master' into safe_pickle
tchaton Dec 6, 2022
76e9992
Merge branch 'master' into safe_pickle
Borda Dec 6, 2022
63bafb6
Merge branch 'master' into safe_pickle
Dec 6, 2022
d6c553f
Merge branch 'master' into safe_pickle
Dec 6, 2022
6f20522
Merge branch 'master' into safe_pickle
akihironitta Dec 6, 2022
4ac4eaa
Merge branch 'master' into safe_pickle
Dec 7, 2022
52520b5
Merge branch 'master' into safe_pickle
Borda Dec 7, 2022
8f1b2b1
Merge branch 'master' into safe_pickle
Dec 7, 2022
1d41879
Merge branch 'master' into safe_pickle
Borda Dec 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions src/lightning_app/utilities/safe_pickle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import pickle
import sys
import types
from copy import deepcopy
from pathlib import Path

from lightning_app.core.work import LightningWork
from lightning_app.utilities.app_helpers import _LightningAppRef


def get_picklable_work(work: LightningWork):
"""Pickling a LightningWork instance fails if it doesn from the work process
hhsecond marked this conversation as resolved.
Show resolved Hide resolved
itself. This function is safe to call from the work process within both MultiprocessRuntime
and Cloud.
Note: This function modifies the module information of the work object. Specifically, it injects
the relative module path into the __module__ attribute of the work object. If the object is not
importable from the CWD, then the pickle load will fail.

Example:
for a directory structure like below and the work class is defined in the app.py where
the app.py is the entrypoint for the app, it will inject `foo.bar.app` into the
__module__ attribute

└── foo
├── __init__.py
└── bar
└── app.py
"""

# pickling the user work class - pickling `self` will cause issue because the
# work is running under a process, in local
for w in _LightningAppRef.get_current().works:
if work.name == w.name:
# copying the work object to avoid modifying the original work object
hhsecond marked this conversation as resolved.
Show resolved Hide resolved
copied_work = deepcopy(w)
break
else:
raise ValueError(f"Work with name {work.name} not found in the app references")
# if work is defined in the __main__ or __mp__main__ (the entrypoint file for `lightning run app` command),
# pickling/unpickling will fail, hence we need patch the module information
if "_main__" in copied_work.__class__.__module__:
hhsecond marked this conversation as resolved.
Show resolved Hide resolved
work_class_module = sys.modules[copied_work.__class__.__module__]
relative_path = Path(work_class_module.__file__).relative_to(Path.cwd())
expected_module_name = relative_path.as_posix().replace(".py", "").replace("/", ".")
# TODO @sherin: also check if the module is importable from the CWD
fake_module = types.ModuleType(expected_module_name)
fake_module.__dict__.update(work_class_module.__dict__)
fake_module.__dict__["__name__"] = expected_module_name
sys.modules[expected_module_name] = fake_module
for k, v in fake_module.__dict__.items():
if not k.startswith("__") and hasattr(v, "__module__"):
if "_main__" in v.__module__:
Borda marked this conversation as resolved.
Show resolved Hide resolved
v.__module__ = expected_module_name

# removing reference to backend; backend is not picklable because of openapi client reference in it
copied_work._backend = None
hhsecond marked this conversation as resolved.
Show resolved Hide resolved
return copied_work


def dump(work: LightningWork, f):
picklable_work = get_picklable_work(work)
pickle.dump(picklable_work, f)


def load(f):
# inject current working directory to sys.path
sys.path.insert(1, str(Path.cwd()))
work = pickle.load(f)
sys.path.pop(1)
return work
10 changes: 10 additions & 0 deletions tests/tests_app/utilities/test_safe_pickle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import subprocess
from pathlib import Path


def test_safe_pickle_app():
test_dir = Path(__file__).parent / "testdata"
proc = subprocess.Popen(
["lightning", "run", "app", "safe_pickle_app.py", "--open-ui", "false"], stdout=subprocess.PIPE, cwd=test_dir
)
assert "Exiting the pickling app successfully!!" in proc.stdout.read().decode("UTF-8")
hhsecond marked this conversation as resolved.
Show resolved Hide resolved
63 changes: 63 additions & 0 deletions tests/tests_app/utilities/testdata/safe_pickle_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""
This app tests three things
1. Can a work pickle `self`
2. Can the pickled work be unpickled in another work
3. Can the pickled work be unpickled from a script
"""

import subprocess
from pathlib import Path

from lightning_app import LightningApp, LightningFlow, LightningWork
from lightning_app.utilities import safe_pickle


class SelfPicklingWork(LightningWork):
def run(self):
with open("work.pkl", "wb") as f:
safe_pickle.dump(self, f)

def get_test_string(self):
return f"Hello from {self.__class__.__name__}!"


class WorkThatLoadsPickledWork(LightningWork):
def run(self):
with open("work.pkl", "rb") as f:
work = safe_pickle.load(f)
assert work.get_test_string() == "Hello from SelfPicklingWork!"


script_load_pickled_work = """
import pickle
work = pickle.load(open("work.pkl", "rb"))
print(work.get_test_string())
"""


class RootFlow(LightningFlow):
def __init__(self):
super().__init__()
self.self_pickling_work = SelfPicklingWork()
self.work_that_loads_pickled_work = WorkThatLoadsPickledWork()

def run(self):
self.self_pickling_work.run()
self.work_that_loads_pickled_work.run()

with open("script_that_loads_pickled_work.py", "w") as f:
f.write(script_load_pickled_work)

# read the output from subprocess
proc = subprocess.Popen(["python", "script_that_loads_pickled_work.py"], stdout=subprocess.PIPE)
assert "Hello from SelfPicklingWork" in proc.stdout.read().decode("UTF-8")

# deleting the script
Path("script_that_loads_pickled_work.py").unlink()
# deleting the pkl file
Path("work.pkl").unlink()

self._exit("Exiting the pickling app successfully!!")


app = LightningApp(RootFlow())
hhsecond marked this conversation as resolved.
Show resolved Hide resolved