Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sdk): using component's pip_index_urls for Dockerfile generation. Fixes #8816 #8871

Merged
merged 7 commits into from
Mar 1, 2023
15 changes: 13 additions & 2 deletions sdk/python/kfp/cli/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@

WORKDIR {component_root_dir}
COPY {requirements_file} {requirements_file}
RUN pip install --no-cache-dir -r {requirements_file}
RUN pip install {index_urls}--no-cache-dir -r {requirements_file}
{maybe_copy_kfp_package}
RUN pip install --no-cache-dir {kfp_package_path}
RUN pip install {index_urls}--no-cache-dir {kfp_package_path}
COPY . .
'''

Expand Down Expand Up @@ -156,6 +156,7 @@ def __init__(

self._base_image = None
self._target_image = None
self._pip_index_urls = None
self._load_components()

def _load_components(self):
Expand Down Expand Up @@ -214,6 +215,13 @@ def _load_components(self):
raise sys.exit(1)
logging.info(f'Using target image: {self._target_image}')

pip_index_urls = []
for comp in self._components:
if comp.pip_index_urls is not None:
pip_index_urls.extend(comp.pip_index_urls)
if len(pip_index_urls) > 0:
b4sus marked this conversation as resolved.
Show resolved Hide resolved
self._pip_index_urls = list(dict.fromkeys(pip_index_urls))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain what's happening here? It seems like this converts a list to dict then back to list, but it's possible I'm missing something.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a little trick to get rid of possible duplicates preserving order (dict guarantees insertion order since 3.7, see here.
I will remove this as it is a bit too defensive and it is not done for any other list arguments I've seen - to be consistent.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neat! Thank you for explaining and good thinking.

I actually think it makes sense to keep this deduping. I can imagine containerizing many components that each use the same single URL. We don't need to list that package registry multiple times. This is probably the most realistic use case, since combining the pip_index_urls of components that each use a different registry wouldn't really make sense.

And the order preserving is good, too. There's no logical total ordering across multiple components, but it's easy enough to preserve a partial ordering, so let's do it.

The entries in the requirements file (runtime-requirements.txt) are deduplicated (via set()) and have some of the same concerns/considerations that arise from combining the component parameters for multiple components, so there's one precedent for deduping.

WDYT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, the deduplication is actually necessary when more components in a file share the same pypi url, dockerfile needs it only once - what is I guess expected use-case - company having own pypi index.
I've put it back in :)
Preserving order is nice to have for unit tests, otherwise set would be fine (like in runtime-requirements case).


def _maybe_write_file(self,
filename: str,
contents: str,
Expand Down Expand Up @@ -270,12 +278,15 @@ def generate_kfp_config(self):
config.save()

def maybe_generate_dockerfile(self, overwrite_dockerfile: bool = False):
index_urls_options = component_factory.make_index_url_options(
self._pip_index_urls)
dockerfile_contents = _DOCKERFILE_TEMPLATE.format(
base_image=self._base_image,
maybe_copy_kfp_package=self._maybe_copy_kfp_package,
component_root_dir=_COMPONENT_ROOT_DIR,
kfp_package_path=self._kfp_package_path,
requirements_file=_REQUIREMENTS_TXT,
index_urls=index_urls_options,
)

self._maybe_write_file(_DOCKERFILE, dockerfile_contents,
Expand Down
77 changes: 71 additions & 6 deletions sdk/python/kfp/cli/component_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,31 @@
from kfp.cli import component


def _make_component(func_name: str,
base_image: Optional[str] = None,
target_image: Optional[str] = None,
packages_to_install: Optional[List[str]] = None,
output_component_file: Optional[str] = None) -> str:
def _make_component(
func_name: str,
base_image: Optional[str] = None,
target_image: Optional[str] = None,
packages_to_install: Optional[List[str]] = None,
output_component_file: Optional[str] = None,
pip_index_urls: Optional[List[str]] = None,
) -> str:
return textwrap.dedent('''
from kfp.dsl import *

@component(
base_image={base_image},
target_image={target_image},
packages_to_install={packages_to_install},
output_component_file={output_component_file})
output_component_file={output_component_file},
pip_index_urls={pip_index_urls})
def {func_name}():
pass
''').format(
base_image=repr(base_image),
target_image=repr(target_image),
packages_to_install=repr(packages_to_install),
output_component_file=repr(output_component_file),
pip_index_urls=repr(pip_index_urls),
func_name=func_name)


Expand Down Expand Up @@ -443,6 +448,66 @@ def test_docker_file_is_created_correctly(self):
COPY . .
'''))

@mock.patch('kfp.__version__', '1.2.3')
def test_docker_file_is_created_correctly_with_one_url(self):
component = _make_component(
func_name='train',
target_image='custom-image',
pip_index_urls=['https://pypi.org/simple'])
_write_components('components.py', component)

result = self.runner.invoke(
self.cli,
['build', str(self._working_dir)],
)
self.assertEqual(result.exit_code, 0)
self._docker_client.api.build.assert_called_once()
self.assert_file_exists_and_contains(
'Dockerfile',
textwrap.dedent('''\
# Generated by KFP.

FROM python:3.7

WORKDIR /usr/local/src/kfp/components
COPY runtime-requirements.txt runtime-requirements.txt
RUN pip install --index-url https://pypi.org/simple --trusted-host https://pypi.org/simple --no-cache-dir -r runtime-requirements.txt

RUN pip install --index-url https://pypi.org/simple --trusted-host https://pypi.org/simple --no-cache-dir kfp==1.2.3
COPY . .
'''))

@mock.patch('kfp.__version__', '1.2.3')
def test_docker_file_is_created_correctly_with_two_urls(self):
component = _make_component(
func_name='train',
target_image='custom-image',
pip_index_urls=[
'https://pypi.org/simple', 'https://example.com/pypi/simple'
])
_write_components('components.py', component)

result = self.runner.invoke(
self.cli,
['build', str(self._working_dir)],
)
self.assertEqual(result.exit_code, 0)
self._docker_client.api.build.assert_called_once()
self.assert_file_exists_and_contains(
'Dockerfile',
textwrap.dedent('''\
# Generated by KFP.

FROM python:3.7

WORKDIR /usr/local/src/kfp/components
COPY runtime-requirements.txt runtime-requirements.txt
RUN pip install --index-url https://pypi.org/simple --trusted-host https://pypi.org/simple --extra-index-url https://example.com/pypi/simple --trusted-host https://example.com/pypi/simple --no-cache-dir -r runtime-requirements.txt
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(not your code; feel free to ignore) Looks like string joiner implementation results in two spaces between the first entry and the second (before the first --extra-index-url).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed


RUN pip install --index-url https://pypi.org/simple --trusted-host https://pypi.org/simple --extra-index-url https://example.com/pypi/simple --trusted-host https://example.com/pypi/simple --no-cache-dir kfp==1.2.3
COPY . .
'''))

def test_existing_dockerfile_is_unchanged_by_default(self):
component = _make_component(
func_name='train', target_image='custom-image')
Expand Down
26 changes: 23 additions & 3 deletions sdk/python/kfp/components/component_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class ComponentInfo():
output_component_file: Optional[str] = None
base_image: str = _DEFAULT_BASE_IMAGE
packages_to_install: Optional[List[str]] = None
pip_index_urls: Optional[List[str]] = None


# A map from function_name to components. This is always populated when a
Expand All @@ -63,7 +64,25 @@ def _python_function_name_to_component_name(name):
return name_with_spaces[0].upper() + name_with_spaces[1:]


def _make_index_url_options(pip_index_urls: Optional[List[str]]) -> str:
def make_index_url_options(pip_index_urls: Optional[List[str]]) -> str:
"""Generates index url options for pip install command based on provided
pip_index_urls.

Args:
pip_index_urls: Optional list of pip index urls

Returns:
- Empty string if pip_index_urls is empty/None.
- '--index-url url --trusted-host url ' if pip_index_urls contains 1
url
- the above followed by '--extra-index-url url --trusted-host url '
for
each next url in pip_index_urls if pip_index_urls contains more than 1
url

Note: In case pip_index_urls is not empty, the returned string will
contain space at the end.
"""
if not pip_index_urls:
return ''

Expand Down Expand Up @@ -97,7 +116,7 @@ def _get_packages_to_install_command(

concat_package_list = ' '.join(
[repr(str(package)) for package in package_list])
index_url_options = _make_index_url_options(pip_index_urls)
index_url_options = make_index_url_options(pip_index_urls)
install_python_packages_script = _install_python_packages_script_template.format(
index_url_options=index_url_options,
concat_package_list=concat_package_list)
Expand Down Expand Up @@ -465,7 +484,8 @@ def create_component_from_func(
component_spec=component_spec,
output_component_file=output_component_file,
base_image=base_image,
packages_to_install=packages_to_install)
packages_to_install=packages_to_install,
pip_index_urls=pip_index_urls)

if REGISTERED_MODULES is not None:
REGISTERED_MODULES[component_name] = component_info
Expand Down