Skip to content

Commit

Permalink
fix(job-attachment): Change group ownership and permission on job att…
Browse files Browse the repository at this point in the history
…achment-owned files (#22)

Signed-off-by: Gahyun Suh <[email protected]>
  • Loading branch information
gahyusuh authored Sep 11, 2023
1 parent 7819ec7 commit 4ad2d40
Show file tree
Hide file tree
Showing 14 changed files with 444 additions and 11 deletions.
5 changes: 5 additions & 0 deletions DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,8 @@ class MyCustomWidget(QWidget):
```

**We recommend you set up your runtimes via `asdf`.**

## Running Docker-based Unit Tests

- Some of the unit tests in this package require a docker environment to run. These tests are marked with `@pytest.mark.docker`. In order to execute these tests, please run the `run_sudo_tests.sh` script located in the `scripts` directory. For detailed instructions, please refer to [scripts/README.md](./scripts/README.md).
- If you make changes to the `download` or `asset_sync` modules, it's hightly recommended to run an ensure these tests pass.
1 change: 1 addition & 0 deletions hatch.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pre-install-commands = [
[envs.default.scripts]
sync = "pip install -r requirements-testing.txt"
test = "pytest --cov-config pyproject.toml {args:test/unit}"
test_docker = "./scripts/run_sudo_tests.sh --build"
typing = "mypy {args:src test}"
style = [
"ruff {args:.}",
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ looponfailroots = [
markers = [
"no_setup: mark that test shouldn't use default setups",
"integ: tests that run against AWS resources",
"docker: marks tests to be run only in a Docker environment",
]
# looponfailroots is deprecated, this removes the deprecation from the test output
filterwarnings = [
Expand Down
18 changes: 18 additions & 0 deletions scripts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
## Scripts

### Running Tests with Docker for File and Directory Permissions

We have some unit tests that require being run in a specific docker container that is set up for testing with different users. Those unit tests are marked with `docker`, and the `run_sudo_tests.sh` script is provided to facilitate this testing. The script builds the Docker iamge using the Dockerfile located in `testing_containers/localuser_sudo_environment/`, and then runs the container.

#### Usage
Execute the script from the root of the repository.
```
./scripts/run_sudo_tests.sh --build
```

Or, you can run the hatch script:
```
hatch run test_docker
```

Please make sure that you have the necessary permissions to execute the script.
59 changes: 59 additions & 0 deletions scripts/run_sudo_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/bash
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.

set -eu

# Run this from the root of the repository
if ! test -d scripts
then
echo "Must run from the root of the repository"
exit 1
fi

DO_BUILD="False"
BUILD_ONLY="False"
while [[ "${1:-}" != "" ]]; do
case $1 in
-h|--help)
echo "Usage: run_sudo_tests.sh [--build]"
exit 1
;;
--build)
DO_BUILD="True"
;;
--build-only)
BUILD_ONLY="True"
;;
*)
echo "Unrecognized parameter: $1"
exit 1
;;
esac
shift
done

# Copying the dist/ dir can cause permission issues, so just nuke it.
hatch clean 2> /dev/null || true

ARGS=""

if test "${PIP_INDEX_URL:-}" != ""; then
# If PIP_INDEX_URL is set, then export that in to the container
# so that `pip install` run in the container will fetch packages
# from the correct repository.
ARGS="${ARGS} -e PIP_INDEX_URL=${PIP_INDEX_URL}"
fi

ARGS="${ARGS} -h localuser.environment.internal"
CONTAINER_IMAGE_TAG="job_attachment_localuser_test"
CONTAINER_IMAGE_DIR="localuser_sudo_environment"

if test "${DO_BUILD}" == "True"; then
docker build testing_containers/"${CONTAINER_IMAGE_DIR}" -t "${CONTAINER_IMAGE_TAG}"
fi

if test "${BUILD_ONLY}" == "True"; then
exit 0
fi

docker run --name test_sudo --rm -v $(pwd):/code:ro ${ARGS} "${CONTAINER_IMAGE_TAG}":latest
19 changes: 19 additions & 0 deletions src/deadline/job_attachments/_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.

from dataclasses import dataclass
import datetime
import io
import os
Expand Down Expand Up @@ -201,3 +202,21 @@ class FileConflictResolution(Enum):
@classmethod
def from_index(cls, index: int):
return cls(index)


@dataclass
class FileSystemPermissionSettings:
"""
A data class representing file system permission-related information.
The specified permission modes will be bitwise-OR'ed with the directory
or file's existing permissions.
Attributes:
os_group (str): The target operating system group for ownership.
dir_mode (int): The permission mode to be applied to directories.
file_mode (int): The permission mode to be applied to files.
"""

os_group: str
dir_mode: int
file_mode: int
9 changes: 7 additions & 2 deletions src/deadline/job_attachments/asset_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from .upload import S3AssetUploader
from ._utils import (
AssetLoadingMethod,
FileSystemPermissionSettings,
_float_to_iso_datetime_string,
_get_unique_dest_dir_name,
_hash_data,
Expand Down Expand Up @@ -256,6 +257,7 @@ def sync_inputs(
queue_id: str,
job_id: str,
session_dir: Path,
fs_permission_settings: Optional[FileSystemPermissionSettings] = None,
storage_profiles_path_mapping_rules: dict[str, str] = {},
step_dependencies: Optional[list[str]] = None,
on_downloading_files: Optional[Callable[[ProgressReportMetadata], bool]] = None,
Expand All @@ -273,6 +275,8 @@ def sync_inputs(
queue_id: the ID of the queue.
job_id: the ID of the job.
session_dir: the directory that the session is going to use.
fs_permission_settings: An instance defining group ownership and permission modes
to be set on the downloaded (synchronized) input files and directories.
storage_profiles_path_mapping_rules: A dict of source path -> destination path mappings.
If this dict is not empty, it means that the Storage Profile set in the job is
different from the one configured in the Fleet performing the input-syncing.
Expand Down Expand Up @@ -328,11 +332,11 @@ def sync_inputs(
}

if manifest_properties.inputManifestPath:
key = s3_settings.add_root_and_manifest_folder_prefix(
manifest_s3_key = s3_settings.add_root_and_manifest_folder_prefix(
manifest_properties.inputManifestPath
)
manifest_path = get_manifest_from_s3(
manifest_key=key,
manifest_key=manifest_s3_key,
s3_bucket=s3_settings.s3BucketName,
session=self.session,
)
Expand Down Expand Up @@ -382,6 +386,7 @@ def sync_inputs(
s3_bucket=s3_settings.s3BucketName,
manifests_by_root=merged_manifests_by_root,
cas_prefix=s3_settings.full_cas_prefix(),
fs_permission_settings=fs_permission_settings,
session=self.session,
on_downloading_files=on_downloading_files,
)
Expand Down
58 changes: 57 additions & 1 deletion src/deadline/job_attachments/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import concurrent.futures
import os
import re
import shutil
import time
from collections import defaultdict
from datetime import datetime
Expand Down Expand Up @@ -39,7 +40,12 @@
)
from .fus3 import Fus3ProcessManager
from .models import JobAttachmentS3Settings, Attachments
from ._utils import FileConflictResolution, _is_relative_to, _join_s3_paths
from ._utils import (
FileConflictResolution,
FileSystemPermissionSettings,
_is_relative_to,
_join_s3_paths,
)

logger = getLogger("deadline.job_attachments.download")

Expand Down Expand Up @@ -335,6 +341,7 @@ def download_file(

s3_key = f"{cas_prefix}/{file.hash}" if cas_prefix else file.hash

# If the file name already exists, resolve the conflict based on the file_conflict_resolution
if local_file_name.is_file():
if file_conflict_resolution == FileConflictResolution.SKIP:
return (file_bytes, None)
Expand Down Expand Up @@ -606,6 +613,7 @@ def download_files_from_manifests(
s3_bucket: str,
manifests_by_root: dict[str, BaseAssetManifest],
cas_prefix: Optional[str] = None,
fs_permission_settings: Optional[FileSystemPermissionSettings] = None,
session: Optional[boto3.Session] = None,
on_downloading_files: Optional[Callable[[ProgressReportMetadata], bool]] = None,
) -> DownloadSummaryStatistics:
Expand Down Expand Up @@ -650,12 +658,60 @@ def download_files_from_manifests(
file_mod_time,
progress_tracker=progress_tracker,
)

if fs_permission_settings is not None:
_set_fs_group(
file_paths=downloaded_files_paths,
local_root=local_download_dir,
fs_permission_settings=fs_permission_settings,
)

downloaded_files_paths_by_root[local_download_dir].extend(downloaded_files_paths)

progress_tracker.total_time = time.perf_counter() - start_time
return progress_tracker.get_download_summary_statistics(downloaded_files_paths_by_root)


def _set_fs_group(
file_paths: list[str],
local_root: str,
fs_permission_settings: FileSystemPermissionSettings,
) -> None:
"""
Sets file system group ownership and permissions for all files and directories
in the given paths, starting from root. It is expected that all `file_paths`
point to files, not directories.
"""
os_group = fs_permission_settings.os_group
dir_mode = fs_permission_settings.dir_mode
file_mode = fs_permission_settings.file_mode

# Initialize set to track changed path
dir_paths_to_change_fs_group = set()

# 1. Set group ownership and permissions for each file.
for file_path in file_paths:
# The file path must be relative to the root path (ie. local_root).
if not _is_relative_to(file_path, local_root):
raise PathOutsideDirectoryError(
f"The provided path '{file_path}' is not under the root directory: {local_root}"
)

shutil.chown(Path(file_path), group=os_group)
os.chmod(Path(file_path), Path(file_path).stat().st_mode | file_mode)

# Accumulate unique parent directories for each file
path_components = Path(file_path).relative_to(local_root).parents
for path_component in path_components:
path_to_change = Path(local_root).joinpath(path_component)
dir_paths_to_change_fs_group.add(path_to_change)

# 2. Set group ownership and permissions for the directories in the path starting from root.
for path_to_change in dir_paths_to_change_fs_group:
shutil.chown(path_to_change, group=os_group)
os.chmod(path_to_change, path_to_change.stat().st_mode | dir_mode)


def merge_asset_manifests(manifests: list[BaseAssetManifest]) -> BaseAssetManifest | None:
"""Merge files from multiple manifests into a single list, ensuring that each filename
is unique by keeping the one from the last encountered manifest. (Thus, the steps'
Expand Down
32 changes: 32 additions & 0 deletions test/unit/deadline_job_attachments/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,3 +455,35 @@ def fixture_merged_manifest():
],
"totalSize": 70,
}


def has_posix_target_user() -> bool:
"""Returns if the testing environment exported the env variables for doing
cross-account posix target-user tests.
"""
return (
os.environ.get("DEADLINE_JOB_ATTACHMENT_TEST_SUDO_TARGET_USER") is not None
and os.environ.get("DEADLINE_JOB_ATTACHMENT_TEST_SUDO_TARGET_GROUP") is not None
)


def has_posix_disjoint_user() -> bool:
"""Returns if the testing environment exported the env variables for doing
cross-account posix disjoint-user tests.
"""
return (
os.environ.get("DEADLINE_JOB_ATTACHMENT_TEST_SUDO_DISJOINT_USER") is not None
and os.environ.get("DEADLINE_JOB_ATTACHMENT_TEST_SUDO_DISJOINT_GROUP") is not None
)


@pytest.fixture(scope="function")
def posix_target_group() -> str:
# Intentionally fail if the var is not defined.
return os.environ["DEADLINE_JOB_ATTACHMENT_TEST_SUDO_TARGET_GROUP"]


@pytest.fixture(scope="function")
def posix_disjoint_group() -> str:
# Intentionally fail if the var is not defined.
return os.environ["DEADLINE_JOB_ATTACHMENT_TEST_SUDO_DISJOINT_GROUP"]
13 changes: 7 additions & 6 deletions test/unit/deadline_job_attachments/test_asset_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,12 +687,12 @@ def test_sync_inputs_with_storage_profiles_path_mapping_rules(
mock_on_downloading_files = MagicMock(return_value=True)

(summary_statistics, result_pathmap_rules) = self.default_asset_sync.sync_inputs(
default_queue.jobAttachmentSettings,
default_job.attachments,
default_queue.queueId,
default_job.jobId,
tmp_path,
storage_profiles_path_mapping_rules,
s3_settings=default_queue.jobAttachmentSettings,
attachments=default_job.attachments,
queue_id=default_queue.queueId,
job_id=default_job.jobId,
session_dir=tmp_path,
storage_profiles_path_mapping_rules=storage_profiles_path_mapping_rules,
on_downloading_files=mock_on_downloading_files,
)

Expand All @@ -712,6 +712,7 @@ def test_sync_inputs_with_storage_profiles_path_mapping_rules(
"/tmp/movie1": "test_manifest_data",
},
cas_prefix="assetRoot/Data",
fs_permission_settings=None,
session=ANY,
on_downloading_files=mock_on_downloading_files,
)
Loading

0 comments on commit 4ad2d40

Please sign in to comment.