Skip to content

Commit

Permalink
Merge pull request #215 from jameshcorbett/instance-owner-persistent
Browse files Browse the repository at this point in the history
dws: only allow owner to create persistent
  • Loading branch information
mergify[bot] authored Sep 20, 2024
2 parents 388debf + 95b2143 commit 0263e77
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 8 deletions.
34 changes: 30 additions & 4 deletions src/modules/coral2_dws.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,12 +266,21 @@ def move_workflow_desiredstate(workflow_name, desiredstate, k8s_api):
)


def owner_uid(handle):
"""Get instance owner UID"""
try:
return int(handle.attr_get("security.owner"))
except Exception:
return os.getuid()


@message_callback_wrapper
def create_cb(handle, _t, msg, api_instance):
def create_cb(handle, _t, msg, arg):
"""dws.create RPC callback. Creates a k8s Workflow object for a job.
Triggered when a new job with a jobdw directive is submitted.
"""
api_instance, unrestricted_persistent = arg
dw_directives = msg.payload["dw_directives"]
jobid = msg.payload["jobid"]
userid = msg.payload["userid"]
Expand All @@ -284,6 +293,12 @@ def create_cb(handle, _t, msg, api_instance):
raise TypeError(
f"Malformed dw_directives, not list or string: {dw_directives!r}"
)
for directive in dw_directives:
if not unrestricted_persistent and "create_persistent" in directive:
if userid != owner_uid(handle):
raise ValueError(
"only the instance owner can create persistent file systems"
)
workflow_name = WORKFLOW_NAME_FORMAT.format(jobid=jobid)
spec = {
"desiredState": "Proposal",
Expand Down Expand Up @@ -870,6 +885,14 @@ def setup_parsing():
type=int,
help="Number of nnfdatamovements to save to job KVS, defaults to 5",
)
parser.add_argument(
"--unrestricted-persistent",
action="store_true",
help=(
"Allow any user to create persistent file systems, not just the instance "
"owner"
),
)
for fs_option, fs_help in (
("xfs", "XFS"),
("gfs2", "GFS2"),
Expand Down Expand Up @@ -922,11 +945,14 @@ def populate_rabbits_dict(k8s_api):
_RABBITS_TO_HOSTLISTS[nnf["metadata"]["name"]] = hlist.encode()


def register_services(handle, k8s_api):
def register_services(handle, k8s_api, unrestricted_persistent):
"""register dws.create, dws.setup, and dws.post_run services."""
serv_reg_fut = handle.service_register("dws")
create_watcher = handle.msg_watcher_create(
create_cb, FLUX_MSGTYPE_REQUEST, "dws.create", args=k8s_api
create_cb,
FLUX_MSGTYPE_REQUEST,
"dws.create",
args=(k8s_api, unrestricted_persistent),
)
create_watcher.start()
setup_watcher = handle.msg_watcher_create(
Expand Down Expand Up @@ -1031,7 +1057,7 @@ def main():
watchers,
args,
)
services = register_services(handle, k8s_api)
services = register_services(handle, k8s_api, args.unrestricted_persistent)
watchers.add_watch(
Watch(
k8s_api,
Expand Down
14 changes: 14 additions & 0 deletions t/scripts/sign-as.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import sys

from flux.security import SecurityContext

if len(sys.argv) < 2:
print("Usage: {0} USERID".format(sys.argv[0]))
sys.exit(1)

userid = int(sys.argv[1])
ctx = SecurityContext()
payload = sys.stdin.read()

print(ctx.sign_wrap_as(userid, payload, mech_type="none").decode("utf-8"))
# vi: ts=4 sw=4 expandtab
25 changes: 21 additions & 4 deletions t/t1002-dws-workflow-obj.t
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ if test_have_prereq NO_DWS_K8S; then
test_done
fi

flux version | grep -q libflux-security && test_set_prereq FLUX_SECURITY

FLUX_SIZE=2

test_under_flux ${FLUX_SIZE} job
Expand All @@ -23,10 +25,15 @@ PROLOG_NAME="dws-setup"
EPILOG_NAME="dws-epilog"
DATADIR=${SHARNESS_TEST_SRCDIR}/data/workflow-obj

# TODO: load alloc-bypass plugin once it is working again (flux-core #4900)
# test_expect_success 'job-manager: load alloc-bypass plugin' '
# flux jobtap load alloc-bypass.so
# '
submit_as_alternate_user()
{
FAKE_USERID=42
flux run --dry-run "$@" | \
flux python ${SHARNESS_TEST_SRCDIR}/scripts/sign-as.py $FAKE_USERID \
>job.signed
FLUX_HANDLE_USERID=$FAKE_USERID \
flux job submit --flags=signed job.signed
}

test_expect_success 'job-manager: load dws-jobtap and alloc-bypass plugin' '
flux jobtap load ${PLUGINPATH}/dws-jobtap.so &&
Expand Down Expand Up @@ -373,6 +380,16 @@ test_expect_success 'job submission with persistent DW string works' '
flux job wait-event -vt 30 ${jobid} clean
'

test_expect_success FLUX_SECURITY 'job submission with persistent DW string and non-owner UID fails' '
jobid=$(submit_as_alternate_user \
--setattr=system.dw="#DW create_persistent capacity=10GiB type=lustre name=project1" \
-N1 -n1 -c1 --setattr=exec.test.run_duration=1s \
hostname) &&
flux job wait-event -vt 10 ${jobid} exception &&
flux job wait-event -vt 15 ${jobid} clean &&
flux job wait-event -t 1 ${jobid} exception | grep "only the instance owner"
'

test_expect_success 'job submission with standalone MGT persistent DW string works' '
(kubectl delete nnfstorageprofiles -nnnf-system mypoolprofile || true) &&
kubectl get nnfstorageprofiles -nnnf-system default -ojson | \
Expand Down

0 comments on commit 0263e77

Please sign in to comment.