Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ROX-15523: Copy expiration to cloud resources (GKE) #895

Draft
wants to merge 39 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
cf332b7
dev img and lifespan in tmplt
davdhacs Jul 12, 2023
946058f
generated srcs
davdhacs Jul 12, 2023
2db4b5b
error name conflict existing cluster
davdhacs Jul 12, 2023
b0af782
lint
davdhacs Jul 12, 2023
92c399a
Merge branch 'master' into tag-gcp-expiration
davdhacs Jul 12, 2023
f67f340
remove quotes
davdhacs Jul 12, 2023
e47ce0c
update image
davdhacs Jul 12, 2023
ead2167
test sidecar during suspend
davdhacs Jul 12, 2023
770c84b
default annot lifespan zero
davdhacs Jul 12, 2023
cfede6b
try stop with onExit
davdhacs Jul 12, 2023
8163011
gke destroy as onExit
davdhacs Jul 13, 2023
17c572a
needsExit flag
davdhacs Jul 13, 2023
7650297
fix name
davdhacs Jul 13, 2023
c5b056b
recreate dev server
davdhacs Jul 14, 2023
ae9bd9f
stop, or resume if suspended
davdhacs Jul 17, 2023
f1debcd
use argo util to check suspend
davdhacs Jul 17, 2023
e63a36c
fix wf lookup
davdhacs Jul 17, 2023
d9cc168
correct import
davdhacs Jul 17, 2023
ee1ecc0
latest test
davdhacs Jul 17, 2023
4d5ca92
Revert "use argo util to check suspend"
davdhacs Jul 17, 2023
f37f7e2
latest test
davdhacs Jul 18, 2023
48f1e6c
no fail fast
davdhacs Jul 18, 2023
d1d7a15
default annotations
davdhacs Jul 18, 2023
a51577e
not suspended != destroying
davdhacs Jul 18, 2023
b42e4a7
remove wrong dupe check
davdhacs Jul 18, 2023
a17e4f1
?needsExit in expire check also
davdhacs Jul 18, 2023
aae3d10
lint
davdhacs Jul 18, 2023
5ec5d9f
latest workflow
davdhacs Jul 18, 2023
bafebbc
check for create start
davdhacs Jul 19, 2023
66ba509
only create CREATING
davdhacs Jul 19, 2023
742a8df
invert loop
davdhacs Jul 19, 2023
95248f7
log hasExitHook
davdhacs Jul 19, 2023
ffb59ab
log hasexithook in delete
davdhacs Jul 20, 2023
64cc1c4
fast loop - stack limit?
davdhacs Jul 21, 2023
bb104a8
if create, creating
davdhacs Jul 21, 2023
e7f3431
log labels
davdhacs Jul 21, 2023
610d5d2
fix label lookup
davdhacs Jul 21, 2023
3287327
exit if exit hook
davdhacs Jul 21, 2023
f4fdcf8
test indent for yaml lint
davdhacs Aug 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 124 additions & 8 deletions chart/infra-server/static/workflow-gke-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,15 @@ apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
generateName: gke-default-
labels:
needsExit: "true"
annotations:
infra.stackrox.com/lifespan: 0s
infra.stackrox.com/flavor: unknown
infra.stackrox.com/owner: unknown
spec:
entrypoint: start
onExit: stop
arguments:
parameters:
- name: name
Expand All @@ -27,15 +34,127 @@ spec:
- name: start
steps:
- - name: create
template: create
template: echo
arguments:
parameters:
- name: message
value: "create"
- - name: wait
template: wait
template: running
arguments:
parameters:
- name: lifespan
value: "{{workflow.annotations.infra.stackrox.com/lifespan}}"

- name: stop
steps:
- - name: destroy
template: destroy
template: echo
arguments:
parameters:
- name: message
value: "{{workflow.parameters.name}}"

- name: running
inputs:
parameters:
- name: lifespan
failFast: false
steps:
- - name: label
template: label
arguments:
parameters:
- name: lifespan
value: "{{inputs.parameters.lifespan}}"
- - name: wait
template: wait
arguments:
parameters:
- name: lifespan
value: "{{inputs.parameters.lifespan}}"

- name: wait
inputs:
parameters:
- name: lifespan
steps:
- - name: delay
inline:
suspend:
duration: "1"
- - name: echo
template: echo
arguments:
parameters:
- name: message
value: "'{{inputs.parameters.lifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'"
- - name: loop
template: wait
when: "'{{inputs.parameters.lifespan}}' =~ '{{workflow.annotations.infra.stackrox.com/lifespan}}'"
arguments:
parameters:
- name: lifespan
value: "{{inputs.parameters.lifespan}}" # If we send the annotation, it could change between the 'when' check and sending.
- - name: break
template: running
arguments:
parameters:
- name: lifespan
value: "{{workflow.annotations.infra.stackrox.com/lifespan}}"

- name: label
inputs:
parameters:
- name: lifespan
steps:
- - name: label
template: label-gke
arguments:
parameters:
- name: name
value: "{{steps.create.outputs.parameters.cluster_name}}"
- name: labels
value: "\
flavor={{workflow.annotations.infra.stackrox.com/flavor}},\
owner={{workflow.annotations.infra.stackrox.com/owner}},\
lifespan={{inputs.parameters.lifespan}},\
expiration={{= sprig.date('2006-01-02T15:04:05Z07:00',\
sprig.dateModify(inputs.parameters.lifespan,\
sprig.toDate('2006-01-02T15:04:05Z07:00',\
workflow.creationTimestamp.RFC3339)))\
}}"

- name: label-gke
inputs:
parameters:
- name: labels
script:
image: quay.io/stackrox-io/ci:automation-flavors-gke-default-0.7.6-3-g1ce65fe441-snapshot
imagePullPolicy: Always
env:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /tmp/google-credentials.json
command: [bash, -x]
source: |
labels="{{inputs.parameters.labels}}"
gcloud auth activate-service-account --key-file /tmp/google-credentials.json;
gcloud auth list;
gcloud config set compute/zone "{{workflow.parameters.gcp-zone}}";
gcloud config set core/disable_prompts True;
gcloud container clusters update \
"{{workflow.parameters.name}}"\
--project=srox-temp-dev-test\
"--update-labels=${labels}" || true
volumeMounts:
- name: credentials
mountPath: /tmp

- name: echo
inputs:
parameters:
- name: message
container:
image: alpine:3.7
command: [echo, "{{inputs.parameters.message}}"]

- name: create
activeDeadlineSeconds: 3600
Expand Down Expand Up @@ -79,9 +198,6 @@ spec:
- name: credentials
mountPath: /tmp

- name: wait
suspend: {}

- name: destroy
activeDeadlineSeconds: 3600
container:
Expand Down
46 changes: 39 additions & 7 deletions service/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,21 @@ func (s *clusterImpl) Delete(ctx context.Context, req *v1.ResourceByID) (*empty.
"error", err,
)
}
if workflow.Spec.HasExitHook() {
log.Infow("stopping argo workflow", "workflow-name", workflow.GetName())
_, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{
Name: workflow.GetName(),
Namespace: s.workflowNamespace,
NodeFieldSelector: "",
Message: "Destroying cluster. End workflow loop.",
})
if err != nil {
log.Warnw("failed to stop workflow, this is OK if the workflow is not running",
"workflow-name", req.GetId(),
"error", err,
)
}
}

return &empty.Empty{}, nil
}
Expand Down Expand Up @@ -638,13 +653,30 @@ func (s *clusterImpl) cleanupExpiredClusters() {
continue
}

log.Infow("resuming an argo workflow that has expired", "workflow-name", workflow.GetName())
_, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{
Name: workflow.GetName(),
Namespace: s.workflowNamespace,
})
if err != nil {
log.Warnw("failed to resume argo workflow", "workflow-name", workflow.GetName(), "error", err)
if workflow.Spec.Suspend != nil && *workflow.Spec.Suspend {
log.Infow("resuming an argo workflow that has expired", "workflow-name", workflow.GetName())
_, err = s.argoWorkflowsClient.ResumeWorkflow(s.argoClientCtx, &workflowpkg.WorkflowResumeRequest{
Name: workflow.GetName(),
Namespace: s.workflowNamespace,
})
if err != nil {
log.Warnw("failed to resume argo workflow", "workflow-name", workflow.GetName(), "error", err)
}
}
if workflow.Spec.HasExitHook() {
log.Infow("stopping argo workflow with exit hook", "workflow-name", workflow.GetName())
_, err = s.argoWorkflowsClient.StopWorkflow(s.argoClientCtx, &workflowpkg.WorkflowStopRequest{
Name: workflow.GetName(),
Namespace: s.workflowNamespace,
NodeFieldSelector: "",
Message: "Destroying cluster. End workflow loop.",
})
if err != nil {
log.Warnw("failed to stop workflow, this is OK if the workflow is not running",
"workflow-name", workflow.GetName(),
"error", err,
)
}
}
}

Expand Down
17 changes: 13 additions & 4 deletions service/cluster/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,18 +204,27 @@ func workflowStatus(workflowStatus v1alpha1.WorkflowStatus) v1.Status {
}
} else if node.Type == v1alpha1.NodeTypeSuspend {
switch node.Phase {
case v1alpha1.NodeSucceeded:
return v1.Status_DESTROYING
case v1alpha1.NodeError, v1alpha1.NodeFailed, v1alpha1.NodeSkipped:
panic("a suspend should not be able to fail?")
case v1alpha1.NodeRunning, v1alpha1.NodePending:
return v1.Status_READY
}
}
if node.GetName() == "destroy" || node.IsExitNode() {
return v1.Status_DESTROYING
}
if node.GetName() == "create" {
switch node.Phase {
case v1alpha1.NodeError, v1alpha1.NodeFailed, v1alpha1.NodeSkipped:
return v1.Status_FAILED
case v1alpha1.NodeRunning, v1alpha1.NodePending:
return v1.Status_CREATING
}
}
}

// No suspend node was found, which means one hasn't been run yet, which means that this cluster is still creating.
return v1.Status_CREATING
// If no "create" or "destroy"/onExit node active, then we're ready.
return v1.Status_READY

case "":
return v1.Status_CREATING
Expand Down