Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GCP endpoint ready test needs to set the name of the cluster #704

Merged
merged 3 commits into from
Jun 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ spec:
- args:
- -m
- kubeflow.testing.get_kf_testing_cluster
- get-credentials
- --base=$(inputs.params.testing-cluster-pattern)
- --location=$(inputs.params.testing-cluster-location)
- get-credentials
- --output=/workspace/cluster.info.yaml
command:
- python
env:
Expand All @@ -52,13 +53,16 @@ spec:
script: |
#!/usr/bin/env bash
set -x
# Get the name of the cluster
export KFNAME=$(yq r /workspace/cluster.info.yaml cluster.name)
# Test suite name needs to be unique based on parameters
pytest endpoint_ready_test.py \
-s \
--log-cli-level=info \
--log-cli-format='%(levelname)s|%(asctime)s|%(pathname)s|%(lineno)d| %(message)s' \
--junitxml=/workspace/artifacts/junit_endpoint-is-ready.xml \
--timeout=180 \
--app_name=${KFNAME} \
-o junit_suite_name=test_endpoint_is_ready_blueprint
echo test finished.
workingDir: /workspace/$(inputs.resources.kfctl-repo.name)/py/kubeflow/kfctl/testing/pytests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ spec:
- args:
- -m
- kubeflow.testing.get_kf_testing_cluster
- get-credentials
- --base=$(inputs.params.testing-cluster-pattern)
- --location=$(inputs.params.testing-cluster-location)
- get-credentials
- --output=/workspace/cluster.info.yaml
command:
- python
env:
Expand All @@ -52,13 +53,16 @@ spec:
script: |
#!/usr/bin/env bash
set -x
# Get the name of the cluster
export KFNAME=$(yq r /workspace/cluster.info.yaml cluster.name)
# Test suite name needs to be unique based on parameters
pytest endpoint_ready_test.py \
-s \
--log-cli-level=info \
--log-cli-format='%(levelname)s|%(asctime)s|%(pathname)s|%(lineno)d| %(message)s' \
--junitxml=/workspace/artifacts/junit_endpoint-is-ready.xml \
--timeout=180 \
--app_name=${KFNAME} \
-o junit_suite_name=test_endpoint_is_ready_blueprint
echo test finished.
workingDir: /workspace/$(inputs.resources.kfctl-repo.name)/py/kubeflow/kfctl/testing/pytests
Expand Down
146 changes: 96 additions & 50 deletions py/kubeflow/testing/get_kf_testing_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
- python -c "from kubeflow.testing import get_kf_testing_cluster; \
print(get_kf_testing_cluster.get_deployment(\"kubeflow-ci-deployment\", \
\"kf-vmaster\", \"kf-test-cluster\"))"

TODO(jlewi): Now that we are using GCP blueprints and CNRM we should support
fetching clusters using label selectors in addition to or as a replacemnt
for regexes
"""

import argparse
Expand All @@ -14,8 +18,10 @@
import logging
import pprint
import re
import sys
import yaml

import fire
from googleapiclient import discovery
from kubeflow.testing import util
from oauth2client.client import GoogleCredentials
Expand Down Expand Up @@ -137,8 +143,6 @@ def _iter_cluster(project, location):
"""Iterate over all clusters in the given location"""
credentials = GoogleCredentials.get_application_default()

next_page_token = None

gke = discovery.build("container", "v1", credentials=credentials)

clusters_client = gke.projects().locations().clusters()
Expand Down Expand Up @@ -259,49 +263,6 @@ def _get_latest_cluster(project, location, pattern,
# most recent cluster will be last
return clusters[-1]

def get_latest_credential(project="kubeflow-ci-deployment",
base_name=DEFAULT_PATTERN,
location=None,
testing_label=None):
"""Convenient function to get the latest deployment information and use it to get
credentials from GCP.

Args:
project: string, Name of deployed GCP project. Optional.
location: zone or region to search for clusters.
testing_label: string, annotation used to identify testing clusters. Optional.
"""
util.maybe_activate_service_account()

command = ["gcloud", "container", "clusters", "get-credentials",
"--project="+project]
if location:
c = _get_latest_cluster(project, location, base_name)

if not c:
message = ("No clusters found matching: project: {0}, location: {1}, "
"pattern: {2}").format(project, location, base_name)
raise ValueError(message)

if ZONE_PATTERN.match(location):
command.append("--zone=" + location)
else:
command.append("--region=" + location)
command.append(c["name"])
else :
# This is the pre blueprint which is using deployment manager
logging.warning("Invoking deprecated path because location not set")
dm = get_latest(project=project, testing_label=testing_label,
base_name=base_name, field="all")
cluster_name = dm["name"]
command.append("--zone="+dm["zone"], dm["name"])

# This call may be flaky due to timeout.
@retry(stop_max_attempt_number=10, wait_fixed=5000)
def run_get_credentials():
util.run(command)
run_get_credentials()

def list_dms(args):
logging.info("Calling list deployments.")
name_prefix = args.base_name
Expand All @@ -317,12 +278,12 @@ def get_dm(args):
field=args.field,
desc_ordered=args.find_latest_deployed)))

# TODO(jlewi): It looks like this is just a wrapper intended to parse args.
# Might be simpler just to switch to using Fire and get rid of this indirection.
# TODO(jlewi): This method is now deprecated. It was a wrapper to allow
# us to use argparse. Latest code should just use the fire module.
def get_credential(args):
logging.info("Calling get_credential - this call needs gcloud client CLI.")
get_latest_credential(project=args.project, base_name=args.base_name,
location=args.location)
CredentialHelper.get_credentials(
project=args.project, pattern=args.base_name, location=args.location)

def main(): # pylint: disable=too-many-locals,too-many-statements
logging.basicConfig(level=logging.INFO,
Expand Down Expand Up @@ -362,6 +323,11 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
help=("Looking for the oldest deployed testing cluster."))
parser.set_defaults(find_latest_deployed=True)

parser.add_argument(
"--output", default="", type=str,
help=("(Optional) if supplied write the cluster information to this "
"YAML file."))

subparsers = parser.add_subparsers()

_list = subparsers.add_parser(
Expand All @@ -379,5 +345,85 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
args = parser.parse_args()
args.func(args)

class CredentialHelper:
"""Collection of methods to get credentials to kubeflow clusters."""

@staticmethod
def get_credentials(project="kubeflow-ci-deployment",
pattern=DEFAULT_PATTERN,
location=None,
output="",
testing_label=None):
"""Get the latest deployment information and use it to get credentials.

Args:
project: string, Name of deployed GCP project.
pattern: Regex pattern to look for
location: zone or region to search for clusters.
output: (Optional) if supplied write information about matching
cluster to this YAML file.
testing_label: string, annotation used to identify testing clusters. Optional.
"""
logging.info("Calling get_credential - this call needs gcloud client CLI.")
util.maybe_activate_service_account()

command = ["gcloud", "container", "clusters", "get-credentials",
"--project="+project]

info = {
"project": project,
"location": location,
}

if location:
c = _get_latest_cluster(project, location, pattern)

if not c:
message = ("No clusters found matching: project: {0}, location: {1}, "
"pattern: {2}").format(project, location, pattern)
raise ValueError(message)

if ZONE_PATTERN.match(location):
command.append("--zone=" + location)
else:
command.append("--region=" + location)
command.append(c["name"])

info["cluster"] = c

else:
# This is the pre blueprint which is using deployment manager
logging.warning("Invoking deprecated path because location not set")
dm = get_latest(project=project, testing_label=testing_label,
base_name=pattern, field="all")
command.append("--zone=" + dm["zone"], dm["name"])

info["cluster"] = dm

if output:
logging.info(f"Writing cluster information to {output}")
with open(output, "w") as hf:
yaml.dump(info, hf)

# This call may be flaky due to timeout.
@retry(stop_max_attempt_number=10, wait_fixed=5000)
def run_get_credentials():
util.run(command)
run_get_credentials()

if __name__ == "__main__":
main()
logging.basicConfig(level=logging.INFO,
format=('%(levelname)s|%(asctime)s'
'|%(pathname)s|%(lineno)d| %(message)s'),
datefmt='%Y-%m-%dT%H:%M:%S',
)
logging.getLogger().setLevel(logging.INFO)
# If the first argument starts with "--" then we are in the legacy
# non fire mode
if sys.argv[1].startswith("--"):
# TODO(jlewi): This code path is deprecated
logging.warning("Running in non fire mode; invoking main()")
main()
else:
logging.info("Running in fire mode; invoking CLI")
fire.Fire(CredentialHelper)
2 changes: 1 addition & 1 deletion py/kubeflow/tests/get_kf_testing_cluster_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import unittest

from googleapiclient.http import HttpMockSequence
from kubeflow.testing import get_kf_testing_cluster
from kubeflow.testing import get_kf_testing_cluster # pylint: disable=no-name-in-module

TEST_PROJECT = "kubeflow-ci-foo"
TEST_LABEL = "kf-foo-label"
Expand Down
6 changes: 5 additions & 1 deletion tekton/templates/tasks/gcp-iap-endpoint-ready-task.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ spec:
args:
- -m
- kubeflow.testing.get_kf_testing_cluster
- get-credentials
- --base=$(inputs.params.testing-cluster-pattern)
- --location=$(inputs.params.testing-cluster-location)
- get-credentials
- --output=/workspace/cluster.info.yaml
env:
- name: PYTHONPATH
value: /workspace/$(inputs.resources.kfctl-repo.name)/py:/srcCache/kubeflow/testing/py
Expand All @@ -61,13 +62,16 @@ spec:
script: |
#!/usr/bin/env bash
set -x
# Get the name of the cluster
export KFNAME=$(yq r /workspace/cluster.info.yaml cluster.name)
# Test suite name needs to be unique based on parameters
pytest endpoint_ready_test.py \
-s \
--log-cli-level=info \
--log-cli-format='%(levelname)s|%(asctime)s|%(pathname)s|%(lineno)d| %(message)s' \
--junitxml=/workspace/artifacts/junit_endpoint-is-ready.xml \
--timeout=180 \
--app_name=${KFNAME} \
-o junit_suite_name=test_endpoint_is_ready_blueprint
echo test finished.
workingDir: /workspace/$(inputs.resources.kfctl-repo.name)/py/kubeflow/kfctl/testing/pytests
Expand Down