From c41d9fb1df21e3d96cb2d824cd141d8aaa9aa280 Mon Sep 17 00:00:00 2001 From: cwbeitel Date: Sun, 7 Jan 2018 08:43:42 -0800 Subject: [PATCH 1/3] update initialClusterVersion to 1.7.11-gke.1 - addressing https://github.com/tensorflow/k8s/issues/268 - previously thought GCS read/write was also necessary for tensorflow/agents but does not appear to be the case so oauthScopes left unchanged. --- py/deploy.py | 96 ++++++++++++++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 44 deletions(-) diff --git a/py/deploy.py b/py/deploy.py index b3eb619238..782d182500 100755 --- a/py/deploy.py +++ b/py/deploy.py @@ -11,13 +11,12 @@ import tempfile import time +from google.cloud import storage # pylint: disable=no-name-in-module +from googleapiclient import discovery from kubernetes import client as k8s_client -from googleapiclient import discovery -from google.cloud import storage # pylint: disable=no-name-in-module +from py import test_util, util -from py import test_util -from py import util def setup(args): """Setup a GKE cluster for TensorFlow jobs. @@ -34,18 +33,18 @@ def setup(args): machine_type = "n1-standard-8" cluster_request = { - "cluster": { - "name": cluster_name, + "cluster": { + "name": cluster_name, "description": "A GKE cluster for TF.", "initialNodeCount": 1, "nodeConfig": { - "machineType": machine_type, + "machineType": machine_type, "oauthScopes": [ - "https://www.googleapis.com/auth/cloud-platform", - ], - }, + "https://www.googleapis.com/auth/cloud-platform", + ], + }, # TODO(jlewi): Stop pinning GKE version once 1.8 becomes the default. - "initialClusterVersion": "1.8.1-gke.1", + "initialClusterVersion": "1.7.11-gke.1", } } @@ -57,8 +56,8 @@ def setup(args): for accelerator_spec in args.accelerators: accelerator_type, accelerator_count = accelerator_spec.split("=", 1) cluster_request["cluster"]["nodeConfig"]["accelerators"].append( - {"acceleratorCount": accelerator_count, - "acceleratorType": accelerator_type, }) + {"acceleratorCount": accelerator_count, + "acceleratorType": accelerator_type, }) util.create_cluster(gke, project, zone, cluster_request) @@ -70,6 +69,10 @@ def setup(args): util.setup_cluster(api_client) + # A None gcs_client should be passed to test_util.create_junit_xml_file + # unless chart.startswith("gs://"), e.g. https://storage.googleapis.com/... + gcs_client = None + if chart.startswith("gs://"): remote = chart chart = os.path.join(tempfile.gettempdir(), os.path.basename(chart)) @@ -97,6 +100,7 @@ def setup(args): t.class_name = "GKE" test_util.create_junit_xml_file([t], args.junit_path, gcs_client) + def test(args): """Run the tests.""" gcs_client = storage.Client(project=args.project) @@ -117,6 +121,7 @@ def test(args): t.class_name = "GKE" test_util.create_junit_xml_file([t], args.junit_path, gcs_client) + def teardown(args): """Teardown the resources.""" gke = discovery.build("container", "v1") @@ -126,6 +131,7 @@ def teardown(args): zone = args.zone util.delete_cluster(gke, cluster_name, project, zone) + def add_common_args(parser): """Add common command line arguments to a parser. @@ -133,62 +139,63 @@ def add_common_args(parser): parser: The parser to add command line arguments to. """ parser.add_argument( - "--project", - default=None, - type=str, - help=("The project to use.")) + "--project", + default=None, + type=str, + help=("The project to use.")) parser.add_argument( - "--cluster", - default=None, - type=str, - help=("The name of the cluster.")) + "--cluster", + default=None, + type=str, + help=("The name of the cluster.")) parser.add_argument( - "--zone", - default="us-east1-d", - type=str, - help=("The zone for the cluster.")) + "--zone", + default="us-east1-d", + type=str, + help=("The zone for the cluster.")) parser.add_argument( - "--junit_path", - default="", - type=str, - help="Where to write the junit xml file with the results.") + "--junit_path", + default="", + type=str, + help="Where to write the junit xml file with the results.") + def main(): # pylint: disable=too-many-locals - logging.getLogger().setLevel(logging.INFO) # pylint: disable=too-many-locals + logging.getLogger().setLevel(logging.INFO) # pylint: disable=too-many-locals # create the top-level parser parser = argparse.ArgumentParser( - description="Setup clusters for testing.") + description="Setup clusters for testing.") subparsers = parser.add_subparsers() ############################################################################# # setup # parser_setup = subparsers.add_parser( - "setup", + "setup", help="Setup a cluster for testing.") parser_setup.add_argument( - "--accelerator", - dest="accelerators", - action="append", - help="Accelerator to add to the cluster. Should be of the form type=count.") + "--accelerator", + dest="accelerators", + action="append", + help="Accelerator to add to the cluster. Should be of the form type=count.") parser_setup.set_defaults(func=setup) add_common_args(parser_setup) parser_setup.add_argument( - "--chart", - type=str, - required=True, - help="The path for the helm chart.") + "--chart", + type=str, + required=True, + help="The path for the helm chart.") ############################################################################# # test # parser_test = subparsers.add_parser( - "test", - help="Run the tests.") + "test", + help="Run the tests.") parser_test.set_defaults(func=test) add_common_args(parser_test) @@ -197,8 +204,8 @@ def main(): # pylint: disable=too-many-locals # teardown # parser_teardown = subparsers.add_parser( - "teardown", - help="Teardown the cluster.") + "teardown", + help="Teardown the cluster.") parser_teardown.set_defaults(func=teardown) add_common_args(parser_teardown) @@ -206,5 +213,6 @@ def main(): # pylint: disable=too-many-locals args = parser.parse_args() args.func(args) + if __name__ == "__main__": main() From 184d2a0cf6b30057704d72990893c302f4461715 Mon Sep 17 00:00:00 2001 From: cwbeitel Date: Sun, 7 Jan 2018 08:53:47 -0800 Subject: [PATCH 2/3] revert linter-related changes --- py/deploy.py | 90 +++++++++++++++++++++++++--------------------------- 1 file changed, 43 insertions(+), 47 deletions(-) diff --git a/py/deploy.py b/py/deploy.py index 782d182500..b87bb7d196 100755 --- a/py/deploy.py +++ b/py/deploy.py @@ -11,12 +11,13 @@ import tempfile import time -from google.cloud import storage # pylint: disable=no-name-in-module -from googleapiclient import discovery from kubernetes import client as k8s_client -from py import test_util, util +from googleapiclient import discovery +from google.cloud import storage # pylint: disable=no-name-in-module +from py import test_util +from py import util def setup(args): """Setup a GKE cluster for TensorFlow jobs. @@ -33,16 +34,16 @@ def setup(args): machine_type = "n1-standard-8" cluster_request = { - "cluster": { - "name": cluster_name, + "cluster": { + "name": cluster_name, "description": "A GKE cluster for TF.", "initialNodeCount": 1, "nodeConfig": { - "machineType": machine_type, + "machineType": machine_type, "oauthScopes": [ - "https://www.googleapis.com/auth/cloud-platform", - ], - }, + "https://www.googleapis.com/auth/cloud-platform", + ], + }, # TODO(jlewi): Stop pinning GKE version once 1.8 becomes the default. "initialClusterVersion": "1.7.11-gke.1", } @@ -56,8 +57,8 @@ def setup(args): for accelerator_spec in args.accelerators: accelerator_type, accelerator_count = accelerator_spec.split("=", 1) cluster_request["cluster"]["nodeConfig"]["accelerators"].append( - {"acceleratorCount": accelerator_count, - "acceleratorType": accelerator_type, }) + {"acceleratorCount": accelerator_count, + "acceleratorType": accelerator_type, }) util.create_cluster(gke, project, zone, cluster_request) @@ -100,7 +101,6 @@ def setup(args): t.class_name = "GKE" test_util.create_junit_xml_file([t], args.junit_path, gcs_client) - def test(args): """Run the tests.""" gcs_client = storage.Client(project=args.project) @@ -121,7 +121,6 @@ def test(args): t.class_name = "GKE" test_util.create_junit_xml_file([t], args.junit_path, gcs_client) - def teardown(args): """Teardown the resources.""" gke = discovery.build("container", "v1") @@ -131,7 +130,6 @@ def teardown(args): zone = args.zone util.delete_cluster(gke, cluster_name, project, zone) - def add_common_args(parser): """Add common command line arguments to a parser. @@ -139,63 +137,62 @@ def add_common_args(parser): parser: The parser to add command line arguments to. """ parser.add_argument( - "--project", - default=None, - type=str, - help=("The project to use.")) + "--project", + default=None, + type=str, + help=("The project to use.")) parser.add_argument( - "--cluster", - default=None, - type=str, - help=("The name of the cluster.")) + "--cluster", + default=None, + type=str, + help=("The name of the cluster.")) parser.add_argument( - "--zone", - default="us-east1-d", - type=str, - help=("The zone for the cluster.")) + "--zone", + default="us-east1-d", + type=str, + help=("The zone for the cluster.")) parser.add_argument( - "--junit_path", - default="", - type=str, - help="Where to write the junit xml file with the results.") - + "--junit_path", + default="", + type=str, + help="Where to write the junit xml file with the results.") def main(): # pylint: disable=too-many-locals - logging.getLogger().setLevel(logging.INFO) # pylint: disable=too-many-locals + logging.getLogger().setLevel(logging.INFO) # pylint: disable=too-many-locals # create the top-level parser parser = argparse.ArgumentParser( - description="Setup clusters for testing.") + description="Setup clusters for testing.") subparsers = parser.add_subparsers() ############################################################################# # setup # parser_setup = subparsers.add_parser( - "setup", + "setup", help="Setup a cluster for testing.") parser_setup.add_argument( - "--accelerator", - dest="accelerators", - action="append", - help="Accelerator to add to the cluster. Should be of the form type=count.") + "--accelerator", + dest="accelerators", + action="append", + help="Accelerator to add to the cluster. Should be of the form type=count.") parser_setup.set_defaults(func=setup) add_common_args(parser_setup) parser_setup.add_argument( - "--chart", - type=str, - required=True, - help="The path for the helm chart.") + "--chart", + type=str, + required=True, + help="The path for the helm chart.") ############################################################################# # test # parser_test = subparsers.add_parser( - "test", - help="Run the tests.") + "test", + help="Run the tests.") parser_test.set_defaults(func=test) add_common_args(parser_test) @@ -204,8 +201,8 @@ def main(): # pylint: disable=too-many-locals # teardown # parser_teardown = subparsers.add_parser( - "teardown", - help="Teardown the cluster.") + "teardown", + help="Teardown the cluster.") parser_teardown.set_defaults(func=teardown) add_common_args(parser_teardown) @@ -213,6 +210,5 @@ def main(): # pylint: disable=too-many-locals args = parser.parse_args() args.func(args) - if __name__ == "__main__": main() From c538264d579ab39e1f2737090d2b24c80ffa8ff1 Mon Sep 17 00:00:00 2001 From: cwbeitel Date: Sun, 7 Jan 2018 18:12:25 -0800 Subject: [PATCH 3/3] set initialClusterVersion to 1.8.5-gke.0 --- py/deploy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/deploy.py b/py/deploy.py index b87bb7d196..3ae21a8ec0 100755 --- a/py/deploy.py +++ b/py/deploy.py @@ -45,7 +45,7 @@ def setup(args): ], }, # TODO(jlewi): Stop pinning GKE version once 1.8 becomes the default. - "initialClusterVersion": "1.7.11-gke.1", + "initialClusterVersion": "1.8.5-gke.0", } }