diff --git a/docs/dbacademy.dbhelper.dataset_manager_class.html b/docs/dbacademy.dbhelper.dataset_manager_class.html index 5321b5e6..dea0a950 100644 --- a/docs/dbacademy.dbhelper.dataset_manager_class.html +++ b/docs/dbacademy.dbhelper.dataset_manager_class.html @@ -47,7 +47,7 @@ This ensures that data and compute are in the same region which subsequently mitigates performance issues
when the storage and compute are, for example, on opposite sides of the world. -
validate_datasets(self, fail_fast: bool) -> None
Validates the "install" of the datasets by recursively listing all files in the remote data repository as well as the local data repository, validating that each file exists but DOES NOT validate file size or checksum.
+
validate_datasets(self, fail_fast: bool) -> None

Static methods defined here:
diff --git a/docs/dbacademy.dbrest.instance_pools.html b/docs/dbacademy.dbrest.instance_pools.html index 4403cc38..8f3e17ce 100644 --- a/docs/dbacademy.dbrest.instance_pools.html +++ b/docs/dbacademy.dbrest.instance_pools.html @@ -45,7 +45,7 @@ Methods defined here:
__init__(self, client: dbacademy.dbrest.client.DBAcademyRestClient)
Initialize self.  See help(type(self)) for accurate signature.
-
create(self, name: str, definition: dict, tags: <function InstancePoolsClient.list at 0x00000218B4B02B80> = None)
+
create(self, name: str, definition: dict, tags: <function InstancePoolsClient.list at 0x000001961AFF9EE0> = None)
create_or_update(self, instance_pool_name: str, idle_instance_autotermination_minutes: int, min_idle_instances: int = 0, max_capacity: int = None, node_type_id: str = None, preloaded_spark_version: str = None, tags: dict = None)
diff --git a/docs/dbacademy.dbrest.pipelines.html b/docs/dbacademy.dbrest.pipelines.html index 4c96bf83..8fef0900 100644 --- a/docs/dbacademy.dbrest.pipelines.html +++ b/docs/dbacademy.dbrest.pipelines.html @@ -53,11 +53,11 @@ Methods defined here:
__init__(self, client: dbacademy.dbrest.client.DBAcademyRestClient)
Initialize self.  See help(type(self)) for accurate signature.
-
create(self, name: str, storage: str, target: str, continuous: bool = False, development: bool = True, configuration: dict = None, notebooks: <function PipelinesClient.list at 0x00000218B4C403A0> = None, libraries: <function PipelinesClient.list at 0x00000218B4C403A0> = None, clusters: <function PipelinesClient.list at 0x00000218B4C403A0> = None, min_workers: int = 0, max_workers: int = 0, photon: bool = True)
+
create(self, name: str, storage: str, target: str, continuous: bool = False, development: bool = True, configuration: dict = None, notebooks: <function PipelinesClient.list at 0x000001961B033280> = None, libraries: <function PipelinesClient.list at 0x000001961B033280> = None, clusters: <function PipelinesClient.list at 0x000001961B033280> = None, min_workers: int = 0, max_workers: int = 0, photon: bool = True)
create_from_dict(self, params: dict)
-
create_or_update(self, name: str, storage: str, target: str, continuous: bool = False, development: bool = True, configuration: dict = None, notebooks: <function PipelinesClient.list at 0x00000218B4C403A0> = None, libraries: <function PipelinesClient.list at 0x00000218B4C403A0> = None, clusters: <function PipelinesClient.list at 0x00000218B4C403A0> = None, min_workers: int = 0, max_workers: int = 0, photon: bool = True, pipeline_id: Optional[str] = None)
+
create_or_update(self, name: str, storage: str, target: str, continuous: bool = False, development: bool = True, configuration: dict = None, notebooks: <function PipelinesClient.list at 0x000001961B033280> = None, libraries: <function PipelinesClient.list at 0x000001961B033280> = None, clusters: <function PipelinesClient.list at 0x000001961B033280> = None, min_workers: int = 0, max_workers: int = 0, photon: bool = True, pipeline_id: Optional[str] = None)
delete_by_id(self, pipeline_id)
@@ -75,7 +75,7 @@
start_by_name(self, name: str)
-
update(self, pipeline_id: str, name: str, storage: str, target: str, continuous: bool = False, development: bool = True, configuration: dict = None, notebooks: <function PipelinesClient.list at 0x00000218B4C403A0> = None, libraries: <function PipelinesClient.list at 0x00000218B4C403A0> = None, clusters: <function PipelinesClient.list at 0x00000218B4C403A0> = None, min_workers: int = 0, max_workers: int = 0, photon: bool = True)
+
update(self, pipeline_id: str, name: str, storage: str, target: str, continuous: bool = False, development: bool = True, configuration: dict = None, notebooks: <function PipelinesClient.list at 0x000001961B033280> = None, libraries: <function PipelinesClient.list at 0x000001961B033280> = None, clusters: <function PipelinesClient.list at 0x000001961B033280> = None, min_workers: int = 0, max_workers: int = 0, photon: bool = True)
update_from_dict(self, pipeline_id: str, params: dict)
@@ -83,7 +83,7 @@ Static methods defined here:
existing_to_create(pipeline)
-
to_dict(name: str, storage: str, target: str, continuous: bool = False, development: bool = True, configuration: dict = None, notebooks: <function PipelinesClient.list at 0x00000218B4C403A0> = None, libraries: <function PipelinesClient.list at 0x00000218B4C403A0> = None, clusters: <function PipelinesClient.list at 0x00000218B4C403A0> = None, min_workers: int = 0, max_workers: int = 0, photon: bool = True)
+
to_dict(name: str, storage: str, target: str, continuous: bool = False, development: bool = True, configuration: dict = None, notebooks: <function PipelinesClient.list at 0x000001961B033280> = None, libraries: <function PipelinesClient.list at 0x000001961B033280> = None, clusters: <function PipelinesClient.list at 0x000001961B033280> = None, min_workers: int = 0, max_workers: int = 0, photon: bool = True)

Methods inherited from dbacademy.rest.common.ApiContainer:
diff --git a/docs/dbacademy.dbrest.sql.endpoints.html b/docs/dbacademy.dbrest.sql.endpoints.html index ee4f5287..e24bd427 100644 --- a/docs/dbacademy.dbrest.sql.endpoints.html +++ b/docs/dbacademy.dbrest.sql.endpoints.html @@ -51,7 +51,7 @@
create_user_endpoint(self, user, naming_template: str, naming_params: dict, cluster_size: str, enable_serverless_compute: bool, min_num_clusters: int, max_num_clusters: int, auto_stop_mins: int, enable_photon: bool, spot_instance_policy: str, channel: str, tags: dict)
-
create_user_endpoints(self, naming_template: str, naming_params: dict, cluster_size: str, enable_serverless_compute: bool, min_num_clusters: int = 1, max_num_clusters: int = 1, auto_stop_mins: int = 120, enable_photon: bool = True, spot_instance_policy: str = 'RELIABILITY_OPTIMIZED', channel: str = 'CHANNEL_NAME_CURRENT', tags: dict = None, users: <function SqlWarehousesClient.list at 0x00000218B4C138B0> = None)
Creates one SQL endpoint per user in the current workspace. The list of users can be limited to a subset of users with the "users" parameter.
+
create_user_endpoints(self, naming_template: str, naming_params: dict, cluster_size: str, enable_serverless_compute: bool, min_num_clusters: int = 1, max_num_clusters: int = 1, auto_stop_mins: int = 120, enable_photon: bool = True, spot_instance_policy: str = 'RELIABILITY_OPTIMIZED', channel: str = 'CHANNEL_NAME_CURRENT', tags: dict = None, users: <function SqlWarehousesClient.list at 0x000001961B044790> = None)
Creates one SQL endpoint per user in the current workspace. The list of users can be limited to a subset of users with the "users" parameter.
Parameters: 
    naming_template (str): The template used to name each user's endpoint.
    naming_params (str): The parameters used in completing the template.
@@ -72,7 +72,7 @@
delete_user_endpoint(self, user, naming_template: str, naming_params: dict)
-
delete_user_endpoints(self, naming_template: str, naming_params: dict, users: <function SqlWarehousesClient.list at 0x00000218B4C138B0> = None)
+
delete_user_endpoints(self, naming_template: str, naming_params: dict, users: <function SqlWarehousesClient.list at 0x000001961B044790> = None)
edit(self, endpoint_id: str, name: str = None, cluster_size: str = None, enable_serverless_compute: bool = None, min_num_clusters: int = None, max_num_clusters: int = None, auto_stop_mins: int = None, enable_photon: bool = None, spot_instance_policy: str = None, channel: str = None, tags: dict = None)
@@ -86,13 +86,13 @@
start_user_endpoint(self, user, naming_template: str, naming_params: dict)
-
start_user_endpoints(self, naming_template: str, naming_params: dict, users: <function SqlWarehousesClient.list at 0x00000218B4C138B0> = None)
+
start_user_endpoints(self, naming_template: str, naming_params: dict, users: <function SqlWarehousesClient.list at 0x000001961B044790> = None)
stop(self, endpoint_id)
stop_user_endpoint(self, user, naming_template: str, naming_params: dict)
-
stop_user_endpoints(self, naming_template: str, naming_params: dict, users: <function SqlWarehousesClient.list at 0x00000218B4C138B0> = None)
+
stop_user_endpoints(self, naming_template: str, naming_params: dict, users: <function SqlWarehousesClient.list at 0x000001961B044790> = None)
update(self, endpoint_id: str, name: str = None, cluster_size: str = None, enable_serverless_compute: bool = None, min_num_clusters: int = None, max_num_clusters: int = None, auto_stop_mins: int = None, enable_photon: bool = None, spot_instance_policy: str = None, channel: str = None, tags: dict = None)
# TODO doug.bateman@databricks.com: Potential bugs.
# noinspection PyUnresolvedReferences
diff --git a/docs/dbacademy.dougrest.runs.html b/docs/dbacademy.dougrest.runs.html index 3977a00d..4ae0dc29 100644 --- a/docs/dbacademy.dougrest.runs.html +++ b/docs/dbacademy.dougrest.runs.html @@ -47,11 +47,11 @@
cancel(self, run: Union[int, dict], *, if_not_exists: str = 'error') -> dict
-
cancel_all(self, job_id: int = None) -> <function Runs.list at 0x00000218B4CA93A0>
+
cancel_all(self, job_id: int = None) -> <function Runs.list at 0x000001961B119280>
delete(self, run: Union[int, dict], *, if_not_exists: str = 'error') -> dict
-
delete_all(self, job_id: int = None) -> <function Runs.list at 0x00000218B4CA93A0>
+
delete_all(self, job_id: int = None) -> <function Runs.list at 0x000001961B119280>
get(self, run: Union[int, dict], *, if_not_exists: str = 'error') -> dict
# TODO Remove unused parameter
# noinspection PyUnusedLocal
diff --git a/docs/dbacademy.dougrest.workspace.html b/docs/dbacademy.dougrest.workspace.html index 3f628ec6..f63755c6 100644 --- a/docs/dbacademy.dougrest.workspace.html +++ b/docs/dbacademy.dougrest.workspace.html @@ -69,13 +69,13 @@
is_empty(self, workspace_path)
-
list(self, workspace_path, sort_key=<function Workspace.<lambda> at 0x00000218B49FAE50>)
+
list(self, workspace_path, sort_key=<function Workspace.<lambda> at 0x000001961AE60E50>)
-
list_names(self, workspace_path, sort_key=<function Workspace.<lambda> at 0x00000218B49FAF70>)
+
list_names(self, workspace_path, sort_key=<function Workspace.<lambda> at 0x000001961AE60F70>)
mkdirs(self, workspace_path)
-
walk(self, workspace_path, sort_key=<function Workspace.<lambda> at 0x00000218B4A040D0>)
Recursively list files into an iterator.  Sorting within a directory is done by the provided sort_key.
+
walk(self, workspace_path, sort_key=<function Workspace.<lambda> at 0x000001961AE5E0D0>)
Recursively list files into an iterator.  Sorting within a directory is done by the provided sort_key.

Static methods defined here:
diff --git a/setup.py b/setup.py index eb68e25f..10e54f6b 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import find_packages setuptools.setup( - version="v3.0.83", + version="v3.0.84", name="dbacademy", author="Databricks, Inc", maintainer="Databricks Academy", diff --git a/src/dbacademy/dbhelper/dataset_manager_class.py b/src/dbacademy/dbhelper/dataset_manager_class.py index fcc4c05f..51bde9af 100644 --- a/src/dbacademy/dbhelper/dataset_manager_class.py +++ b/src/dbacademy/dbhelper/dataset_manager_class.py @@ -104,37 +104,38 @@ def install_dataset(self, *, install_min_time: Optional[str], install_max_time: self.validate_datasets(fail_fast=False) def validate_datasets(self, fail_fast: bool) -> None: - """ - Validates the "install" of the datasets by recursively listing all files in the remote data repository as well as the local data repository, validating that each file exists but DOES NOT validate file size or checksum. - """ - from dbacademy import dbgems - - validation_start = dbgems.clock_start() - - if self.staging_source_uri == self.data_source_uri: - # When working with staging data, we need to enumerate what is in there - # and use it as a definitive source to the complete enumeration of our files - start = dbgems.clock_start() - print("\nEnumerating staged files for validation", end="...") - self.__remote_files = DatasetManager.list_r(self.staging_source_uri) - print(dbgems.clock_stopped(start)) - print() - - print(f"\nValidating the locally installed datasets:") - - self.__validate_and_repair() - - if self.fixes == 1: - print(f"| fixed 1 issue", end="...") - elif self.fixes > 0: - print(f"| fixed {self.fixes} issues", end="...") - else: - print(f"| validation completed", end="...") - - print(dbgems.clock_stopped(validation_start, " total")) - - if fail_fast: - assert self.fixes == 0, f"Unexpected modifications to source datasets." + pass + # """ + # Validates the "install" of the datasets by recursively listing all files in the remote data repository as well as the local data repository, validating that each file exists but DOES NOT validate file size or checksum. + # """ + # from dbacademy import dbgems + # + # validation_start = dbgems.clock_start() + # + # if self.staging_source_uri == self.data_source_uri: + # # When working with staging data, we need to enumerate what is in there + # # and use it as a definitive source to the complete enumeration of our files + # start = dbgems.clock_start() + # print("\nEnumerating staged files for validation", end="...") + # self.__remote_files = DatasetManager.list_r(self.staging_source_uri) + # print(dbgems.clock_stopped(start)) + # print() + # + # print(f"\nValidating the locally installed datasets:") + # + # self.__validate_and_repair() + # + # if self.fixes == 1: + # print(f"| fixed 1 issue", end="...") + # elif self.fixes > 0: + # print(f"| fixed {self.fixes} issues", end="...") + # else: + # print(f"| validation completed", end="...") + # + # print(dbgems.clock_stopped(validation_start, " total")) + # + # if fail_fast: + # assert self.fixes == 0, f"Unexpected modifications to source datasets." def __validate_and_repair(self) -> None: from dbacademy import dbgems