Skip to content

Commit

Permalink
Use internal Permissions Migration API by default (#3230)
Browse files Browse the repository at this point in the history
This pull request introduces changes to support both legacy and new
permission migration workflows in the Databricks UCX project. The key
changes include adding a configuration option to toggle between the two
workflows, renaming classes and methods to reflect legacy and new
workflows, and updating tests accordingly.

* Added a configuration option `use_legacy_permission_migration` to
`WorkspaceConfig` to toggle between legacy and new permission migration
workflows.
* Updated methods in `workflows.py` to skip certain steps if
`use_legacy_permission_migration` is not enabled.
[[1]](diffhunk://#diff-adc2edb9e75e6050728aef658ad15e7556eca8f4149a890d87e930bdb2822d5cR168-R170)
[[2]](diffhunk://#diff-adc2edb9e75e6050728aef658ad15e7556eca8f4149a890d87e930bdb2822d5cR180-R182)
* Renamed `GroupMigration` to `LegacyGroupMigration` and updated related
method names to reflect the legacy workflow.
[[1]](diffhunk://#diff-bf31a0d3769d994ca8c56c723f2b9f2fa2418b61f705bd979760af2e6e540ecbL24-R24)
[[2]](diffhunk://#diff-bf31a0d3769d994ca8c56c723f2b9f2fa2418b61f705bd979760af2e6e540ecbL51-R51)
[[3]](diffhunk://#diff-5e6b7befe1ff9f16445d8cc20f282caedf1e2c80ff8bd44ca32b43da38cf3a52L10-R39)
[[4]](diffhunk://#diff-5e6b7befe1ff9f16445d8cc20f282caedf1e2c80ff8bd44ca32b43da38cf3a52R53-R55)
[[5]](diffhunk://#diff-5e6b7befe1ff9f16445d8cc20f282caedf1e2c80ff8bd44ca32b43da38cf3a52R65-R105)
[[6]](diffhunk://#diff-5e6b7befe1ff9f16445d8cc20f282caedf1e2c80ff8bd44ca32b43da38cf3a52R122-R124)
[[7]](diffhunk://#diff-5e6b7befe1ff9f16445d8cc20f282caedf1e2c80ff8bd44ca32b43da38cf3a52R142-R152)
[[8]](diffhunk://#diff-7c7d6f60d52e8ce3b7d47da66d46dd08b55aafba824c0bb9418d392d6a211ecaL14-R14)
[[9]](diffhunk://#diff-7c7d6f60d52e8ce3b7d47da66d46dd08b55aafba824c0bb9418d392d6a211ecaL26-R32)
* Updated integration and unit tests to use the new configuration option
and renamed classes/methods.
[[1]](diffhunk://#diff-defae77adab2fc55d5d13129dbf07a108192e13adedd1daccb969b3c3628c1f1L1-L15)
[[2]](diffhunk://#diff-defae77adab2fc55d5d13129dbf07a108192e13adedd1daccb969b3c3628c1f1L45-R53)
[[3]](diffhunk://#diff-defae77adab2fc55d5d13129dbf07a108192e13adedd1daccb969b3c3628c1f1R91-L127)
[[4]](diffhunk://#diff-72753dfa5bb6d8d548f60244277f9f0e17ae5f207060819d328a26d2ec5a4b1bL19-R19)
[[5]](diffhunk://#diff-72753dfa5bb6d8d548f60244277f9f0e17ae5f207060819d328a26d2ec5a4b1bL29-R29)
[[6]](diffhunk://#diff-7c7d6f60d52e8ce3b7d47da66d46dd08b55aafba824c0bb9418d392d6a211ecaL14-R14)
[[7]](diffhunk://#diff-7c7d6f60d52e8ce3b7d47da66d46dd08b55aafba824c0bb9418d392d6a211ecaL26-R32)
  • Loading branch information
nfx authored Nov 11, 2024
1 parent 64d8f42 commit 2901471
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 56 deletions.
6 changes: 6 additions & 0 deletions src/databricks/labs/ucx/assessment/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ def workspace_listing(self, ctx: RuntimeContext):
It uses multi-threading to parallelize the listing process to speed up execution on big workspaces.
It accepts starting path as the parameter defaulted to the root path '/'."""
if not ctx.config.use_legacy_permission_migration:
logger.info("Skipping workspace listing as legacy permission migration is disabled.")
return
ctx.workspace_listing.snapshot()

@job_task(depends_on=[crawl_grants, workspace_listing])
Expand All @@ -174,6 +177,9 @@ def crawl_permissions(self, ctx: RuntimeContext):
This is the first step for the _group migration_ process, which is continued in the `migrate-groups` workflow.
This step includes preparing Legacy Table ACLs for local group migration."""
if not ctx.config.use_legacy_permission_migration:
logger.info("Skipping permission crawling as legacy permission migration is disabled.")
return
permission_manager = ctx.permission_manager
permission_manager.reset()
permission_manager.snapshot()
Expand Down
2 changes: 2 additions & 0 deletions src/databricks/labs/ucx/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ class WorkspaceConfig: # pylint: disable=too-many-instance-attributes

managed_table_external_storage: str = 'CLONE'

use_legacy_permission_migration: bool = False

# [INTERNAL ONLY] If specified, the large-scale scanners will only list the specified number of objects.
debug_listing_upper_limit: int | None = None

Expand Down
4 changes: 2 additions & 2 deletions src/databricks/labs/ucx/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from databricks.labs.ucx.progress.workflows import MigrationProgress
from databricks.labs.ucx.recon.workflows import MigrationRecon
from databricks.labs.ucx.workspace_access.workflows import (
GroupMigration,
LegacyGroupMigration,
PermissionsMigrationAPI,
RemoveWorkspaceLocalGroups,
ValidateGroupPermissions,
Expand All @@ -48,7 +48,7 @@ def all(cls):
[
Assessment(),
MigrationProgress(),
GroupMigration(),
LegacyGroupMigration(),
TableMigration(),
MigrateHiveSerdeTablesInPlace(),
MigrateExternalTablesCTAS(),
Expand Down
44 changes: 37 additions & 7 deletions src/databricks/labs/ucx/workspace_access/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,36 @@
logger = logging.getLogger(__name__)


class GroupMigration(Workflow):
class LegacyGroupMigration(Workflow):
def __init__(self):
super().__init__('migrate-groups')
super().__init__('migrate-groups-legacy')

@job_task(job_cluster="table_migration")
def verify_metastore_attached(self, ctx: RuntimeContext):
"""Verifies if a metastore is attached to this workspace. If not, the workflow will fail.
Account level groups are only available when a metastore is attached to the workspace.
"""
if not ctx.config.use_legacy_permission_migration:
logger.info("Use `migrate-groups` job, or set `use_legacy_permission_migration: true` in config.yml.")
return
ctx.verify_has_metastore.verify_metastore()

@job_task(depends_on=[Assessment.crawl_groups, verify_metastore_attached])
def rename_workspace_local_groups(self, ctx: RuntimeContext):
"""Renames workspace local groups by adding `db-temp-` prefix."""
if not ctx.config.use_legacy_permission_migration:
logger.info("Use `migrate-groups` job, or set `use_legacy_permission_migration: true` in config.yml.")
return
ctx.group_manager.rename_groups()

@job_task(depends_on=[rename_workspace_local_groups])
def reflect_account_groups_on_workspace(self, ctx: RuntimeContext):
"""Adds matching account groups to this workspace. The matching account level group(s) must preexist(s) for this
step to be successful. This process does not create the account level group(s)."""
if not ctx.config.use_legacy_permission_migration:
logger.info("Use `migrate-groups` job, or set `use_legacy_permission_migration: true` in config.yml.")
return
ctx.group_manager.reflect_account_groups_on_workspace()

@job_task(depends_on=[reflect_account_groups_on_workspace], job_cluster="tacl")
Expand All @@ -41,6 +50,9 @@ def apply_permissions_to_account_groups(self, ctx: RuntimeContext):
permissions, Secret Scopes, Notebooks, Directories, Repos, Files.
See [interactive tutorial here](https://app.getreprise.com/launch/myM3VNn/)."""
if not ctx.config.use_legacy_permission_migration:
logger.info("Use `migrate-groups` job, or set `use_legacy_permission_migration: true` in config.yml.")
return
migration_state = ctx.group_manager.get_migration_state()
if len(migration_state.groups) == 0:
logger.info("Skipping group migration as no groups were found.")
Expand All @@ -50,35 +62,47 @@ def apply_permissions_to_account_groups(self, ctx: RuntimeContext):
@job_task(depends_on=[apply_permissions_to_account_groups], job_cluster="tacl")
def validate_groups_permissions(self, ctx: RuntimeContext):
"""Validate that all the crawled permissions are applied correctly to the destination groups."""
if not ctx.config.use_legacy_permission_migration:
logger.info("Use `migrate-groups` job, or set `use_legacy_permission_migration: true` in config.yml.")
return
ctx.permission_manager.verify_group_permissions()


class PermissionsMigrationAPI(Workflow):
def __init__(self):
super().__init__('migrate-groups-experimental')
super().__init__('migrate-groups')

@job_task(job_cluster="table_migration")
def verify_metastore_attached(self, ctx: RuntimeContext):
"""Verifies if a metastore is attached to this workspace. If not, the workflow will fail.
Account level groups are only available when a metastore is attached to the workspace.
"""
if ctx.config.use_legacy_permission_migration:
logger.info("Remove `use_legacy_permission_migration: true` from config.yml to run this workflow.")
return
ctx.verify_has_metastore.verify_metastore()

@job_task(depends_on=[Assessment.crawl_groups, verify_metastore_attached])
def rename_workspace_local_groups(self, ctx: RuntimeContext):
"""[EXPERIMENTAL] Renames workspace local groups by adding `db-temp-` prefix."""
"""Renames workspace local groups by adding `db-temp-` prefix."""
if ctx.config.use_legacy_permission_migration:
logger.info("Remove `use_legacy_permission_migration: true` from config.yml to run this workflow.")
return
ctx.group_manager.rename_groups()

@job_task(depends_on=[rename_workspace_local_groups])
def reflect_account_groups_on_workspace(self, ctx: RuntimeContext):
"""[EXPERIMENTAL] Adds matching account groups to this workspace. The matching account level group(s) must preexist(s) for this
"""Adds matching account groups to this workspace. The matching account level group(s) must preexist(s) for this
step to be successful. This process does not create the account level group(s)."""
if ctx.config.use_legacy_permission_migration:
logger.info("Remove `use_legacy_permission_migration: true` from config.yml to run this workflow.")
return
ctx.group_manager.reflect_account_groups_on_workspace()

@job_task(depends_on=[reflect_account_groups_on_workspace])
def apply_permissions(self, ctx: RuntimeContext):
"""[EXPERIMENTAL] This task uses the new permission migration API which requires enrolment from Databricks.
"""This task uses the new permission migration API which requires enrolment from Databricks.
Fourth phase of the workspace-local group migration process. It does the following:
- Assigns the full set of permissions of the original group to the account-level one
Expand All @@ -95,6 +119,9 @@ def apply_permissions(self, ctx: RuntimeContext):
The expectation is that account group has no permissions to begin with.
It covers local workspace-local permissions for all entities."""
if ctx.config.use_legacy_permission_migration:
logger.info("Remove `use_legacy_permission_migration: true` from config.yml to run this workflow.")
return
migration_state = ctx.group_manager.get_migration_state()
if len(migration_state.groups) == 0:
logger.info("Skipping group migration as no groups were found.")
Expand All @@ -112,14 +139,17 @@ def __init__(self):
@job_task(job_cluster="tacl")
def validate_groups_permissions(self, ctx: RuntimeContext):
"""Validate that all the crawled permissions are applied correctly to the destination groups."""
if not ctx.config.use_legacy_permission_migration:
logger.info("Use `migrate-groups` job, or set `use_legacy_permission_migration: true` in config.yml.")
return
ctx.permission_manager.verify_group_permissions()


class RemoveWorkspaceLocalGroups(Workflow):
def __init__(self):
super().__init__('remove-workspace-local-backup-groups')

@job_task(depends_on=[GroupMigration.apply_permissions_to_account_groups])
@job_task(depends_on=[LegacyGroupMigration.apply_permissions_to_account_groups])
def delete_backup_groups(self, ctx: RuntimeContext):
"""Last step of the group migration process. Removes all workspace-level backup groups, along with their
permissions. Execute this workflow only after you've confirmed that workspace-local migration worked
Expand Down
49 changes: 9 additions & 40 deletions tests/integration/workspace_access/test_workflows.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
from datetime import timedelta
from dataclasses import replace

import pytest
from databricks.labs.blueprint.parallel import ManyError

from databricks.sdk.errors import NotFound, InvalidParameterValue
from databricks.sdk.retries import retried
from databricks.sdk.service import sql
from databricks.sdk.service.iam import PermissionLevel
from databricks.sdk.service.workspace import AclPermission


@retried(on=[NotFound, InvalidParameterValue], timeout=timedelta(minutes=8))
def test_running_real_migrate_groups_job(
ws,
installation_ctx,
make_cluster_policy,
make_cluster_policy_permissions,
Expand Down Expand Up @@ -42,18 +36,21 @@ def test_running_real_migrate_groups_job(
]

installation_ctx.workspace_installation.run()
installation_ctx.permission_manager.snapshot()

installation_ctx.deployed_workflows.run_workflow("migrate-groups")

# specific permissions api migrations are checked in different and smaller integration tests
found = installation_ctx.generic_permissions_support.load_as_dict("cluster-policies", cluster_policy.policy_id)
assert acc_group_a.display_name in found, "Group not found in cluster policies"
assert found[acc_group_a.display_name] == PermissionLevel.CAN_USE
assert found[f"{installation_ctx.config.renamed_group_prefix}{ws_group_a.display_name}"] == PermissionLevel.CAN_USE

scope_permission = installation_ctx.secret_scope_acl_support.secret_scope_permission(
secret_scope, acc_group_a.display_name
)
assert scope_permission == AclPermission.WRITE


@retried(on=[NotFound, InvalidParameterValue], timeout=timedelta(minutes=5))
def test_running_real_validate_groups_permissions_job(
def test_running_legacy_validate_groups_permissions_job(
installation_ctx,
make_query,
make_query_permissions,
Expand Down Expand Up @@ -91,37 +88,9 @@ def test_running_real_validate_groups_permissions_job(
f"TABLE:{table.full_name}",
f"secrets:{secret_scope}",
]
installation_ctx.__dict__['config_transform'] = lambda c: replace(c, use_legacy_permission_migration=True)
installation_ctx.workspace_installation.run()
installation_ctx.permission_manager.snapshot()

# assert the job does not throw any exception
installation_ctx.deployed_workflows.run_workflow("validate-groups-permissions")


@retried(on=[NotFound], timeout=timedelta(minutes=8))
def test_running_real_validate_groups_permissions_job_fails(
ws, installation_ctx, make_cluster_policy, make_cluster_policy_permissions
):

ws_group_a, _ = installation_ctx.make_ucx_group()

cluster_policy = make_cluster_policy()
make_cluster_policy_permissions(
object_id=cluster_policy.policy_id,
permission_level=PermissionLevel.CAN_USE,
group_name=ws_group_a.display_name,
)

installation_ctx.make_schema() # optimization to skip listing all schemas
installation_ctx.__dict__['include_group_names'] = [ws_group_a.display_name]
installation_ctx.__dict__['include_object_permissions'] = [f'cluster-policies:{cluster_policy.policy_id}']
installation_ctx.workspace_installation.run()
installation_ctx.permission_manager.snapshot()

# remove permission so the validation fails
ws.permissions.set(
request_object_type="cluster-policies", request_object_id=cluster_policy.policy_id, access_control_list=[]
)

with pytest.raises(ManyError):
installation_ctx.deployed_workflows.run_workflow("validate-groups-permissions")
4 changes: 2 additions & 2 deletions tests/unit/assessment/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_assess_azure_service_principals(run_workflow):

def test_runtime_workspace_listing(run_workflow):
ctx = run_workflow(Assessment.workspace_listing)
assert "SELECT * FROM `hive_metastore`.`ucx`.`workspace_objects`" in ctx.sql_backend.queries
assert "SELECT * FROM `hive_metastore`.`ucx`.`workspace_objects`" not in ctx.sql_backend.queries


def test_runtime_crawl_grants(run_workflow):
Expand All @@ -26,7 +26,7 @@ def test_runtime_crawl_grants(run_workflow):

def test_runtime_crawl_permissions(run_workflow):
ctx = run_workflow(Assessment.crawl_permissions)
assert "TRUNCATE TABLE `hive_metastore`.`ucx`.`permissions`" in ctx.sql_backend.queries
assert "TRUNCATE TABLE `hive_metastore`.`ucx`.`permissions`" not in ctx.sql_backend.queries


def test_runtime_crawl_groups(run_workflow):
Expand Down
10 changes: 5 additions & 5 deletions tests/unit/workspace_access/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from databricks.labs.ucx.workspace_access.groups import GroupManager
from databricks.labs.ucx.workspace_access.workflows import (
RemoveWorkspaceLocalGroups,
GroupMigration,
LegacyGroupMigration,
PermissionsMigrationAPI,
)
from tests.unit import GROUPS, PERMISSIONS
Expand All @@ -23,13 +23,13 @@ def test_runtime_delete_backup_groups(run_workflow) -> None:


def test_runtime_apply_permissions_to_account_groups(run_workflow) -> None:
ctx = run_workflow(GroupMigration.apply_permissions_to_account_groups)
assert 'SELECT * FROM `hive_metastore`.`ucx`.`groups`' in ctx.sql_backend.queries
ctx = run_workflow(LegacyGroupMigration.apply_permissions_to_account_groups)
assert 'SELECT * FROM `hive_metastore`.`ucx`.`groups`' not in ctx.sql_backend.queries


def test_rename_workspace_local_group(run_workflow) -> None:
ctx = run_workflow(GroupMigration.rename_workspace_local_groups)
assert 'SELECT * FROM `hive_metastore`.`ucx`.`groups`' in ctx.sql_backend.queries
ctx = run_workflow(LegacyGroupMigration.rename_workspace_local_groups)
assert 'SELECT * FROM `hive_metastore`.`ucx`.`groups`' not in ctx.sql_backend.queries


def test_reflect_account_groups_on_workspace(run_workflow) -> None:
Expand Down

0 comments on commit 2901471

Please sign in to comment.