From ce22323f38c5e0e6331c1d11bd1c1c5ddcef1425 Mon Sep 17 00:00:00 2001 From: xloya <982052490@qq.com> Date: Wed, 9 Oct 2024 14:16:16 +0800 Subject: [PATCH] [#3758] Improvement(PyGVFS): Support OAuth2 authentication in Python GVFS (#5030) ### What changes were proposed in this pull request? Support OAuth2 authentication in PyGVFS so that it can interact with the Gravitino server. This is supported in Java GVFS and we should also support it in PyGVFS. This is depended on https://github.com/apache/gravitino/pull/5026 which refactor the Python GVFS. ### Why are the changes needed? Fix: #3758 ### How was this patch tested? Add some UTs. --- .../gravitino/filesystem/gvfs.py | 46 ++++++++++- .../gravitino/filesystem/gvfs_config.py | 8 +- .../tests/unittests/test_gvfs_with_local.py | 81 ++++++++++++++++++- docs/how-to-use-gvfs.md | 43 +++++++--- 4 files changed, 164 insertions(+), 14 deletions(-) diff --git a/clients/client-python/gravitino/filesystem/gvfs.py b/clients/client-python/gravitino/filesystem/gvfs.py index 8d98d0a0412..e5a565ce0d6 100644 --- a/clients/client-python/gravitino/filesystem/gvfs.py +++ b/clients/client-python/gravitino/filesystem/gvfs.py @@ -32,6 +32,8 @@ from gravitino.audit.fileset_audit_constants import FilesetAuditConstants from gravitino.audit.fileset_data_operation import FilesetDataOperation from gravitino.audit.internal_client_type import InternalClientType +from gravitino.auth.default_oauth2_token_provider import DefaultOAuth2TokenProvider +from gravitino.auth.oauth2_token_provider import OAuth2TokenProvider from gravitino.auth.simple_auth_provider import SimpleAuthProvider from gravitino.catalog.fileset_catalog import FilesetCatalog from gravitino.client.gravitino_client import GravitinoClient @@ -92,16 +94,41 @@ def __init__( """ self._metalake = metalake_name auth_type = ( - GVFSConfig.DEFAULT_AUTH_TYPE + GVFSConfig.SIMPLE_AUTH_TYPE if options is None - else options.get(GVFSConfig.AUTH_TYPE, GVFSConfig.DEFAULT_AUTH_TYPE) + else options.get(GVFSConfig.AUTH_TYPE, GVFSConfig.SIMPLE_AUTH_TYPE) ) - if auth_type == GVFSConfig.DEFAULT_AUTH_TYPE: + if auth_type == GVFSConfig.SIMPLE_AUTH_TYPE: self._client = GravitinoClient( uri=server_uri, metalake_name=metalake_name, auth_data_provider=SimpleAuthProvider(), ) + elif auth_type == GVFSConfig.OAUTH2_AUTH_TYPE: + oauth2_server_uri = options.get(GVFSConfig.OAUTH2_SERVER_URI) + self._check_auth_config( + auth_type, GVFSConfig.OAUTH2_SERVER_URI, oauth2_server_uri + ) + + oauth2_credential = options.get(GVFSConfig.OAUTH2_CREDENTIAL) + self._check_auth_config( + auth_type, GVFSConfig.OAUTH2_CREDENTIAL, oauth2_credential + ) + + oauth2_path = options.get(GVFSConfig.OAUTH2_PATH) + self._check_auth_config(auth_type, GVFSConfig.OAUTH2_PATH, oauth2_path) + + oauth2_scope = options.get(GVFSConfig.OAUTH2_SCOPE) + self._check_auth_config(auth_type, GVFSConfig.OAUTH2_SCOPE, oauth2_scope) + + oauth2_token_provider: OAuth2TokenProvider = DefaultOAuth2TokenProvider( + oauth2_server_uri, oauth2_credential, oauth2_path, oauth2_scope + ) + self._client = GravitinoClient( + uri=server_uri, + metalake_name=metalake_name, + auth_data_provider=oauth2_token_provider, + ) else: raise GravitinoRuntimeException( f"Authentication type {auth_type} is not supported." @@ -686,6 +713,19 @@ def _strip_storage_protocol(storage_type: StorageType, path: str): f"Storage type:{storage_type} doesn't support now." ) + @staticmethod + def _check_auth_config(auth_type: str, config_key: str, config_value: str): + """Check if the config value is null. + :param auth_type: The auth type + :param config_key: The config key + :param config_value: The config value + """ + if config_value is None: + raise GravitinoRuntimeException( + f"{config_key} should not be null" + f" if {GVFSConfig.AUTH_TYPE} is set to {auth_type}." + ) + def _get_fileset_catalog(self, catalog_ident: NameIdentifier): read_lock = self._catalog_cache_lock.gen_rlock() try: diff --git a/clients/client-python/gravitino/filesystem/gvfs_config.py b/clients/client-python/gravitino/filesystem/gvfs_config.py index be072a357b6..eb5733b56be 100644 --- a/clients/client-python/gravitino/filesystem/gvfs_config.py +++ b/clients/client-python/gravitino/filesystem/gvfs_config.py @@ -24,4 +24,10 @@ class GVFSConfig: DEFAULT_CACHE_EXPIRED_TIME = 3600 AUTH_TYPE = "auth_type" - DEFAULT_AUTH_TYPE = "simple" + SIMPLE_AUTH_TYPE = "simple" + + OAUTH2_AUTH_TYPE = "oauth2" + OAUTH2_SERVER_URI = "oauth2_server_uri" + OAUTH2_CREDENTIAL = "oauth2_credential" + OAUTH2_PATH = "oauth2_path" + OAUTH2_SCOPE = "oauth2_scope" diff --git a/clients/client-python/tests/unittests/test_gvfs_with_local.py b/clients/client-python/tests/unittests/test_gvfs_with_local.py index 22bdccd8c57..b4ce39e571a 100644 --- a/clients/client-python/tests/unittests/test_gvfs_with_local.py +++ b/clients/client-python/tests/unittests/test_gvfs_with_local.py @@ -34,9 +34,19 @@ from gravitino import gvfs, NameIdentifier from gravitino.auth.auth_constants import AuthConstants -from gravitino.exceptions.base import GravitinoRuntimeException +from gravitino.exceptions.base import ( + GravitinoRuntimeException, + IllegalArgumentException, + BadRequestException, +) from gravitino.filesystem.gvfs_config import GVFSConfig from tests.unittests import mock_base +from tests.unittests.auth.mock_base import ( + mock_jwt, + GENERATED_TIME, + mock_authentication_with_error_authentication_type, + mock_authentication_invalid_grant_error, +) def generate_unique_random_string(length): @@ -106,6 +116,75 @@ def test_simple_auth(self, *mock_methods): if current_user is not None: os.environ["user.name"] = current_user + def test_oauth2_auth(self, *mock_methods): + fs_options = { + GVFSConfig.AUTH_TYPE: GVFSConfig.OAUTH2_AUTH_TYPE, + GVFSConfig.OAUTH2_SERVER_URI: "http://127.0.0.1:1082", + GVFSConfig.OAUTH2_CREDENTIAL: "xx:xx", + GVFSConfig.OAUTH2_SCOPE: "test", + GVFSConfig.OAUTH2_PATH: "token/test", + } + # test auth normally + mocked_jwt = mock_jwt( + sub="gravitino", exp=GENERATED_TIME + 10000, aud="service1" + ) + with patch( + "gravitino.auth.default_oauth2_token_provider.DefaultOAuth2TokenProvider._get_access_token", + return_value=mocked_jwt, + ), patch( + "gravitino.auth.default_oauth2_token_provider.DefaultOAuth2TokenProvider._fetch_token", + return_value=mocked_jwt, + ): + fileset_storage_location = f"{self._fileset_dir}/test_oauth2_auth" + fileset_virtual_location = "fileset/fileset_catalog/tmp/test_oauth2_auth" + actual_path = fileset_storage_location + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) + sub_dir_path = f"{fileset_storage_location}/test_1" + local_fs.mkdir(sub_dir_path) + self.assertTrue(local_fs.exists(sub_dir_path)) + sub_file_path = f"{fileset_storage_location}/test_file_1.par" + local_fs.touch(sub_file_path) + self.assertTrue(local_fs.exists(sub_file_path)) + fs = gvfs.GravitinoVirtualFileSystem( + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + options=fs_options, + skip_instance_cache=True, + ) + # should not raise exception + self.assertTrue(fs.exists(fileset_virtual_location)) + + # test error authentication type + with patch( + "gravitino.utils.http_client.HTTPClient.post_form", + return_value=mock_authentication_with_error_authentication_type(), + ): + with self.assertRaises(IllegalArgumentException): + gvfs.GravitinoVirtualFileSystem( + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + options=fs_options, + skip_instance_cache=True, + ) + + # test bad request + with patch( + "gravitino.utils.http_client.HTTPClient._make_request", + return_value=mock_authentication_invalid_grant_error(), + ): + with self.assertRaises(BadRequestException): + gvfs.GravitinoVirtualFileSystem( + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + options=fs_options, + skip_instance_cache=True, + ) + def test_ls(self, *mock_methods): fileset_storage_location = f"{self._fileset_dir}/test_ls" fileset_virtual_location = "fileset/fileset_catalog/tmp/test_ls" diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md index 3a116928ad0..7a98271d41c 100644 --- a/docs/how-to-use-gvfs.md +++ b/docs/how-to-use-gvfs.md @@ -10,7 +10,7 @@ license: "This software is licensed under the Apache License version 2." directories, with `fileset` you can manage non-tabular data through Gravitino. For details, you can read [How to manage fileset metadata using Gravitino](./manage-fileset-metadata-using-gravitino.md). -To use `Fileset` managed by Gravitino, Gravitino provides a virtual file system layer called +To use `fileset` managed by Gravitino, Gravitino provides a virtual file system layer called the Gravitino Virtual File System (GVFS): * In Java, it's built on top of the Hadoop Compatible File System(HCFS) interface. * In Python, it's built on top of the [fsspec](https://filesystem-spec.readthedocs.io/en/stable/index.html) @@ -335,13 +335,17 @@ to recompile the native libraries like `libhdfs` and others, and completely repl ### Configuration -| Configuration item | Description | Default value | Required | Since version | -|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `server_uri` | The Gravitino server uri, e.g. `http://localhost:8090`. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 | -| `metalake_name` | The metalake name which the fileset belongs to. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 | -| `cache_size` | The cache capacity of the Gravitino Virtual File System. | `20` | No | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 | -| `cache_expired_time` | The value of time that the cache expires after accessing in the Gravitino Virtual File System. The value is in `seconds`. | `3600` | No | 0.6.0 |. -| `auth_type` | The auth type to initialize the Gravitino client to use with the Gravitino Virtual File System. Currently only supports `simple` auth types. | `simple` | No | 0.6.0 |. +| Configuration item | Description | Default value | Required | Since version | +|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------|---------------| +| `server_uri` | The Gravitino server uri, e.g. `http://localhost:8090`. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 | +| `metalake_name` | The metalake name which the fileset belongs to. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 | +| `cache_size` | The cache capacity of the Gravitino Virtual File System. | `20` | No | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 | +| `cache_expired_time` | The value of time that the cache expires after accessing in the Gravitino Virtual File System. The value is in `seconds`. | `3600` | No | 0.6.0 |. +| `auth_type` | The auth type to initialize the Gravitino client to use with the Gravitino Virtual File System. Currently supports `simple` and `oauth2` auth types. | `simple` | No | 0.6.0 |. +| `oauth2_server_uri` | The auth server URI for the Gravitino client when using `oauth2` auth type. | (none) | Yes if you use `oauth2` auth type | 0.7.0 |. +| `oauth2_credential` | The auth credential for the Gravitino client when using `oauth2` auth type. | (none) | Yes if you use `oauth2` auth type | 0.7.0 |. +| `oauth2_path` | The auth server path for the Gravitino client when using `oauth2` auth type. Please remove the first slash `/` from the path, for example `oauth/token`. | (none) | Yes if you use `oauth2` auth type | 0.7.0 |. +| `oauth2_scope` | The auth scope for the Gravitino client when using `oauth2` auth type with the Gravitino Virtual File System. | (none) | Yes if you use `oauth2` auth type | 0.7.0 |. You can configure these properties when obtaining the `Gravitino Virtual FileSystem` in Python like this: @@ -538,7 +542,7 @@ print(documents) ### Authentication -Currently, Gravitino Virtual File System in Python only supports one kind of authentication types to access Gravitino server: `simple`. +Currently, Gravitino Virtual File System in Python supports two kinds of authentication types to access Gravitino server: `simple` and `oauth2`. The type of `simple` is the default authentication type in Gravitino Virtual File System in Python. @@ -556,4 +560,25 @@ from gravitino import gvfs options = {"auth_type": "simple"} fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090", metalake_name="test_metalake", options=options) print(fs.ls("gvfs://fileset/fileset_catlaog/tmp/test_fileset")) +``` + +##### Using `OAuth` authentication + +First, make sure that your Gravitino server is also configured to use the `oauth2` authentication mode, +and you have an OAuth server to fetch the token: [Security](security/security.md). + +Then, you can configure the authentication like this: + +```python +from gravitino import gvfs + +options = { + GVFSConfig.AUTH_TYPE: GVFSConfig.OAUTH2_AUTH_TYPE, + GVFSConfig.OAUTH2_SERVER_URI: "http://127.0.0.1:1082", + GVFSConfig.OAUTH2_CREDENTIAL: "xx:xx", + GVFSConfig.OAUTH2_SCOPE: "test", + GVFSConfig.OAUTH2_PATH: "token/test", +} +fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090", metalake_name="test_metalake", options=options) +print(fs.ls("gvfs://fileset/fileset_catlaog/tmp/test_fileset")) ``` \ No newline at end of file