diff --git a/clients/client-python/gravitino/filesystem/gvfs.py b/clients/client-python/gravitino/filesystem/gvfs.py index 8d98d0a0412..e5a565ce0d6 100644 --- a/clients/client-python/gravitino/filesystem/gvfs.py +++ b/clients/client-python/gravitino/filesystem/gvfs.py @@ -32,6 +32,8 @@ from gravitino.audit.fileset_audit_constants import FilesetAuditConstants from gravitino.audit.fileset_data_operation import FilesetDataOperation from gravitino.audit.internal_client_type import InternalClientType +from gravitino.auth.default_oauth2_token_provider import DefaultOAuth2TokenProvider +from gravitino.auth.oauth2_token_provider import OAuth2TokenProvider from gravitino.auth.simple_auth_provider import SimpleAuthProvider from gravitino.catalog.fileset_catalog import FilesetCatalog from gravitino.client.gravitino_client import GravitinoClient @@ -92,16 +94,41 @@ def __init__( """ self._metalake = metalake_name auth_type = ( - GVFSConfig.DEFAULT_AUTH_TYPE + GVFSConfig.SIMPLE_AUTH_TYPE if options is None - else options.get(GVFSConfig.AUTH_TYPE, GVFSConfig.DEFAULT_AUTH_TYPE) + else options.get(GVFSConfig.AUTH_TYPE, GVFSConfig.SIMPLE_AUTH_TYPE) ) - if auth_type == GVFSConfig.DEFAULT_AUTH_TYPE: + if auth_type == GVFSConfig.SIMPLE_AUTH_TYPE: self._client = GravitinoClient( uri=server_uri, metalake_name=metalake_name, auth_data_provider=SimpleAuthProvider(), ) + elif auth_type == GVFSConfig.OAUTH2_AUTH_TYPE: + oauth2_server_uri = options.get(GVFSConfig.OAUTH2_SERVER_URI) + self._check_auth_config( + auth_type, GVFSConfig.OAUTH2_SERVER_URI, oauth2_server_uri + ) + + oauth2_credential = options.get(GVFSConfig.OAUTH2_CREDENTIAL) + self._check_auth_config( + auth_type, GVFSConfig.OAUTH2_CREDENTIAL, oauth2_credential + ) + + oauth2_path = options.get(GVFSConfig.OAUTH2_PATH) + self._check_auth_config(auth_type, GVFSConfig.OAUTH2_PATH, oauth2_path) + + oauth2_scope = options.get(GVFSConfig.OAUTH2_SCOPE) + self._check_auth_config(auth_type, GVFSConfig.OAUTH2_SCOPE, oauth2_scope) + + oauth2_token_provider: OAuth2TokenProvider = DefaultOAuth2TokenProvider( + oauth2_server_uri, oauth2_credential, oauth2_path, oauth2_scope + ) + self._client = GravitinoClient( + uri=server_uri, + metalake_name=metalake_name, + auth_data_provider=oauth2_token_provider, + ) else: raise GravitinoRuntimeException( f"Authentication type {auth_type} is not supported." @@ -686,6 +713,19 @@ def _strip_storage_protocol(storage_type: StorageType, path: str): f"Storage type:{storage_type} doesn't support now." ) + @staticmethod + def _check_auth_config(auth_type: str, config_key: str, config_value: str): + """Check if the config value is null. + :param auth_type: The auth type + :param config_key: The config key + :param config_value: The config value + """ + if config_value is None: + raise GravitinoRuntimeException( + f"{config_key} should not be null" + f" if {GVFSConfig.AUTH_TYPE} is set to {auth_type}." + ) + def _get_fileset_catalog(self, catalog_ident: NameIdentifier): read_lock = self._catalog_cache_lock.gen_rlock() try: diff --git a/clients/client-python/gravitino/filesystem/gvfs_config.py b/clients/client-python/gravitino/filesystem/gvfs_config.py index be072a357b6..eb5733b56be 100644 --- a/clients/client-python/gravitino/filesystem/gvfs_config.py +++ b/clients/client-python/gravitino/filesystem/gvfs_config.py @@ -24,4 +24,10 @@ class GVFSConfig: DEFAULT_CACHE_EXPIRED_TIME = 3600 AUTH_TYPE = "auth_type" - DEFAULT_AUTH_TYPE = "simple" + SIMPLE_AUTH_TYPE = "simple" + + OAUTH2_AUTH_TYPE = "oauth2" + OAUTH2_SERVER_URI = "oauth2_server_uri" + OAUTH2_CREDENTIAL = "oauth2_credential" + OAUTH2_PATH = "oauth2_path" + OAUTH2_SCOPE = "oauth2_scope" diff --git a/clients/client-python/tests/unittests/test_gvfs_with_local.py b/clients/client-python/tests/unittests/test_gvfs_with_local.py index 22bdccd8c57..b4ce39e571a 100644 --- a/clients/client-python/tests/unittests/test_gvfs_with_local.py +++ b/clients/client-python/tests/unittests/test_gvfs_with_local.py @@ -34,9 +34,19 @@ from gravitino import gvfs, NameIdentifier from gravitino.auth.auth_constants import AuthConstants -from gravitino.exceptions.base import GravitinoRuntimeException +from gravitino.exceptions.base import ( + GravitinoRuntimeException, + IllegalArgumentException, + BadRequestException, +) from gravitino.filesystem.gvfs_config import GVFSConfig from tests.unittests import mock_base +from tests.unittests.auth.mock_base import ( + mock_jwt, + GENERATED_TIME, + mock_authentication_with_error_authentication_type, + mock_authentication_invalid_grant_error, +) def generate_unique_random_string(length): @@ -106,6 +116,75 @@ def test_simple_auth(self, *mock_methods): if current_user is not None: os.environ["user.name"] = current_user + def test_oauth2_auth(self, *mock_methods): + fs_options = { + GVFSConfig.AUTH_TYPE: GVFSConfig.OAUTH2_AUTH_TYPE, + GVFSConfig.OAUTH2_SERVER_URI: "http://127.0.0.1:1082", + GVFSConfig.OAUTH2_CREDENTIAL: "xx:xx", + GVFSConfig.OAUTH2_SCOPE: "test", + GVFSConfig.OAUTH2_PATH: "token/test", + } + # test auth normally + mocked_jwt = mock_jwt( + sub="gravitino", exp=GENERATED_TIME + 10000, aud="service1" + ) + with patch( + "gravitino.auth.default_oauth2_token_provider.DefaultOAuth2TokenProvider._get_access_token", + return_value=mocked_jwt, + ), patch( + "gravitino.auth.default_oauth2_token_provider.DefaultOAuth2TokenProvider._fetch_token", + return_value=mocked_jwt, + ): + fileset_storage_location = f"{self._fileset_dir}/test_oauth2_auth" + fileset_virtual_location = "fileset/fileset_catalog/tmp/test_oauth2_auth" + actual_path = fileset_storage_location + with patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.get_file_location", + return_value=actual_path, + ): + local_fs = LocalFileSystem() + local_fs.mkdir(fileset_storage_location) + sub_dir_path = f"{fileset_storage_location}/test_1" + local_fs.mkdir(sub_dir_path) + self.assertTrue(local_fs.exists(sub_dir_path)) + sub_file_path = f"{fileset_storage_location}/test_file_1.par" + local_fs.touch(sub_file_path) + self.assertTrue(local_fs.exists(sub_file_path)) + fs = gvfs.GravitinoVirtualFileSystem( + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + options=fs_options, + skip_instance_cache=True, + ) + # should not raise exception + self.assertTrue(fs.exists(fileset_virtual_location)) + + # test error authentication type + with patch( + "gravitino.utils.http_client.HTTPClient.post_form", + return_value=mock_authentication_with_error_authentication_type(), + ): + with self.assertRaises(IllegalArgumentException): + gvfs.GravitinoVirtualFileSystem( + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + options=fs_options, + skip_instance_cache=True, + ) + + # test bad request + with patch( + "gravitino.utils.http_client.HTTPClient._make_request", + return_value=mock_authentication_invalid_grant_error(), + ): + with self.assertRaises(BadRequestException): + gvfs.GravitinoVirtualFileSystem( + server_uri="http://localhost:9090", + metalake_name="metalake_demo", + options=fs_options, + skip_instance_cache=True, + ) + def test_ls(self, *mock_methods): fileset_storage_location = f"{self._fileset_dir}/test_ls" fileset_virtual_location = "fileset/fileset_catalog/tmp/test_ls" diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md index 3a116928ad0..7a98271d41c 100644 --- a/docs/how-to-use-gvfs.md +++ b/docs/how-to-use-gvfs.md @@ -10,7 +10,7 @@ license: "This software is licensed under the Apache License version 2." directories, with `fileset` you can manage non-tabular data through Gravitino. For details, you can read [How to manage fileset metadata using Gravitino](./manage-fileset-metadata-using-gravitino.md). -To use `Fileset` managed by Gravitino, Gravitino provides a virtual file system layer called +To use `fileset` managed by Gravitino, Gravitino provides a virtual file system layer called the Gravitino Virtual File System (GVFS): * In Java, it's built on top of the Hadoop Compatible File System(HCFS) interface. * In Python, it's built on top of the [fsspec](https://filesystem-spec.readthedocs.io/en/stable/index.html) @@ -335,13 +335,17 @@ to recompile the native libraries like `libhdfs` and others, and completely repl ### Configuration -| Configuration item | Description | Default value | Required | Since version | -|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `server_uri` | The Gravitino server uri, e.g. `http://localhost:8090`. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 | -| `metalake_name` | The metalake name which the fileset belongs to. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 | -| `cache_size` | The cache capacity of the Gravitino Virtual File System. | `20` | No | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 | -| `cache_expired_time` | The value of time that the cache expires after accessing in the Gravitino Virtual File System. The value is in `seconds`. | `3600` | No | 0.6.0 |. -| `auth_type` | The auth type to initialize the Gravitino client to use with the Gravitino Virtual File System. Currently only supports `simple` auth types. | `simple` | No | 0.6.0 |. +| Configuration item | Description | Default value | Required | Since version | +|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------|---------------| +| `server_uri` | The Gravitino server uri, e.g. `http://localhost:8090`. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 | +| `metalake_name` | The metalake name which the fileset belongs to. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 | +| `cache_size` | The cache capacity of the Gravitino Virtual File System. | `20` | No | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 | +| `cache_expired_time` | The value of time that the cache expires after accessing in the Gravitino Virtual File System. The value is in `seconds`. | `3600` | No | 0.6.0 |. +| `auth_type` | The auth type to initialize the Gravitino client to use with the Gravitino Virtual File System. Currently supports `simple` and `oauth2` auth types. | `simple` | No | 0.6.0 |. +| `oauth2_server_uri` | The auth server URI for the Gravitino client when using `oauth2` auth type. | (none) | Yes if you use `oauth2` auth type | 0.7.0 |. +| `oauth2_credential` | The auth credential for the Gravitino client when using `oauth2` auth type. | (none) | Yes if you use `oauth2` auth type | 0.7.0 |. +| `oauth2_path` | The auth server path for the Gravitino client when using `oauth2` auth type. Please remove the first slash `/` from the path, for example `oauth/token`. | (none) | Yes if you use `oauth2` auth type | 0.7.0 |. +| `oauth2_scope` | The auth scope for the Gravitino client when using `oauth2` auth type with the Gravitino Virtual File System. | (none) | Yes if you use `oauth2` auth type | 0.7.0 |. You can configure these properties when obtaining the `Gravitino Virtual FileSystem` in Python like this: @@ -538,7 +542,7 @@ print(documents) ### Authentication -Currently, Gravitino Virtual File System in Python only supports one kind of authentication types to access Gravitino server: `simple`. +Currently, Gravitino Virtual File System in Python supports two kinds of authentication types to access Gravitino server: `simple` and `oauth2`. The type of `simple` is the default authentication type in Gravitino Virtual File System in Python. @@ -556,4 +560,25 @@ from gravitino import gvfs options = {"auth_type": "simple"} fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090", metalake_name="test_metalake", options=options) print(fs.ls("gvfs://fileset/fileset_catlaog/tmp/test_fileset")) +``` + +##### Using `OAuth` authentication + +First, make sure that your Gravitino server is also configured to use the `oauth2` authentication mode, +and you have an OAuth server to fetch the token: [Security](security/security.md). + +Then, you can configure the authentication like this: + +```python +from gravitino import gvfs + +options = { + GVFSConfig.AUTH_TYPE: GVFSConfig.OAUTH2_AUTH_TYPE, + GVFSConfig.OAUTH2_SERVER_URI: "http://127.0.0.1:1082", + GVFSConfig.OAUTH2_CREDENTIAL: "xx:xx", + GVFSConfig.OAUTH2_SCOPE: "test", + GVFSConfig.OAUTH2_PATH: "token/test", +} +fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090", metalake_name="test_metalake", options=options) +print(fs.ls("gvfs://fileset/fileset_catlaog/tmp/test_fileset")) ``` \ No newline at end of file