Skip to content

Commit

Permalink
[apache#4000] improvement(client-python): Support simple auth for PyG…
Browse files Browse the repository at this point in the history
…VFS (apache#4001)

### What changes were proposed in this pull request?

Support simple auth for gravitino client in PyGVFS. The integration test
depends on this PR: apache#3876 apache#3931 . When apache#3876 apache#3931 is merged, I will add
integration tests and docs for this PR.

### Why are the changes needed?

Fix: apache#4000 

### How was this patch tested?

Add UTs and ITs.

---------

Co-authored-by: xiaojiebao <[email protected]>
  • Loading branch information
xloya and xiaojiebao authored Jul 10, 2024
1 parent 1422217 commit 0f18b63
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 18 deletions.
43 changes: 37 additions & 6 deletions clients/client-python/gravitino/filesystem/gvfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@
from readerwriterlock import rwlock
from gravitino.api.catalog import Catalog
from gravitino.api.fileset import Fileset
from gravitino.auth.simple_auth_provider import SimpleAuthProvider
from gravitino.client.gravitino_client import GravitinoClient
from gravitino.exceptions.base import GravitinoRuntimeException
from gravitino.filesystem.gvfs_config import GVFSConfig
from gravitino.name_identifier import NameIdentifier

PROTOCOL_NAME = "gvfs"
Expand Down Expand Up @@ -94,15 +96,44 @@ class GravitinoVirtualFileSystem(fsspec.AbstractFileSystem):

def __init__(
self,
server_uri=None,
metalake_name=None,
cache_size=20,
cache_expired_time=3600,
server_uri: str = None,
metalake_name: str = None,
options: Dict = None,
**kwargs,
):
"""Initialize the GravitinoVirtualFileSystem.
:param server_uri: Gravitino server URI
:param metalake_name: Gravitino metalake name
:param options: Options for the GravitinoVirtualFileSystem
:param kwargs: Extra args for super filesystem
"""
self._metalake = metalake_name
self._client = GravitinoClient(
uri=server_uri, metalake_name=metalake_name, check_version=False
auth_type = (
GVFSConfig.DEFAULT_AUTH_TYPE
if options is None
else options.get(GVFSConfig.AUTH_TYPE, GVFSConfig.DEFAULT_AUTH_TYPE)
)
if auth_type == GVFSConfig.DEFAULT_AUTH_TYPE:
self._client = GravitinoClient(
uri=server_uri,
metalake_name=metalake_name,
auth_data_provider=SimpleAuthProvider(),
)
else:
raise GravitinoRuntimeException(
f"Authentication type {auth_type} is not supported."
)
cache_size = (
GVFSConfig.DEFAULT_CACHE_SIZE
if options is None
else options.get(GVFSConfig.CACHE_SIZE, GVFSConfig.DEFAULT_CACHE_SIZE)
)
cache_expired_time = (
GVFSConfig.DEFAULT_CACHE_EXPIRED_TIME
if options is None
else options.get(
GVFSConfig.CACHE_EXPIRED_TIME, GVFSConfig.DEFAULT_CACHE_EXPIRED_TIME
)
)
self._cache = TTLCache(maxsize=cache_size, ttl=cache_expired_time)
self._cache_lock = rwlock.RWLockFair()
Expand Down
29 changes: 29 additions & 0 deletions clients/client-python/gravitino/filesystem/gvfs_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
"""


class GVFSConfig:
CACHE_SIZE = "cache_size"
DEFAULT_CACHE_SIZE = 20

CACHE_EXPIRED_TIME = "cache_expired_time"
DEFAULT_CACHE_EXPIRED_TIME = 3600

AUTH_TYPE = "auth_type"
DEFAULT_AUTH_TYPE = "simple"
24 changes: 24 additions & 0 deletions clients/client-python/tests/integration/test_gvfs_with_hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
under the License.
"""

# pylint: disable=protected-access

import base64
import logging
import os
import platform
Expand All @@ -40,6 +43,7 @@
Catalog,
Fileset,
)
from gravitino.auth.auth_constants import AuthConstants
from gravitino.exceptions.base import GravitinoRuntimeException
from tests.integration.integration_test_env import IntegrationTestEnv
from tests.integration.hdfs_container import HDFSContainer
Expand Down Expand Up @@ -186,6 +190,26 @@ def _clean_test_data(cls):
except Exception as e:
logger.error("Clean test data failed: %s", e)

def test_simple_auth(self):
options = {"auth_type": "simple"}
current_user = (
None if os.environ.get("user.name") is None else os.environ["user.name"]
)
user = "test_gvfs"
os.environ["user.name"] = user
fs = gvfs.GravitinoVirtualFileSystem(
server_uri="http://localhost:8090",
metalake_name=self.metalake_name,
options=options,
)
token = fs._client._rest_client.auth_data_provider.get_token_data()
token_string = base64.b64decode(
token.decode("utf-8")[len(AuthConstants.AUTHORIZATION_BASIC_HEADER) :]
).decode("utf-8")
self.assertEqual(f"{user}:dummy", token_string)
if current_user is not None:
os.environ["user.name"] = current_user

def test_ls(self):
ls_dir = self.fileset_gvfs_location + "/test_ls"
ls_actual_dir = self.fileset_storage_location + "/test_ls"
Expand Down
36 changes: 32 additions & 4 deletions clients/client-python/tests/unittests/test_gvfs_with_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
under the License.
"""

# pylint: disable=protected-access
# pylint: disable=protected-access,too-many-lines

import base64
import os
import random
import string
import time
Expand All @@ -34,6 +36,7 @@

from gravitino import gvfs
from gravitino import NameIdentifier
from gravitino.auth.auth_constants import AuthConstants
from gravitino.dto.audit_dto import AuditDTO
from gravitino.dto.fileset_dto import FilesetDTO
from gravitino.filesystem.gvfs import FilesetContext, StorageType
Expand Down Expand Up @@ -77,12 +80,11 @@ def test_cache(self, *mock_methods):
fileset_virtual_location = "fileset/fileset_catalog/tmp/test_cache"
local_fs.mkdir(fileset_storage_location)
self.assertTrue(local_fs.exists(fileset_storage_location))

options = {"cache_size": 1, "cache_expired_time": 2}
fs = gvfs.GravitinoVirtualFileSystem(
server_uri="http://localhost:9090",
metalake_name="metalake_demo",
cache_size=1,
cache_expired_time=1,
options=options,
)
self.assertTrue(fs.exists(fileset_virtual_location))
# wait 2 seconds
Expand All @@ -95,6 +97,32 @@ def test_cache(self, *mock_methods):
)
)

@patch(
"gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset",
return_value=mock_base.mock_load_fileset(
"test_simple_auth", f"{_fileset_dir}/test_simple_auth"
),
)
def test_simple_auth(self, mock_method1, mock_method2, mock_method3, mock_method4):
options = {"auth_type": "simple"}
current_user = (
None if os.environ.get("user.name") is None else os.environ["user.name"]
)
user = "test_gvfs"
os.environ["user.name"] = user
fs = gvfs.GravitinoVirtualFileSystem(
server_uri="http://localhost:9090",
metalake_name="metalake_demo",
options=options,
)
token = fs._client._rest_client.auth_data_provider.get_token_data()
token_string = base64.b64decode(
token.decode("utf-8")[len(AuthConstants.AUTHORIZATION_BASIC_HEADER) :]
).decode("utf-8")
self.assertEqual(f"{user}:dummy", token_string)
if current_user is not None:
os.environ["user.name"] = current_user

@patch(
"gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset",
return_value=mock_base.mock_load_fileset("test_ls", f"{_fileset_dir}/test_ls"),
Expand Down
43 changes: 35 additions & 8 deletions docs/how-to-use-gvfs.md
Original file line number Diff line number Diff line change
Expand Up @@ -335,20 +335,25 @@ to recompile the native libraries like `libhdfs` and others, and completely repl

### Configuration

| Configuration item | Description | Default value | Required | Since version |
|----------------------|---------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
| `server_uri` | The Gravitino server uri, e.g. `http://localhost:8090`. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 |
| `metalake_name` | The metalake name which the fileset belongs to. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 |
| `cache_size` | The cache capacity of the Gravitino Virtual File System. | `20` | No | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 |
| `cache_expired_time` | The value of time that the cache expires after accessing in the Gravitino Virtual File System. The value is in `seconds`. | `3600` | No | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 |
| Configuration item | Description | Default value | Required | Since version |
|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------|
| `server_uri` | The Gravitino server uri, e.g. `http://localhost:8090`. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 |
| `metalake_name` | The metalake name which the fileset belongs to. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 |
| `cache_size` | The cache capacity of the Gravitino Virtual File System. | `20` | No | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 |
| `cache_expired_time` | The value of time that the cache expires after accessing in the Gravitino Virtual File System. The value is in `seconds`. | `3600` | No | 0.6.0 |.
| `auth_type` | The auth type to initialize the Gravitino client to use with the Gravitino Virtual File System. Currently only supports `simple` auth types. | `simple` | No | 0.6.0 |.


You can configure these properties when obtaining the `Gravitino Virtual FileSystem` in Python like this:

```python
from gravitino import gvfs

fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090", metalake_name="test_metalake")
options = {
"cache_size": 20,
"cache_expired_time": 3600,
"auth_type": "simple"
}
fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090", metalake_name="test_metalake", options=options)
```

### Usage examples
Expand Down Expand Up @@ -530,3 +535,25 @@ reader = SimpleDirectoryReader(
documents = reader.load_data()
print(documents)
```

### Authentication

Currently, Gravitino Virtual File System in Python only supports one kind of authentication types to access Gravitino server: `simple`.

The type of `simple` is the default authentication type in Gravitino Virtual File System in Python.

#### How to use authentication

##### Using `simple` authentication

First, make sure that your Gravitino server is also configured to use the `simple` authentication mode.

Then, you can configure the authentication like this:

```python
from gravitino import gvfs

options = {"auth_type": "simple"}
fs = gvfs.GravitinoVirtualFileSystem(server_uri="http://localhost:8090", metalake_name="test_metalake", options=options)
print(fs.ls("gvfs://fileset/fileset_catlaog/tmp/test_fileset"))
```

0 comments on commit 0f18b63

Please sign in to comment.