Skip to content

Commit

Permalink
[#4279] feat(client-python): Add the getFileLocation interface code…
Browse files Browse the repository at this point in the history
… skeleton in Python Client (#4373)

### What changes were proposed in this pull request?

Added an interface code skeleton in Python Client for obtaining the file
location so that the client can report some necessary information for
the server to audit and simplify some check logics in Python GVFS later.
Depend on #4320.

### Why are the changes needed?

Fix: #4279 

### How was this patch tested?

Add UTs and ITs.
  • Loading branch information
xloya authored Sep 24, 2024
1 parent 92a0ec8 commit 39f3ca5
Show file tree
Hide file tree
Showing 12 changed files with 431 additions and 13 deletions.
16 changes: 16 additions & 0 deletions clients/client-python/gravitino/audit/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
71 changes: 71 additions & 0 deletions clients/client-python/gravitino/audit/caller_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import threading
from typing import Dict

caller_context_holder = threading.local()


class CallerContext:
"""A class defining the caller context for auditing coarse-grained operations."""

_context: Dict[str, str]

def __init__(self, context: Dict[str, str]):
"""Initialize the CallerContext.
Args:
context: The context dict.
"""
self._context = context

def context(self) -> Dict[str, str]:
"""Returns the context dict in the caller context.
Returns:
The context dict.
"""
return self._context


class CallerContextHolder:
"""A thread local holder for the CallerContext."""

@staticmethod
def set(context: CallerContext):
"""Set the CallerContext in the thread local.
Args:
context: The CallerContext to set.
"""
caller_context_holder.caller_context = context

@staticmethod
def get():
"""Get the CallerContext from the thread local.
Returns:
The CallerContext.
"""
if not hasattr(caller_context_holder, "caller_context"):
return None
return caller_context_holder.caller_context

@staticmethod
def remove():
"""Remove the CallerContext from the thread local."""
del caller_context_holder.caller_context
28 changes: 28 additions & 0 deletions clients/client-python/gravitino/audit/fileset_audit_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.


class FilesetAuditConstants:
"""Constants used for fileset data operation audits."""

HTTP_HEADER_INTERNAL_CLIENT_TYPE = "InternalClientType"
"""The HTTP header used to pass the internal client type.
"""

HTTP_HEADER_FILESET_DATA_OPERATION = "FilesetDataOperation"
"""The HTTP header used to pass the fileset data operation.
"""
81 changes: 81 additions & 0 deletions clients/client-python/gravitino/audit/fileset_data_operation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from enum import Enum


class FilesetDataOperation(Enum):
"""An enum class containing fileset data operations that supported."""

CREATE = "CREATE"
"""Creates a new file.
"""

OPEN = "OPEN"
"""Opens a file.
"""

APPEND = "APPEND"
"""Appends some content into a file.
"""

RENAME = "RENAME"
"""Renames a file or a directory.
"""

DELETE = "DELETE"
"""Deletes a file or a directory.
"""

GET_FILE_STATUS = "GET_FILE_STATUS"
"""Gets a file status from a file or a directory.
"""

LIST_STATUS = "LIST_STATUS"
"""Lists file statuses under a directory.
"""

MKDIRS = "MKDIRS"
"""Creates a directory.
"""

EXISTS = "EXISTS"
"""Checks if a file or a directory exists.
"""

CREATED_TIME = "CREATED_TIME"
"""Gets the created time of a file.
"""

MODIFIED_TIME = "MODIFIED_TIME"
"""Gets the modified time of a file.
"""

COPY_FILE = "COPY_FILE"
"""Copies a file.
"""

CAT_FILE = "CAT_FILE"
"""Gets the content of a file.
"""

GET_FILE = "GET_FILE"
"""Copies a remote file to local.
"""

UNKNOWN = "UNKNOWN"
"""Unknown data operation.
"""
35 changes: 35 additions & 0 deletions clients/client-python/gravitino/audit/internal_client_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from enum import Enum


class InternalClientType(Enum):
"""An enum class containing internal client type that supported."""

HADOOP_GVFS = "HADOOP_GVFS"
"""The client type is `org.apache.gravitino.filesystem.hadoop.GravitinoVirtualFileSystem` which in
the filesystem-hadoop3 module.
"""

PYTHON_GVFS = "PYTHON_GVFS"
"""The client type is `gravitino.filesystem.gvfs.GravitinoVirtualFileSystem` which in the
client-python module.
"""

UNKNOWN = "UNKNOWN"
"""The client type is unknown.
"""
12 changes: 12 additions & 0 deletions clients/client-python/gravitino/catalog/fileset_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,18 @@ def drop_fileset(self, ident: NameIdentifier) -> bool:

return drop_resp.dropped()

def get_file_location(self, ident: NameIdentifier, sub_path: str) -> str:
"""Get the actual location of a file or directory based on the storage location of Fileset and the sub path.
Args:
ident: A fileset identifier, which should be "schema.fileset" format.
sub_path: The sub path of the file or directory.
Returns:
The actual location of the file or directory.
"""
raise NotImplementedError("Not implemented yet")

@staticmethod
def check_fileset_namespace(namespace: Namespace):
Namespace.check(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from dataclasses import dataclass, field

from dataclasses_json import config
from gravitino.dto.responses.base_response import BaseResponse
from gravitino.exceptions.base import IllegalArgumentException


@dataclass
class FileLocationResponse(BaseResponse):
"""Response for the actual file location."""

_file_location: str = field(metadata=config(field_name="fileLocation"))

def file_location(self) -> str:
return self._file_location

def validate(self):
"""Validates the response data.
Raises:
IllegalArgumentException if file location is not set.
"""
super().validate()
if self._file_location is None or len(self.file_location()) == 0:
raise IllegalArgumentException("file location must not be null")
16 changes: 16 additions & 0 deletions clients/client-python/tests/unittests/audit/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
64 changes: 64 additions & 0 deletions clients/client-python/tests/unittests/audit/test_caller_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import threading
import unittest
from typing import Dict

from gravitino.audit.caller_context import CallerContextHolder, CallerContext


class TestCallerContext(unittest.TestCase):

def test_caller_context(self):
thread_names_and_values = [
("Thread1", {"k1": "v1", "k2": "v2"}),
("Thread2", {"k11": "v11", "k21": "v21"}),
]
test_threads = []
for thread_name, value in thread_names_and_values:
t = threading.Thread(
target=self._set_thread_local_context, args=(thread_name, value)
)
t.start()
test_threads.append(t)

for t in test_threads:
t.join()

def _set_thread_local_context(self, thread_name, context: Dict[str, str]):
caller_context: CallerContext = CallerContext(context)
CallerContextHolder.set(caller_context)

try:
if thread_name == "Thread1":
self.assertEqual(
CallerContextHolder.get().context()["k1"], context["k1"]
)
self.assertEqual(
CallerContextHolder.get().context()["k2"], context["k2"]
)
if thread_name == "Thread2":
self.assertEqual(
CallerContextHolder.get().context()["k11"], context["k11"]
)
self.assertEqual(
CallerContextHolder.get().context()["k21"], context["k21"]
)
finally:
CallerContextHolder.remove()

self.assertIsNone(CallerContextHolder.get())
Loading

0 comments on commit 39f3ca5

Please sign in to comment.