diff --git a/LICENSE b/LICENSE index d80ec880eca..2b0c487bad5 100644 --- a/LICENSE +++ b/LICENSE @@ -283,6 +283,7 @@ Apache Arrow ./dev/ci/util_free_space.sh + ./clients/client-python/gravitino/filesystem/gvfs.py This product bundles a third-party component under the MIT License. @@ -290,3 +291,12 @@ Kyligence/kylinpy ./clients/client-python/gravitino/utils/exceptions.py ./clients/client-python/gravitino/utils/http_client.py + + elarivie/pyReaderWriterLock + ./clients/client-python/gravitino/filesystem/gvfs.py + + This product bundles a third-party component under the + BSD License. + + fsspec + .clients/client-python/gravitino/filesystem/gvfs.py diff --git a/LICENSE.bin b/LICENSE.bin index 60db5658127..80e101c6729 100644 --- a/LICENSE.bin +++ b/LICENSE.bin @@ -355,6 +355,7 @@ XNIO API WildFly Confluent Kafka Streams Examples + Apache Arrow This product bundles various third-party components also under the Apache Software Foundation License 1.1 @@ -382,6 +383,7 @@ ParaNamer RE2/J ZSTD JNI + fsspec This product bundles various third-party components also under the MIT license @@ -393,6 +395,7 @@ Protocol Buffers Treelayout Kyligence/kylinpy + elarivie/pyReaderWriterLock This product bundles various third-party components also under the Common Development and Distribution License 1.0 diff --git a/clients/client-python/gravitino/filesystem/gvfs.py b/clients/client-python/gravitino/filesystem/gvfs.py index 5690cfc8ac4..516ef22f399 100644 --- a/clients/client-python/gravitino/filesystem/gvfs.py +++ b/clients/client-python/gravitino/filesystem/gvfs.py @@ -18,6 +18,7 @@ from gravitino.api.catalog import Catalog from gravitino.api.fileset import Fileset from gravitino.client.gravitino_client import GravitinoClient +from gravitino.exceptions.gravitino_runtime_exception import GravitinoRuntimeException from gravitino.name_identifier import NameIdentifier PROTOCOL_NAME = "gvfs" @@ -25,7 +26,7 @@ class StorageType(Enum): HDFS = "hdfs" - FILE = "file" + LOCAL = "file" class FilesetContext: @@ -41,26 +42,26 @@ def __init__( storage_type: StorageType, actual_path: str, ): - self.name_identifier = name_identifier - self.fileset = fileset - self.fs = fs - self.storage_type = storage_type - self.actual_path = actual_path + self._name_identifier = name_identifier + self._fileset = fileset + self._fs = fs + self._storage_type = storage_type + self._actual_path = actual_path def get_name_identifier(self): - return self.name_identifier + return self._name_identifier def get_fileset(self): - return self.fileset + return self._fileset def get_fs(self): - return self.fs + return self._fs def get_actual_path(self): - return self.actual_path + return self._actual_path def get_storage_type(self): - return self.storage_type + return self._storage_type class GravitinoVirtualFileSystem(fsspec.AbstractFileSystem): @@ -72,23 +73,29 @@ class GravitinoVirtualFileSystem(fsspec.AbstractFileSystem): access the underlying storage. """ + # Override the parent variable protocol = PROTOCOL_NAME _identifier_pattern = re.compile("^fileset/([^/]+)/([^/]+)/([^/]+)(?:/[^/]+)*/?$") def __init__(self, server_uri=None, metalake_name=None, **kwargs): - self.metalake = metalake_name - self.client = GravitinoClient(uri=server_uri, metalake_name=metalake_name) - self.cache: Dict[NameIdentifier, Tuple] = {} - self.cache_lock = rwlock.RWLockFair() + self._metalake = metalake_name + self._client = GravitinoClient( + uri=server_uri, metalake_name=metalake_name, check_version=False + ) + self._cache: Dict[NameIdentifier, Tuple] = {} + self._cache_lock = rwlock.RWLockFair() super().__init__(**kwargs) @property def fsid(self): - raise RuntimeError("Unsupported method now.") + return PROTOCOL_NAME def sign(self, path, expiration=None, **kwargs): - raise RuntimeError("Unsupported method now.") + """We do not support to create a signed URL representing the given path in gvfs.""" + raise GravitinoRuntimeException( + "Sign is not implemented for Gravitino Virtual FileSystem." + ) def ls(self, path, detail=True, **kwargs): """List the files and directories info of the path. @@ -100,10 +107,10 @@ def ls(self, path, detail=True, **kwargs): context: FilesetContext = self._get_fileset_context(path) if detail: entries = [ - self._convert_info(entry, context) - for entry in context.fs.ls( + self._convert_actual_info(entry, context) + for entry in context.get_fs().ls( self._strip_storage_protocol( - context.storage_type, context.actual_path + context.get_storage_type(), context.get_actual_path() ), detail=True, ) @@ -111,8 +118,10 @@ def ls(self, path, detail=True, **kwargs): return entries entries = [ self._convert_actual_path(entry_path, context) - for entry_path in context.fs.ls( - self._strip_storage_protocol(context.storage_type, context.actual_path), + for entry_path in context.get_fs().ls( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ), detail=False, ) ] @@ -125,10 +134,12 @@ def info(self, path, **kwargs): :return A file info dict """ context: FilesetContext = self._get_fileset_context(path) - actual_info: Dict = context.fs.info( - self._strip_storage_protocol(context.storage_type, context.actual_path) + actual_info: Dict = context.get_fs().info( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ) ) - return self._convert_info(actual_info, context) + return self._convert_actual_info(actual_info, context) def exists(self, path, **kwargs): """Check if a file or a directory exists. @@ -137,8 +148,10 @@ def exists(self, path, **kwargs): :return A file or directory exists, a boolean """ context: FilesetContext = self._get_fileset_context(path) - return context.fs.exists( - self._strip_storage_protocol(context.storage_type, context.actual_path) + return context.get_fs().exists( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ) ) def cp_file(self, path1, path2, **kwargs): @@ -147,70 +160,93 @@ def cp_file(self, path1, path2, **kwargs): :param path2: Virtual dst fileset path, should be consistent with the src path fileset identifier :param kwargs: Extra args """ - path1 = self._pre_process_path(path1) - path2 = self._pre_process_path(path2) - src_identifier: NameIdentifier = self._extract_identifier(path1) - dst_identifier: NameIdentifier = self._extract_identifier(path2) + src_path = self._pre_process_path(path1) + dst_path = self._pre_process_path(path2) + src_identifier: NameIdentifier = self._extract_identifier(src_path) + dst_identifier: NameIdentifier = self._extract_identifier(dst_path) if src_identifier != dst_identifier: - raise RuntimeError( + raise GravitinoRuntimeException( f"Destination file path identifier: `{dst_identifier}` should be same with src file path " f"identifier: `{src_identifier}`." ) - src_context: FilesetContext = self._get_fileset_context(path1) + src_context: FilesetContext = self._get_fileset_context(src_path) if self._check_mount_single_file( - src_context.fileset, src_context.fs, src_context.storage_type + src_context.get_fileset(), + src_context.get_fs(), + src_context.get_storage_type(), ): - raise RuntimeError( + raise GravitinoRuntimeException( f"Cannot cp file of the fileset: {src_identifier} which only mounts to a single file." ) - dst_context: FilesetContext = self._get_fileset_context(path2) + dst_context: FilesetContext = self._get_fileset_context(dst_path) - src_context.fs.cp_file( + src_context.get_fs().cp_file( self._strip_storage_protocol( - src_context.storage_type, src_context.actual_path + src_context.get_storage_type(), src_context.get_actual_path() ), self._strip_storage_protocol( - dst_context.storage_type, dst_context.actual_path + dst_context.get_storage_type(), dst_context.get_actual_path() ), ) - # pylint: disable=W0221 - def mv(self, path1, path2, **kwargs): + def mv(self, path1, path2, recursive=False, maxdepth=None, **kwargs): """Move a file to another directory. This can move a file to another existing directory. If the target path directory does not exist, an exception will be thrown. :param path1: Virtual src fileset path :param path2: Virtual dst fileset path, should be consistent with the src path fileset identifier + :param recursive: Whether to move recursively + :param maxdepth: Maximum depth of recursive move :param kwargs: Extra args """ - path1 = self._pre_process_path(path1) - path2 = self._pre_process_path(path2) - src_identifier: NameIdentifier = self._extract_identifier(path1) - dst_identifier: NameIdentifier = self._extract_identifier(path2) + src_path = self._pre_process_path(path1) + dst_path = self._pre_process_path(path2) + src_identifier: NameIdentifier = self._extract_identifier(src_path) + dst_identifier: NameIdentifier = self._extract_identifier(dst_path) if src_identifier != dst_identifier: - raise RuntimeError( + raise GravitinoRuntimeException( f"Destination file path identifier: `{dst_identifier}`" f" should be same with src file path identifier: `{src_identifier}`." ) - src_context: FilesetContext = self._get_fileset_context(path1) + src_context: FilesetContext = self._get_fileset_context(src_path) if self._check_mount_single_file( - src_context.fileset, src_context.fs, src_context.storage_type + src_context.get_fileset(), + src_context.get_fs(), + src_context.get_storage_type(), ): - raise RuntimeError( + raise GravitinoRuntimeException( f"Cannot cp file of the fileset: {src_identifier} which only mounts to a single file." ) - dst_context: FilesetContext = self._get_fileset_context(path2) - src_context.fs.move( - self._strip_storage_protocol( - src_context.storage_type, src_context.actual_path - ), - self._strip_storage_protocol( - dst_context.storage_type, dst_context.actual_path - ), - ) + dst_context: FilesetContext = self._get_fileset_context(dst_path) + if src_context.get_storage_type() == StorageType.HDFS: + src_context.get_fs().mv( + self._strip_storage_protocol( + src_context.get_storage_type(), src_context.get_actual_path() + ), + self._strip_storage_protocol( + dst_context.get_storage_type(), dst_context.get_actual_path() + ), + ) + elif src_context.get_storage_type() == StorageType.LOCAL: + src_context.get_fs().mv( + self._strip_storage_protocol( + src_context.get_storage_type(), src_context.get_actual_path() + ), + self._strip_storage_protocol( + dst_context.get_storage_type(), dst_context.get_actual_path() + ), + recursive, + maxdepth, + ) + else: + raise GravitinoRuntimeException( + f"Storage type:{src_context.get_storage_type()} doesn't support now." + ) def _rm(self, path): - raise RuntimeError("Deprecated method, use `rm_file` method instead.") + raise GravitinoRuntimeException( + "Deprecated method, use `rm_file` method instead." + ) def rm(self, path, recursive=False, maxdepth=None): """Remove a file or directory. @@ -220,8 +256,10 @@ def rm(self, path, recursive=False, maxdepth=None): :param maxdepth: The maximum depth to remove the directory recursively. """ context: FilesetContext = self._get_fileset_context(path) - context.fs.rm( - self._strip_storage_protocol(context.storage_type, context.actual_path), + context.get_fs().rm( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ), recursive, maxdepth, ) @@ -231,8 +269,10 @@ def rm_file(self, path): :param path: Virtual fileset path """ context: FilesetContext = self._get_fileset_context(path) - context.fs.rm_file( - self._strip_storage_protocol(context.storage_type, context.actual_path) + context.get_fs().rm_file( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ) ) def rmdir(self, path): @@ -240,64 +280,88 @@ def rmdir(self, path): :param path: Virtual fileset path """ context: FilesetContext = self._get_fileset_context(path) - context.fs.rmdir( - self._strip_storage_protocol(context.storage_type, context.actual_path) + context.get_fs().rmdir( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ) ) - # pylint: disable=W0221 - def _open(self, path, mode="rb", block_size=None, seekable=True, **kwargs): + def open( + self, + path, + mode="rb", + block_size=None, + cache_options=None, + compression=None, + **kwargs, + ): """Open a file to read/write/append. :param path: Virtual fileset path :param mode: The mode now supports: rb(read), wb(write), ab(append). See builtin ``open()`` :param block_size: Some indication of buffering - this is a value in bytes - :param seekable: Some filesystem supports seek, if so, can set this param to True + :param cache_options: Extra arguments to pass through to the cache + :param compression: If given, open file using compression codec :param kwargs: Extra args :return A file-like object from the filesystem """ context: FilesetContext = self._get_fileset_context(path) - if context.storage_type == StorageType.HDFS: - return context.fs._open( - self._strip_storage_protocol(context.storage_type, context.actual_path), - mode, - block_size, - seekable, - **kwargs, - ) - if context.storage_type == StorageType.FILE: - return context.fs._open( - self._strip_storage_protocol(context.storage_type, context.actual_path), - mode, - block_size, - **kwargs, - ) - raise RuntimeError(f"Storage type:{context.storage_type} doesn't support now.") + return context.get_fs().open( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ), + mode, + block_size, + cache_options, + compression, + **kwargs, + ) def mkdir(self, path, create_parents=True, **kwargs): """Make a directory. + if create_parents=True, this is equivalent to ``makedirs``. + :param path: Virtual fileset path :param create_parents: Create parent directories if missing when set to True :param kwargs: Extra args """ context: FilesetContext = self._get_fileset_context(path) - context.fs.mkdir( - self._strip_storage_protocol(context.storage_type, context.actual_path), + context.get_fs().mkdir( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ), create_parents, **kwargs, ) def makedirs(self, path, exist_ok=True): - """Make a directory. + """Make a directory recursively. :param path: Virtual fileset path :param exist_ok: Continue if a directory already exists """ context: FilesetContext = self._get_fileset_context(path) - context.fs.makedirs( - self._strip_storage_protocol(context.storage_type, context.actual_path), + context.get_fs().makedirs( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ), exist_ok, ) def created(self, path): - raise RuntimeError("Unsupported method now.") + """Return the created timestamp of a file as a datetime.datetime + Only supports for `fsspec.LocalFileSystem` now. + :param path: Virtual fileset path + :return Created time(datetime.datetime) + """ + context: FilesetContext = self._get_fileset_context(path) + if context.get_storage_type() == StorageType.LOCAL: + return context.get_fs().created( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ) + ) + raise GravitinoRuntimeException( + f"Storage type:{context.get_storage_type()} doesn't support now." + ) def modified(self, path): """Returns the modified time of the path file if it exists. @@ -305,8 +369,10 @@ def modified(self, path): :return Modified time(datetime.datetime) """ context: FilesetContext = self._get_fileset_context(path) - return context.fs.modified( - self._strip_storage_protocol(context.storage_type, context.actual_path) + return context.get_fs().modified( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ) ) def cat_file(self, path, start=None, end=None, **kwargs): @@ -321,22 +387,34 @@ def cat_file(self, path, start=None, end=None, **kwargs): :return File content """ context: FilesetContext = self._get_fileset_context(path) - return context.fs.cat_file( - self._strip_storage_protocol(context.storage_type, context.actual_path), + return context.get_fs().cat_file( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ), start, end, **kwargs, ) - def get_file(self, rpath, lpath, **kwargs): + def get_file(self, rpath, lpath, callback=None, outfile=None, **kwargs): """Copy single remote file to local. :param rpath: Remote file path :param lpath: Local file path + :param callback: The callback class + :param outfile: The output file path :param kwargs: Extra args """ + if not lpath.startswith(f"{StorageType.LOCAL.value}:") and not lpath.startswith( + "/" + ): + raise GravitinoRuntimeException( + "Doesn't support copy a remote gvfs file to an another remote file." + ) context: FilesetContext = self._get_fileset_context(rpath) - context.fs.get_file( - self._strip_storage_protocol(context.storage_type, context.actual_path), + context.get_fs().get_file( + self._strip_storage_protocol( + context.get_storage_type(), context.get_actual_path() + ), lpath, **kwargs, ) @@ -348,28 +426,28 @@ def _convert_actual_path(self, path, context: FilesetContext): :param context: Fileset context :return A virtual path """ - if context.storage_type == StorageType.HDFS: - actual_prefix = infer_storage_options(context.fileset.storage_location())[ - "path" - ] - elif context.storage_type == StorageType.FILE: - actual_prefix = context.fileset.storage_location()[ - len(f"{StorageType.FILE.value}:") : + if context.get_storage_type() == StorageType.HDFS: + actual_prefix = infer_storage_options( + context.get_fileset().storage_location() + )["path"] + elif context.get_storage_type() == StorageType.LOCAL: + actual_prefix = context.get_fileset().storage_location()[ + len(f"{StorageType.LOCAL.value}:") : ] else: - raise RuntimeError( - f"Storage type:{context.storage_type} doesn't support now." + raise GravitinoRuntimeException( + f"Storage type:{context.get_storage_type()} doesn't support now." ) if not path.startswith(actual_prefix): - raise RuntimeError( + raise GravitinoRuntimeException( f"Path {path} does not start with valid prefix {actual_prefix}." ) - virtual_location = self._get_virtual_location(context.name_identifier) + virtual_location = self._get_virtual_location(context.get_name_identifier()) return f"{path.replace(actual_prefix, virtual_location)}" - def _convert_info(self, entry: Dict, context: FilesetContext): - """Convert a file info from actual entry to virtual entry. + def _convert_actual_info(self, entry: Dict, context: FilesetContext): + """Convert a file info from an actual entry to a virtual entry. :param entry: A dict of the actual file info :param context: Fileset context :return A dict of the virtual file info @@ -389,11 +467,11 @@ def _get_fileset_context(self, virtual_path: str): """ virtual_path: str = self._pre_process_path(virtual_path) identifier: NameIdentifier = self._extract_identifier(virtual_path) - read_lock = self.cache_lock.gen_rlock() + read_lock = self._cache_lock.gen_rlock() try: read_lock.acquire() cache_value: Tuple[Fileset, AbstractFileSystem, StorageType] = ( - self.cache.get(identifier) + self._cache.get(identifier) ) if cache_value is not None: actual_path = self._get_actual_path_by_ident( @@ -413,10 +491,12 @@ def _get_fileset_context(self, virtual_path: str): finally: read_lock.release() - write_lock = self.cache_lock.gen_wlock() + write_lock = self._cache_lock.gen_wlock() try: write_lock.acquire() - cache_value: Tuple[Fileset, AbstractFileSystem] = self.cache.get(identifier) + cache_value: Tuple[Fileset, AbstractFileSystem] = self._cache.get( + identifier + ) if cache_value is not None: actual_path = self._get_actual_path_by_ident( identifier, @@ -437,17 +517,17 @@ def _get_fileset_context(self, virtual_path: str): if storage_location.startswith(f"{StorageType.HDFS.value}://"): fs = ArrowFSWrapper(HadoopFileSystem.from_uri(storage_location)) storage_type = StorageType.HDFS - elif storage_location.startswith(f"{StorageType.FILE.value}:/"): + elif storage_location.startswith(f"{StorageType.LOCAL.value}:/"): fs = LocalFileSystem() - storage_type = StorageType.FILE + storage_type = StorageType.LOCAL else: - raise ValueError( + raise GravitinoRuntimeException( f"Storage under the fileset: `{identifier}` doesn't support now." ) actual_path = self._get_actual_path_by_ident( identifier, fileset, fs, storage_type, virtual_path ) - self.cache[identifier] = (fileset, fs, storage_type) + self._cache[identifier] = (fileset, fs, storage_type) context = FilesetContext(identifier, fileset, fs, storage_type, actual_path) return context finally: @@ -459,21 +539,26 @@ def _extract_identifier(self, path): :return The fileset identifier """ if path is None: - raise RuntimeError("path which need be extracted cannot be null or empty.") + raise GravitinoRuntimeException( + "path which need be extracted cannot be null or empty." + ) match = self._identifier_pattern.match(path) if match and len(match.groups()) == 3: return NameIdentifier.of_fileset( - self.metalake, match.group(1), match.group(2), match.group(3) + self._metalake, match.group(1), match.group(2), match.group(3) ) - raise RuntimeError(f"path: `{path}` doesn't contains valid identifier.") + raise GravitinoRuntimeException( + f"path: `{path}` doesn't contains valid identifier." + ) def _load_fileset_from_server(self, identifier: NameIdentifier) -> Fileset: """Load the fileset from the server. + If the fileset is not found on the server, an `NoSuchFilesetException` exception will be raised. :param identifier: The fileset identifier :return The fileset """ - catalog: Catalog = self.client.load_catalog( + catalog: Catalog = self._client.load_catalog( NameIdentifier.of_catalog( identifier.namespace().level(0), identifier.namespace().level(1) ) @@ -500,7 +585,7 @@ def _get_actual_path_by_ident( storage_location = fileset.storage_location() if self._check_mount_single_file(fileset, fs, storage_type): if virtual_path != virtual_location: - raise RuntimeError( + raise GravitinoRuntimeException( f"Path: {virtual_path} should be same with the virtual location: {virtual_location}" " when the fileset only mounts a single file." ) @@ -534,25 +619,27 @@ def _check_mount_single_file( return result["type"] == "file" @staticmethod - def _pre_process_path(path): + def _pre_process_path(virtual_path): """Pre-process the path. We will uniformly process `gvfs://fileset/{catalog}/{schema}/{fileset_name}/xxx` into the format of `fileset/{catalog}/{schema}/{fileset_name}/xxx`. This is because some implementations of `PyArrow` and `fsspec` can only recognize this format. - :param path: The virtual path + :param virtual_path: The virtual path :return The pre-processed path """ - if isinstance(path, PurePosixPath): - path = path.as_posix() + if isinstance(virtual_path, PurePosixPath): + pre_processed_path = virtual_path.as_posix() + else: + pre_processed_path = virtual_path gvfs_prefix = f"{PROTOCOL_NAME}://" - if path.startswith(gvfs_prefix): - path = path[len(gvfs_prefix) :] - if not path.startswith("fileset/"): - raise RuntimeError( - f"Invalid path:`{path}`. Expected path to start with `fileset/`." + if pre_processed_path.startswith(gvfs_prefix): + pre_processed_path = pre_processed_path[len(gvfs_prefix) :] + if not pre_processed_path.startswith("fileset/"): + raise GravitinoRuntimeException( + f"Invalid path:`{pre_processed_path}`. Expected path to start with `fileset/`." " Example: fileset/{fileset_catalog}/{schema}/{fileset_name}/{sub_path}." ) - return path + return pre_processed_path @staticmethod def _strip_storage_protocol(storage_type: StorageType, path: str): @@ -560,15 +647,19 @@ def _strip_storage_protocol(storage_type: StorageType, path: str): Before passing the path to the underlying file system for processing, pre-process the protocol information in the path. Some file systems require special processing. + For HDFS, we can pass the path like 'hdfs://{host}:{port}/xxx'. + For Local, we can pass the path like '/tmp/xxx'. :param storage_type: The storage type :param path: The path :return: The stripped path """ if storage_type == StorageType.HDFS: return path - if storage_type == StorageType.FILE: - return path[len(f"{StorageType.FILE.value}:") :] - raise RuntimeError(f"Storage type:{storage_type} doesn't support now.") + if storage_type == StorageType.LOCAL: + return path[len(f"{StorageType.LOCAL.value}:") :] + raise GravitinoRuntimeException( + f"Storage type:{storage_type} doesn't support now." + ) fsspec.register_implementation(PROTOCOL_NAME, GravitinoVirtualFileSystem) diff --git a/clients/client-python/requirements-dev.txt b/clients/client-python/requirements-dev.txt index 0d667441daa..ced6d28877d 100644 --- a/clients/client-python/requirements-dev.txt +++ b/clients/client-python/requirements-dev.txt @@ -6,3 +6,6 @@ pylint==3.2.2 black==24.4.2 twine==5.1.0 coverage==7.5.1 +pandas==2.0.3 +pyarrow==15.0.2 +llama-index==0.10.40 \ No newline at end of file diff --git a/clients/client-python/requirements.txt b/clients/client-python/requirements.txt index 08b9ca80529..35ab5aac827 100644 --- a/clients/client-python/requirements.txt +++ b/clients/client-python/requirements.txt @@ -4,6 +4,6 @@ # the tools to publish the python client to Pypi requests dataclasses-json -readerwriterlock -fsspec>=2024.3.1 -pyarrow>=15.0.2 \ No newline at end of file +readerwriterlock==1.0.9 +fsspec==2024.3.1 +pyarrow \ No newline at end of file diff --git a/clients/client-python/tests/unittests/mock_base.py b/clients/client-python/tests/unittests/mock_base.py index c673af63bc1..dd9bc3382fe 100644 --- a/clients/client-python/tests/unittests/mock_base.py +++ b/clients/client-python/tests/unittests/mock_base.py @@ -74,6 +74,10 @@ def mock_data(cls): "gravitino.client.gravitino_metalake.GravitinoMetalake.load_catalog", return_value=mock_load_fileset_catalog(), ) + @patch( + "gravitino.client.gravitino_client_base.GravitinoClientBase.check_version", + return_value=True, + ) class Wrapper(cls): pass diff --git a/clients/client-python/tests/unittests/test_gvfs_with_local.py b/clients/client-python/tests/unittests/test_gvfs_with_local.py index 3b70b8fc491..43436d94c85 100644 --- a/clients/client-python/tests/unittests/test_gvfs_with_local.py +++ b/clients/client-python/tests/unittests/test_gvfs_with_local.py @@ -7,6 +7,10 @@ import string import unittest import mock_base +import pandas +import pyarrow as pa +import pyarrow.dataset as dt +import pyarrow.parquet as pq from unittest.mock import patch from gravitino import gvfs @@ -14,7 +18,9 @@ from gravitino.dto.audit_dto import AuditDTO from gravitino.dto.fileset_dto import FilesetDTO from gravitino.filesystem.gvfs import FilesetContext, StorageType +from gravitino.exceptions.gravitino_runtime_exception import GravitinoRuntimeException from fsspec.implementations.local import LocalFileSystem +from llama_index.core import SimpleDirectoryReader def generate_unique_random_string(length): @@ -44,7 +50,7 @@ def tearDown(self) -> None: "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", return_value=mock_base.mock_load_fileset("test_ls", f"{_fileset_dir}/test_ls"), ) - def test_ls(self, mock_method1, mock_method2, mock_method3): + def test_ls(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_ls" fileset_virtual_location = "fileset/fileset_catalog/tmp/test_ls" @@ -90,7 +96,7 @@ def test_ls(self, mock_method1, mock_method2, mock_method3): "test_info", f"{_fileset_dir}/test_info" ), ) - def test_info(self, mock_method1, mock_method2, mock_method3): + def test_info(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_info" fileset_virtual_location = "fileset/fileset_catalog/tmp/test_info" @@ -121,7 +127,7 @@ def test_info(self, mock_method1, mock_method2, mock_method3): "test_exist", f"{_fileset_dir}/test_exist" ), ) - def test_exist(self, mock_method1, mock_method2, mock_method3): + def test_exist(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_exist" fileset_virtual_location = "fileset/fileset_catalog/tmp/test_exist" @@ -150,7 +156,7 @@ def test_exist(self, mock_method1, mock_method2, mock_method3): "test_cp_file", f"{_fileset_dir}/test_cp_file" ), ) - def test_cp_file(self, mock_method1, mock_method2, mock_method3): + def test_cp_file(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_cp_file" local_fs.mkdir(fileset_storage_location) @@ -172,7 +178,7 @@ def test_cp_file(self, mock_method1, mock_method2, mock_method3): self.assertTrue(fs.exists(file_virtual_path)) cp_file_virtual_path = fileset_virtual_location + "/test_cp_file_1.par" - fs.cp(file_virtual_path, cp_file_virtual_path) + fs.cp_file(file_virtual_path, cp_file_virtual_path) self.assertTrue(fs.exists(cp_file_virtual_path)) with local_fs.open(sub_file_path, "rb") as f: result = f.read() @@ -182,22 +188,22 @@ def test_cp_file(self, mock_method1, mock_method2, mock_method3): cp_file_invalid_virtual_path = ( "fileset/fileset_catalog/tmp/invalid_fileset/test_cp_file_1.par" ) - with self.assertRaises(RuntimeError): - fs.cp(file_virtual_path, cp_file_invalid_virtual_path) + with self.assertRaises(GravitinoRuntimeException): + fs.cp_file(file_virtual_path, cp_file_invalid_virtual_path) # test mount a single file local_fs.rm(path=fileset_storage_location, recursive=True) self.assertFalse(local_fs.exists(fileset_storage_location)) local_fs.touch(fileset_storage_location) self.assertTrue(local_fs.exists(fileset_storage_location)) - with self.assertRaises(RuntimeError): - fs.cp(file_virtual_path, cp_file_virtual_path) + with self.assertRaises(GravitinoRuntimeException): + fs.cp_file(file_virtual_path, cp_file_virtual_path) @patch( "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", return_value=mock_base.mock_load_fileset("test_mv", f"{_fileset_dir}/test_mv"), ) - def test_mv(self, mock_method1, mock_method2, mock_method3): + def test_mv(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_mv" local_fs.mkdir(fileset_storage_location) @@ -239,7 +245,7 @@ def test_mv(self, mock_method1, mock_method2, mock_method3): mv_file_invalid_virtual_path = ( "fileset/fileset_catalog/tmp/invalid_fileset/test_cp_file_1.par" ) - with self.assertRaises(RuntimeError): + with self.assertRaises(GravitinoRuntimeException): fs.mv(path1=file_virtual_path, path2=mv_file_invalid_virtual_path) # test mount a single file @@ -247,14 +253,14 @@ def test_mv(self, mock_method1, mock_method2, mock_method3): self.assertFalse(local_fs.exists(fileset_storage_location)) local_fs.touch(fileset_storage_location) self.assertTrue(local_fs.exists(fileset_storage_location)) - with self.assertRaises(RuntimeError): + with self.assertRaises(GravitinoRuntimeException): fs.mv(file_virtual_path, mv_file_virtual_path) @patch( "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", return_value=mock_base.mock_load_fileset("test_rm", f"{_fileset_dir}/test_rm"), ) - def test_rm(self, mock_method1, mock_method2, mock_method3): + def test_rm(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_rm" local_fs.mkdir(fileset_storage_location) @@ -296,7 +302,7 @@ def test_rm(self, mock_method1, mock_method2, mock_method3): "test_rm_file", f"{_fileset_dir}/test_rm_file" ), ) - def test_rm_file(self, mock_method1, mock_method2, mock_method3): + def test_rm_file(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_rm_file" local_fs.mkdir(fileset_storage_location) @@ -334,7 +340,7 @@ def test_rm_file(self, mock_method1, mock_method2, mock_method3): "test_rmdir", f"{_fileset_dir}/test_rmdir" ), ) - def test_rmdir(self, mock_method1, mock_method2, mock_method3): + def test_rmdir(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_rmdir" local_fs.mkdir(fileset_storage_location) @@ -372,7 +378,7 @@ def test_rmdir(self, mock_method1, mock_method2, mock_method3): "test_open", f"{_fileset_dir}/test_open" ), ) - def test_open(self, mock_method1, mock_method2, mock_method3): + def test_open(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_open" local_fs.mkdir(fileset_storage_location) @@ -407,7 +413,7 @@ def test_open(self, mock_method1, mock_method2, mock_method3): dir_virtual_path = fileset_virtual_location + "/sub_dir" self.assertTrue(fs.exists(dir_virtual_path)) with self.assertRaises(IsADirectoryError): - fs._open(dir_virtual_path) + fs.open(dir_virtual_path) @patch( "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", @@ -415,7 +421,7 @@ def test_open(self, mock_method1, mock_method2, mock_method3): "test_mkdir", f"{_fileset_dir}/test_mkdir" ), ) - def test_mkdir(self, mock_method1, mock_method2, mock_method3): + def test_mkdir(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_mkdir" local_fs.mkdir(fileset_storage_location) @@ -455,7 +461,7 @@ def test_mkdir(self, mock_method1, mock_method2, mock_method3): "test_makedirs", f"{_fileset_dir}/test_makedirs" ), ) - def test_makedirs(self, mock_method1, mock_method2, mock_method3): + def test_makedirs(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_makedirs" local_fs.mkdir(fileset_storage_location) @@ -483,13 +489,40 @@ def test_makedirs(self, mock_method1, mock_method2, mock_method3): fs.makedirs(parent_not_exist_virtual_path) self.assertTrue(fs.exists(parent_not_exist_virtual_path)) + @patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", + return_value=mock_base.mock_load_fileset( + "test_created", f"{_fileset_dir}/test_created" + ), + ) + def test_created(self, mock_method1, mock_method2, mock_method3, mock_method4): + local_fs = LocalFileSystem() + fileset_storage_location = f"{self._fileset_dir}/test_created" + local_fs.mkdir(fileset_storage_location) + + fileset_virtual_location = "fileset/fileset_catalog/tmp/test_created" + + sub_dir_path = f"{fileset_storage_location}/sub_dir" + local_fs.mkdirs(sub_dir_path) + self.assertTrue(local_fs.exists(sub_dir_path)) + + fs = gvfs.GravitinoVirtualFileSystem( + server_uri="http://localhost:9090", metalake_name="metalake_demo" + ) + self.assertTrue(fs.exists(fileset_virtual_location)) + + # test mkdir dir which exists + dir_virtual_path = fileset_virtual_location + "/sub_dir" + self.assertTrue(fs.exists(dir_virtual_path)) + self.assertIsNotNone(fs.created(dir_virtual_path)) + @patch( "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", return_value=mock_base.mock_load_fileset( "test_modified", f"{_fileset_dir}/test_modified" ), ) - def test_modified(self, mock_method1, mock_method2, mock_method3): + def test_modified(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_modified" local_fs.mkdir(fileset_storage_location) @@ -516,7 +549,7 @@ def test_modified(self, mock_method1, mock_method2, mock_method3): "test_cat_file", f"{_fileset_dir}/test_cat_file" ), ) - def test_cat_file(self, mock_method1, mock_method2, mock_method3): + def test_cat_file(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_cat_file" local_fs.mkdir(fileset_storage_location) @@ -559,7 +592,7 @@ def test_cat_file(self, mock_method1, mock_method2, mock_method3): "test_get_file", f"{_fileset_dir}/test_get_file" ), ) - def test_get_file(self, mock_method1, mock_method2, mock_method3): + def test_get_file(self, mock_method1, mock_method2, mock_method3, mock_method4): local_fs = LocalFileSystem() fileset_storage_location = f"{self._fileset_dir}/test_get_file" local_fs.mkdir(fileset_storage_location) @@ -600,7 +633,12 @@ def test_get_file(self, mock_method1, mock_method2, mock_method3): fs.get_file(dir_virtual_path, local_path) self.assertTrue(local_fs.exists(local_path)) - def test_convert_actual_path(self, mock_method1, mock_method2): + # test get a file to a remote file + remote_path = "gvfs://" + fileset_virtual_location + "/test_file_2.par" + with self.assertRaises(GravitinoRuntimeException): + fs.get_file(file_virtual_path, remote_path) + + def test_convert_actual_path(self, mock_method1, mock_method2, mock_method3): # test convert actual hdfs path audit_dto = AuditDTO( _creator="test", @@ -631,7 +669,7 @@ def test_convert_actual_path(self, mock_method1, mock_method2): ) # test actual path not start with storage location actual_path = "/not_start_with_storage/ttt" - with self.assertRaises(RuntimeError): + with self.assertRaises(GravitinoRuntimeException): fs._convert_actual_path(actual_path, mock_hdfs_context) # test actual path start with storage location @@ -660,7 +698,7 @@ def test_convert_actual_path(self, mock_method1, mock_method2): name_identifier=NameIdentifier.of_fileset( "test_metalake", "test_catalog", "test_schema", "test_f1" ), - storage_type=StorageType.FILE, + storage_type=StorageType.LOCAL, fileset=local_fileset, actual_path=local_fileset.storage_location() + "/actual_path", fs=LocalFileSystem(), @@ -671,7 +709,7 @@ def test_convert_actual_path(self, mock_method1, mock_method2): ) # test actual path not start with storage location actual_path = "/not_start_with_storage/ttt" - with self.assertRaises(RuntimeError): + with self.assertRaises(GravitinoRuntimeException): fs._convert_actual_path(actual_path, mock_local_context) # test actual path start with storage location @@ -681,7 +719,7 @@ def test_convert_actual_path(self, mock_method1, mock_method2): "fileset/test_catalog/test_schema/test_f1/actual_path", virtual_path ) - def test_convert_info(self, mock_method1, mock_method2): + def test_convert_info(self, mock_method1, mock_method2, mock_method3): # test convert actual hdfs path audit_dto = AuditDTO( _creator="test", @@ -712,7 +750,7 @@ def test_convert_info(self, mock_method1, mock_method2): ) # test actual path not start with storage location actual_path = "/not_start_with_storage/ttt" - with self.assertRaises(RuntimeError): + with self.assertRaises(GravitinoRuntimeException): fs._convert_actual_path(actual_path, mock_hdfs_context) # test actual path start with storage location @@ -741,7 +779,7 @@ def test_convert_info(self, mock_method1, mock_method2): name_identifier=NameIdentifier.of_fileset( "test_metalake", "test_catalog", "test_schema", "test_f1" ), - storage_type=StorageType.FILE, + storage_type=StorageType.LOCAL, fileset=local_fileset, actual_path=local_fileset.storage_location() + "/actual_path", fs=LocalFileSystem(), @@ -752,7 +790,7 @@ def test_convert_info(self, mock_method1, mock_method2): ) # test actual path not start with storage location actual_path = "/not_start_with_storage/ttt" - with self.assertRaises(RuntimeError): + with self.assertRaises(GravitinoRuntimeException): fs._convert_actual_path(actual_path, mock_local_context) # test actual path start with storage location @@ -762,15 +800,15 @@ def test_convert_info(self, mock_method1, mock_method2): "fileset/test_catalog/test_schema/test_f1/actual_path", virtual_path ) - def test_extract_identifier(self, mock_method1, mock_method2): + def test_extract_identifier(self, mock_method1, mock_method2, mock_method3): fs = gvfs.GravitinoVirtualFileSystem( server_uri="http://localhost:9090", metalake_name="metalake_demo" ) - with self.assertRaises(RuntimeError): + with self.assertRaises(GravitinoRuntimeException): fs._extract_identifier(path=None) invalid_path = "s3://bucket_1/test_catalog/schema/fileset/ttt" - with self.assertRaises(RuntimeError): + with self.assertRaises(GravitinoRuntimeException): fs._extract_identifier(path=invalid_path) valid_path = "fileset/test_catalog/schema/fileset/ttt" @@ -779,3 +817,140 @@ def test_extract_identifier(self, mock_method1, mock_method2): self.assertEqual("test_catalog", identifier.namespace().level(1)) self.assertEqual("schema", identifier.namespace().level(2)) self.assertEqual("fileset", identifier.name()) + + @patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", + return_value=mock_base.mock_load_fileset( + "test_pandas", f"{_fileset_dir}/test_pandas" + ), + ) + def test_pandas(self, mock_method1, mock_method2, mock_method3, mock_method4): + local_fs = LocalFileSystem() + fileset_storage_location = f"{self._fileset_dir}/test_pandas" + local_fs.mkdir(fileset_storage_location) + + fileset_virtual_location = "gvfs://fileset/fileset_catalog/tmp/test_pandas" + data = pandas.DataFrame({"Name": ["A", "B", "C", "D"], "ID": [20, 21, 19, 18]}) + fs = gvfs.GravitinoVirtualFileSystem( + server_uri="http://localhost:8090", metalake_name="test_metalake" + ) + # to parquet + data.to_parquet(fileset_virtual_location + "/test.parquet", filesystem=fs) + self.assertTrue(local_fs.exists(fileset_storage_location + "/test.parquet")) + + # read parquet + ds1 = pandas.read_parquet( + path=fileset_virtual_location + "/test.parquet", filesystem=fs + ) + self.assertTrue(data.equals(ds1)) + storage_options = { + "server_uri": "http://localhost:8090", + "metalake_name": "test_metalake", + } + # to csv + data.to_csv( + fileset_virtual_location + "/test.csv", + index=False, + storage_options=storage_options, + ) + self.assertTrue(local_fs.exists(fileset_storage_location + "/test.csv")) + + # read csv + ds2 = pandas.read_csv( + fileset_virtual_location + "/test.csv", storage_options=storage_options + ) + self.assertTrue(data.equals(ds2)) + + @patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", + return_value=mock_base.mock_load_fileset( + "test_pyarrow", f"{_fileset_dir}/test_pyarrow" + ), + ) + def test_pyarrow(self, mock_method1, mock_method2, mock_method3, mock_method4): + local_fs = LocalFileSystem() + fileset_storage_location = f"{self._fileset_dir}/test_pyarrow" + local_fs.mkdir(fileset_storage_location) + + fileset_virtual_location = "gvfs://fileset/fileset_catalog/tmp/test_pyarrow" + data = pandas.DataFrame({"Name": ["A", "B", "C", "D"], "ID": [20, 21, 19, 18]}) + fs = gvfs.GravitinoVirtualFileSystem( + server_uri="http://localhost:8090", metalake_name="test_metalake" + ) + + # to parquet + data.to_parquet(fileset_virtual_location + "/test.parquet", filesystem=fs) + self.assertTrue(local_fs.exists(fileset_storage_location + "/test.parquet")) + + # read as arrow dataset + arrow_dataset = dt.dataset( + fileset_virtual_location + "/test.parquet", filesystem=fs + ) + arrow_tb_1 = arrow_dataset.to_table() + + arrow_tb_2 = pa.Table.from_pandas(data) + self.assertTrue(arrow_tb_1.equals(arrow_tb_2)) + + # read as arrow parquet dataset + arrow_tb_3 = pq.read_table( + fileset_virtual_location + "/test.parquet", filesystem=fs + ) + self.assertTrue(arrow_tb_3.equals(arrow_tb_2)) + + @patch( + "gravitino.catalog.fileset_catalog.FilesetCatalog.load_fileset", + return_value=mock_base.mock_load_fileset( + "test_llama_index", f"{_fileset_dir}/test_llama_index" + ), + ) + def test_llama_index(self, mock_method1, mock_method2, mock_method3, mock_method4): + local_fs = LocalFileSystem() + fileset_storage_location = f"{self._fileset_dir}/test_llama_index" + local_fs.mkdir(fileset_storage_location) + + fileset_virtual_location = "gvfs://fileset/fileset_catalog/tmp/test_llama_index" + data = pandas.DataFrame({"Name": ["A", "B", "C", "D"], "ID": [20, 21, 19, 18]}) + fs = gvfs.GravitinoVirtualFileSystem( + server_uri="http://localhost:8090", metalake_name="test_metalake" + ) + + storage_options = { + "server_uri": "http://localhost:8090", + "metalake_name": "test_metalake", + } + # to csv + data.to_csv( + fileset_virtual_location + "/test.csv", + index=False, + storage_options=storage_options, + ) + self.assertTrue(local_fs.exists(fileset_storage_location + "/test.csv")) + + data.to_csv( + fileset_virtual_location + "/sub_dir/test1.csv", + index=False, + storage_options=storage_options, + ) + self.assertTrue( + local_fs.exists(fileset_storage_location + "/sub_dir/test1.csv") + ) + + reader = SimpleDirectoryReader( + input_dir="fileset/fileset_catalog/tmp/test_llama_index", + fs=fs, + recursive=True, # recursively searches all subdirectories + ) + documents = reader.load_data() + self.assertEqual(len(documents), 2) + doc_1 = documents[0] + result_1 = [line.strip().split(", ") for line in doc_1.text.split("\n")] + self.assertEqual(4, len(result_1)) + for row in result_1: + if row[0] == "A": + self.assertEqual(row[1], "20") + elif row[0] == "B": + self.assertEqual(row[1], "21") + elif row[0] == "C": + self.assertEqual(row[1], "19") + elif row[0] == "D": + self.assertEqual(row[1], "18") diff --git a/licenses/fsspec.txt b/licenses/fsspec.txt new file mode 100644 index 00000000000..699b2d5f83f --- /dev/null +++ b/licenses/fsspec.txt @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2018, Martin Durant +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/licenses/pyreaderwriterlock.txt b/licenses/pyreaderwriterlock.txt new file mode 100644 index 00000000000..2199652f2dd --- /dev/null +++ b/licenses/pyreaderwriterlock.txt @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2018 Éric Larivière + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file