From 4b109d8c1c16067819b8395e6ea2a9ad988da9eb Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Wed, 14 Feb 2024 13:12:44 -0800 Subject: [PATCH 01/15] boilerplate --- latch/types/path.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 latch/types/path.py diff --git a/latch/types/path.py b/latch/types/path.py new file mode 100644 index 00000000..8436cf7b --- /dev/null +++ b/latch/types/path.py @@ -0,0 +1,16 @@ +import io +from pathlib import Path +from typing import Optional, Union + +from latch.types.json import JsonValue + + +class LPath: + def __init__(self, path: Path): + self.path = path + + def download(self, dst: Optional[Union[Path, io.IOBase]]) -> Optional[Path]: + pass + + def upload(self, src: Union[Path, bytes, JsonValue, io.IOBase]) -> None: + pass From 53eb087ccb00619de2db32eee7eff81784ab5dac Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Thu, 15 Feb 2024 16:28:35 -0800 Subject: [PATCH 02/15] implement metadata functions --- latch/path/__init__.py | 1 + latch/path/path.py | 79 ++++++++++++++++++++++++++++ latch/types/path.py | 16 ------ latch_cli/services/cp/ldata_utils.py | 3 ++ 4 files changed, 83 insertions(+), 16 deletions(-) create mode 100644 latch/path/__init__.py create mode 100644 latch/path/path.py delete mode 100644 latch/types/path.py diff --git a/latch/path/__init__.py b/latch/path/__init__.py new file mode 100644 index 00000000..e06b6445 --- /dev/null +++ b/latch/path/__init__.py @@ -0,0 +1 @@ +from latch.path.path import LPath diff --git a/latch/path/path.py b/latch/path/path.py new file mode 100644 index 00000000..f9ca669b --- /dev/null +++ b/latch/path/path.py @@ -0,0 +1,79 @@ +import io +from enum import Enum +from pathlib import Path +from typing import Generator, Optional, Union + +from gql import gql +from latch_sdk_gql.execute import execute + +from latch.types.json import JsonValue +from latch_cli.services.cp.ldata_utils import LDataNodeType, get_node_data +from latch_cli.utils.path import is_remote_path + + +class LPath: + def __init__(self, path: str): + if not is_remote_path(path): + raise ValueError(f"Invalid LPath: {path} is not a Latch path") + self._path = path + + @property + def node_id(self) -> str: + # todo: currently raises click exception which is wrong + # todo: this function should be moved to this directory + node_data = get_node_data(self._path).data[self._path] + return node_data.id + + @property + def exists(self) -> bool: + try: + node_data = get_node_data(self._path).data[self._path] + except Exception: # todo: this should be a specific exception + return False + return not node_data.removed + + @property + def type(self) -> LDataNodeType: + node_data = get_node_data(self._path).data[self._path] + return node_data.type + + def _fetch_metadata(self): + data = execute( + gql(""" + query NodeMetadataQuery($id: BigInt!) { + ldataNode(id: $id) { + ldataObjectMeta { + contentSize + contentType + } + } + } + """), + variables={"id": self.node_id}, + )["ldataNode"] + if data is None: + raise FileNotFoundError(f"{self._path} not found") + + assert "ldataObjectMeta" in data + return data["ldataObjectMeta"] + + @property + def size(self) -> float: + metadata = self._fetch_metadata() + assert "contentSize" in metadata + return metadata["contentSize"] + + @property + def content_type(self) -> str: + metadata = self._fetch_metadata() + assert "contentType" in metadata + return metadata["contentType"] + + def iterdir(self) -> Generator[Path, None, None]: + pass + + def download(self, dst: Optional[Union[Path, io.IOBase]]) -> Optional[Path]: + pass + + def upload(self, src: Union[Path, io.IOBase, bytes, JsonValue]) -> None: + pass diff --git a/latch/types/path.py b/latch/types/path.py deleted file mode 100644 index 8436cf7b..00000000 --- a/latch/types/path.py +++ /dev/null @@ -1,16 +0,0 @@ -import io -from pathlib import Path -from typing import Optional, Union - -from latch.types.json import JsonValue - - -class LPath: - def __init__(self, path: Path): - self.path = path - - def download(self, dst: Optional[Union[Path, io.IOBase]]) -> Optional[Path]: - pass - - def upload(self, src: Union[Path, bytes, JsonValue, io.IOBase]) -> None: - pass diff --git a/latch_cli/services/cp/ldata_utils.py b/latch_cli/services/cp/ldata_utils.py index 53de0968..c6af12eb 100644 --- a/latch_cli/services/cp/ldata_utils.py +++ b/latch_cli/services/cp/ldata_utils.py @@ -49,6 +49,7 @@ class NodeData: id: str name: str type: LDataNodeType + removed: bool is_parent: bool @@ -83,6 +84,7 @@ def get_node_data( id name type + removed } } } @@ -141,6 +143,7 @@ def get_node_data( id=final_link_target["id"], name=final_link_target["name"], type=LDataNodeType(final_link_target["type"].lower()), + removed=final_link_target["removed"], is_parent=is_parent, ) except (TypeError, ValueError) as e: From 0eb5042516ded147c42a3f0f07c23906aedbe05d Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Fri, 16 Feb 2024 14:49:25 -0800 Subject: [PATCH 03/15] refactor upload/download logic to ldata/transfer directory --- latch/ldata/__init__.py | 0 latch/ldata/node.py | 242 +++++++++++ latch/ldata/path.py | 268 ++++++++++++ latch/ldata/transfer/__init__.py | 2 + .../cp => latch/ldata/transfer}/download.py | 33 +- .../cp => latch/ldata/transfer}/manager.py | 6 +- .../cp => latch/ldata/transfer}/progress.py | 7 + .../cp => latch/ldata/transfer}/throttle.py | 0 .../cp => latch/ldata/transfer}/upload.py | 42 +- latch/ldata/transfer/utils.py | 31 ++ latch/path/__init__.py | 1 - latch/path/path.py | 79 ---- latch_cli/services/cp/autocomplete.py | 2 +- latch_cli/services/cp/config.py | 14 - latch_cli/services/cp/exceptions.py | 1 - latch_cli/services/cp/glob.py | 2 +- latch_cli/services/cp/ldata_utils.py | 303 ------------- latch_cli/services/cp/main.py | 60 ++- latch_cli/services/cp/remote_copy.py | 107 ----- latch_cli/services/cp/utils.py | 178 ++++++-- latch_cli/services/ls.py | 2 +- latch_cli/services/move.py | 8 +- latch_cli/utils/ldata.py | 398 ------------------ latch_cli/utils/path.py | 1 - 24 files changed, 801 insertions(+), 986 deletions(-) create mode 100644 latch/ldata/__init__.py create mode 100644 latch/ldata/node.py create mode 100644 latch/ldata/path.py create mode 100644 latch/ldata/transfer/__init__.py rename {latch_cli/services/cp => latch/ldata/transfer}/download.py (89%) rename {latch_cli/services/cp => latch/ldata/transfer}/manager.py (57%) rename {latch_cli/services/cp => latch/ldata/transfer}/progress.py (97%) rename {latch_cli/services/cp => latch/ldata/transfer}/throttle.py (100%) rename {latch_cli/services/cp => latch/ldata/transfer}/upload.py (93%) create mode 100644 latch/ldata/transfer/utils.py delete mode 100644 latch/path/__init__.py delete mode 100644 latch/path/path.py delete mode 100644 latch_cli/services/cp/config.py delete mode 100644 latch_cli/services/cp/exceptions.py delete mode 100644 latch_cli/services/cp/ldata_utils.py delete mode 100644 latch_cli/services/cp/remote_copy.py delete mode 100644 latch_cli/utils/ldata.py diff --git a/latch/ldata/__init__.py b/latch/ldata/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/latch/ldata/node.py b/latch/ldata/node.py new file mode 100644 index 00000000..094a0d4c --- /dev/null +++ b/latch/ldata/node.py @@ -0,0 +1,242 @@ +from dataclasses import dataclass +from enum import Enum +from typing import Dict, List, TypedDict + +try: + from functools import cache +except ImportError: + from functools import lru_cache as cache + +import gql +import graphql.language as l +from latch_sdk_gql.execute import execute +from latch_sdk_gql.utils import _name_node, _parse_selection + +from latch_cli.utils.path import get_path_error, normalize_path + +AccId = int + + +class LDataNodeType(str, Enum): + account_root = "account_root" + dir = "dir" + obj = "obj" + mount = "mount" + link = "link" + + +class FinalLinkTargetPayload(TypedDict): + id: str + type: str + name: str + + +class LdataNodePayload(TypedDict): + finalLinkTarget: FinalLinkTargetPayload + + +class LdataResolvePathToNodePayload(TypedDict): + path: str + ldataNode: LdataNodePayload + + +class AccountInfoCurrentPayload(TypedDict): + id: str + + +@dataclass(frozen=True) +class NodeData: + id: str + name: str + type: LDataNodeType + removed: bool + is_parent: bool + + +@dataclass(frozen=True) +class GetNodeDataResult: + acc_id: str + data: Dict[str, NodeData] + + +def get_node_data( + *remote_paths: str, allow_resolve_to_parent: bool = False +) -> GetNodeDataResult: + normalized: Dict[str, str] = {} + + acc_sel = _parse_selection(""" + accountInfoCurrent { + id + } + """) + assert isinstance(acc_sel, l.FieldNode) + + sels: List[l.FieldNode] = [acc_sel] + + for i, remote_path in enumerate(remote_paths): + normalized[remote_path] = normalize_path(remote_path) + + sel = _parse_selection(""" + ldataResolvePathToNode(path: {}) { + path + ldataNode { + finalLinkTarget { + id + name + type + removed + } + } + } + """) + assert isinstance(sel, l.FieldNode) + + val = l.StringValueNode() + val.value = normalized[remote_path] + + args = l.ArgumentNode() + args.name = _name_node("path") + args.value = val + + sel.alias = _name_node(f"q{i}") + sel.arguments = (args,) + + sels.append(sel) + + sel_set = l.SelectionSetNode() + sel_set.selections = tuple(sels) + + doc = l.parse(""" + query GetNodeType { + placeholder + } + """) + + assert len(doc.definitions) == 1 + query = doc.definitions[0] + + assert isinstance(query, l.OperationDefinitionNode) + query.selection_set = sel_set + + res = execute(doc) + + acc_info: AccountInfoCurrentPayload = res["accountInfoCurrent"] + acc_id = acc_info["id"] + + ret: Dict[str, NodeData] = {} + for i, remote_path in enumerate(remote_paths): + node: LdataResolvePathToNodePayload = res[f"q{i}"] + + try: + final_link_target = node["ldataNode"]["finalLinkTarget"] + remaining = node["path"] + + is_parent = remaining is not None and remaining != "" + + if not allow_resolve_to_parent and is_parent: + raise ValueError("node does not exist") + + if remaining is not None and "/" in remaining: + raise ValueError("node and parent does not exist") + + ret[remote_path] = NodeData( + id=final_link_target["id"], + name=final_link_target["name"], + type=LDataNodeType(final_link_target["type"].lower()), + removed=final_link_target["removed"], + is_parent=is_parent, + ) + except (TypeError, ValueError) as e: + raise FileNotFoundError(get_path_error(remote_path, "not found", acc_id)) + + return GetNodeDataResult(acc_id, ret) + + +@dataclass(frozen=True) +class NodeMetadata: + id: str + size: int + content_type: str + + +def get_node_metadata(node_id: str) -> NodeMetadata: + data = execute( + gql.gql(""" + query NodeMetadataQuery($id: BigInt!) { + ldataNode(id: $id) { + removed + ldataObjectMeta { + contentSize + contentType + } + } + } + """), + variables={"id": node_id}, + )["ldataNode"] + if data is None or data["removed"]: + raise FileNotFoundError + + return NodeMetadata( + id=node_id, + size=data["ldataObjectMeta"]["contentSize"], + content_type=data["ldataObjectMeta"]["contentType"], + ) + + +class PermLevel(str, Enum): + NONE = "none" + VIEWER = "viewer" + MEMBER = "member" + ADMIN = "admin" + OWNER = "owner" + + +@dataclass(frozen=True) +class LDataPerms: + id: str + shared: bool + share_invites: Dict[str, PermLevel] + share_perms: Dict[AccId, PermLevel] + + +def get_node_perms(node_id: str) -> LDataPerms: + data = execute( + gql.gql(""" + query NodePermissionsQuery($id: BigInt!) { + ldataNode(id: $id) { + id + removed + ldataSharePermissionsByObjectId { + nodes { + receiverId + level + } + } + ldataShareInvitesByObjectId { + nodes { + receiverEmail + level + } + } + shared + } + } + """), + variables={"id": node_id}, + )["ldataNode"] + if data is None or data["removed"]: + raise FileNotFoundError + + return LDataPerms( + id=node_id, + shared=data["shared"], + share_invites={ + node["reveiverEmail"]: node["level"] + for node in data["ldataShareInvitesByObjectId"]["nodes"] + }, + share_perms={ + int(node["receiverId"]): node["level"] + for node in data["ldataSharePermissionsByObjectId"]["nodes"] + }, + ) diff --git a/latch/ldata/path.py b/latch/ldata/path.py new file mode 100644 index 00000000..5375aa34 --- /dev/null +++ b/latch/ldata/path.py @@ -0,0 +1,268 @@ +import io +from pathlib import Path +from typing import Generator, Optional, Union +from urllib.parse import urljoin + +import gql +from gql.transport.exceptions import TransportQueryError +from latch_sdk_config.latch import NUCLEUS_URL +from latch_sdk_gql.execute import execute + +from latch.ldata.node import ( + LDataNodeType, + LDataPerms, + PermLevel, + get_node_data, + get_node_metadata, + get_node_perms, +) +from latch.ldata.transfer import download, upload +from latch.types.json import JsonValue +from latch_cli.tinyrequests import post +from latch_cli.utils import get_auth_header, urljoins +from latch_cli.utils.path import get_name_from_path, get_path_error, is_remote_path + + +class LPath: + def __init__(self, path: str): + if not is_remote_path(path): + raise ValueError(f"Invalid LPath: {path} is not a Latch path") + self.path = path + self._node_id = None + + @property + def node_id(self) -> str: + if self._node_id is None: + self._node_id = get_node_data(self.path).data[self.path].id + return self._node_id + + @property + def exists(self) -> bool: + try: + node_data = get_node_data(self.path).data[self.path] + except FileNotFoundError: + return False + return not node_data.removed + + @property + def name(self) -> str: + return get_node_data(self.path).data[self.path].name + + @property + def type(self) -> LDataNodeType: + return get_node_data(self.path).data[self.path].type + + def is_dir(self) -> bool: + return self.type is LDataNodeType.dir + + @property + def size(self) -> float: + metadata = get_node_metadata(self.node_id) + return metadata.size + + @property + def content_type(self) -> str: + metadata = get_node_metadata(self.node_id) + return metadata.content_type + + def iterdir(self) -> Generator[Path, None, None]: + data = execute( + gql.gql(""" + query LDataChildren($argPath: String!) { + ldataResolvePathData(argPath: $argPath) { + finalLinkTarget { + childLdataTreeEdges(filter: { child: { removed: { equalTo: false } } }) { + nodes { + child { + name + } + } + } + } + } + }"""), + {"argPath": self.path}, + )["ldataResolvePathData"] + + if data is None: + raise ValueError(f"No directory found at path: {self.path}") + + for node in data["finalLinkTarget"]["childLdataTreeEdges"]["nodes"]: + yield urljoins(self.path, node["child"]["name"]) + + def rmr(self) -> None: + execute( + gql.gql(""" + mutation LDataRmr($nodeId: BigInt!) { + ldataRmr(input: { argNodeId: $nodeId }) { + clientMutationId + } + } + """), + {"nodeId": self.node_id}, + ) + + def copy(self, dst: Union["LPath", str]) -> None: + dst = str(dst) + node_data = get_node_data(self.path, dst, allow_resolve_to_parent=True) + + src_data = node_data.data[self.path] + dst_data = node_data.data[dst] + acc_id = node_data.acc_id + + path_by_id = {v.id: k for k, v in node_data.data.items()} + + if src_data.is_parent: + raise FileNotFoundError(get_path_error(self.path, "not found", acc_id)) + + new_name = None + if dst_data.is_parent: + new_name = get_name_from_path(dst) + elif dst_data.type in {LDataNodeType.obj, LDataNodeType.link}: + raise FileExistsError( + get_path_error(dst, "object already exists at path.", acc_id) + ) + + try: + execute( + gql.gql(""" + mutation Copy( + $argSrcNode: BigInt! + $argDstParent: BigInt! + $argNewName: String + ) { + ldataCopy( + input: { + argSrcNode: $argSrcNode + argDstParent: $argDstParent + argNewName: $argNewName + } + ) { + clientMutationId + } + }"""), + { + "argSrcNode": src_data.id, + "argDstParent": dst_data.id, + "argNewName": new_name, + }, + ) + except TransportQueryError as e: + if e.errors is None or len(e.errors) == 0: + raise e + + msg: str = e.errors[0]["message"] + + if msg.startswith("Permission denied on node"): + node_id = msg.rsplit(" ", 1)[1] + path = path_by_id[node_id] + + raise ValueError(get_path_error(path, "permission denied.", acc_id)) + elif msg == "Refusing to make node its own parent": + raise ValueError( + get_path_error(dst, f"is a parent of {self.path}.", acc_id) + ) + elif msg == "Refusing to parent node to an object node": + raise ValueError(get_path_error(dst, f"object exists at path.", acc_id)) + elif msg == "Refusing to move a share link (or into a share link)": + raise ValueError( + get_path_error( + self.path if src_data.type is LDataNodeType.link else dst, + f"is a share link.", + acc_id, + ) + ) + elif msg.startswith("Refusing to copy account root"): + raise ValueError( + get_path_error(self.path, "is an account root.", acc_id) + ) + elif msg.startswith("Refusing to copy removed node"): + raise ValueError(get_path_error(self.path, "not found.", acc_id)) + elif msg.startswith("Refusing to copy already in-transit node"): + raise ValueError( + get_path_error(self.path, "copy already in progress.", acc_id) + ) + elif msg == "Conflicting object in destination": + raise ValueError(get_path_error(dst, "object exists at path.", acc_id)) + + raise ValueError(get_path_error(self.path, str(e), acc_id)) + + def download(self, dst: Optional[Union[Path, io.IOBase]]) -> Optional[Path]: + # todo: perform different actions depending on dst type + return download( + self.path, + dst, + ) + + def read_bytes(self) -> bytes: + # todo: implement + pass + + def read_text(self) -> str: + # todo: implement + pass + + def read_json(self) -> JsonValue: + # todo: implement + pass + + def read_chunks(self, chunk_size: int) -> Generator[bytes, None, None]: + # todo: implement + pass + + def read_lines(self): + # todo: implement + pass + + def read_at(self, offset: int, amount: int) -> bytes: + # todo: implement + pass + + def upload(self, src: Union[Path, io.IOBase, bytes, JsonValue]) -> str: + # todo: implement + pass + + @property + def perms(self) -> LDataPerms: + return get_node_perms(self.node_id) + + def share_with(self, email: str, perm_level: PermLevel) -> None: + resp = post( + url=urljoin(NUCLEUS_URL, "/ldata/send-share-email"), + json={ + "node_id": self.node_id, + "perm_level": str(perm_level), + "receiver_email": email, + }, + headers={"Authorization": get_auth_header()}, + ) + resp.raise_for_status() + + def _toggle_share_link(self, enable: bool) -> None: + execute( + gql.gql(""" + mutation LDataShare($nodeId: BigInt!, $value: Boolean!) { + ldataShare(input: { argNodeId: $nodeId, argValue: $value }) { + clientMutationId + } + } + """), + {"nodeId": self.node_id, "value": enable}, + ) + + def enable_share_link(self) -> None: + self._toggle_share_link(True) + + def disable_share_link(self) -> None: + self._toggle_share_link(False) + + def __str__(self) -> str: + return self.path + + def __truediv__(self, other: Union[Path, str]) -> "LPath": + return LPath(f"{Path(self.path) / other}") + + +if __name__ == "__main__": + # add tests here + pass diff --git a/latch/ldata/transfer/__init__.py b/latch/ldata/transfer/__init__.py new file mode 100644 index 00000000..5c4374b0 --- /dev/null +++ b/latch/ldata/transfer/__init__.py @@ -0,0 +1,2 @@ +from latch.ldata.transfer.download import download +from latch.ldata.transfer.upload import upload diff --git a/latch_cli/services/cp/download.py b/latch/ldata/transfer/download.py similarity index 89% rename from latch_cli/services/cp/download.py rename to latch/ldata/transfer/download.py index 357c7c73..3dd3cf0f 100644 --- a/latch_cli/services/cp/download.py +++ b/latch/ldata/transfer/download.py @@ -9,13 +9,12 @@ import click from latch_sdk_config.latch import config as latch_config +from latch.ldata.node import LDataNodeType, get_node_data +from latch.ldata.transfer.manager import TransferStateManager +from latch.ldata.transfer.progress import Progress, ProgressBars, get_free_index +from latch.ldata.transfer.utils import get_max_workers, human_readable_time from latch_cli import tinyrequests from latch_cli.constants import Units -from latch_cli.services.cp.config import CPConfig, Progress -from latch_cli.services.cp.ldata_utils import LDataNodeType, get_node_data -from latch_cli.services.cp.manager import CPStateManager -from latch_cli.services.cp.progress import ProgressBars, get_free_index -from latch_cli.services.cp.utils import get_max_workers, human_readable_time from latch_cli.utils import get_auth_header, with_si_suffix from latch_cli.utils.path import normalize_path @@ -35,9 +34,7 @@ class DownloadJob: def download( - src: str, - dest: Path, - config: CPConfig, + src: str, dest: Path, progress: Progress = Progress.tasks, verbose: bool = False ): if not dest.parent.exists(): click.secho( @@ -48,7 +45,11 @@ def download( raise click.exceptions.Exit(1) normalized = normalize_path(src) - data = get_node_data(src) + try: + data = get_node_data(src) + except FileNotFoundError as e: + click.echo(str(e)) + raise click.exceptions.Exit(1) from e node_data = data.data[src] click.secho(f"Downloading {node_data.name}", fg="blue") @@ -127,23 +128,23 @@ def download( num_files = len(confirmed_jobs) - if config.progress == Progress.none: + if progress == Progress.none: num_bars = 0 show_total_progress = False - if config.progress == Progress.total: + if progress == Progress.total: num_bars = 0 show_total_progress = True else: num_bars = min(get_max_workers(), num_files) show_total_progress = True - with CPStateManager() as manager: + with TransferStateManager() as manager: progress_bars: ProgressBars with closing( manager.ProgressBars( num_bars, show_total_progress=show_total_progress, - verbose=config.verbose, + verbose=verbose, ) ) as progress_bars: progress_bars.set_total(num_files, "Copying Files") @@ -169,18 +170,18 @@ def download( if dest.exists() and dest.is_dir(): dest = dest / node_data.name - if config.progress == Progress.none: + if progress == Progress.none: num_bars = 0 else: num_bars = 1 - with CPStateManager() as manager: + with TransferStateManager() as manager: progress_bars: ProgressBars with closing( manager.ProgressBars( num_bars, show_total_progress=False, - verbose=config.verbose, + verbose=verbose, ) ) as progress_bars: start = time.monotonic() diff --git a/latch_cli/services/cp/manager.py b/latch/ldata/transfer/manager.py similarity index 57% rename from latch_cli/services/cp/manager.py rename to latch/ldata/transfer/manager.py index b3da8d20..01796068 100644 --- a/latch_cli/services/cp/manager.py +++ b/latch/ldata/transfer/manager.py @@ -5,10 +5,10 @@ from .throttle import Throttle -class CPStateManager(SyncManager): +class TransferStateManager(SyncManager): ProgressBars: Type[ProgressBars] Throttle: Type[Throttle] -CPStateManager.register("ProgressBars", ProgressBars) -CPStateManager.register("Throttle", Throttle) +TransferStateManager.register("ProgressBars", ProgressBars) +TransferStateManager.register("Throttle", Throttle) diff --git a/latch_cli/services/cp/progress.py b/latch/ldata/transfer/progress.py similarity index 97% rename from latch_cli/services/cp/progress.py rename to latch/ldata/transfer/progress.py index 80feba58..506182a0 100644 --- a/latch_cli/services/cp/progress.py +++ b/latch/ldata/transfer/progress.py @@ -1,4 +1,5 @@ from contextlib import contextmanager +from enum import Enum from multiprocessing import BoundedSemaphore from typing import Dict, List, Optional @@ -15,6 +16,12 @@ def get_progress_bar(): ) +class Progress(Enum): + none = "none" + total = "total" + tasks = "tasks" + + class ProgressBars: def __init__( self, diff --git a/latch_cli/services/cp/throttle.py b/latch/ldata/transfer/throttle.py similarity index 100% rename from latch_cli/services/cp/throttle.py rename to latch/ldata/transfer/throttle.py diff --git a/latch_cli/services/cp/upload.py b/latch/ldata/transfer/upload.py similarity index 93% rename from latch_cli/services/cp/upload.py rename to latch/ldata/transfer/upload.py index 7f7952f3..2eff9048 100644 --- a/latch_cli/services/cp/upload.py +++ b/latch/ldata/transfer/upload.py @@ -16,13 +16,18 @@ from latch_sdk_config.latch import config as latch_config from typing_extensions import TypeAlias +from latch.ldata.node import LDataNodeType, get_node_data +from latch.ldata.transfer.manager import TransferStateManager +from latch.ldata.transfer.progress import Progress, ProgressBars +from latch.ldata.transfer.throttle import Throttle +from latch.ldata.transfer.utils import ( + LDataNodeType, + get_max_workers, + get_node_data, + human_readable_time, +) from latch_cli import tinyrequests from latch_cli.constants import latch_constants, units -from latch_cli.services.cp.config import CPConfig, Progress -from latch_cli.services.cp.ldata_utils import LDataNodeType, get_node_data -from latch_cli.services.cp.manager import CPStateManager -from latch_cli.services.cp.progress import ProgressBars -from latch_cli.services.cp.throttle import Throttle from latch_cli.services.cp.utils import get_max_workers, human_readable_time from latch_cli.utils import get_auth_header, urljoins, with_si_suffix from latch_cli.utils.path import normalize_path @@ -52,17 +57,22 @@ class UploadJob: def upload( src: str, # pathlib.Path strips trailing slashes but we want to keep them here as they determine cp behavior dest: str, - config: CPConfig, + progress: Progress = Progress.tasks, + verbose: bool = False, ): src_path = Path(src) if not src_path.exists(): click.secho(f"Could not find {src_path}: no such file or directory.", fg="red") raise click.exceptions.Exit(1) - if config.progress != Progress.none: + if progress != Progress.none: click.secho(f"Uploading {src_path.name}", fg="blue") - node_data = get_node_data(dest, allow_resolve_to_parent=True) + try: + node_data = get_node_data(dest, allow_resolve_to_parent=True) + except FileNotFoundError as e: + click.echo(str(e)) + raise click.exceptions.Exit(1) from e dest_data = node_data.data[dest] normalized = normalize_path(dest) @@ -82,13 +92,13 @@ def upload( click.secho(f"{normalized} is not a directory.", fg="red") raise click.exceptions.Exit(1) - if config.progress == Progress.none: + if progress == Progress.none: num_bars = 0 show_total_progress = False elif not src_path.is_dir(): num_bars = 1 show_total_progress = False - elif config.progress == Progress.total: + elif progress == Progress.total: num_bars = 0 show_total_progress = True else: @@ -96,7 +106,7 @@ def upload( show_total_progress = True with ProcessPoolExecutor(max_workers=get_max_workers()) as exec: - with CPStateManager() as man: + with TransferStateManager() as man: parts_by_src: "PartsBySrcType" = man.dict() upload_info_by_src: "UploadInfoBySrcType" = man.dict() @@ -134,7 +144,7 @@ def upload( with closing( man.ProgressBars( 0, - show_total_progress=(config.progress != Progress.none), + show_total_progress=(progress != Progress.none), ) ) as url_generation_bar: url_generation_bar.set_total(num_files, "Generating URLs") @@ -169,7 +179,7 @@ def upload( man.ProgressBars( min(num_bars, num_files), show_total_progress=show_total_progress, - verbose=config.verbose, + verbose=verbose, ) ) as chunk_upload_bars: chunk_upload_bars.set_total(num_files, "Uploading Files") @@ -209,7 +219,7 @@ def upload( wait(chunk_futs) - if config.progress != Progress.none: + if progress != Progress.none: print("\x1b[0GFinalizing uploads...") else: if dest_exists and dest_is_dir: @@ -223,7 +233,7 @@ def upload( man.ProgressBars( num_bars, show_total_progress=show_total_progress, - verbose=config.verbose, + verbose=verbose, ) ) as progress_bars: pbar_index = progress_bars.get_free_task_bar_index() @@ -260,7 +270,7 @@ def upload( end = time.monotonic() total_time = end - start - if config.progress != Progress.none: + if progress != Progress.none: click.clear() click.echo( f"""{click.style("Upload Complete", fg="green")} diff --git a/latch/ldata/transfer/utils.py b/latch/ldata/transfer/utils.py new file mode 100644 index 00000000..cdb115b6 --- /dev/null +++ b/latch/ldata/transfer/utils.py @@ -0,0 +1,31 @@ +import os +from typing import List + + +def get_max_workers() -> int: + try: + max_workers = len(os.sched_getaffinity(0)) * 4 + except AttributeError: + cpu = os.cpu_count() + if cpu is not None: + max_workers = cpu * 4 + else: + max_workers = 16 + + return min(max_workers, 16) + + +def human_readable_time(t_seconds: float) -> str: + s = t_seconds % 60 + m = (t_seconds // 60) % 60 + h = t_seconds // 60 // 60 + + x: List[str] = [] + if h > 0: + x.append(f"{int(h):d}h") + if m > 0: + x.append(f"{int(m):d}m") + if s > 0: + x.append(f"{s:.2f}s") + + return " ".join(x) diff --git a/latch/path/__init__.py b/latch/path/__init__.py deleted file mode 100644 index e06b6445..00000000 --- a/latch/path/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from latch.path.path import LPath diff --git a/latch/path/path.py b/latch/path/path.py deleted file mode 100644 index f9ca669b..00000000 --- a/latch/path/path.py +++ /dev/null @@ -1,79 +0,0 @@ -import io -from enum import Enum -from pathlib import Path -from typing import Generator, Optional, Union - -from gql import gql -from latch_sdk_gql.execute import execute - -from latch.types.json import JsonValue -from latch_cli.services.cp.ldata_utils import LDataNodeType, get_node_data -from latch_cli.utils.path import is_remote_path - - -class LPath: - def __init__(self, path: str): - if not is_remote_path(path): - raise ValueError(f"Invalid LPath: {path} is not a Latch path") - self._path = path - - @property - def node_id(self) -> str: - # todo: currently raises click exception which is wrong - # todo: this function should be moved to this directory - node_data = get_node_data(self._path).data[self._path] - return node_data.id - - @property - def exists(self) -> bool: - try: - node_data = get_node_data(self._path).data[self._path] - except Exception: # todo: this should be a specific exception - return False - return not node_data.removed - - @property - def type(self) -> LDataNodeType: - node_data = get_node_data(self._path).data[self._path] - return node_data.type - - def _fetch_metadata(self): - data = execute( - gql(""" - query NodeMetadataQuery($id: BigInt!) { - ldataNode(id: $id) { - ldataObjectMeta { - contentSize - contentType - } - } - } - """), - variables={"id": self.node_id}, - )["ldataNode"] - if data is None: - raise FileNotFoundError(f"{self._path} not found") - - assert "ldataObjectMeta" in data - return data["ldataObjectMeta"] - - @property - def size(self) -> float: - metadata = self._fetch_metadata() - assert "contentSize" in metadata - return metadata["contentSize"] - - @property - def content_type(self) -> str: - metadata = self._fetch_metadata() - assert "contentType" in metadata - return metadata["contentType"] - - def iterdir(self) -> Generator[Path, None, None]: - pass - - def download(self, dst: Optional[Union[Path, io.IOBase]]) -> Optional[Path]: - pass - - def upload(self, src: Union[Path, io.IOBase, bytes, JsonValue]) -> None: - pass diff --git a/latch_cli/services/cp/autocomplete.py b/latch_cli/services/cp/autocomplete.py index 24acc6e7..d608ce80 100644 --- a/latch_cli/services/cp/autocomplete.py +++ b/latch_cli/services/cp/autocomplete.py @@ -7,7 +7,7 @@ import click import click.shell_completion as sc -from latch_cli.services.cp.ldata_utils import ( +from latch_cli.services.cp.utils import ( _get_immediate_children_of_node, _get_known_domains_for_account, ) diff --git a/latch_cli/services/cp/config.py b/latch_cli/services/cp/config.py deleted file mode 100644 index e1bc52b5..00000000 --- a/latch_cli/services/cp/config.py +++ /dev/null @@ -1,14 +0,0 @@ -from dataclasses import dataclass -from enum import Enum - - -class Progress(Enum): - none = "none" - total = "total" - tasks = "tasks" - - -@dataclass(frozen=True) -class CPConfig: - progress: Progress - verbose: bool diff --git a/latch_cli/services/cp/exceptions.py b/latch_cli/services/cp/exceptions.py deleted file mode 100644 index a57e099f..00000000 --- a/latch_cli/services/cp/exceptions.py +++ /dev/null @@ -1 +0,0 @@ -class PathResolutionError(ValueError): ... diff --git a/latch_cli/services/cp/glob.py b/latch_cli/services/cp/glob.py index a691ff50..edfe45a6 100644 --- a/latch_cli/services/cp/glob.py +++ b/latch_cli/services/cp/glob.py @@ -1,7 +1,7 @@ import re from typing import List -from latch_cli.services.cp.ldata_utils import _get_immediate_children_of_node +from latch_cli.services.cp.utils import _get_immediate_children_of_node from latch_cli.utils import urljoins diff --git a/latch_cli/services/cp/ldata_utils.py b/latch_cli/services/cp/ldata_utils.py deleted file mode 100644 index c6af12eb..00000000 --- a/latch_cli/services/cp/ldata_utils.py +++ /dev/null @@ -1,303 +0,0 @@ -from dataclasses import dataclass -from enum import Enum -from typing import Dict, List, TypedDict - -import click - -try: - from functools import cache -except ImportError: - from functools import lru_cache as cache - -import gql -import graphql.language as l -from latch_sdk_gql.execute import execute -from latch_sdk_gql.utils import _name_node, _parse_selection - -from latch_cli.utils.path import get_path_error, normalize_path - - -class LDataNodeType(str, Enum): - account_root = "account_root" - dir = "dir" - obj = "obj" - mount = "mount" - link = "link" - - -class FinalLinkTargetPayload(TypedDict): - id: str - type: str - name: str - - -class LdataNodePayload(TypedDict): - finalLinkTarget: FinalLinkTargetPayload - - -class LdataResolvePathToNodePayload(TypedDict): - path: str - ldataNode: LdataNodePayload - - -class AccountInfoCurrentPayload(TypedDict): - id: str - - -@dataclass(frozen=True) -class NodeData: - id: str - name: str - type: LDataNodeType - removed: bool - is_parent: bool - - -@dataclass(frozen=True) -class GetNodeDataResult: - acc_id: str - data: Dict[str, NodeData] - - -def get_node_data( - *remote_paths: str, allow_resolve_to_parent: bool = False -) -> GetNodeDataResult: - normalized: Dict[str, str] = {} - - acc_sel = _parse_selection(""" - accountInfoCurrent { - id - } - """) - assert isinstance(acc_sel, l.FieldNode) - - sels: List[l.FieldNode] = [acc_sel] - - for i, remote_path in enumerate(remote_paths): - normalized[remote_path] = normalize_path(remote_path) - - sel = _parse_selection(""" - ldataResolvePathToNode(path: {}) { - path - ldataNode { - finalLinkTarget { - id - name - type - removed - } - } - } - """) - assert isinstance(sel, l.FieldNode) - - val = l.StringValueNode() - val.value = normalized[remote_path] - - args = l.ArgumentNode() - args.name = _name_node("path") - args.value = val - - sel.alias = _name_node(f"q{i}") - sel.arguments = (args,) - - sels.append(sel) - - sel_set = l.SelectionSetNode() - sel_set.selections = tuple(sels) - - doc = l.parse(""" - query GetNodeType { - placeholder - } - """) - - assert len(doc.definitions) == 1 - query = doc.definitions[0] - - assert isinstance(query, l.OperationDefinitionNode) - query.selection_set = sel_set - - res = execute(doc) - - acc_info: AccountInfoCurrentPayload = res["accountInfoCurrent"] - acc_id = acc_info["id"] - - ret: Dict[str, NodeData] = {} - for i, remote_path in enumerate(remote_paths): - node: LdataResolvePathToNodePayload = res[f"q{i}"] - - try: - final_link_target = node["ldataNode"]["finalLinkTarget"] - remaining = node["path"] - - is_parent = remaining is not None and remaining != "" - - if not allow_resolve_to_parent and is_parent: - raise ValueError("node does not exist") - - if remaining is not None and "/" in remaining: - raise ValueError("node and parent does not exist") - - ret[remote_path] = NodeData( - id=final_link_target["id"], - name=final_link_target["name"], - type=LDataNodeType(final_link_target["type"].lower()), - removed=final_link_target["removed"], - is_parent=is_parent, - ) - except (TypeError, ValueError) as e: - click.echo(get_path_error(remote_path, "not found", acc_id)) - raise click.exceptions.Exit(1) from e - - return GetNodeDataResult(acc_id, ret) - - -class Child(TypedDict): - name: str - - -class ChildLdataTreeEdgesNode(TypedDict): - child: Child - - -class ChildLdataTreeEdges(TypedDict): - nodes: List[ChildLdataTreeEdgesNode] - - -class LdataResolvePathData(TypedDict): - childLdataTreeEdges: ChildLdataTreeEdges - - -@cache -def _get_immediate_children_of_node(path: str) -> List[str]: - lrpd: LdataResolvePathData = execute( - gql.gql(""" - query MyQuery($argPath: String!) { - ldataResolvePathData(argPath: $argPath) { - childLdataTreeEdges( - filter: {child: {removed: {equalTo: false}}} - ) { - nodes { - child { - name - } - } - } - } - } - """), - {"argPath": path}, - )["ldataResolvePathData"] - - if lrpd is None: - return [] - - res: List[str] = [] - for node in lrpd["childLdataTreeEdges"]["nodes"]: - res.append(node["child"]["name"]) - - return res - - -class Team(TypedDict): - accountId: str - - -class TeamMembersByUserIdNode(TypedDict): - team: Team - - -class TeamMembersByUserId(TypedDict): - nodes: List[TeamMembersByUserIdNode] - - -class TeamInfosByOwnerId(TypedDict): - nodes: List[Team] - - -class UserInfoByAccountId(TypedDict): - defaultAccount: str - teamMembersByUserId: TeamMembersByUserId - teamInfosByOwnerId: TeamInfosByOwnerId - - -class Bucket(TypedDict): - bucketName: str - - -class LdataS3MountAccessProvensByGeneratedUsing(TypedDict): - nodes: List[Bucket] - - -class LdataS3MountConfiguratorRolesByAccountIdNode(TypedDict): - ldataS3MountAccessProvensByGeneratedUsing: LdataS3MountAccessProvensByGeneratedUsing - - -class LdataS3MountConfiguratorRolesByAccountId(TypedDict): - nodes: List[LdataS3MountConfiguratorRolesByAccountIdNode] - - -class AccountInfoCurrent(TypedDict): - userInfoByAccountId: UserInfoByAccountId - ldataS3MountConfiguratorRolesByAccountId: LdataS3MountConfiguratorRolesByAccountId - - -@cache -def _get_known_domains_for_account() -> List[str]: - aic: AccountInfoCurrent = execute(gql.gql(""" - query DomainCompletionQuery { - accountInfoCurrent { - userInfoByAccountId { - defaultAccount - teamMembersByUserId( - filter: { team: { account: { removed: { equalTo: false } } } } - ) { - nodes { - team { - accountId - } - } - } - teamInfosByOwnerId(filter: { account: { removed: { equalTo: false } } }) { - nodes { - accountId - } - } - } - ldataS3MountConfiguratorRolesByAccountId { - nodes { - ldataS3MountAccessProvensByGeneratedUsing { - nodes { - bucketName - } - } - } - } - } - } - """))["accountInfoCurrent"] - - ui = aic["userInfoByAccountId"] - - res: List[str] = [""] # "" is for latch:/// - - accs: List[int] = [int(ui["defaultAccount"])] - accs.extend( - int(tm["team"]["accountId"]) for tm in ui["teamMembersByUserId"]["nodes"] - ) - accs.extend(int(ti["accountId"]) for ti in ui["teamInfosByOwnerId"]["nodes"]) - accs.sort() - for x in accs: - res.append(f"{x}.account") - res.append(f"shared.{x}.account") - - buckets = [ - map["bucketName"] - for role in aic["ldataS3MountConfiguratorRolesByAccountId"]["nodes"] - for map in role["ldataS3MountAccessProvensByGeneratedUsing"]["nodes"] - ] - buckets.sort() - res.extend(f"{x}.mount" for x in buckets) - - return res diff --git a/latch_cli/services/cp/main.py b/latch_cli/services/cp/main.py index 98ca4c62..89db098d 100644 --- a/latch_cli/services/cp/main.py +++ b/latch_cli/services/cp/main.py @@ -4,14 +4,48 @@ import click -from latch_cli.services.cp.config import CPConfig, Progress -from latch_cli.services.cp.download import download +from latch.ldata.path import LPath +from latch.ldata.transfer.progress import Progress from latch_cli.services.cp.glob import expand_pattern -from latch_cli.services.cp.remote_copy import remote_copy -from latch_cli.services.cp.upload import upload from latch_cli.utils.path import is_remote_path +# todo(ayush): figure out how to do progress for this +def remote_copy( + src: LPath, + dest: LPath, +): + click.clear() + + try: + src.copy(dest) + except Exception as e: + click.echo(str(e)) + raise click.exceptions.Exit(1) from e + + click.echo(f""" +{click.style("Copy Requested.", fg="green")} + +{click.style("Source: ", fg="blue")}{(src)} +{click.style("Destination: ", fg="blue")}{(dest)}""") + + +def upload(src: str, dest: LPath, progress: Progress, verbose: bool): + try: + dest.upload(src, progress=progress, verbose=verbose) + except Exception as e: + click.echo(str(e)) + raise click.exceptions.Exit(1) from e + + +def download(src: LPath, dest: str, progress: Progress, verbose: bool): + try: + src.download(dest, progress=progress, verbose=verbose) + except Exception as e: + click.echo(str(e)) + raise click.exceptions.Exit(1) from e + + # todo(ayush): come up with a better behavior scheme than unix cp def cp( srcs: List[str], @@ -23,26 +57,24 @@ def cp( ): dest_remote = is_remote_path(dest) - config = CPConfig( - progress=progress, - verbose=verbose, - ) - for src in srcs: src_remote = is_remote_path(src) if src_remote and not dest_remote: if expand_globs: - [download(p, Path(dest), config) for p in expand_pattern(src)] + [ + download(LPath(p), Path(dest), progress, verbose) + for p in expand_pattern(src) + ] else: - download(src, Path(dest), config) + download(LPath(src), Path(dest), progress, verbose) elif not src_remote and dest_remote: - upload(src, dest, config) + upload(src, LPath(dest), progress, verbose) elif src_remote and dest_remote: if expand_globs: - [remote_copy(p, dest) for p in expand_pattern(src)] + [remote_copy(LPath(p), LPath(dest)) for p in expand_pattern(src)] else: - remote_copy(src, dest) + remote_copy(LPath(src), LPath(dest)) else: click.secho( dedent(f""" diff --git a/latch_cli/services/cp/remote_copy.py b/latch_cli/services/cp/remote_copy.py deleted file mode 100644 index fda20d14..00000000 --- a/latch_cli/services/cp/remote_copy.py +++ /dev/null @@ -1,107 +0,0 @@ -import click -import gql -from gql.transport.exceptions import TransportQueryError -from latch_sdk_gql.execute import execute - -from latch_cli.services.cp.ldata_utils import LDataNodeType, get_node_data -from latch_cli.utils.path import get_name_from_path, get_path_error - - -# todo(ayush): figure out how to do progress for this -def remote_copy( - src: str, - dest: str, -): - click.clear() - - node_data = get_node_data(src, dest, allow_resolve_to_parent=True) - - src_data = node_data.data[src] - dest_data = node_data.data[dest] - acc_id = node_data.acc_id - - path_by_id = {v.id: k for k, v in node_data.data.items()} - - if src_data.is_parent: - click.echo(get_path_error(src, "not found", acc_id)) - raise click.exceptions.Exit(1) - - new_name = None - if dest_data.is_parent: - new_name = get_name_from_path(dest) - elif dest_data.type in {LDataNodeType.obj, LDataNodeType.link}: - click.echo(get_path_error(dest, "object already exists at path.", acc_id)) - raise click.exceptions.Exit(1) - - try: - execute( - gql.gql(""" - mutation Copy( - $argSrcNode: BigInt! - $argDstParent: BigInt! - $argNewName: String - ) { - ldataCopy( - input: { - argSrcNode: $argSrcNode - argDstParent: $argDstParent - argNewName: $argNewName - } - ) { - clientMutationId - } - }"""), - { - "argSrcNode": src_data.id, - "argDstParent": dest_data.id, - "argNewName": new_name, - }, - ) - except TransportQueryError as e: - if e.errors is None or len(e.errors) == 0: - click.echo(get_path_error(src, str(e), acc_id)) - raise click.exceptions.Exit(1) from e - - msg: str = e.errors[0]["message"] - - if msg.startswith("Permission denied on node"): - node_id = msg.rsplit(" ", 1)[1] - path = path_by_id[node_id] - - click.echo(get_path_error(path, "permission denied.", acc_id)) - raise click.exceptions.Exit(1) from e - elif msg == "Refusing to make node its own parent": - click.echo(get_path_error(dest, f"is a parent of {src}.", acc_id)) - raise click.exceptions.Exit(1) from e - elif msg == "Refusing to parent node to an object node": - click.echo(get_path_error(dest, f"object exists at path.", acc_id)) - raise click.exceptions.Exit(1) from e - elif msg == "Refusing to move a share link (or into a share link)": - if src_data.type is LDataNodeType.link: - path = src - else: - path = dest - - click.echo(get_path_error(path, f"is a share link.", acc_id)) - raise click.exceptions.Exit(1) from e - elif msg.startswith("Refusing to copy account root"): - click.echo(get_path_error(src, "is an account root.", acc_id)) - raise click.exceptions.Exit(1) from e - elif msg.startswith("Refusing to copy removed node"): - click.echo(get_path_error(src, "not found.", acc_id)) - raise click.exceptions.Exit(1) from e - elif msg.startswith("Refusing to copy already in-transit node"): - click.echo(get_path_error(src, "copy already in progress.", acc_id)) - raise click.exceptions.Exit(1) from e - elif msg == "Conflicting object in destination": - click.echo(get_path_error(dest, "object exists at path.", acc_id)) - raise click.exceptions.Exit(1) from e - - click.echo(get_path_error(src, str(e), acc_id)) - raise click.exceptions.Exit(1) from e - - click.echo(f""" -{click.style("Copy Requested.", fg="green")} - -{click.style("Source: ", fg="blue")}{(src)} -{click.style("Destination: ", fg="blue")}{(dest)}""") diff --git a/latch_cli/services/cp/utils.py b/latch_cli/services/cp/utils.py index c32add04..1fb0c19c 100644 --- a/latch_cli/services/cp/utils.py +++ b/latch_cli/services/cp/utils.py @@ -1,37 +1,159 @@ -import os -from typing import List +from typing import List, TypedDict +try: + from functools import cache +except ImportError: + from functools import lru_cache as cache -def get_max_workers() -> int: - try: - max_workers = len(os.sched_getaffinity(0)) * 4 - except AttributeError: - cpu = os.cpu_count() - if cpu is not None: - max_workers = cpu * 4 - else: - max_workers = 16 +import gql +from latch_sdk_gql.execute import execute - return min(max_workers, 16) +class Child(TypedDict): + name: str -def pluralize(singular: str, plural: str, selector: int) -> str: - if selector == 1: - return singular - return plural +class ChildLdataTreeEdgesNode(TypedDict): + child: Child -def human_readable_time(t_seconds: float) -> str: - s = t_seconds % 60 - m = (t_seconds // 60) % 60 - h = t_seconds // 60 // 60 - x: List[str] = [] - if h > 0: - x.append(f"{int(h):d}h") - if m > 0: - x.append(f"{int(m):d}m") - if s > 0: - x.append(f"{s:.2f}s") +class ChildLdataTreeEdges(TypedDict): + nodes: List[ChildLdataTreeEdgesNode] - return " ".join(x) + +class LdataResolvePathData(TypedDict): + childLdataTreeEdges: ChildLdataTreeEdges + + +@cache +def _get_immediate_children_of_node(path: str) -> List[str]: + lrpd: LdataResolvePathData = execute( + gql.gql(""" + query MyQuery($argPath: String!) { + ldataResolvePathData(argPath: $argPath) { + childLdataTreeEdges( + filter: {child: {removed: {equalTo: false}}} + ) { + nodes { + child { + name + } + } + } + } + } + """), + {"argPath": path}, + )["ldataResolvePathData"] + + if lrpd is None: + return [] + + res: List[str] = [] + for node in lrpd["childLdataTreeEdges"]["nodes"]: + res.append(node["child"]["name"]) + + return res + + +class Team(TypedDict): + accountId: str + + +class TeamMembersByUserIdNode(TypedDict): + team: Team + + +class TeamMembersByUserId(TypedDict): + nodes: List[TeamMembersByUserIdNode] + + +class TeamInfosByOwnerId(TypedDict): + nodes: List[Team] + + +class UserInfoByAccountId(TypedDict): + defaultAccount: str + teamMembersByUserId: TeamMembersByUserId + teamInfosByOwnerId: TeamInfosByOwnerId + + +class Bucket(TypedDict): + bucketName: str + + +class LdataS3MountAccessProvensByGeneratedUsing(TypedDict): + nodes: List[Bucket] + + +class LdataS3MountConfiguratorRolesByAccountIdNode(TypedDict): + ldataS3MountAccessProvensByGeneratedUsing: LdataS3MountAccessProvensByGeneratedUsing + + +class LdataS3MountConfiguratorRolesByAccountId(TypedDict): + nodes: List[LdataS3MountConfiguratorRolesByAccountIdNode] + + +class AccountInfoCurrent(TypedDict): + userInfoByAccountId: UserInfoByAccountId + ldataS3MountConfiguratorRolesByAccountId: LdataS3MountConfiguratorRolesByAccountId + + +@cache +def _get_known_domains_for_account() -> List[str]: + aic: AccountInfoCurrent = execute(gql.gql(""" + query DomainCompletionQuery { + accountInfoCurrent { + userInfoByAccountId { + defaultAccount + teamMembersByUserId( + filter: { team: { account: { removed: { equalTo: false } } } } + ) { + nodes { + team { + accountId + } + } + } + teamInfosByOwnerId(filter: { account: { removed: { equalTo: false } } }) { + nodes { + accountId + } + } + } + ldataS3MountConfiguratorRolesByAccountId { + nodes { + ldataS3MountAccessProvensByGeneratedUsing { + nodes { + bucketName + } + } + } + } + } + } + """))["accountInfoCurrent"] + + ui = aic["userInfoByAccountId"] + + res: List[str] = [""] # "" is for latch:/// + + accs: List[int] = [int(ui["defaultAccount"])] + accs.extend( + int(tm["team"]["accountId"]) for tm in ui["teamMembersByUserId"]["nodes"] + ) + accs.extend(int(ti["accountId"]) for ti in ui["teamInfosByOwnerId"]["nodes"]) + accs.sort() + for x in accs: + res.append(f"{x}.account") + res.append(f"shared.{x}.account") + + buckets = [ + map["bucketName"] + for role in aic["ldataS3MountConfiguratorRolesByAccountId"]["nodes"] + for map in role["ldataS3MountAccessProvensByGeneratedUsing"]["nodes"] + ] + buckets.sort() + res.extend(f"{x}.mount" for x in buckets) + + return res diff --git a/latch_cli/services/ls.py b/latch_cli/services/ls.py index 1c9ab433..dd8779a4 100644 --- a/latch_cli/services/ls.py +++ b/latch_cli/services/ls.py @@ -10,8 +10,8 @@ import gql from latch_sdk_gql.execute import execute +from latch.lpath.utils import LDataNodeType from latch_cli.click_utils import bold -from latch_cli.services.cp.ldata_utils import LDataNodeType from latch_cli.utils import with_si_suffix from latch_cli.utils.path import normalize_path diff --git a/latch_cli/services/move.py b/latch_cli/services/move.py index a655b725..700b29ad 100644 --- a/latch_cli/services/move.py +++ b/latch_cli/services/move.py @@ -5,8 +5,8 @@ from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import execute +from latch.lpath.utils import LDataNodeType, get_node_data from latch_cli.services.cp.glob import expand_pattern -from latch_cli.services.cp.ldata_utils import LDataNodeType, get_node_data from latch_cli.utils.path import get_name_from_path, get_path_error, is_remote_path @@ -33,7 +33,11 @@ def move( click.echo(f"Could not find any files that match pattern {src}. Exiting.") raise click.exceptions.Exit(0) - node_data = get_node_data(*srcs, dest, allow_resolve_to_parent=True) + try: + node_data = get_node_data(*srcs, dest, allow_resolve_to_parent=True) + except FileNotFoundError as e: + click.echo(str(e)) + raise click.exceptions.Exit(1) from e dest_data = node_data.data[dest] acc_id = node_data.acc_id diff --git a/latch_cli/utils/ldata.py b/latch_cli/utils/ldata.py deleted file mode 100644 index e7e74540..00000000 --- a/latch_cli/utils/ldata.py +++ /dev/null @@ -1,398 +0,0 @@ -from dataclasses import dataclass -from datetime import datetime -from enum import Enum -from typing import Dict, List, Optional, TypedDict - -import click -import dateutil.parser as dp - -from latch_cli.utils import urljoins - -try: - from functools import cache -except ImportError: - from functools import lru_cache as cache - -import gql -import graphql.language as l -from latch_sdk_gql.execute import execute -from latch_sdk_gql.utils import _name_node, _parse_selection - -from latch_cli.utils.path import get_path_error, normalize_path - - -class LDataNodeType(str, Enum): - account_root = "account_root" - dir = "dir" - obj = "obj" - mount = "mount" - link = "link" - - -class FinalLinkTargetPayload(TypedDict): - id: str - type: str - name: str - ldataObjectMeta: Optional["LdataObjectMeta"] - - -class LdataNodePayload(TypedDict): - finalLinkTarget: FinalLinkTargetPayload - - -class LdataResolvePathToNodePayload(TypedDict): - path: str - ldataNode: LdataNodePayload - - -class AccountInfoCurrentPayload(TypedDict): - id: str - - -@dataclass(frozen=True) -class NodeData: - id: str - name: str - type: LDataNodeType - is_parent: bool - modify_time: Optional[datetime] - - -@dataclass(frozen=True) -class GetNodeDataResult: - acc_id: str - data: Dict[str, NodeData] - - -def get_node_data( - *remote_paths: str, allow_resolve_to_parent: bool = False -) -> GetNodeDataResult: - normalized: Dict[str, str] = {} - - acc_sel = _parse_selection(""" - accountInfoCurrent { - id - } - """) - assert isinstance(acc_sel, l.FieldNode) - - sels: List[l.FieldNode] = [acc_sel] - - for i, remote_path in enumerate(remote_paths): - normalized[remote_path] = normalize_path(remote_path) - - sel = _parse_selection(""" - ldataResolvePathToNode(path: {}) { - path - ldataNode { - finalLinkTarget { - id - name - type - ldataObjectMeta { - modifyTime - } - } - } - } - """) - assert isinstance(sel, l.FieldNode) - - val = l.StringValueNode() - val.value = normalized[remote_path] - - args = l.ArgumentNode() - args.name = _name_node("path") - args.value = val - - sel.alias = _name_node(f"q{i}") - sel.arguments = (args,) - - sels.append(sel) - - sel_set = l.SelectionSetNode() - sel_set.selections = tuple(sels) - - doc = l.parse(""" - query GetNodeType { - placeholder - } - """) - - assert len(doc.definitions) == 1 - query = doc.definitions[0] - - assert isinstance(query, l.OperationDefinitionNode) - query.selection_set = sel_set - - res = execute(doc) - - acc_info: AccountInfoCurrentPayload = res["accountInfoCurrent"] - acc_id = acc_info["id"] - - ret: Dict[str, NodeData] = {} - for i, remote_path in enumerate(remote_paths): - node: LdataResolvePathToNodePayload = res[f"q{i}"] - - try: - final_link_target = node["ldataNode"]["finalLinkTarget"] - lom = final_link_target["ldataObjectMeta"] - remaining = node["path"] - - is_parent = remaining is not None and remaining != "" - - if not allow_resolve_to_parent and is_parent: - raise ValueError("node does not exist") - - if remaining is not None and "/" in remaining: - raise ValueError("node and parent does not exist") - - modify_time: Optional[datetime] = None - if lom is not None and lom["modifyTime"] is not None: - modify_time = dp.isoparse(lom["modifyTime"]) - - ret[remote_path] = NodeData( - id=final_link_target["id"], - name=final_link_target["name"], - type=LDataNodeType(final_link_target["type"].lower()), - is_parent=is_parent, - modify_time=modify_time, - ) - except (TypeError, ValueError) as e: - click.echo(get_path_error(remote_path, "not found", acc_id)) - raise click.exceptions.Exit(1) from e - - return GetNodeDataResult(acc_id, ret) - - -class Child(TypedDict): - name: str - - -class ChildLdataTreeEdgesNode(TypedDict): - child: Child - - -class ChildLdataTreeEdges(TypedDict): - nodes: List[ChildLdataTreeEdgesNode] - - -class LdataResolvePathData(TypedDict): - childLdataTreeEdges: ChildLdataTreeEdges - - -@cache -def _get_immediate_children_of_node(path: str) -> List[str]: - lrpd: LdataResolvePathData = execute( - gql.gql(""" - query MyQuery($argPath: String!) { - ldataResolvePathData(argPath: $argPath) { - childLdataTreeEdges( - filter: {child: {removed: {equalTo: false}}} - ) { - nodes { - child { - name - } - } - } - } - } - """), - {"argPath": path}, - )["ldataResolvePathData"] - - res: List[str] = [] - for node in lrpd["childLdataTreeEdges"]["nodes"]: - res.append(node["child"]["name"]) - - return res - - -class Team(TypedDict): - accountId: str - - -class TeamMembersByUserIdNode(TypedDict): - team: Team - - -class TeamMembersByUserId(TypedDict): - nodes: List[TeamMembersByUserIdNode] - - -class TeamInfosByOwnerId(TypedDict): - nodes: List[Team] - - -class UserInfoByAccountId(TypedDict): - defaultAccount: str - teamMembersByUserId: TeamMembersByUserId - teamInfosByOwnerId: TeamInfosByOwnerId - - -class Bucket(TypedDict): - bucketName: str - - -class LdataS3MountAccessProvensByGeneratedUsing(TypedDict): - nodes: List[Bucket] - - -class LdataS3MountConfiguratorRolesByAccountIdNode(TypedDict): - ldataS3MountAccessProvensByGeneratedUsing: LdataS3MountAccessProvensByGeneratedUsing - - -class LdataS3MountConfiguratorRolesByAccountId(TypedDict): - nodes: List[LdataS3MountConfiguratorRolesByAccountIdNode] - - -class AccountInfoCurrent(TypedDict): - userInfoByAccountId: UserInfoByAccountId - ldataS3MountConfiguratorRolesByAccountId: LdataS3MountConfiguratorRolesByAccountId - - -@cache -def _get_known_domains_for_account() -> List[str]: - aic: AccountInfoCurrent = execute(gql.gql(""" - query DomainCompletionQuery { - accountInfoCurrent { - userInfoByAccountId { - defaultAccount - teamMembersByUserId( - filter: { team: { account: { removed: { equalTo: false } } } } - ) { - nodes { - team { - accountId - } - } - } - teamInfosByOwnerId(filter: { account: { removed: { equalTo: false } } }) { - nodes { - accountId - } - } - } - ldataS3MountConfiguratorRolesByAccountId { - nodes { - ldataS3MountAccessProvensByGeneratedUsing { - nodes { - bucketName - } - } - } - } - } - } - """))["accountInfoCurrent"] - - ui = aic["userInfoByAccountId"] - - res: List[str] = [""] # "" is for latch:/// - - accs: List[int] = [int(ui["defaultAccount"])] - accs.extend( - int(tm["team"]["accountId"]) for tm in ui["teamMembersByUserId"]["nodes"] - ) - accs.extend(int(ti["accountId"]) for ti in ui["teamInfosByOwnerId"]["nodes"]) - accs.sort() - for x in accs: - res.append(f"{x}.account") - res.append(f"shared.{x}.account") - - buckets = [ - map["bucketName"] - for role in aic["ldataS3MountConfiguratorRolesByAccountId"]["nodes"] - for map in role["ldataS3MountAccessProvensByGeneratedUsing"]["nodes"] - ] - buckets.sort() - res.extend(f"{x}.mount" for x in buckets) - - return res - - -class LdataObjectMeta(TypedDict): - modifyTime: Optional[str] - - -class InnerFinalLinkTarget(TypedDict): - id: str - ldataObjectMeta: Optional[LdataObjectMeta] - - -class NodeDescendantData(TypedDict): - finalLinkTarget: InnerFinalLinkTarget - - -class Node(TypedDict): - relPath: str - node: NodeDescendantData - - -class Descendants(TypedDict): - nodes: List[Node] - - -class OuterFinalLinkTarget(TypedDict): - descendants: Descendants - - -class NodeDescendantsLdataResolvePathData(TypedDict): - finalLinkTarget: OuterFinalLinkTarget - - -@dataclass(frozen=True) -class NodeDescendant: - id: str - modify_time: Optional[datetime] - - -@dataclass(frozen=True) -class GetNodeDescendantsResult: - nodes: Dict[str, NodeDescendant] - - -@cache -def get_node_descendants(path: str) -> GetNodeDescendantsResult: - res: Dict[str, NodeDescendant] = {} - data: Optional[NodeDescendantsLdataResolvePathData] = execute( - gql.gql(""" - query NodeDescendants($argPath: String!) { - ldataResolvePathData(argPath: $argPath) { - finalLinkTarget { - descendants { - nodes { - relPath - node { - finalLinkTarget { - id - ldataObjectMeta { - modifyTime - } - } - } - } - } - } - } - } - """), - {"argPath": path}, - )["ldataResolvePathData"] - - if data is None: - return GetNodeDescendantsResult(nodes=res) - - for descendant in data["finalLinkTarget"]["descendants"]["nodes"]: - rel_path = descendant["relPath"] - flt = descendant["node"]["finalLinkTarget"] - lom = flt["ldataObjectMeta"] - - modify_time: Optional[datetime] = None - if lom is not None and lom["modifyTime"] is not None: - modify_time = dp.isoparse(lom["modifyTime"]) - - res[rel_path] = NodeDescendant(id=flt["id"], modify_time=modify_time) - - return GetNodeDescendantsResult(nodes=res) diff --git a/latch_cli/utils/path.py b/latch_cli/utils/path.py index 8bd6f350..0d552e7f 100644 --- a/latch_cli/utils/path.py +++ b/latch_cli/utils/path.py @@ -6,7 +6,6 @@ import click from latch_sdk_config.user import user_config -from latch_cli.services.cp.exceptions import PathResolutionError from latch_cli.utils import get_auth_header latch_url_regex = re.compile(r"^(latch)?://") From bc2a1f31c4a8bdb0bc5c4984f9ba5437dd192f0b Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Fri, 16 Feb 2024 18:15:34 -0800 Subject: [PATCH 04/15] cleanup --- latch/ldata/node.py | 7 +- latch/ldata/path.py | 149 +++++----------------------- latch/ldata/transfer/__init__.py | 1 + latch/ldata/transfer/download.py | 53 +++++----- latch/ldata/transfer/remote_copy.py | 94 ++++++++++++++++++ latch/ldata/transfer/upload.py | 61 ++++-------- latch_cli/main.py | 2 +- latch_cli/services/cp/main.py | 89 ++++++----------- latch_cli/services/ls.py | 2 +- latch_cli/services/move.py | 2 +- 10 files changed, 196 insertions(+), 264 deletions(-) create mode 100644 latch/ldata/transfer/remote_copy.py diff --git a/latch/ldata/node.py b/latch/ldata/node.py index 094a0d4c..e0f0cde0 100644 --- a/latch/ldata/node.py +++ b/latch/ldata/node.py @@ -2,11 +2,6 @@ from enum import Enum from typing import Dict, List, TypedDict -try: - from functools import cache -except ImportError: - from functools import lru_cache as cache - import gql import graphql.language as l from latch_sdk_gql.execute import execute @@ -179,7 +174,7 @@ def get_node_metadata(node_id: str) -> NodeMetadata: return NodeMetadata( id=node_id, - size=data["ldataObjectMeta"]["contentSize"], + size=int(data["ldataObjectMeta"]["contentSize"]), content_type=data["ldataObjectMeta"]["contentType"], ) diff --git a/latch/ldata/path.py b/latch/ldata/path.py index 5375aa34..7bb64efa 100644 --- a/latch/ldata/path.py +++ b/latch/ldata/path.py @@ -1,10 +1,9 @@ -import io from pathlib import Path from typing import Generator, Optional, Union from urllib.parse import urljoin +from uuid import uuid4 import gql -from gql.transport.exceptions import TransportQueryError from latch_sdk_config.latch import NUCLEUS_URL from latch_sdk_gql.execute import execute @@ -16,11 +15,11 @@ get_node_metadata, get_node_perms, ) -from latch.ldata.transfer import download, upload -from latch.types.json import JsonValue +from latch.ldata.transfer import download, remote_copy, upload +from latch.ldata.transfer.progress import Progress from latch_cli.tinyrequests import post from latch_cli.utils import get_auth_header, urljoins -from latch_cli.utils.path import get_name_from_path, get_path_error, is_remote_path +from latch_cli.utils.path import is_remote_path class LPath: @@ -53,7 +52,11 @@ def type(self) -> LDataNodeType: return get_node_data(self.path).data[self.path].type def is_dir(self) -> bool: - return self.type is LDataNodeType.dir + return self.type in { + LDataNodeType.dir, + LDataNodeType.account_root, + LDataNodeType.mount, + } @property def size(self) -> float: @@ -66,6 +69,8 @@ def content_type(self) -> str: return metadata.content_type def iterdir(self) -> Generator[Path, None, None]: + if not self.is_dir(): + raise ValueError(f"Not a directory: {self.path}") data = execute( gql.gql(""" query LDataChildren($argPath: String!) { @@ -103,124 +108,21 @@ def rmr(self) -> None: ) def copy(self, dst: Union["LPath", str]) -> None: - dst = str(dst) - node_data = get_node_data(self.path, dst, allow_resolve_to_parent=True) + remote_copy(self.path, str(dst)) - src_data = node_data.data[self.path] - dst_data = node_data.data[dst] - acc_id = node_data.acc_id + def upload(self, src: Path, progress=Progress.tasks, verbose=False) -> None: + upload(src, self.path, progress, verbose) - path_by_id = {v.id: k for k, v in node_data.data.items()} + def download( + self, dst: Optional[Path] = None, progress=Progress.tasks, verbose=False + ) -> Path: + if dst is None: + dir = Path(".") / "downloads" / str(uuid4()) + dir.mkdir(parents=True, exist_ok=True) + dst = dir / self.name - if src_data.is_parent: - raise FileNotFoundError(get_path_error(self.path, "not found", acc_id)) - - new_name = None - if dst_data.is_parent: - new_name = get_name_from_path(dst) - elif dst_data.type in {LDataNodeType.obj, LDataNodeType.link}: - raise FileExistsError( - get_path_error(dst, "object already exists at path.", acc_id) - ) - - try: - execute( - gql.gql(""" - mutation Copy( - $argSrcNode: BigInt! - $argDstParent: BigInt! - $argNewName: String - ) { - ldataCopy( - input: { - argSrcNode: $argSrcNode - argDstParent: $argDstParent - argNewName: $argNewName - } - ) { - clientMutationId - } - }"""), - { - "argSrcNode": src_data.id, - "argDstParent": dst_data.id, - "argNewName": new_name, - }, - ) - except TransportQueryError as e: - if e.errors is None or len(e.errors) == 0: - raise e - - msg: str = e.errors[0]["message"] - - if msg.startswith("Permission denied on node"): - node_id = msg.rsplit(" ", 1)[1] - path = path_by_id[node_id] - - raise ValueError(get_path_error(path, "permission denied.", acc_id)) - elif msg == "Refusing to make node its own parent": - raise ValueError( - get_path_error(dst, f"is a parent of {self.path}.", acc_id) - ) - elif msg == "Refusing to parent node to an object node": - raise ValueError(get_path_error(dst, f"object exists at path.", acc_id)) - elif msg == "Refusing to move a share link (or into a share link)": - raise ValueError( - get_path_error( - self.path if src_data.type is LDataNodeType.link else dst, - f"is a share link.", - acc_id, - ) - ) - elif msg.startswith("Refusing to copy account root"): - raise ValueError( - get_path_error(self.path, "is an account root.", acc_id) - ) - elif msg.startswith("Refusing to copy removed node"): - raise ValueError(get_path_error(self.path, "not found.", acc_id)) - elif msg.startswith("Refusing to copy already in-transit node"): - raise ValueError( - get_path_error(self.path, "copy already in progress.", acc_id) - ) - elif msg == "Conflicting object in destination": - raise ValueError(get_path_error(dst, "object exists at path.", acc_id)) - - raise ValueError(get_path_error(self.path, str(e), acc_id)) - - def download(self, dst: Optional[Union[Path, io.IOBase]]) -> Optional[Path]: - # todo: perform different actions depending on dst type - return download( - self.path, - dst, - ) - - def read_bytes(self) -> bytes: - # todo: implement - pass - - def read_text(self) -> str: - # todo: implement - pass - - def read_json(self) -> JsonValue: - # todo: implement - pass - - def read_chunks(self, chunk_size: int) -> Generator[bytes, None, None]: - # todo: implement - pass - - def read_lines(self): - # todo: implement - pass - - def read_at(self, offset: int, amount: int) -> bytes: - # todo: implement - pass - - def upload(self, src: Union[Path, io.IOBase, bytes, JsonValue]) -> str: - # todo: implement - pass + download(self.path, dst, progress, verbose, confirm_overwrite=False) + return dst @property def perms(self) -> LDataPerms: @@ -264,5 +166,6 @@ def __truediv__(self, other: Union[Path, str]) -> "LPath": if __name__ == "__main__": - # add tests here - pass + # tests + file_path = LPath("latch://24030.account/test_dir/B.txt") + file_path.share_with("rahuljaydesai@gmail.com", PermLevel.VIEWER) diff --git a/latch/ldata/transfer/__init__.py b/latch/ldata/transfer/__init__.py index 5c4374b0..ba7905ab 100644 --- a/latch/ldata/transfer/__init__.py +++ b/latch/ldata/transfer/__init__.py @@ -1,2 +1,3 @@ from latch.ldata.transfer.download import download +from latch.ldata.transfer.remote_copy import remote_copy from latch.ldata.transfer.upload import upload diff --git a/latch/ldata/transfer/download.py b/latch/ldata/transfer/download.py index 3dd3cf0f..6830a97b 100644 --- a/latch/ldata/transfer/download.py +++ b/latch/ldata/transfer/download.py @@ -2,8 +2,10 @@ from concurrent.futures import ProcessPoolExecutor from contextlib import closing from dataclasses import dataclass +from http.client import HTTPException from itertools import repeat from pathlib import Path +from textwrap import dedent from typing import Dict, List, Set, TypedDict import click @@ -34,25 +36,24 @@ class DownloadJob: def download( - src: str, dest: Path, progress: Progress = Progress.tasks, verbose: bool = False -): + src: str, + dest: Path, + progress: Progress, + verbose: bool, + confirm_overwrite: bool = True, +) -> None: if not dest.parent.exists(): - click.secho( + raise ValueError( f"Invalid copy destination {dest}. Parent directory {dest.parent} does not" - " exist.", - fg="red", + " exist." ) - raise click.exceptions.Exit(1) normalized = normalize_path(src) - try: - data = get_node_data(src) - except FileNotFoundError as e: - click.echo(str(e)) - raise click.exceptions.Exit(1) from e + data = get_node_data(src) node_data = data.data[src] - click.secho(f"Downloading {node_data.name}", fg="blue") + if progress != Progress.none: + click.secho(f"Downloading {node_data.name}", fg="blue") can_have_children = node_data.type in { LDataNodeType.account_root, @@ -70,14 +71,11 @@ def download( headers={"Authorization": get_auth_header()}, json={"path": normalized}, ) - if res.status_code != 200: - click.secho( + raise HTTPException( f"failed to fetch presigned url(s) for path {src} with code" - f" {res.status_code}: {res.json()['error']}", - fg="red", + f" {res.status_code}: {res.json()['error']}" ) - raise click.exceptions.Exit(1) json_data = res.json() if can_have_children: @@ -89,11 +87,9 @@ def download( try: dest.mkdir(exist_ok=True) except FileNotFoundError as e: - click.secho(f"No such download destination {dest}", fg="red") - raise click.exceptions.Exit(1) from e + raise ValueError(f"No such download destination {dest}") except (FileExistsError, NotADirectoryError) as e: - click.secho(f"Download destination {dest} is not a directory", fg="red") - raise click.exceptions.Exit(1) from e + raise ValueError(f"Download destination {dest} is not a directory") unconfirmed_jobs: List[DownloadJob] = [] confirmed_jobs: List[DownloadJob] = [] @@ -116,7 +112,7 @@ def download( job.dest.parent.mkdir(parents=True, exist_ok=True) confirmed_jobs.append(job) except FileExistsError: - if click.confirm( + if confirm_overwrite and click.confirm( f"A file already exists at {job.dest.parent}. Overwrite?", default=False, ): @@ -124,6 +120,9 @@ def download( job.dest.parent.mkdir(parents=True, exist_ok=True) confirmed_jobs.append(job) else: + click.secho( + f"Skipping {job.dest.parent}, file already exists", fg="yellow" + ) rejected_jobs.add(job.dest.parent) num_files = len(confirmed_jobs) @@ -193,12 +192,10 @@ def download( total_time = end - start - click.echo( - f"""{click.style("Download Complete", fg="green")} - -{click.style("Time Elapsed: ", fg="blue")}{human_readable_time(total_time)} -{click.style("Files Downloaded: ", fg="blue")}{num_files} ({with_si_suffix(total_bytes)})""" - ) + click.echo(dedent(f"""{click.style("Download Complete", fg="green")} + {click.style("Time Elapsed: ", fg="blue")}{human_readable_time(total_time)} + {click.style("Files Downloaded: ", fg="blue")}{num_files} ({with_si_suffix(total_bytes)}) + """)) # dest will always be a path which includes the copied file as its leaf diff --git a/latch/ldata/transfer/remote_copy.py b/latch/ldata/transfer/remote_copy.py new file mode 100644 index 00000000..4f1b49c8 --- /dev/null +++ b/latch/ldata/transfer/remote_copy.py @@ -0,0 +1,94 @@ +from textwrap import dedent + +import click +import gql +from gql.transport.exceptions import TransportQueryError +from latch_sdk_gql.execute import execute + +from latch.ldata.node import LDataNodeType, get_node_data +from latch_cli.utils.path import get_name_from_path, get_path_error + + +def remote_copy(src: str, dst: str) -> None: + node_data = get_node_data(src, dst, allow_resolve_to_parent=True) + + src_data = node_data.data[src] + dst_data = node_data.data[dst] + acc_id = node_data.acc_id + + path_by_id = {v.id: k for k, v in node_data.data.items()} + + if src_data.is_parent: + raise FileNotFoundError(get_path_error(src, "not found", acc_id)) + + new_name = None + if dst_data.is_parent: + new_name = get_name_from_path(dst) + elif dst_data.type in {LDataNodeType.obj, LDataNodeType.link}: + raise FileExistsError( + get_path_error(dst, "object already exists at path.", acc_id) + ) + + try: + execute( + gql.gql(""" + mutation Copy( + $argSrcNode: BigInt! + $argDstParent: BigInt! + $argNewName: String + ) { + ldataCopy( + input: { + argSrcNode: $argSrcNode + argDstParent: $argDstParent + argNewName: $argNewName + } + ) { + clientMutationId + } + }"""), + { + "argSrcNode": src_data.id, + "argDstParent": dst_data.id, + "argNewName": new_name, + }, + ) + except TransportQueryError as e: + if e.errors is None or len(e.errors) == 0: + raise e + + msg: str = e.errors[0]["message"] + + if msg.startswith("Permission denied on node"): + node_id = msg.rsplit(" ", 1)[1] + path = path_by_id[node_id] + + raise ValueError(get_path_error(path, "permission denied.", acc_id)) + elif msg == "Refusing to make node its own parent": + raise ValueError(get_path_error(dst, f"is a parent of {src}.", acc_id)) + elif msg == "Refusing to parent node to an object node": + raise ValueError(get_path_error(dst, f"object exists at path.", acc_id)) + elif msg == "Refusing to move a share link (or into a share link)": + raise ValueError( + get_path_error( + src if src_data.type is LDataNodeType.link else dst, + f"is a share link.", + acc_id, + ) + ) + elif msg.startswith("Refusing to copy account root"): + raise ValueError(get_path_error(src, "is an account root.", acc_id)) + elif msg.startswith("Refusing to copy removed node"): + raise ValueError(get_path_error(src, "not found.", acc_id)) + elif msg.startswith("Refusing to copy already in-transit node"): + raise ValueError(get_path_error(src, "copy already in progress.", acc_id)) + elif msg == "Conflicting object in destination": + raise ValueError(get_path_error(dst, "object exists at path.", acc_id)) + + raise ValueError(get_path_error(src, str(e), acc_id)) + + click.echo(dedent(f""" + {click.style("Copy Requested.", fg="green")} + + {click.style("Source: ", fg="blue")}{(src)} + {click.style("Destination: ", fg="blue")}{(dst)}""")) diff --git a/latch/ldata/transfer/upload.py b/latch/ldata/transfer/upload.py index 2eff9048..2a303fdf 100644 --- a/latch/ldata/transfer/upload.py +++ b/latch/ldata/transfer/upload.py @@ -6,6 +6,7 @@ from concurrent.futures import Future, ProcessPoolExecutor, as_completed, wait from contextlib import closing from dataclasses import dataclass +from http.client import HTTPException from json import JSONDecodeError from multiprocessing.managers import DictProxy, ListProxy from pathlib import Path @@ -20,15 +21,9 @@ from latch.ldata.transfer.manager import TransferStateManager from latch.ldata.transfer.progress import Progress, ProgressBars from latch.ldata.transfer.throttle import Throttle -from latch.ldata.transfer.utils import ( - LDataNodeType, - get_max_workers, - get_node_data, - human_readable_time, -) +from latch.ldata.transfer.utils import get_max_workers, human_readable_time from latch_cli import tinyrequests from latch_cli.constants import latch_constants, units -from latch_cli.services.cp.utils import get_max_workers, human_readable_time from latch_cli.utils import get_auth_header, urljoins, with_si_suffix from latch_cli.utils.path import normalize_path @@ -57,22 +52,17 @@ class UploadJob: def upload( src: str, # pathlib.Path strips trailing slashes but we want to keep them here as they determine cp behavior dest: str, - progress: Progress = Progress.tasks, - verbose: bool = False, -): + progress: Progress, + verbose: bool, +) -> None: src_path = Path(src) if not src_path.exists(): - click.secho(f"Could not find {src_path}: no such file or directory.", fg="red") - raise click.exceptions.Exit(1) + raise ValueError(f"Could not find {src_path}: no such file or directory.") if progress != Progress.none: click.secho(f"Uploading {src_path.name}", fg="blue") - try: - node_data = get_node_data(dest, allow_resolve_to_parent=True) - except FileNotFoundError as e: - click.echo(str(e)) - raise click.exceptions.Exit(1) from e + node_data = get_node_data(dest, allow_resolve_to_parent=True) dest_data = node_data.data[dest] normalized = normalize_path(dest) @@ -86,11 +76,9 @@ def upload( if not dest_is_dir: if not dest_exists: # path is latch:///a/b/file_1/file_2 - click.secho(f"No such file or directory: {dest}", fg="red") - raise click.exceptions.Exit(1) + raise ValueError(f"No such file or directory: {dest}") if src_path.is_dir(): - click.secho(f"{normalized} is not a directory.", fg="red") - raise click.exceptions.Exit(1) + raise ValueError(f"{normalized} is not a directory.") if progress == Progress.none: num_bars = 0 @@ -271,7 +259,6 @@ def upload( end = time.monotonic() total_time = end - start if progress != Progress.none: - click.clear() click.echo( f"""{click.style("Upload Complete", fg="green")} @@ -301,9 +288,7 @@ def start_upload( latency_q: Optional["LatencyQueueType"] = None, ) -> Optional[StartUploadReturnType]: if not src.exists(): - raise click.exceptions.Exit( - click.style(f"Could not find {src}: no such file or link", fg="red") - ) + raise ValueError(f"Could not find {src}: no such file or link") if src.is_symlink(): src = src.resolve() @@ -321,12 +306,9 @@ def start_upload( file_size = src.stat().st_size if file_size > latch_constants.maximum_upload_size: - raise click.exceptions.Exit( - click.style( - f"File is {with_si_suffix(file_size)} which exceeds the maximum" - " upload size (5TiB)", - fg="red", - ) + raise ValueError( + f"File is {with_si_suffix(file_size)} which exceeds the maximum" + " upload size (5TiB)", ) part_count = min( @@ -380,12 +362,7 @@ def start_upload( **data, part_count=part_count, part_size=part_size, src=src, dest=dest ) - raise click.exceptions.Exit( - click.style( - f"Unable to generate upload URL for {src}", - fg="red", - ) - ) + raise RuntimeError(f"Unable to generate upload URL for {src}") @dataclass(frozen=True) @@ -414,8 +391,8 @@ def upload_file_chunk( res = tinyrequests.put(url, data=data) if res.status_code != 200: - raise click.exceptions.Exit( - click.style(f"Failed to upload part {part_index} of {src}", fg="red") + raise HTTPException( + f"Failed to upload part {part_index} of {src}: {res.status_code}" ) ret = CompletedPart( @@ -473,11 +450,7 @@ def end_upload( ) if res.status_code != 200: - raise click.exceptions.Exit( - click.style( - f"Unable to complete file upload: {res.json()['error']}", fg="red" - ) - ) + raise HTTPException(f"Unable to complete file upload: {res.json()['error']}") if progress_bars is not None: progress_bars.update_total_progress(1) diff --git a/latch_cli/main.py b/latch_cli/main.py index d6f3452a..93382d14 100644 --- a/latch_cli/main.py +++ b/latch_cli/main.py @@ -11,11 +11,11 @@ from typing_extensions import ParamSpec import latch_cli.click_utils +from latch.ldata.transfer.progress import Progress from latch_cli.click_utils import EnumChoice from latch_cli.exceptions.handler import CrashHandler from latch_cli.services.cp.autocomplete import complete as cp_complete from latch_cli.services.cp.autocomplete import remote_complete -from latch_cli.services.cp.config import Progress from latch_cli.services.init.init import template_flag_to_option from latch_cli.services.local_dev import TaskSize from latch_cli.utils import ( diff --git a/latch_cli/services/cp/main.py b/latch_cli/services/cp/main.py index 89db098d..b775250d 100644 --- a/latch_cli/services/cp/main.py +++ b/latch_cli/services/cp/main.py @@ -10,42 +10,6 @@ from latch_cli.utils.path import is_remote_path -# todo(ayush): figure out how to do progress for this -def remote_copy( - src: LPath, - dest: LPath, -): - click.clear() - - try: - src.copy(dest) - except Exception as e: - click.echo(str(e)) - raise click.exceptions.Exit(1) from e - - click.echo(f""" -{click.style("Copy Requested.", fg="green")} - -{click.style("Source: ", fg="blue")}{(src)} -{click.style("Destination: ", fg="blue")}{(dest)}""") - - -def upload(src: str, dest: LPath, progress: Progress, verbose: bool): - try: - dest.upload(src, progress=progress, verbose=verbose) - except Exception as e: - click.echo(str(e)) - raise click.exceptions.Exit(1) from e - - -def download(src: LPath, dest: str, progress: Progress, verbose: bool): - try: - src.download(dest, progress=progress, verbose=verbose) - except Exception as e: - click.echo(str(e)) - raise click.exceptions.Exit(1) from e - - # todo(ayush): come up with a better behavior scheme than unix cp def cp( srcs: List[str], @@ -60,28 +24,33 @@ def cp( for src in srcs: src_remote = is_remote_path(src) - if src_remote and not dest_remote: - if expand_globs: - [ - download(LPath(p), Path(dest), progress, verbose) - for p in expand_pattern(src) - ] - else: - download(LPath(src), Path(dest), progress, verbose) - elif not src_remote and dest_remote: - upload(src, LPath(dest), progress, verbose) - elif src_remote and dest_remote: - if expand_globs: - [remote_copy(LPath(p), LPath(dest)) for p in expand_pattern(src)] + try: + if src_remote and not dest_remote: + if expand_globs: + [ + LPath(p).download( + Path(dest), progress=progress, verbose=verbose + ) + for p in expand_pattern(src) + ] + else: + LPath(src).download(Path(dest), progress=progress, verbose=verbose) + elif not src_remote and dest_remote: + LPath(dest).upload(src, progress=progress, verbose=verbose) + elif src_remote and dest_remote: + if expand_globs: + [LPath(p).copy(dest) for p in expand_pattern(src)] + else: + LPath(src).copy(dest) else: - remote_copy(LPath(src), LPath(dest)) - else: - click.secho( - dedent(f""" - `latch cp` cannot be used for purely local file copying. - - Please ensure at least one of your arguments is a remote path (beginning with `latch://`) - """).strip("\n"), - fg="red", - ) - raise click.exceptions.Exit(1) + raise ValueError( + dedent(f""" + `latch cp` cannot be used for purely local file copying. + + Please ensure at least one of your arguments is a remote path (beginning with `latch://`) + """).strip("\n"), + fg="red", + ) + except Exception as e: + click.secho(str(e), fg="red") + raise click.exceptions.Exit(1) from e diff --git a/latch_cli/services/ls.py b/latch_cli/services/ls.py index dd8779a4..d692b628 100644 --- a/latch_cli/services/ls.py +++ b/latch_cli/services/ls.py @@ -10,7 +10,7 @@ import gql from latch_sdk_gql.execute import execute -from latch.lpath.utils import LDataNodeType +from latch.ldata.transfer.utils import LDataNodeType from latch_cli.click_utils import bold from latch_cli.utils import with_si_suffix from latch_cli.utils.path import normalize_path diff --git a/latch_cli/services/move.py b/latch_cli/services/move.py index 700b29ad..5f73783b 100644 --- a/latch_cli/services/move.py +++ b/latch_cli/services/move.py @@ -5,7 +5,7 @@ from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import execute -from latch.lpath.utils import LDataNodeType, get_node_data +from latch.ldata.node import LDataNodeType, get_node_data from latch_cli.services.cp.glob import expand_pattern from latch_cli.utils.path import get_name_from_path, get_path_error, is_remote_path From 8d1672cf87b55d94e09a43b50c0a239d2cba92f0 Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Sat, 17 Feb 2024 09:29:40 -0800 Subject: [PATCH 05/15] bug fix --- latch_cli/services/ls.py | 2 +- latch_cli/services/sync.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/latch_cli/services/ls.py b/latch_cli/services/ls.py index d692b628..791d40f7 100644 --- a/latch_cli/services/ls.py +++ b/latch_cli/services/ls.py @@ -10,7 +10,7 @@ import gql from latch_sdk_gql.execute import execute -from latch.ldata.transfer.utils import LDataNodeType +from latch.ldata.node import LDataNodeType from latch_cli.click_utils import bold from latch_cli.utils import with_si_suffix from latch_cli.utils.path import normalize_path diff --git a/latch_cli/services/sync.py b/latch_cli/services/sync.py index 08281a94..4f478ae7 100644 --- a/latch_cli/services/sync.py +++ b/latch_cli/services/sync.py @@ -11,7 +11,7 @@ from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import JsonValue, execute -import latch_cli.services.cp.upload as upl +import latch.ldata.transfer.upload as upl def upload_file(src: Path, dest: str): From dca9c495a21beb1c0c9d8007637431b8cf549318 Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Sat, 17 Feb 2024 14:43:03 -0800 Subject: [PATCH 06/15] implement flyte type transformer for lPath --- latch/ldata/path.py | 59 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/latch/ldata/path.py b/latch/ldata/path.py index 7bb64efa..a682a893 100644 --- a/latch/ldata/path.py +++ b/latch/ldata/path.py @@ -1,9 +1,19 @@ from pathlib import Path -from typing import Generator, Optional, Union +from typing import Generator, Optional, Type, Union from urllib.parse import urljoin from uuid import uuid4 import gql +from flytekit import ( + Blob, + BlobMetadata, + BlobType, + FlyteContext, + Literal, + LiteralType, + Scalar, +) +from flytekit.extend import TypeEngine, TypeTransformer from latch_sdk_config.latch import NUCLEUS_URL from latch_sdk_gql.execute import execute @@ -165,7 +175,46 @@ def __truediv__(self, other: Union[Path, str]) -> "LPath": return LPath(f"{Path(self.path) / other}") -if __name__ == "__main__": - # tests - file_path = LPath("latch://24030.account/test_dir/B.txt") - file_path.share_with("rahuljaydesai@gmail.com", PermLevel.VIEWER) +class LPathTransformer(TypeTransformer[LPath]): + def __init__(self): + super(LPathTransformer, self).__init__(name="lpath-transformer", t=LPath) + + def get_literal_type(self, t: Type[LPath]) -> LiteralType: + return LiteralType( + blob=BlobType( + # this is sus, but there is no way to check if the LPath is a file or dir + format="binary", + dimensionality=BlobType.BlobDimensionality.SINGLE, + ) + ) + + def to_literal( + self, + ctx: FlyteContext, + python_val: LPath, + python_type: Type[LPath], + expected: LiteralType, + ) -> Literal: + dimensionality = ( + BlobType.BlobDimensionality.MULTIPART + if python_val.is_dir() + else BlobType.BlobDimensionality.SINGLE + ) + return Literal( + scalar=Scalar( + blob=Blob( + uri=python_val.path, + metadata=BlobMetadata( + format="binary", dimensionality=dimensionality + ), + ) + ) + ) + + def to_python_value( + self, ctx: FlyteContext, lv: Literal, expected_python_type: Type[LPath] + ): + return LPath(path=lv.scalar.blob.uri) + + +TypeEngine.register(LPathTransformer()) From 9c6d7c3ef0fa4c610c4e1a09475af43227911fd8 Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Sat, 17 Feb 2024 16:48:57 -0800 Subject: [PATCH 07/15] implement caching --- latch/ldata/path.py | 260 ++++++++++++++++------------ latch/ldata/transfer/__init__.py | 3 - latch/ldata/transfer/download.py | 11 +- latch/ldata/{ => transfer}/node.py | 103 +---------- latch/ldata/transfer/remote_copy.py | 2 +- latch/ldata/transfer/upload.py | 2 +- latch_cli/services/ls.py | 2 +- latch_cli/services/move.py | 2 +- 8 files changed, 170 insertions(+), 215 deletions(-) rename latch/ldata/{ => transfer}/node.py (60%) diff --git a/latch/ldata/path.py b/latch/ldata/path.py index a682a893..e41dcd47 100644 --- a/latch/ldata/path.py +++ b/latch/ldata/path.py @@ -1,7 +1,7 @@ +import re +from dataclasses import dataclass, field from pathlib import Path -from typing import Generator, Optional, Type, Union -from urllib.parse import urljoin -from uuid import uuid4 +from typing import Generator, Optional, Type import gql from flytekit import ( @@ -14,78 +14,141 @@ Scalar, ) from flytekit.extend import TypeEngine, TypeTransformer -from latch_sdk_config.latch import NUCLEUS_URL from latch_sdk_gql.execute import execute -from latch.ldata.node import ( - LDataNodeType, - LDataPerms, - PermLevel, - get_node_data, - get_node_metadata, - get_node_perms, -) -from latch.ldata.transfer import download, remote_copy, upload +from latch.ldata.transfer.download import download +from latch.ldata.transfer.node import LDataNodeType from latch.ldata.transfer.progress import Progress -from latch_cli.tinyrequests import post -from latch_cli.utils import get_auth_header, urljoins -from latch_cli.utils.path import is_remote_path +from latch.ldata.transfer.remote_copy import remote_copy +from latch.ldata.transfer.upload import upload +from latch_cli.utils import urljoins + +node_id_regex = re.compile(r"^latch://(?P[0-9]+)\.node$") + +dir_types = { + LDataNodeType.dir, + LDataNodeType.account_root, + LDataNodeType.mount, +} + + +@dataclass +class _Cache: + """Internal cache class to organize information for a `LPath`.""" + path: Optional[str] = None + node_id: Optional[str] = None + name: Optional[str] = None + type: Optional[LDataNodeType] = None + size: Optional[int] = None + content_type: Optional[str] = None + +@dataclass class LPath: + _cache: _Cache = field( + default_factory=lambda: _Cache(), + init=False, + repr=False, + hash=False, + compare=False, + ) + + path: str + def __init__(self, path: str): - if not is_remote_path(path): + if not path.startswith("latch://"): raise ValueError(f"Invalid LPath: {path} is not a Latch path") self.path = path - self._node_id = None - - @property - def node_id(self) -> str: - if self._node_id is None: - self._node_id = get_node_data(self.path).data[self.path].id - return self._node_id - - @property - def exists(self) -> bool: - try: - node_data = get_node_data(self.path).data[self.path] - except FileNotFoundError: - return False - return not node_data.removed - - @property - def name(self) -> str: - return get_node_data(self.path).data[self.path].name - - @property - def type(self) -> LDataNodeType: - return get_node_data(self.path).data[self.path].type + self._download_idx = 0 + + def load(self): + """(Re-)populate this LPath's instance's cache. + + Future calls to most getters will return immediately without making a network request. + + Always makes a network request. + """ + data = execute( + gql.gql(""" + query GetNodeData($path: String!) { + ldataResolvePathToNode(path: {}) { + path + ldataNode { + finalLinkTarget { + id + name + type + removed + ldataObjectMeta { + contentSize + contentType + } + } + } + } + }"""), + {"path": self.path}, + )["ldataResolvePathToNode"] + + self._cache.path = self.path + + final_link_target = data["ldataNode"]["finalLinkTarget"] + self._cache.node_id = final_link_target["id"] + self._cache.name = final_link_target["name"] + self._cache.type = LDataNodeType(final_link_target["type"].lower()) + self._cache.size = int(final_link_target["ldataObjectMeta"]["contentSize"]) + self._cache.content_type = final_link_target["ldataObjectMeta"]["contentType"] + + def node_id(self, *, load_if_missing: bool = True) -> str: + match = node_id_regex.match(self.path) + if match: + self._node_id = match.group("id") + + if self._cache.node_id is None or self._cache.path != self.path: + if not load_if_missing: + return None + self.load() + return self._cache.node_id + + def name(self, *, load_if_missing: bool = True) -> str: + if self._cache.name is None or self._cache.path != self.path: + if not load_if_missing: + return None + self.load() + return self._cache.name + + def type(self, *, load_if_missing: bool = True) -> LDataNodeType: + if self._cache.type is None or self._cache.path != self.path: + if not load_if_missing: + return None + self.load() + return self._cache.type + + def size(self, *, load_if_missing: bool = True) -> float: + if self._cache.size is None or self._cache.path != self.path: + if not load_if_missing: + return None + self.load() + return self._cache.size + + def content_type(self, *, load_if_missing: bool = True) -> str: + if self._cache.content_type is None or self._cache.path != self.path: + if not load_if_missing: + return None + self.load() + return self._cache.content_type def is_dir(self) -> bool: - return self.type in { - LDataNodeType.dir, - LDataNodeType.account_root, - LDataNodeType.mount, - } - - @property - def size(self) -> float: - metadata = get_node_metadata(self.node_id) - return metadata.size - - @property - def content_type(self) -> str: - metadata = get_node_metadata(self.node_id) - return metadata.content_type - - def iterdir(self) -> Generator[Path, None, None]: - if not self.is_dir(): - raise ValueError(f"Not a directory: {self.path}") + return self.type() in dir_types + + def iterdir(self) -> Generator["LPath", None, None]: data = execute( gql.gql(""" query LDataChildren($argPath: String!) { ldataResolvePathData(argPath: $argPath) { finalLinkTarget { + type childLdataTreeEdges(filter: { child: { removed: { equalTo: false } } }) { nodes { child { @@ -100,10 +163,12 @@ def iterdir(self) -> Generator[Path, None, None]: )["ldataResolvePathData"] if data is None: - raise ValueError(f"No directory found at path: {self.path}") + raise FileNotFoundError(f"No such Latch file or directory: {self.path}") + if data["finalLinkTarget"]["type"].lower() not in dir_types: + raise ValueError(f"{self.path} is not a directory") for node in data["finalLinkTarget"]["childLdataTreeEdges"]["nodes"]: - yield urljoins(self.path, node["child"]["name"]) + yield LPath(urljoins(self.path, node["child"]["name"])) def rmr(self) -> None: execute( @@ -117,62 +182,39 @@ def rmr(self) -> None: {"nodeId": self.node_id}, ) - def copy(self, dst: Union["LPath", str]) -> None: - remote_copy(self.path, str(dst)) + def copy(self, dst: "LPath") -> None: + remote_copy(self.path, dst.path) - def upload(self, src: Path, progress=Progress.tasks, verbose=False) -> None: - upload(src, self.path, progress, verbose) + def upload(self, src: Path, *, show_progress_bar: bool = False) -> None: + upload( + src, + self.path, + progress=Progress.tasks if show_progress_bar else Progress.none, + verbose=show_progress_bar, + ) def download( - self, dst: Optional[Path] = None, progress=Progress.tasks, verbose=False + self, dst: Optional[Path] = None, *, show_progress_bar: bool = False ) -> Path: if dst is None: - dir = Path(".") / "downloads" / str(uuid4()) + dir = Path.home() / "lpath" / str(self._download_idx) + self._download_idx += 1 dir.mkdir(parents=True, exist_ok=True) - dst = dir / self.name - - download(self.path, dst, progress, verbose, confirm_overwrite=False) - return dst - - @property - def perms(self) -> LDataPerms: - return get_node_perms(self.node_id) - - def share_with(self, email: str, perm_level: PermLevel) -> None: - resp = post( - url=urljoin(NUCLEUS_URL, "/ldata/send-share-email"), - json={ - "node_id": self.node_id, - "perm_level": str(perm_level), - "receiver_email": email, - }, - headers={"Authorization": get_auth_header()}, + dst = dir / self.name() + + download( + self.path, + dst, + progress=Progress.tasks if show_progress_bar else Progress.none, + verbose=show_progress_bar, + confirm_overwrite=False, ) - resp.raise_for_status() - - def _toggle_share_link(self, enable: bool) -> None: - execute( - gql.gql(""" - mutation LDataShare($nodeId: BigInt!, $value: Boolean!) { - ldataShare(input: { argNodeId: $nodeId, argValue: $value }) { - clientMutationId - } - } - """), - {"nodeId": self.node_id, "value": enable}, - ) - - def enable_share_link(self) -> None: - self._toggle_share_link(True) - - def disable_share_link(self) -> None: - self._toggle_share_link(False) - - def __str__(self) -> str: - return self.path + return dst - def __truediv__(self, other: Union[Path, str]) -> "LPath": - return LPath(f"{Path(self.path) / other}") + def __truediv__(self, other: object) -> "LPath": + if not isinstance(other, (LPath, str)): + return NotImplemented + return LPath(urljoins(self.path, other)) class LPathTransformer(TypeTransformer[LPath]): diff --git a/latch/ldata/transfer/__init__.py b/latch/ldata/transfer/__init__.py index ba7905ab..e69de29b 100644 --- a/latch/ldata/transfer/__init__.py +++ b/latch/ldata/transfer/__init__.py @@ -1,3 +0,0 @@ -from latch.ldata.transfer.download import download -from latch.ldata.transfer.remote_copy import remote_copy -from latch.ldata.transfer.upload import upload diff --git a/latch/ldata/transfer/download.py b/latch/ldata/transfer/download.py index 6830a97b..e7dce538 100644 --- a/latch/ldata/transfer/download.py +++ b/latch/ldata/transfer/download.py @@ -11,8 +11,8 @@ import click from latch_sdk_config.latch import config as latch_config -from latch.ldata.node import LDataNodeType, get_node_data from latch.ldata.transfer.manager import TransferStateManager +from latch.ldata.transfer.node import LDataNodeType, get_node_data from latch.ldata.transfer.progress import Progress, ProgressBars, get_free_index from latch.ldata.transfer.utils import get_max_workers, human_readable_time from latch_cli import tinyrequests @@ -192,10 +192,11 @@ def download( total_time = end - start - click.echo(dedent(f"""{click.style("Download Complete", fg="green")} - {click.style("Time Elapsed: ", fg="blue")}{human_readable_time(total_time)} - {click.style("Files Downloaded: ", fg="blue")}{num_files} ({with_si_suffix(total_bytes)}) - """)) + if progress != Progress.none: + click.echo(dedent(f"""{click.style("Download Complete", fg="green")} + {click.style("Time Elapsed: ", fg="blue")}{human_readable_time(total_time)} + {click.style("Files Downloaded: ", fg="blue")}{num_files} ({with_si_suffix(total_bytes)}) + """)) # dest will always be a path which includes the copied file as its leaf diff --git a/latch/ldata/node.py b/latch/ldata/transfer/node.py similarity index 60% rename from latch/ldata/node.py rename to latch/ldata/transfer/node.py index e0f0cde0..28f0b0d1 100644 --- a/latch/ldata/node.py +++ b/latch/ldata/transfer/node.py @@ -6,10 +6,11 @@ import graphql.language as l from latch_sdk_gql.execute import execute from latch_sdk_gql.utils import _name_node, _parse_selection +from typing_extensions import TypeAlias from latch_cli.utils.path import get_path_error, normalize_path -AccId = int +AccId: TypeAlias = int class LDataNodeType(str, Enum): @@ -20,10 +21,17 @@ class LDataNodeType(str, Enum): link = "link" +class LDataObjectMeta(TypedDict): + contentSize: str + contentType: str + + class FinalLinkTargetPayload(TypedDict): id: str type: str name: str + removed: bool + ldataObjectMeta: LDataObjectMeta class LdataNodePayload(TypedDict): @@ -44,7 +52,6 @@ class NodeData: id: str name: str type: LDataNodeType - removed: bool is_parent: bool @@ -79,7 +86,6 @@ def get_node_data( id name type - removed } } } @@ -138,100 +144,9 @@ def get_node_data( id=final_link_target["id"], name=final_link_target["name"], type=LDataNodeType(final_link_target["type"].lower()), - removed=final_link_target["removed"], is_parent=is_parent, ) except (TypeError, ValueError) as e: raise FileNotFoundError(get_path_error(remote_path, "not found", acc_id)) return GetNodeDataResult(acc_id, ret) - - -@dataclass(frozen=True) -class NodeMetadata: - id: str - size: int - content_type: str - - -def get_node_metadata(node_id: str) -> NodeMetadata: - data = execute( - gql.gql(""" - query NodeMetadataQuery($id: BigInt!) { - ldataNode(id: $id) { - removed - ldataObjectMeta { - contentSize - contentType - } - } - } - """), - variables={"id": node_id}, - )["ldataNode"] - if data is None or data["removed"]: - raise FileNotFoundError - - return NodeMetadata( - id=node_id, - size=int(data["ldataObjectMeta"]["contentSize"]), - content_type=data["ldataObjectMeta"]["contentType"], - ) - - -class PermLevel(str, Enum): - NONE = "none" - VIEWER = "viewer" - MEMBER = "member" - ADMIN = "admin" - OWNER = "owner" - - -@dataclass(frozen=True) -class LDataPerms: - id: str - shared: bool - share_invites: Dict[str, PermLevel] - share_perms: Dict[AccId, PermLevel] - - -def get_node_perms(node_id: str) -> LDataPerms: - data = execute( - gql.gql(""" - query NodePermissionsQuery($id: BigInt!) { - ldataNode(id: $id) { - id - removed - ldataSharePermissionsByObjectId { - nodes { - receiverId - level - } - } - ldataShareInvitesByObjectId { - nodes { - receiverEmail - level - } - } - shared - } - } - """), - variables={"id": node_id}, - )["ldataNode"] - if data is None or data["removed"]: - raise FileNotFoundError - - return LDataPerms( - id=node_id, - shared=data["shared"], - share_invites={ - node["reveiverEmail"]: node["level"] - for node in data["ldataShareInvitesByObjectId"]["nodes"] - }, - share_perms={ - int(node["receiverId"]): node["level"] - for node in data["ldataSharePermissionsByObjectId"]["nodes"] - }, - ) diff --git a/latch/ldata/transfer/remote_copy.py b/latch/ldata/transfer/remote_copy.py index 4f1b49c8..f2afc4e0 100644 --- a/latch/ldata/transfer/remote_copy.py +++ b/latch/ldata/transfer/remote_copy.py @@ -5,7 +5,7 @@ from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import execute -from latch.ldata.node import LDataNodeType, get_node_data +from latch.ldata.transfer.node import LDataNodeType, get_node_data from latch_cli.utils.path import get_name_from_path, get_path_error diff --git a/latch/ldata/transfer/upload.py b/latch/ldata/transfer/upload.py index 2a303fdf..42588265 100644 --- a/latch/ldata/transfer/upload.py +++ b/latch/ldata/transfer/upload.py @@ -17,8 +17,8 @@ from latch_sdk_config.latch import config as latch_config from typing_extensions import TypeAlias -from latch.ldata.node import LDataNodeType, get_node_data from latch.ldata.transfer.manager import TransferStateManager +from latch.ldata.transfer.node import LDataNodeType, get_node_data from latch.ldata.transfer.progress import Progress, ProgressBars from latch.ldata.transfer.throttle import Throttle from latch.ldata.transfer.utils import get_max_workers, human_readable_time diff --git a/latch_cli/services/ls.py b/latch_cli/services/ls.py index 791d40f7..94690bdb 100644 --- a/latch_cli/services/ls.py +++ b/latch_cli/services/ls.py @@ -10,7 +10,7 @@ import gql from latch_sdk_gql.execute import execute -from latch.ldata.node import LDataNodeType +from latch.ldata.transfer.node import LDataNodeType from latch_cli.click_utils import bold from latch_cli.utils import with_si_suffix from latch_cli.utils.path import normalize_path diff --git a/latch_cli/services/move.py b/latch_cli/services/move.py index 5f73783b..3878bf8d 100644 --- a/latch_cli/services/move.py +++ b/latch_cli/services/move.py @@ -5,7 +5,7 @@ from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import execute -from latch.ldata.node import LDataNodeType, get_node_data +from latch.ldata.transfer.node import LDataNodeType, get_node_data from latch_cli.services.cp.glob import expand_pattern from latch_cli.utils.path import get_name_from_path, get_path_error, is_remote_path From 9f014e3561d784c8d6d140cc1a13acca7e27cd71 Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Mon, 19 Feb 2024 12:43:55 -0800 Subject: [PATCH 08/15] move node/transfer to private api --- latch/ldata/path.py | 23 ++++-- latch/ldata/transfer/download.py | 77 ++++++++--------- latch/ldata/transfer/manager.py | 14 ++-- latch/ldata/transfer/node.py | 39 ++++----- latch/ldata/transfer/progress.py | 10 +-- latch/ldata/transfer/remote_copy.py | 7 +- latch/ldata/transfer/throttle.py | 2 +- latch/ldata/transfer/upload.py | 123 ++++++++++++++-------------- latch/ldata/transfer/utils.py | 4 +- latch_cli/main.py | 2 +- latch_cli/services/cp/main.py | 22 ++--- latch_cli/services/ls.py | 2 +- latch_cli/services/move.py | 5 +- latch_cli/services/sync.py | 4 +- 14 files changed, 168 insertions(+), 166 deletions(-) diff --git a/latch/ldata/path.py b/latch/ldata/path.py index e41dcd47..a2c69eb4 100644 --- a/latch/ldata/path.py +++ b/latch/ldata/path.py @@ -1,5 +1,6 @@ import re from dataclasses import dataclass, field +from enum import Enum from pathlib import Path from typing import Generator, Optional, Type @@ -16,15 +17,23 @@ from flytekit.extend import TypeEngine, TypeTransformer from latch_sdk_gql.execute import execute -from latch.ldata.transfer.download import download -from latch.ldata.transfer.node import LDataNodeType +from latch.ldata.transfer.download import _download from latch.ldata.transfer.progress import Progress -from latch.ldata.transfer.remote_copy import remote_copy -from latch.ldata.transfer.upload import upload +from latch.ldata.transfer.remote_copy import _remote_copy +from latch.ldata.transfer.upload import _upload from latch_cli.utils import urljoins node_id_regex = re.compile(r"^latch://(?P[0-9]+)\.node$") + +class LDataNodeType(str, Enum): + account_root = "account_root" + dir = "dir" + obj = "obj" + mount = "mount" + link = "link" + + dir_types = { LDataNodeType.dir, LDataNodeType.account_root, @@ -183,10 +192,10 @@ def rmr(self) -> None: ) def copy(self, dst: "LPath") -> None: - remote_copy(self.path, dst.path) + _remote_copy(self.path, dst.path) def upload(self, src: Path, *, show_progress_bar: bool = False) -> None: - upload( + _upload( src, self.path, progress=Progress.tasks if show_progress_bar else Progress.none, @@ -202,7 +211,7 @@ def download( dir.mkdir(parents=True, exist_ok=True) dst = dir / self.name() - download( + _download( self.path, dst, progress=Progress.tasks if show_progress_bar else Progress.none, diff --git a/latch/ldata/transfer/download.py b/latch/ldata/transfer/download.py index e7dce538..63ff719f 100644 --- a/latch/ldata/transfer/download.py +++ b/latch/ldata/transfer/download.py @@ -11,31 +11,33 @@ import click from latch_sdk_config.latch import config as latch_config -from latch.ldata.transfer.manager import TransferStateManager -from latch.ldata.transfer.node import LDataNodeType, get_node_data -from latch.ldata.transfer.progress import Progress, ProgressBars, get_free_index -from latch.ldata.transfer.utils import get_max_workers, human_readable_time +from latch.ldata.path import LDataNodeType from latch_cli import tinyrequests from latch_cli.constants import Units from latch_cli.utils import get_auth_header, with_si_suffix from latch_cli.utils.path import normalize_path +from .manager import _TransferStateManager +from .node import _get_node_data +from .progress import Progress, _get_free_index, _ProgressBars +from .utils import _get_max_workers, _human_readable_time -class GetSignedUrlData(TypedDict): + +class _GetSignedUrlData(TypedDict): url: str -class GetSignedUrlsRecursiveData(TypedDict): +class _GetSignedUrlsRecursiveData(TypedDict): urls: Dict[str, str] @dataclass(frozen=True, unsafe_hash=True) -class DownloadJob: +class _DownloadJob: signed_url: str dest: Path -def download( +def _download( src: str, dest: Path, progress: Progress, @@ -49,11 +51,11 @@ def download( ) normalized = normalize_path(src) - data = get_node_data(src) + data = _get_node_data(src) node_data = data.data[src] - if progress != Progress.none: - click.secho(f"Downloading {node_data.name}", fg="blue") + if verbose: + print(f"Downloading {node_data.name}") can_have_children = node_data.type in { LDataNodeType.account_root, @@ -79,7 +81,7 @@ def download( json_data = res.json() if can_have_children: - dir_data: GetSignedUrlsRecursiveData = json_data["data"] + dir_data: _GetSignedUrlsRecursiveData = json_data["data"] if dest.exists() and not normalized.endswith("/"): dest = dest / node_data.name @@ -91,12 +93,12 @@ def download( except (FileExistsError, NotADirectoryError) as e: raise ValueError(f"Download destination {dest} is not a directory") - unconfirmed_jobs: List[DownloadJob] = [] - confirmed_jobs: List[DownloadJob] = [] + unconfirmed_jobs: List[_DownloadJob] = [] + confirmed_jobs: List[_DownloadJob] = [] rejected_jobs: Set[Path] = set() for rel_path, url in dir_data["urls"].items(): - unconfirmed_jobs.append(DownloadJob(url, dest / rel_path)) + unconfirmed_jobs.append(_DownloadJob(url, dest / rel_path)) for job in unconfirmed_jobs: reject_job = False @@ -120,9 +122,7 @@ def download( job.dest.parent.mkdir(parents=True, exist_ok=True) confirmed_jobs.append(job) else: - click.secho( - f"Skipping {job.dest.parent}, file already exists", fg="yellow" - ) + print(f"Skipping {job.dest.parent}, file already exists") rejected_jobs.add(job.dest.parent) num_files = len(confirmed_jobs) @@ -134,11 +134,11 @@ def download( num_bars = 0 show_total_progress = True else: - num_bars = min(get_max_workers(), num_files) + num_bars = min(_get_max_workers(), num_files) show_total_progress = True - with TransferStateManager() as manager: - progress_bars: ProgressBars + with _TransferStateManager() as manager: + progress_bars: _ProgressBars with closing( manager.ProgressBars( num_bars, @@ -151,10 +151,10 @@ def download( start = time.monotonic() # todo(ayush): benchmark this against asyncio - with ProcessPoolExecutor(max_workers=get_max_workers()) as executor: + with ProcessPoolExecutor(max_workers=_get_max_workers()) as executor: total_bytes = sum( executor.map( - download_file, + _download_file, confirmed_jobs, repeat(progress_bars), ) @@ -162,7 +162,7 @@ def download( end = time.monotonic() else: - file_data: GetSignedUrlData = json_data["data"] + file_data: _GetSignedUrlData = json_data["data"] num_files = 1 @@ -174,8 +174,8 @@ def download( else: num_bars = 1 - with TransferStateManager() as manager: - progress_bars: ProgressBars + with _TransferStateManager() as manager: + progress_bars: _ProgressBars with closing( manager.ProgressBars( num_bars, @@ -184,26 +184,27 @@ def download( ) ) as progress_bars: start = time.monotonic() - total_bytes = download_file( - DownloadJob(file_data["url"], dest), + total_bytes = _download_file( + _DownloadJob(file_data["url"], dest), progress_bars, ) end = time.monotonic() total_time = end - start - if progress != Progress.none: - click.echo(dedent(f"""{click.style("Download Complete", fg="green")} - {click.style("Time Elapsed: ", fg="blue")}{human_readable_time(total_time)} - {click.style("Files Downloaded: ", fg="blue")}{num_files} ({with_si_suffix(total_bytes)}) - """)) + if verbose: + print(dedent(f""" + Download Complete + Time Elapsed: {_human_readable_time(total_time)} + Files Downloaded: {num_files} ({with_si_suffix(total_bytes)}) + """)) # dest will always be a path which includes the copied file as its leaf # e.g. download_file("a/b.txt", Path("c/d.txt")) will copy the content of 'b.txt' into 'd.txt' -def download_file( - job: DownloadJob, - progress_bars: ProgressBars, +def _download_file( + job: _DownloadJob, + progress_bars: _ProgressBars, ) -> int: # todo(ayush): benchmark parallelized downloads using the range header with open(job.dest, "wb") as f: @@ -212,7 +213,7 @@ def download_file( total_bytes = res.headers.get("Content-Length") assert total_bytes is not None, "Must have a content-length header" - with get_free_index(progress_bars) as pbar_index: + with _get_free_index(progress_bars) as pbar_index: progress_bars.set( index=pbar_index, total=int(total_bytes), desc=job.dest.name ) @@ -229,7 +230,7 @@ def download_file( progress_bars.update_total_progress(1) progress_bars.write( f"Downloaded {job.dest.name} ({with_si_suffix(int(total_bytes))})" - f" in {human_readable_time(end - start)}" + f" in {_human_readable_time(end - start)}" ) return int(total_bytes) diff --git a/latch/ldata/transfer/manager.py b/latch/ldata/transfer/manager.py index 01796068..025e0aed 100644 --- a/latch/ldata/transfer/manager.py +++ b/latch/ldata/transfer/manager.py @@ -1,14 +1,14 @@ from multiprocessing.managers import SyncManager from typing import Type -from .progress import ProgressBars -from .throttle import Throttle +from .progress import _ProgressBars +from .throttle import _Throttle -class TransferStateManager(SyncManager): - ProgressBars: Type[ProgressBars] - Throttle: Type[Throttle] +class _TransferStateManager(SyncManager): + ProgressBars: Type[_ProgressBars] + Throttle: Type[_Throttle] -TransferStateManager.register("ProgressBars", ProgressBars) -TransferStateManager.register("Throttle", Throttle) +_TransferStateManager.register("ProgressBars", _ProgressBars) +_TransferStateManager.register("Throttle", _Throttle) diff --git a/latch/ldata/transfer/node.py b/latch/ldata/transfer/node.py index 28f0b0d1..dd8c6257 100644 --- a/latch/ldata/transfer/node.py +++ b/latch/ldata/transfer/node.py @@ -1,49 +1,40 @@ from dataclasses import dataclass -from enum import Enum from typing import Dict, List, TypedDict -import gql import graphql.language as l from latch_sdk_gql.execute import execute from latch_sdk_gql.utils import _name_node, _parse_selection from typing_extensions import TypeAlias +from latch.ldata.path import LDataNodeType from latch_cli.utils.path import get_path_error, normalize_path AccId: TypeAlias = int -class LDataNodeType(str, Enum): - account_root = "account_root" - dir = "dir" - obj = "obj" - mount = "mount" - link = "link" - - -class LDataObjectMeta(TypedDict): +class _LDataObjectMeta(TypedDict): contentSize: str contentType: str -class FinalLinkTargetPayload(TypedDict): +class _FinalLinkTargetPayload(TypedDict): id: str type: str name: str removed: bool - ldataObjectMeta: LDataObjectMeta + ldataObjectMeta: _LDataObjectMeta -class LdataNodePayload(TypedDict): - finalLinkTarget: FinalLinkTargetPayload +class _LdataNodePayload(TypedDict): + finalLinkTarget: _FinalLinkTargetPayload -class LdataResolvePathToNodePayload(TypedDict): +class _LdataResolvePathToNodePayload(TypedDict): path: str - ldataNode: LdataNodePayload + ldataNode: _LdataNodePayload -class AccountInfoCurrentPayload(TypedDict): +class _AccountInfoCurrentPayload(TypedDict): id: str @@ -56,14 +47,14 @@ class NodeData: @dataclass(frozen=True) -class GetNodeDataResult: +class _GetNodeDataResult: acc_id: str data: Dict[str, NodeData] -def get_node_data( +def _get_node_data( *remote_paths: str, allow_resolve_to_parent: bool = False -) -> GetNodeDataResult: +) -> _GetNodeDataResult: normalized: Dict[str, str] = {} acc_sel = _parse_selection(""" @@ -121,12 +112,12 @@ def get_node_data( res = execute(doc) - acc_info: AccountInfoCurrentPayload = res["accountInfoCurrent"] + acc_info: _AccountInfoCurrentPayload = res["accountInfoCurrent"] acc_id = acc_info["id"] ret: Dict[str, NodeData] = {} for i, remote_path in enumerate(remote_paths): - node: LdataResolvePathToNodePayload = res[f"q{i}"] + node: _LdataResolvePathToNodePayload = res[f"q{i}"] try: final_link_target = node["ldataNode"]["finalLinkTarget"] @@ -149,4 +140,4 @@ def get_node_data( except (TypeError, ValueError) as e: raise FileNotFoundError(get_path_error(remote_path, "not found", acc_id)) - return GetNodeDataResult(acc_id, ret) + return _GetNodeDataResult(acc_id, ret) diff --git a/latch/ldata/transfer/progress.py b/latch/ldata/transfer/progress.py index 506182a0..f0f56560 100644 --- a/latch/ldata/transfer/progress.py +++ b/latch/ldata/transfer/progress.py @@ -6,7 +6,7 @@ import tqdm -def get_progress_bar(): +def _get_progress_bar(): return tqdm.tqdm( total=0, leave=False, @@ -22,7 +22,7 @@ class Progress(Enum): tasks = "tasks" -class ProgressBars: +class _ProgressBars: def __init__( self, num_task_bars: int, @@ -31,7 +31,7 @@ def __init__( verbose: bool = False, ): if show_total_progress: - self.total_bar = get_progress_bar() + self.total_bar = _get_progress_bar() self.total_bar.desc = "Copying Files" self.total_bar.colour = "green" self.total_bar.unit = "" @@ -43,7 +43,7 @@ def __init__( self.verbose = verbose self.task_bars: List[tqdm.tqdm] = [ - get_progress_bar() for _ in range(num_task_bars) + _get_progress_bar() for _ in range(num_task_bars) ] self.free_indices = {i for i in range(num_task_bars)} self.task_bar_sema = BoundedSemaphore(num_task_bars) @@ -135,7 +135,7 @@ def close(self): @contextmanager -def get_free_index(progress_bars: ProgressBars): +def _get_free_index(progress_bars: _ProgressBars): try: pbar_index = progress_bars.get_free_task_bar_index() yield pbar_index diff --git a/latch/ldata/transfer/remote_copy.py b/latch/ldata/transfer/remote_copy.py index f2afc4e0..c609e08f 100644 --- a/latch/ldata/transfer/remote_copy.py +++ b/latch/ldata/transfer/remote_copy.py @@ -5,12 +5,13 @@ from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import execute -from latch.ldata.transfer.node import LDataNodeType, get_node_data +from latch.ldata.path import LDataNodeType +from latch.ldata.transfer.node import _get_node_data from latch_cli.utils.path import get_name_from_path, get_path_error -def remote_copy(src: str, dst: str) -> None: - node_data = get_node_data(src, dst, allow_resolve_to_parent=True) +def _remote_copy(src: str, dst: str) -> None: + node_data = _get_node_data(src, dst, allow_resolve_to_parent=True) src_data = node_data.data[src] dst_data = node_data.data[dst] diff --git a/latch/ldata/transfer/throttle.py b/latch/ldata/transfer/throttle.py index 30d95ecf..17be4bcf 100644 --- a/latch/ldata/transfer/throttle.py +++ b/latch/ldata/transfer/throttle.py @@ -2,7 +2,7 @@ @dataclass -class Throttle: +class _Throttle: delay: float = 0 def get_delay(self): diff --git a/latch/ldata/transfer/upload.py b/latch/ldata/transfer/upload.py index 42588265..96fd55d8 100644 --- a/latch/ldata/transfer/upload.py +++ b/latch/ldata/transfer/upload.py @@ -11,17 +11,18 @@ from multiprocessing.managers import DictProxy, ListProxy from pathlib import Path from queue import Queue +from textwrap import dedent from typing import TYPE_CHECKING, List, Optional, TypedDict -import click from latch_sdk_config.latch import config as latch_config from typing_extensions import TypeAlias -from latch.ldata.transfer.manager import TransferStateManager -from latch.ldata.transfer.node import LDataNodeType, get_node_data -from latch.ldata.transfer.progress import Progress, ProgressBars -from latch.ldata.transfer.throttle import Throttle -from latch.ldata.transfer.utils import get_max_workers, human_readable_time +from latch.ldata.path import LDataNodeType +from latch.ldata.transfer.manager import _TransferStateManager +from latch.ldata.transfer.node import _get_node_data +from latch.ldata.transfer.progress import Progress, _ProgressBars +from latch.ldata.transfer.throttle import _Throttle +from latch.ldata.transfer.utils import _get_max_workers, _human_readable_time from latch_cli import tinyrequests from latch_cli.constants import latch_constants, units from latch_cli.utils import get_auth_header, urljoins, with_si_suffix @@ -30,26 +31,26 @@ if TYPE_CHECKING: PathQueueType: TypeAlias = "Queue[Optional[Path]]" LatencyQueueType: TypeAlias = "Queue[Optional[float]]" - PartsBySrcType: TypeAlias = DictProxy[Path, ListProxy["CompletedPart"]] - UploadInfoBySrcType: TypeAlias = DictProxy[Path, "StartUploadReturnType"] + PartsBySrcType: TypeAlias = DictProxy[Path, ListProxy["_CompletedPart"]] + UploadInfoBySrcType: TypeAlias = DictProxy[Path, "_StartUploadReturnType"] -class EmptyUploadData(TypedDict): +class _EmptyUploadData(TypedDict): version_id: str -class StartUploadData(TypedDict): +class _StartUploadData(TypedDict): upload_id: str urls: List[str] @dataclass(frozen=True) -class UploadJob: +class _UploadJob: src: Path dest: str -def upload( +def _upload( src: str, # pathlib.Path strips trailing slashes but we want to keep them here as they determine cp behavior dest: str, progress: Progress, @@ -59,10 +60,10 @@ def upload( if not src_path.exists(): raise ValueError(f"Could not find {src_path}: no such file or directory.") - if progress != Progress.none: - click.secho(f"Uploading {src_path.name}", fg="blue") + if verbose: + print(f"Uploading {src_path.name}") - node_data = get_node_data(dest, allow_resolve_to_parent=True) + node_data = _get_node_data(dest, allow_resolve_to_parent=True) dest_data = node_data.data[dest] normalized = normalize_path(dest) @@ -86,27 +87,24 @@ def upload( elif not src_path.is_dir(): num_bars = 1 show_total_progress = False - elif progress == Progress.total: - num_bars = 0 - show_total_progress = True else: - num_bars = get_max_workers() + num_bars = _get_max_workers() show_total_progress = True - with ProcessPoolExecutor(max_workers=get_max_workers()) as exec: - with TransferStateManager() as man: + with ProcessPoolExecutor(max_workers=_get_max_workers()) as exec: + with _TransferStateManager() as man: parts_by_src: "PartsBySrcType" = man.dict() upload_info_by_src: "UploadInfoBySrcType" = man.dict() - throttle: Throttle = man.Throttle() + throttle: _Throttle = man.Throttle() latency_q: "LatencyQueueType" = man.Queue() - throttle_listener = exec.submit(throttler, throttle, latency_q) + throttle_listener = exec.submit(_throttler, throttle, latency_q) if src_path.is_dir(): if dest_exists and not src.endswith("/"): normalized = urljoins(normalized, src_path.name) - jobs: List[UploadJob] = [] + jobs: List[_UploadJob] = [] total_bytes = 0 for dir_path, _, file_names in os.walk(src_path, followlinks=True): @@ -115,7 +113,7 @@ def upload( parts_by_src[rel_path] = man.list() jobs.append( - UploadJob( + _UploadJob( rel_path, urljoins( normalized, @@ -128,7 +126,7 @@ def upload( num_files = len(jobs) - url_generation_bar: ProgressBars + url_generation_bar: _ProgressBars with closing( man.ProgressBars( 0, @@ -137,15 +135,15 @@ def upload( ) as url_generation_bar: url_generation_bar.set_total(num_files, "Generating URLs") - start_upload_futs: List[Future[Optional[StartUploadReturnType]]] = ( - [] - ) + start_upload_futs: List[ + Future[Optional[_StartUploadReturnType]] + ] = [] start = time.monotonic() for job in jobs: start_upload_futs.append( exec.submit( - start_upload, + _start_upload, job.src, job.dest, url_generation_bar, @@ -162,7 +160,7 @@ def upload( latency_q.put(None) wait([throttle_listener]) - chunk_upload_bars: ProgressBars + chunk_upload_bars: _ProgressBars with closing( man.ProgressBars( min(num_bars, num_files), @@ -173,7 +171,7 @@ def upload( chunk_upload_bars.set_total(num_files, "Uploading Files") # todo(ayush): async-ify - chunk_futs: List[Future[CompletedPart]] = [] + chunk_futs: List[Future[_CompletedPart]] = [] for data in start_upload_futs: res = data.result() @@ -192,7 +190,7 @@ def upload( for part_index, url in enumerate(res.urls): chunk_futs.append( exec.submit( - upload_file_chunk, + _upload_file_chunk, src=res.src, url=url, part_index=part_index, @@ -207,7 +205,7 @@ def upload( wait(chunk_futs) - if progress != Progress.none: + if verbose: print("\x1b[0GFinalizing uploads...") else: if dest_exists and dest_is_dir: @@ -216,7 +214,7 @@ def upload( num_files = 1 total_bytes = src_path.stat().st_size - progress_bars: ProgressBars + progress_bars: _ProgressBars with closing( man.ProgressBars( num_bars, @@ -227,18 +225,18 @@ def upload( pbar_index = progress_bars.get_free_task_bar_index() start = time.monotonic() - res = start_upload(src_path, normalized) + res = _start_upload(src_path, normalized) if res is not None: progress_bars.set( pbar_index, res.src.stat().st_size, res.src.name ) - chunk_futs: List[Future[CompletedPart]] = [] + chunk_futs: List[Future[_CompletedPart]] = [] for part_index, url in enumerate(res.urls): chunk_futs.append( exec.submit( - upload_file_chunk, + _upload_file_chunk, src_path, url, part_index, @@ -250,7 +248,7 @@ def upload( wait(chunk_futs) - end_upload( + _end_upload( normalized, res.upload_id, [fut.result() for fut in chunk_futs], @@ -258,17 +256,16 @@ def upload( end = time.monotonic() total_time = end - start - if progress != Progress.none: - click.echo( - f"""{click.style("Upload Complete", fg="green")} - -{click.style("Time Elapsed: ", fg="blue")}{human_readable_time(total_time)} -{click.style("Files Uploaded: ", fg="blue")}{num_files} ({with_si_suffix(total_bytes)})""" - ) + if verbose: + print(dedent(f""" + Upload Complete + Time Elapsed: {_human_readable_time(total_time)} + Files Uploaded: {num_files} ({with_si_suffix(total_bytes)}) + """)) @dataclass(frozen=True) -class StartUploadReturnType: +class _StartUploadReturnType: upload_id: str urls: List[str] part_count: int @@ -280,13 +277,13 @@ class StartUploadReturnType: MAX_RETRIES = 5 -def start_upload( +def _start_upload( src: Path, dest: str, - progress_bars: Optional[ProgressBars] = None, - throttle: Optional[Throttle] = None, + progress_bars: Optional[_ProgressBars] = None, + throttle: Optional[_Throttle] = None, latency_q: Optional["LatencyQueueType"] = None, -) -> Optional[StartUploadReturnType]: +) -> Optional[_StartUploadReturnType]: if not src.exists(): raise ValueError(f"Could not find {src}: no such file or link") @@ -356,9 +353,9 @@ def start_upload( if "version_id" in json_data["data"]: return # file is empty, so no need to upload any content - data: StartUploadData = json_data["data"] + data: _StartUploadData = json_data["data"] - return StartUploadReturnType( + return _StartUploadReturnType( **data, part_count=part_count, part_size=part_size, src=src, dest=dest ) @@ -366,23 +363,23 @@ def start_upload( @dataclass(frozen=True) -class CompletedPart: +class _CompletedPart: src: Path etag: str part_number: int -def upload_file_chunk( +def _upload_file_chunk( src: Path, url: str, part_index: int, part_size: int, - progress_bars: Optional[ProgressBars] = None, + progress_bars: Optional[_ProgressBars] = None, pbar_index: Optional[int] = None, parts_by_source: Optional["PartsBySrcType"] = None, upload_id: Optional[str] = None, dest: Optional[str] = None, -) -> CompletedPart: +) -> _CompletedPart: time.sleep(0.1 * random.random()) with open(src, "rb") as f: @@ -395,7 +392,7 @@ def upload_file_chunk( f"Failed to upload part {part_index} of {src}: {res.status_code}" ) - ret = CompletedPart( + ret = _CompletedPart( src=src, etag=res.headers["ETag"], part_number=part_index + 1, @@ -418,7 +415,7 @@ def upload_file_chunk( and parts_by_source is not None and upload_id is not None ): - end_upload( + _end_upload( dest=dest, upload_id=upload_id, parts=list(parts_by_source[src]), @@ -427,11 +424,11 @@ def upload_file_chunk( return ret -def end_upload( +def _end_upload( dest: str, upload_id: str, - parts: List[CompletedPart], - progress_bars: Optional[ProgressBars] = None, + parts: List[_CompletedPart], + progress_bars: Optional[_ProgressBars] = None, ): res = tinyrequests.post( latch_config.api.data.end_upload, @@ -456,7 +453,7 @@ def end_upload( progress_bars.update_total_progress(1) -def throttler(t: Throttle, q: "LatencyQueueType"): +def _throttler(t: _Throttle, q: "LatencyQueueType"): ema = 0 # todo(ayush): these params were tuned via naive grid search uploading a diff --git a/latch/ldata/transfer/utils.py b/latch/ldata/transfer/utils.py index cdb115b6..e86d1771 100644 --- a/latch/ldata/transfer/utils.py +++ b/latch/ldata/transfer/utils.py @@ -2,7 +2,7 @@ from typing import List -def get_max_workers() -> int: +def _get_max_workers() -> int: try: max_workers = len(os.sched_getaffinity(0)) * 4 except AttributeError: @@ -15,7 +15,7 @@ def get_max_workers() -> int: return min(max_workers, 16) -def human_readable_time(t_seconds: float) -> str: +def _human_readable_time(t_seconds: float) -> str: s = t_seconds % 60 m = (t_seconds // 60) % 60 h = t_seconds // 60 // 60 diff --git a/latch_cli/main.py b/latch_cli/main.py index 93382d14..26cc57f6 100644 --- a/latch_cli/main.py +++ b/latch_cli/main.py @@ -11,7 +11,7 @@ from typing_extensions import ParamSpec import latch_cli.click_utils -from latch.ldata.transfer.progress import Progress +from latch.ldata.transfer import Progress from latch_cli.click_utils import EnumChoice from latch_cli.exceptions.handler import CrashHandler from latch_cli.services.cp.autocomplete import complete as cp_complete diff --git a/latch_cli/services/cp/main.py b/latch_cli/services/cp/main.py index b775250d..4494aba8 100644 --- a/latch_cli/services/cp/main.py +++ b/latch_cli/services/cp/main.py @@ -4,8 +4,10 @@ import click -from latch.ldata.path import LPath -from latch.ldata.transfer.progress import Progress +from latch.ldata.transfer import _Progress +from latch.ldata.transfer.download import _download +from latch.ldata.transfer.remote_copy import _remote_copy +from latch.ldata.transfer.upload import _upload from latch_cli.services.cp.glob import expand_pattern from latch_cli.utils.path import is_remote_path @@ -15,7 +17,7 @@ def cp( srcs: List[str], dest: str, *, - progress: Progress, + progress: _Progress, verbose: bool, expand_globs: bool, ): @@ -28,20 +30,20 @@ def cp( if src_remote and not dest_remote: if expand_globs: [ - LPath(p).download( - Path(dest), progress=progress, verbose=verbose - ) + _download(p, Path(dest), progress=progress, verbose=verbose) for p in expand_pattern(src) ] else: - LPath(src).download(Path(dest), progress=progress, verbose=verbose) + _download( + src, Path(dest), show_progress_bar=progress, verbose=verbose + ) elif not src_remote and dest_remote: - LPath(dest).upload(src, progress=progress, verbose=verbose) + _upload(src, dest, progress=progress, verbose=verbose) elif src_remote and dest_remote: if expand_globs: - [LPath(p).copy(dest) for p in expand_pattern(src)] + [_remote_copy(p, dest) for p in expand_pattern(src)] else: - LPath(src).copy(dest) + _remote_copy(src, dest) else: raise ValueError( dedent(f""" diff --git a/latch_cli/services/ls.py b/latch_cli/services/ls.py index 94690bdb..65fe96da 100644 --- a/latch_cli/services/ls.py +++ b/latch_cli/services/ls.py @@ -10,7 +10,7 @@ import gql from latch_sdk_gql.execute import execute -from latch.ldata.transfer.node import LDataNodeType +from latch.ldata.path import LDataNodeType from latch_cli.click_utils import bold from latch_cli.utils import with_si_suffix from latch_cli.utils.path import normalize_path diff --git a/latch_cli/services/move.py b/latch_cli/services/move.py index 3878bf8d..86395062 100644 --- a/latch_cli/services/move.py +++ b/latch_cli/services/move.py @@ -5,7 +5,8 @@ from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import execute -from latch.ldata.transfer.node import LDataNodeType, get_node_data +from latch.ldata.path import LDataNodeType +from latch.ldata.transfer.node import _get_node_data from latch_cli.services.cp.glob import expand_pattern from latch_cli.utils.path import get_name_from_path, get_path_error, is_remote_path @@ -34,7 +35,7 @@ def move( raise click.exceptions.Exit(0) try: - node_data = get_node_data(*srcs, dest, allow_resolve_to_parent=True) + node_data = _get_node_data(*srcs, dest, allow_resolve_to_parent=True) except FileNotFoundError as e: click.echo(str(e)) raise click.exceptions.Exit(1) from e diff --git a/latch_cli/services/sync.py b/latch_cli/services/sync.py index 4f478ae7..61ffb8a1 100644 --- a/latch_cli/services/sync.py +++ b/latch_cli/services/sync.py @@ -15,7 +15,7 @@ def upload_file(src: Path, dest: str): - start = upl.start_upload(src, dest) + start = upl._start_upload(src, dest) if start is None: return @@ -30,7 +30,7 @@ def upload_file(src: Path, dest: str): ) ) - upl.end_upload(dest, start.upload_id, parts) + upl._end_upload(dest, start.upload_id, parts) def check_src(p: Path, *, indent: str = "") -> Optional[Tuple[Path, os.stat_result]]: From c64ed099ed2d6f4b4dc362e3406cf52f72b37ec8 Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Mon, 19 Feb 2024 13:36:24 -0800 Subject: [PATCH 09/15] bug fixes --- latch/ldata/path.py | 50 +++++++++++++++++------------ latch/ldata/transfer/download.py | 2 +- latch/ldata/transfer/node.py | 10 +++++- latch/ldata/transfer/remote_copy.py | 16 ++++----- latch/ldata/transfer/upload.py | 12 +++---- latch_cli/main.py | 2 +- latch_cli/services/cp/main.py | 8 ++--- latch_cli/services/ls.py | 2 +- latch_cli/services/move.py | 3 +- 9 files changed, 59 insertions(+), 46 deletions(-) diff --git a/latch/ldata/path.py b/latch/ldata/path.py index a2c69eb4..72fbf0cb 100644 --- a/latch/ldata/path.py +++ b/latch/ldata/path.py @@ -1,8 +1,7 @@ import re from dataclasses import dataclass, field -from enum import Enum from pathlib import Path -from typing import Generator, Optional, Type +from typing import Generator, Optional, Type, Union import gql from flytekit import ( @@ -17,21 +16,15 @@ from flytekit.extend import TypeEngine, TypeTransformer from latch_sdk_gql.execute import execute -from latch.ldata.transfer.download import _download +from latch.ldata.transfer.node import LDataNodeType from latch.ldata.transfer.progress import Progress -from latch.ldata.transfer.remote_copy import _remote_copy -from latch.ldata.transfer.upload import _upload from latch_cli.utils import urljoins -node_id_regex = re.compile(r"^latch://(?P[0-9]+)\.node$") - +from .transfer.download import _download +from .transfer.remote_copy import _remote_copy +from .transfer.upload import _upload -class LDataNodeType(str, Enum): - account_root = "account_root" - dir = "dir" - obj = "obj" - mount = "mount" - link = "link" +node_id_regex = re.compile(r"^latch://(?P[0-9]+)\.node$") dir_types = { @@ -55,6 +48,7 @@ class _Cache: @dataclass class LPath: + _cache: _Cache = field( default_factory=lambda: _Cache(), init=False, @@ -65,13 +59,15 @@ class LPath: path: str - def __init__(self, path: str): + def __init__(self, path: Union[str, "LPath"]): + if isinstance(path, LPath): + path = path.path if not path.startswith("latch://"): raise ValueError(f"Invalid LPath: {path} is not a Latch path") self.path = path self._download_idx = 0 - def load(self): + def load(self) -> None: """(Re-)populate this LPath's instance's cache. Future calls to most getters will return immediately without making a network request. @@ -81,7 +77,7 @@ def load(self): data = execute( gql.gql(""" query GetNodeData($path: String!) { - ldataResolvePathToNode(path: {}) { + ldataResolvePathToNode(path: $path) { path ldataNode { finalLinkTarget { @@ -100,19 +96,31 @@ def load(self): {"path": self.path}, )["ldataResolvePathToNode"] + if data is None or data["ldataNode"] is None: + raise FileNotFoundError(f"No such Latch file or directory: {self.path}") + self._cache.path = self.path final_link_target = data["ldataNode"]["finalLinkTarget"] self._cache.node_id = final_link_target["id"] self._cache.name = final_link_target["name"] self._cache.type = LDataNodeType(final_link_target["type"].lower()) - self._cache.size = int(final_link_target["ldataObjectMeta"]["contentSize"]) - self._cache.content_type = final_link_target["ldataObjectMeta"]["contentType"] + + meta = final_link_target["ldataObjectMeta"] + if meta is not None: + self._cache.size = ( + -1 + if meta["contentSize"] is None + else int(final_link_target["ldataObjectMeta"]["contentSize"]) + ) + self._cache.content_type = final_link_target["ldataObjectMeta"][ + "contentType" + ] def node_id(self, *, load_if_missing: bool = True) -> str: match = node_id_regex.match(self.path) if match: - self._node_id = match.group("id") + return match.group("id") if self._cache.node_id is None or self._cache.path != self.path: if not load_if_missing: @@ -191,8 +199,8 @@ def rmr(self) -> None: {"nodeId": self.node_id}, ) - def copy(self, dst: "LPath") -> None: - _remote_copy(self.path, dst.path) + def copy(self, dst: "LPath", *, verbose: bool = False) -> None: + _remote_copy(self.path, dst.path, verbose=verbose) def upload(self, src: Path, *, show_progress_bar: bool = False) -> None: _upload( diff --git a/latch/ldata/transfer/download.py b/latch/ldata/transfer/download.py index 63ff719f..8d7317f3 100644 --- a/latch/ldata/transfer/download.py +++ b/latch/ldata/transfer/download.py @@ -11,7 +11,7 @@ import click from latch_sdk_config.latch import config as latch_config -from latch.ldata.path import LDataNodeType +from latch.ldata.transfer.node import LDataNodeType from latch_cli import tinyrequests from latch_cli.constants import Units from latch_cli.utils import get_auth_header, with_si_suffix diff --git a/latch/ldata/transfer/node.py b/latch/ldata/transfer/node.py index dd8c6257..3dbf4964 100644 --- a/latch/ldata/transfer/node.py +++ b/latch/ldata/transfer/node.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from enum import Enum from typing import Dict, List, TypedDict import graphql.language as l @@ -6,12 +7,19 @@ from latch_sdk_gql.utils import _name_node, _parse_selection from typing_extensions import TypeAlias -from latch.ldata.path import LDataNodeType from latch_cli.utils.path import get_path_error, normalize_path AccId: TypeAlias = int +class LDataNodeType(str, Enum): + account_root = "account_root" + dir = "dir" + obj = "obj" + mount = "mount" + link = "link" + + class _LDataObjectMeta(TypedDict): contentSize: str contentType: str diff --git a/latch/ldata/transfer/remote_copy.py b/latch/ldata/transfer/remote_copy.py index c609e08f..500e7eb0 100644 --- a/latch/ldata/transfer/remote_copy.py +++ b/latch/ldata/transfer/remote_copy.py @@ -1,16 +1,15 @@ from textwrap import dedent -import click import gql from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import execute -from latch.ldata.path import LDataNodeType -from latch.ldata.transfer.node import _get_node_data from latch_cli.utils.path import get_name_from_path, get_path_error +from .node import LDataNodeType, _get_node_data -def _remote_copy(src: str, dst: str) -> None: + +def _remote_copy(src: str, dst: str, *, verbose: bool = False) -> None: node_data = _get_node_data(src, dst, allow_resolve_to_parent=True) src_data = node_data.data[src] @@ -88,8 +87,7 @@ def _remote_copy(src: str, dst: str) -> None: raise ValueError(get_path_error(src, str(e), acc_id)) - click.echo(dedent(f""" - {click.style("Copy Requested.", fg="green")} - - {click.style("Source: ", fg="blue")}{(src)} - {click.style("Destination: ", fg="blue")}{(dst)}""")) + print(dedent(f""" + Copy Requested. + Source: {(src)} + Destination: {(dst)}""")) diff --git a/latch/ldata/transfer/upload.py b/latch/ldata/transfer/upload.py index 96fd55d8..4dd72861 100644 --- a/latch/ldata/transfer/upload.py +++ b/latch/ldata/transfer/upload.py @@ -17,17 +17,17 @@ from latch_sdk_config.latch import config as latch_config from typing_extensions import TypeAlias -from latch.ldata.path import LDataNodeType -from latch.ldata.transfer.manager import _TransferStateManager -from latch.ldata.transfer.node import _get_node_data -from latch.ldata.transfer.progress import Progress, _ProgressBars -from latch.ldata.transfer.throttle import _Throttle -from latch.ldata.transfer.utils import _get_max_workers, _human_readable_time from latch_cli import tinyrequests from latch_cli.constants import latch_constants, units from latch_cli.utils import get_auth_header, urljoins, with_si_suffix from latch_cli.utils.path import normalize_path +from .manager import _TransferStateManager +from .node import LDataNodeType, _get_node_data +from .progress import Progress, _ProgressBars +from .throttle import _Throttle +from .utils import _get_max_workers, _human_readable_time + if TYPE_CHECKING: PathQueueType: TypeAlias = "Queue[Optional[Path]]" LatencyQueueType: TypeAlias = "Queue[Optional[float]]" diff --git a/latch_cli/main.py b/latch_cli/main.py index 26cc57f6..93382d14 100644 --- a/latch_cli/main.py +++ b/latch_cli/main.py @@ -11,7 +11,7 @@ from typing_extensions import ParamSpec import latch_cli.click_utils -from latch.ldata.transfer import Progress +from latch.ldata.transfer.progress import Progress from latch_cli.click_utils import EnumChoice from latch_cli.exceptions.handler import CrashHandler from latch_cli.services.cp.autocomplete import complete as cp_complete diff --git a/latch_cli/services/cp/main.py b/latch_cli/services/cp/main.py index 4494aba8..b8a96db5 100644 --- a/latch_cli/services/cp/main.py +++ b/latch_cli/services/cp/main.py @@ -4,8 +4,8 @@ import click -from latch.ldata.transfer import _Progress from latch.ldata.transfer.download import _download +from latch.ldata.transfer.progress import Progress from latch.ldata.transfer.remote_copy import _remote_copy from latch.ldata.transfer.upload import _upload from latch_cli.services.cp.glob import expand_pattern @@ -17,7 +17,7 @@ def cp( srcs: List[str], dest: str, *, - progress: _Progress, + progress: Progress, verbose: bool, expand_globs: bool, ): @@ -41,9 +41,9 @@ def cp( _upload(src, dest, progress=progress, verbose=verbose) elif src_remote and dest_remote: if expand_globs: - [_remote_copy(p, dest) for p in expand_pattern(src)] + [_remote_copy(p, dest, verbose=True) for p in expand_pattern(src)] else: - _remote_copy(src, dest) + _remote_copy(src, dest, verbose=True) else: raise ValueError( dedent(f""" diff --git a/latch_cli/services/ls.py b/latch_cli/services/ls.py index 65fe96da..94690bdb 100644 --- a/latch_cli/services/ls.py +++ b/latch_cli/services/ls.py @@ -10,7 +10,7 @@ import gql from latch_sdk_gql.execute import execute -from latch.ldata.path import LDataNodeType +from latch.ldata.transfer.node import LDataNodeType from latch_cli.click_utils import bold from latch_cli.utils import with_si_suffix from latch_cli.utils.path import normalize_path diff --git a/latch_cli/services/move.py b/latch_cli/services/move.py index 86395062..a0ff9963 100644 --- a/latch_cli/services/move.py +++ b/latch_cli/services/move.py @@ -5,8 +5,7 @@ from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import execute -from latch.ldata.path import LDataNodeType -from latch.ldata.transfer.node import _get_node_data +from latch.ldata.transfer.node import LDataNodeType, _get_node_data from latch_cli.services.cp.glob import expand_pattern from latch_cli.utils.path import get_name_from_path, get_path_error, is_remote_path From 49364d67ba2b62ffd7097da988270caa1f08b4d2 Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Mon, 19 Feb 2024 14:45:01 -0800 Subject: [PATCH 10/15] more bug fixes --- latch/ldata/path.py | 31 ++++++++++++------------------- latch/ldata/transfer/download.py | 2 +- latch/ldata/transfer/upload.py | 4 ---- 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/latch/ldata/path.py b/latch/ldata/path.py index 72fbf0cb..b59c83d8 100644 --- a/latch/ldata/path.py +++ b/latch/ldata/path.py @@ -1,3 +1,4 @@ +import os import re from dataclasses import dataclass, field from pathlib import Path @@ -50,8 +51,6 @@ class _Cache: class LPath: _cache: _Cache = field( - default_factory=lambda: _Cache(), - init=False, repr=False, hash=False, compare=False, @@ -65,6 +64,7 @@ def __init__(self, path: Union[str, "LPath"]): if not path.startswith("latch://"): raise ValueError(f"Invalid LPath: {path} is not a Latch path") self.path = path + self._cache = _Cache() self._download_idx = 0 def load(self) -> None: @@ -204,7 +204,7 @@ def copy(self, dst: "LPath", *, verbose: bool = False) -> None: def upload(self, src: Path, *, show_progress_bar: bool = False) -> None: _upload( - src, + os.fspath(src), self.path, progress=Progress.tasks if show_progress_bar else Progress.none, verbose=show_progress_bar, @@ -235,17 +235,18 @@ def __truediv__(self, other: object) -> "LPath": class LPathTransformer(TypeTransformer[LPath]): + _TYPE_INFO = BlobType( + # there is no way to know if the LPath is a file or directory ahead to time, + # so just set dimensionality to SINGLE + format="binary", + dimensionality=BlobType.BlobDimensionality.SINGLE, + ) + def __init__(self): super(LPathTransformer, self).__init__(name="lpath-transformer", t=LPath) def get_literal_type(self, t: Type[LPath]) -> LiteralType: - return LiteralType( - blob=BlobType( - # this is sus, but there is no way to check if the LPath is a file or dir - format="binary", - dimensionality=BlobType.BlobDimensionality.SINGLE, - ) - ) + return LiteralType(blob=self._TYPE_INFO) def to_literal( self, @@ -254,18 +255,10 @@ def to_literal( python_type: Type[LPath], expected: LiteralType, ) -> Literal: - dimensionality = ( - BlobType.BlobDimensionality.MULTIPART - if python_val.is_dir() - else BlobType.BlobDimensionality.SINGLE - ) return Literal( scalar=Scalar( blob=Blob( - uri=python_val.path, - metadata=BlobMetadata( - format="binary", dimensionality=dimensionality - ), + uri=python_val.path, metadata=BlobMetadata(type=self._TYPE_INFO) ) ) ) diff --git a/latch/ldata/transfer/download.py b/latch/ldata/transfer/download.py index 8d7317f3..74d94653 100644 --- a/latch/ldata/transfer/download.py +++ b/latch/ldata/transfer/download.py @@ -130,7 +130,7 @@ def _download( if progress == Progress.none: num_bars = 0 show_total_progress = False - if progress == Progress.total: + elif progress == Progress.total: num_bars = 0 show_total_progress = True else: diff --git a/latch/ldata/transfer/upload.py b/latch/ldata/transfer/upload.py index 4dd72861..2f5e5ee7 100644 --- a/latch/ldata/transfer/upload.py +++ b/latch/ldata/transfer/upload.py @@ -35,10 +35,6 @@ UploadInfoBySrcType: TypeAlias = DictProxy[Path, "_StartUploadReturnType"] -class _EmptyUploadData(TypedDict): - version_id: str - - class _StartUploadData(TypedDict): upload_id: str urls: List[str] From 25b6598d14087df0db4a28fcc07a8bcc3a08aa42 Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Mon, 19 Feb 2024 15:16:24 -0800 Subject: [PATCH 11/15] resolve pr comments --- latch/ldata/transfer/download.py | 15 +++++++++------ latch/ldata/transfer/upload.py | 21 ++++++++++++++------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/latch/ldata/transfer/download.py b/latch/ldata/transfer/download.py index 74d94653..3cf7a969 100644 --- a/latch/ldata/transfer/download.py +++ b/latch/ldata/transfer/download.py @@ -2,7 +2,6 @@ from concurrent.futures import ProcessPoolExecutor from contextlib import closing from dataclasses import dataclass -from http.client import HTTPException from itertools import repeat from pathlib import Path from textwrap import dedent @@ -46,7 +45,7 @@ def _download( ) -> None: if not dest.parent.exists(): raise ValueError( - f"Invalid copy destination {dest}. Parent directory {dest.parent} does not" + f"invalid copy destination {dest}. Parent directory {dest.parent} does not" " exist." ) @@ -73,11 +72,15 @@ def _download( headers={"Authorization": get_auth_header()}, json={"path": normalized}, ) + if res.status_code != 200: - raise HTTPException( - f"failed to fetch presigned url(s) for path {src} with code" - f" {res.status_code}: {res.json()['error']}" - ) + err = res.json()["error"] + msg = f"failed to fetch presigned url(s) for path {src}" + if res.status_code == 400: + raise ValueError(f"{msg}: download request invalid: {err}") + if res.status_code == 401: + raise RuntimeError(f"authorization token invalid: {err}") + raise RuntimeError(f"{msg} with code {res.status_code}: {res.json()['error']}") json_data = res.json() if can_have_children: diff --git a/latch/ldata/transfer/upload.py b/latch/ldata/transfer/upload.py index 2f5e5ee7..7432952c 100644 --- a/latch/ldata/transfer/upload.py +++ b/latch/ldata/transfer/upload.py @@ -54,7 +54,7 @@ def _upload( ) -> None: src_path = Path(src) if not src_path.exists(): - raise ValueError(f"Could not find {src_path}: no such file or directory.") + raise ValueError(f"could not find {src_path}: no such file or directory.") if verbose: print(f"Uploading {src_path.name}") @@ -73,7 +73,7 @@ def _upload( if not dest_is_dir: if not dest_exists: # path is latch:///a/b/file_1/file_2 - raise ValueError(f"No such file or directory: {dest}") + raise ValueError(f"no such file or directory: {dest}") if src_path.is_dir(): raise ValueError(f"{normalized} is not a directory.") @@ -281,7 +281,7 @@ def _start_upload( latency_q: Optional["LatencyQueueType"] = None, ) -> Optional[_StartUploadReturnType]: if not src.exists(): - raise ValueError(f"Could not find {src}: no such file or link") + raise ValueError(f"could not find {src}: no such file or link") if src.is_symlink(): src = src.resolve() @@ -300,7 +300,7 @@ def _start_upload( file_size = src.stat().st_size if file_size > latch_constants.maximum_upload_size: raise ValueError( - f"File is {with_si_suffix(file_size)} which exceeds the maximum" + f"file is {with_si_suffix(file_size)} which exceeds the maximum" " upload size (5TiB)", ) @@ -355,7 +355,7 @@ def _start_upload( **data, part_count=part_count, part_size=part_size, src=src, dest=dest ) - raise RuntimeError(f"Unable to generate upload URL for {src}") + raise RuntimeError(f"unable to generate upload URL for {src}") @dataclass(frozen=True) @@ -385,7 +385,7 @@ def _upload_file_chunk( res = tinyrequests.put(url, data=data) if res.status_code != 200: raise HTTPException( - f"Failed to upload part {part_index} of {src}: {res.status_code}" + f"failed to upload part {part_index} of {src}: {res.status_code}" ) ret = _CompletedPart( @@ -443,7 +443,14 @@ def _end_upload( ) if res.status_code != 200: - raise HTTPException(f"Unable to complete file upload: {res.json()['error']}") + err = res.json()["error"] + if res.status_code == 400: + raise ValueError(f"upload request invalid: {err}") + if res.status_code == 401: + raise RuntimeError(f"authorization failed: {err}") + raise RuntimeError( + f"end upload request failed with code {res.status_code}: {err}" + ) if progress_bars is not None: progress_bars.update_total_progress(1) From 00ab83f105df5021e38554bbbf987d58dff23359 Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Tue, 20 Feb 2024 11:58:55 -0800 Subject: [PATCH 12/15] cleanup --- latch/ldata/path.py | 83 +++++++++++++++++------------ latch/ldata/transfer/download.py | 20 +++---- latch/ldata/transfer/remote_copy.py | 12 +++-- latch/ldata/transfer/upload.py | 17 +++--- latch_cli/services/cp/main.py | 7 ++- 5 files changed, 80 insertions(+), 59 deletions(-) diff --git a/latch/ldata/path.py b/latch/ldata/path.py index b59c83d8..5a171da5 100644 --- a/latch/ldata/path.py +++ b/latch/ldata/path.py @@ -51,6 +51,16 @@ class _Cache: class LPath: _cache: _Cache = field( + default_factory=lambda: _Cache(), + init=False, + repr=False, + hash=False, + compare=False, + ) + + _download_idx: int = field( + default=0, + init=False, repr=False, hash=False, compare=False, @@ -58,16 +68,13 @@ class LPath: path: str - def __init__(self, path: Union[str, "LPath"]): - if isinstance(path, LPath): - path = path.path - if not path.startswith("latch://"): - raise ValueError(f"Invalid LPath: {path} is not a Latch path") - self.path = path - self._cache = _Cache() - self._download_idx = 0 + def __post_init__(self): + if isinstance(self.path, LPath): + self.path = self.path.path + if not self.path.startswith("latch://"): + raise ValueError(f"Invalid LPath: {self.path} is not a Latch path") - def load(self) -> None: + def load_metadata(self) -> None: """(Re-)populate this LPath's instance's cache. Future calls to most getters will return immediately without making a network request. @@ -109,15 +116,11 @@ def load(self) -> None: meta = final_link_target["ldataObjectMeta"] if meta is not None: self._cache.size = ( - -1 - if meta["contentSize"] is None - else int(final_link_target["ldataObjectMeta"]["contentSize"]) + -1 if meta["contentSize"] is None else int(meta["contentSize"]) ) - self._cache.content_type = final_link_target["ldataObjectMeta"][ - "contentType" - ] + self._cache.content_type = meta["contentType"] - def node_id(self, *, load_if_missing: bool = True) -> str: + def node_id(self, *, load_if_missing: bool = True) -> Optional[str]: match = node_id_regex.match(self.path) if match: return match.group("id") @@ -125,41 +128,47 @@ def node_id(self, *, load_if_missing: bool = True) -> str: if self._cache.node_id is None or self._cache.path != self.path: if not load_if_missing: return None - self.load() + self.load_metadata() return self._cache.node_id - def name(self, *, load_if_missing: bool = True) -> str: + def name(self, *, load_if_missing: bool = True) -> Optional[str]: if self._cache.name is None or self._cache.path != self.path: if not load_if_missing: return None - self.load() + self.load_metadata() return self._cache.name - def type(self, *, load_if_missing: bool = True) -> LDataNodeType: + def type(self, *, load_if_missing: bool = True) -> Optional[LDataNodeType]: if self._cache.type is None or self._cache.path != self.path: if not load_if_missing: return None - self.load() + self.load_metadata() return self._cache.type - def size(self, *, load_if_missing: bool = True) -> float: + def size(self, *, load_if_missing: bool = True) -> Optional[int]: if self._cache.size is None or self._cache.path != self.path: if not load_if_missing: return None - self.load() + self.load_metadata() return self._cache.size - def content_type(self, *, load_if_missing: bool = True) -> str: + def content_type(self, *, load_if_missing: bool = True) -> Optional[str]: if self._cache.content_type is None or self._cache.path != self.path: if not load_if_missing: return None - self.load() + self.load_metadata() return self._cache.content_type - def is_dir(self) -> bool: - return self.type() in dir_types + def is_dir(self, *, load_if_missing: bool = True) -> bool: + return self.type(load_if_missing=load_if_missing) in dir_types def iterdir(self) -> Generator["LPath", None, None]: + """Yield LPaths objects contained within the directory. + + Should only be called on directories. Does not recursively list directories. + + Always makes a network request. + """ data = execute( gql.gql(""" query LDataChildren($argPath: String!) { @@ -188,6 +197,10 @@ def iterdir(self) -> Generator["LPath", None, None]: yield LPath(urljoins(self.path, node["child"]["name"])) def rmr(self) -> None: + """Recursively delete files at this instance's path. + + Always makes a network request. + """ execute( gql.gql(""" mutation LDataRmr($nodeId: BigInt!) { @@ -199,15 +212,15 @@ def rmr(self) -> None: {"nodeId": self.node_id}, ) - def copy(self, dst: "LPath", *, verbose: bool = False) -> None: - _remote_copy(self.path, dst.path, verbose=verbose) + def copy_to(self, dst: "LPath", *, show_summary: bool = False) -> None: + _remote_copy(self.path, dst.path, show_summary=show_summary) - def upload(self, src: Path, *, show_progress_bar: bool = False) -> None: + def upload_from(self, src: Path, *, show_progress_bar: bool = False) -> None: _upload( os.fspath(src), self.path, progress=Progress.tasks if show_progress_bar else Progress.none, - verbose=show_progress_bar, + verbose=False, ) def download( @@ -223,7 +236,7 @@ def download( self.path, dst, progress=Progress.tasks if show_progress_bar else Progress.none, - verbose=show_progress_bar, + verbose=False, confirm_overwrite=False, ) return dst @@ -236,14 +249,14 @@ def __truediv__(self, other: object) -> "LPath": class LPathTransformer(TypeTransformer[LPath]): _TYPE_INFO = BlobType( - # there is no way to know if the LPath is a file or directory ahead to time, - # so just set dimensionality to SINGLE + # rahul: there is no way to know if the LPath is a file or directory + # ahead to time, so just set dimensionality to SINGLE format="binary", dimensionality=BlobType.BlobDimensionality.SINGLE, ) def __init__(self): - super(LPathTransformer, self).__init__(name="lpath-transformer", t=LPath) + super().__init__(name="lpath-transformer", t=LPath) def get_literal_type(self, t: Type[LPath]) -> LiteralType: return LiteralType(blob=self._TYPE_INFO) diff --git a/latch/ldata/transfer/download.py b/latch/ldata/transfer/download.py index 3cf7a969..0349ef8d 100644 --- a/latch/ldata/transfer/download.py +++ b/latch/ldata/transfer/download.py @@ -53,8 +53,8 @@ def _download( data = _get_node_data(src) node_data = data.data[src] - if verbose: - print(f"Downloading {node_data.name}") + if progress != Progress.none: + click.secho(f"Downloading {node_data.name}", fg="blue") can_have_children = node_data.type in { LDataNodeType.account_root, @@ -125,7 +125,9 @@ def _download( job.dest.parent.mkdir(parents=True, exist_ok=True) confirmed_jobs.append(job) else: - print(f"Skipping {job.dest.parent}, file already exists") + click.secho( + f"Skipping {job.dest.parent}, file already exists", fg="yellow" + ) rejected_jobs.add(job.dest.parent) num_files = len(confirmed_jobs) @@ -195,12 +197,12 @@ def _download( total_time = end - start - if verbose: - print(dedent(f""" - Download Complete - Time Elapsed: {_human_readable_time(total_time)} - Files Downloaded: {num_files} ({with_si_suffix(total_bytes)}) - """)) + if progress != Progress.none: + click.echo(dedent(f""" + {click.style("Download Complete", fg="green")} + {click.style("Time Elapsed: ", fg="blue")}{_human_readable_time(total_time)} + {click.style("Files Downloaded: ", fg="blue")}{num_files} ({with_si_suffix(total_bytes)}) + """)) # dest will always be a path which includes the copied file as its leaf diff --git a/latch/ldata/transfer/remote_copy.py b/latch/ldata/transfer/remote_copy.py index 500e7eb0..ccc23091 100644 --- a/latch/ldata/transfer/remote_copy.py +++ b/latch/ldata/transfer/remote_copy.py @@ -1,5 +1,6 @@ from textwrap import dedent +import click import gql from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import execute @@ -9,7 +10,7 @@ from .node import LDataNodeType, _get_node_data -def _remote_copy(src: str, dst: str, *, verbose: bool = False) -> None: +def _remote_copy(src: str, dst: str, *, show_summary: bool = False) -> None: node_data = _get_node_data(src, dst, allow_resolve_to_parent=True) src_data = node_data.data[src] @@ -87,7 +88,8 @@ def _remote_copy(src: str, dst: str, *, verbose: bool = False) -> None: raise ValueError(get_path_error(src, str(e), acc_id)) - print(dedent(f""" - Copy Requested. - Source: {(src)} - Destination: {(dst)}""")) + if show_summary: + click.echo(dedent(f""" + {click.style("Copy Requested.", fg="green")} + {click.style("Source: ", fg="blue")}{(src)} + {click.style("Destination: ", fg="blue")}{(dst)}""")) diff --git a/latch/ldata/transfer/upload.py b/latch/ldata/transfer/upload.py index 7432952c..43bfca16 100644 --- a/latch/ldata/transfer/upload.py +++ b/latch/ldata/transfer/upload.py @@ -14,6 +14,7 @@ from textwrap import dedent from typing import TYPE_CHECKING, List, Optional, TypedDict +import click from latch_sdk_config.latch import config as latch_config from typing_extensions import TypeAlias @@ -56,8 +57,8 @@ def _upload( if not src_path.exists(): raise ValueError(f"could not find {src_path}: no such file or directory.") - if verbose: - print(f"Uploading {src_path.name}") + if progress != Progress.none: + click.secho(f"Uploading {src_path.name}", fg="blue") node_data = _get_node_data(dest, allow_resolve_to_parent=True) dest_data = node_data.data[dest] @@ -201,7 +202,7 @@ def _upload( wait(chunk_futs) - if verbose: + if progress != Progress.none: print("\x1b[0GFinalizing uploads...") else: if dest_exists and dest_is_dir: @@ -252,11 +253,11 @@ def _upload( end = time.monotonic() total_time = end - start - if verbose: - print(dedent(f""" - Upload Complete - Time Elapsed: {_human_readable_time(total_time)} - Files Uploaded: {num_files} ({with_si_suffix(total_bytes)}) + if progress != Progress.none: + click.echo(dedent(f""" + {click.style("Upload Complete", fg="green")} + {click.style("Time Elapsed: ", fg="blue")}{_human_readable_time(total_time)} + {click.style("Files Uploaded: ", fg="blue")}{num_files} ({with_si_suffix(total_bytes)}) """)) diff --git a/latch_cli/services/cp/main.py b/latch_cli/services/cp/main.py index b8a96db5..071301ca 100644 --- a/latch_cli/services/cp/main.py +++ b/latch_cli/services/cp/main.py @@ -41,9 +41,12 @@ def cp( _upload(src, dest, progress=progress, verbose=verbose) elif src_remote and dest_remote: if expand_globs: - [_remote_copy(p, dest, verbose=True) for p in expand_pattern(src)] + [ + _remote_copy(p, dest, show_summary=True) + for p in expand_pattern(src) + ] else: - _remote_copy(src, dest, verbose=True) + _remote_copy(src, dest, show_summary=True) else: raise ValueError( dedent(f""" From 7728c6c84757df43934c363c67b7fd5f1049e41e Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Tue, 20 Feb 2024 12:46:04 -0800 Subject: [PATCH 13/15] make LPath object immutable --- latch/ldata/path.py | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/latch/ldata/path.py b/latch/ldata/path.py index 5a171da5..3c771ada 100644 --- a/latch/ldata/path.py +++ b/latch/ldata/path.py @@ -47,7 +47,10 @@ class _Cache: content_type: Optional[str] = None -@dataclass +download_idx = 0 + + +@dataclass(frozen=True) class LPath: _cache: _Cache = field( @@ -58,19 +61,11 @@ class LPath: compare=False, ) - _download_idx: int = field( - default=0, - init=False, - repr=False, - hash=False, - compare=False, - ) - path: str def __post_init__(self): if isinstance(self.path, LPath): - self.path = self.path.path + raise ValueError("LPath cannot be initialized with another LPath") if not self.path.startswith("latch://"): raise ValueError(f"Invalid LPath: {self.path} is not a Latch path") @@ -125,37 +120,27 @@ def node_id(self, *, load_if_missing: bool = True) -> Optional[str]: if match: return match.group("id") - if self._cache.node_id is None or self._cache.path != self.path: - if not load_if_missing: - return None + if self._cache.node_id is None and load_if_missing: self.load_metadata() return self._cache.node_id def name(self, *, load_if_missing: bool = True) -> Optional[str]: - if self._cache.name is None or self._cache.path != self.path: - if not load_if_missing: - return None + if self._cache.name is None and load_if_missing: self.load_metadata() return self._cache.name def type(self, *, load_if_missing: bool = True) -> Optional[LDataNodeType]: - if self._cache.type is None or self._cache.path != self.path: - if not load_if_missing: - return None + if self._cache.type is None and load_if_missing: self.load_metadata() return self._cache.type def size(self, *, load_if_missing: bool = True) -> Optional[int]: - if self._cache.size is None or self._cache.path != self.path: - if not load_if_missing: - return None + if self._cache.size is None and load_if_missing: self.load_metadata() return self._cache.size def content_type(self, *, load_if_missing: bool = True) -> Optional[str]: - if self._cache.content_type is None or self._cache.path != self.path: - if not load_if_missing: - return None + if self._cache.content_type is None and load_if_missing: self.load_metadata() return self._cache.content_type @@ -227,10 +212,11 @@ def download( self, dst: Optional[Path] = None, *, show_progress_bar: bool = False ) -> Path: if dst is None: - dir = Path.home() / "lpath" / str(self._download_idx) - self._download_idx += 1 + global download_idx + dir = Path.home() / "lpath" dir.mkdir(parents=True, exist_ok=True) - dst = dir / self.name() + dst = dir / f"{download_idx}_{self.name()}" + download_idx += 1 _download( self.path, From 4d6ad1e10734ce821f4c9c0e213acaec10c7ba3a Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Tue, 20 Feb 2024 15:34:18 -0800 Subject: [PATCH 14/15] transfer -> _transfer + use mkdtemp --- .../ldata/{transfer => _transfer}/__init__.py | 0 .../ldata/{transfer => _transfer}/download.py | 60 +++++----- latch/ldata/_transfer/manager.py | 14 +++ latch/ldata/{transfer => _transfer}/node.py | 40 +++---- .../ldata/{transfer => _transfer}/progress.py | 10 +- .../{transfer => _transfer}/remote_copy.py | 7 +- .../ldata/{transfer => _transfer}/throttle.py | 2 +- latch/ldata/{transfer => _transfer}/upload.py | 99 ++++++++--------- latch/ldata/{transfer => _transfer}/utils.py | 4 +- latch/ldata/path.py | 104 ++++++++++-------- latch/ldata/transfer/manager.py | 14 --- latch/ldata/type.py | 9 ++ latch_cli/main.py | 6 +- latch_cli/services/cp/main.py | 8 +- latch_cli/services/ls.py | 2 +- latch_cli/services/move.py | 3 +- latch_cli/services/sync.py | 10 +- 17 files changed, 206 insertions(+), 186 deletions(-) rename latch/ldata/{transfer => _transfer}/__init__.py (100%) rename latch/ldata/{transfer => _transfer}/download.py (82%) create mode 100644 latch/ldata/_transfer/manager.py rename latch/ldata/{transfer => _transfer}/node.py (78%) rename latch/ldata/{transfer => _transfer}/progress.py (94%) rename latch/ldata/{transfer => _transfer}/remote_copy.py (93%) rename latch/ldata/{transfer => _transfer}/throttle.py (91%) rename latch/ldata/{transfer => _transfer}/upload.py (85%) rename latch/ldata/{transfer => _transfer}/utils.py (87%) delete mode 100644 latch/ldata/transfer/manager.py create mode 100644 latch/ldata/type.py diff --git a/latch/ldata/transfer/__init__.py b/latch/ldata/_transfer/__init__.py similarity index 100% rename from latch/ldata/transfer/__init__.py rename to latch/ldata/_transfer/__init__.py diff --git a/latch/ldata/transfer/download.py b/latch/ldata/_transfer/download.py similarity index 82% rename from latch/ldata/transfer/download.py rename to latch/ldata/_transfer/download.py index 0349ef8d..d4bfaadd 100644 --- a/latch/ldata/transfer/download.py +++ b/latch/ldata/_transfer/download.py @@ -10,33 +10,33 @@ import click from latch_sdk_config.latch import config as latch_config -from latch.ldata.transfer.node import LDataNodeType +from latch.ldata.type import LDataNodeType from latch_cli import tinyrequests from latch_cli.constants import Units from latch_cli.utils import get_auth_header, with_si_suffix from latch_cli.utils.path import normalize_path -from .manager import _TransferStateManager -from .node import _get_node_data -from .progress import Progress, _get_free_index, _ProgressBars -from .utils import _get_max_workers, _human_readable_time +from .manager import TransferStateManager +from .node import get_node_data +from .progress import Progress, ProgressBars, get_free_index +from .utils import get_max_workers, human_readable_time -class _GetSignedUrlData(TypedDict): +class GetSignedUrlData(TypedDict): url: str -class _GetSignedUrlsRecursiveData(TypedDict): +class GetSignedUrlsRecursiveData(TypedDict): urls: Dict[str, str] @dataclass(frozen=True, unsafe_hash=True) -class _DownloadJob: +class DownloadJob: signed_url: str dest: Path -def _download( +def download( src: str, dest: Path, progress: Progress, @@ -50,7 +50,7 @@ def _download( ) normalized = normalize_path(src) - data = _get_node_data(src) + data = get_node_data(src) node_data = data.data[src] if progress != Progress.none: @@ -84,7 +84,7 @@ def _download( json_data = res.json() if can_have_children: - dir_data: _GetSignedUrlsRecursiveData = json_data["data"] + dir_data: GetSignedUrlsRecursiveData = json_data["data"] if dest.exists() and not normalized.endswith("/"): dest = dest / node_data.name @@ -96,12 +96,12 @@ def _download( except (FileExistsError, NotADirectoryError) as e: raise ValueError(f"Download destination {dest} is not a directory") - unconfirmed_jobs: List[_DownloadJob] = [] - confirmed_jobs: List[_DownloadJob] = [] + unconfirmed_jobs: List[DownloadJob] = [] + confirmed_jobs: List[DownloadJob] = [] rejected_jobs: Set[Path] = set() for rel_path, url in dir_data["urls"].items(): - unconfirmed_jobs.append(_DownloadJob(url, dest / rel_path)) + unconfirmed_jobs.append(DownloadJob(url, dest / rel_path)) for job in unconfirmed_jobs: reject_job = False @@ -139,11 +139,11 @@ def _download( num_bars = 0 show_total_progress = True else: - num_bars = min(_get_max_workers(), num_files) + num_bars = min(get_max_workers(), num_files) show_total_progress = True - with _TransferStateManager() as manager: - progress_bars: _ProgressBars + with TransferStateManager() as manager: + progress_bars: ProgressBars with closing( manager.ProgressBars( num_bars, @@ -156,10 +156,10 @@ def _download( start = time.monotonic() # todo(ayush): benchmark this against asyncio - with ProcessPoolExecutor(max_workers=_get_max_workers()) as executor: + with ProcessPoolExecutor(max_workers=get_max_workers()) as executor: total_bytes = sum( executor.map( - _download_file, + download_file, confirmed_jobs, repeat(progress_bars), ) @@ -167,7 +167,7 @@ def _download( end = time.monotonic() else: - file_data: _GetSignedUrlData = json_data["data"] + file_data: GetSignedUrlData = json_data["data"] num_files = 1 @@ -179,8 +179,8 @@ def _download( else: num_bars = 1 - with _TransferStateManager() as manager: - progress_bars: _ProgressBars + with TransferStateManager() as manager: + progress_bars: ProgressBars with closing( manager.ProgressBars( num_bars, @@ -189,8 +189,8 @@ def _download( ) ) as progress_bars: start = time.monotonic() - total_bytes = _download_file( - _DownloadJob(file_data["url"], dest), + total_bytes = download_file( + DownloadJob(file_data["url"], dest), progress_bars, ) end = time.monotonic() @@ -200,16 +200,16 @@ def _download( if progress != Progress.none: click.echo(dedent(f""" {click.style("Download Complete", fg="green")} - {click.style("Time Elapsed: ", fg="blue")}{_human_readable_time(total_time)} + {click.style("Time Elapsed: ", fg="blue")}{human_readable_time(total_time)} {click.style("Files Downloaded: ", fg="blue")}{num_files} ({with_si_suffix(total_bytes)}) """)) # dest will always be a path which includes the copied file as its leaf # e.g. download_file("a/b.txt", Path("c/d.txt")) will copy the content of 'b.txt' into 'd.txt' -def _download_file( - job: _DownloadJob, - progress_bars: _ProgressBars, +def download_file( + job: DownloadJob, + progress_bars: ProgressBars, ) -> int: # todo(ayush): benchmark parallelized downloads using the range header with open(job.dest, "wb") as f: @@ -218,7 +218,7 @@ def _download_file( total_bytes = res.headers.get("Content-Length") assert total_bytes is not None, "Must have a content-length header" - with _get_free_index(progress_bars) as pbar_index: + with get_free_index(progress_bars) as pbar_index: progress_bars.set( index=pbar_index, total=int(total_bytes), desc=job.dest.name ) @@ -235,7 +235,7 @@ def _download_file( progress_bars.update_total_progress(1) progress_bars.write( f"Downloaded {job.dest.name} ({with_si_suffix(int(total_bytes))})" - f" in {_human_readable_time(end - start)}" + f" in {human_readable_time(end - start)}" ) return int(total_bytes) diff --git a/latch/ldata/_transfer/manager.py b/latch/ldata/_transfer/manager.py new file mode 100644 index 00000000..01796068 --- /dev/null +++ b/latch/ldata/_transfer/manager.py @@ -0,0 +1,14 @@ +from multiprocessing.managers import SyncManager +from typing import Type + +from .progress import ProgressBars +from .throttle import Throttle + + +class TransferStateManager(SyncManager): + ProgressBars: Type[ProgressBars] + Throttle: Type[Throttle] + + +TransferStateManager.register("ProgressBars", ProgressBars) +TransferStateManager.register("Throttle", Throttle) diff --git a/latch/ldata/transfer/node.py b/latch/ldata/_transfer/node.py similarity index 78% rename from latch/ldata/transfer/node.py rename to latch/ldata/_transfer/node.py index 3dbf4964..37006eec 100644 --- a/latch/ldata/transfer/node.py +++ b/latch/ldata/_transfer/node.py @@ -1,5 +1,4 @@ from dataclasses import dataclass -from enum import Enum from typing import Dict, List, TypedDict import graphql.language as l @@ -7,42 +6,39 @@ from latch_sdk_gql.utils import _name_node, _parse_selection from typing_extensions import TypeAlias +from latch.ldata.type import LDataNodeType from latch_cli.utils.path import get_path_error, normalize_path AccId: TypeAlias = int -class LDataNodeType(str, Enum): - account_root = "account_root" - dir = "dir" - obj = "obj" - mount = "mount" - link = "link" +class LatchPathNotFound(RuntimeError): + pass -class _LDataObjectMeta(TypedDict): +class LDataObjectMeta(TypedDict): contentSize: str contentType: str -class _FinalLinkTargetPayload(TypedDict): +class FinalLinkTargetPayload(TypedDict): id: str type: str name: str removed: bool - ldataObjectMeta: _LDataObjectMeta + ldataObjectMeta: LDataObjectMeta -class _LdataNodePayload(TypedDict): - finalLinkTarget: _FinalLinkTargetPayload +class LdataNodePayload(TypedDict): + finalLinkTarget: FinalLinkTargetPayload -class _LdataResolvePathToNodePayload(TypedDict): +class LdataResolvePathToNodePayload(TypedDict): path: str - ldataNode: _LdataNodePayload + ldataNode: LdataNodePayload -class _AccountInfoCurrentPayload(TypedDict): +class AccountInfoCurrentPayload(TypedDict): id: str @@ -55,14 +51,14 @@ class NodeData: @dataclass(frozen=True) -class _GetNodeDataResult: +class GetNodeDataResult: acc_id: str data: Dict[str, NodeData] -def _get_node_data( +def get_node_data( *remote_paths: str, allow_resolve_to_parent: bool = False -) -> _GetNodeDataResult: +) -> GetNodeDataResult: normalized: Dict[str, str] = {} acc_sel = _parse_selection(""" @@ -120,12 +116,12 @@ def _get_node_data( res = execute(doc) - acc_info: _AccountInfoCurrentPayload = res["accountInfoCurrent"] + acc_info: AccountInfoCurrentPayload = res["accountInfoCurrent"] acc_id = acc_info["id"] ret: Dict[str, NodeData] = {} for i, remote_path in enumerate(remote_paths): - node: _LdataResolvePathToNodePayload = res[f"q{i}"] + node: LdataResolvePathToNodePayload = res[f"q{i}"] try: final_link_target = node["ldataNode"]["finalLinkTarget"] @@ -146,6 +142,6 @@ def _get_node_data( is_parent=is_parent, ) except (TypeError, ValueError) as e: - raise FileNotFoundError(get_path_error(remote_path, "not found", acc_id)) + raise LatchPathNotFound(get_path_error(remote_path, "not found", acc_id)) - return _GetNodeDataResult(acc_id, ret) + return GetNodeDataResult(acc_id, ret) diff --git a/latch/ldata/transfer/progress.py b/latch/ldata/_transfer/progress.py similarity index 94% rename from latch/ldata/transfer/progress.py rename to latch/ldata/_transfer/progress.py index f0f56560..506182a0 100644 --- a/latch/ldata/transfer/progress.py +++ b/latch/ldata/_transfer/progress.py @@ -6,7 +6,7 @@ import tqdm -def _get_progress_bar(): +def get_progress_bar(): return tqdm.tqdm( total=0, leave=False, @@ -22,7 +22,7 @@ class Progress(Enum): tasks = "tasks" -class _ProgressBars: +class ProgressBars: def __init__( self, num_task_bars: int, @@ -31,7 +31,7 @@ def __init__( verbose: bool = False, ): if show_total_progress: - self.total_bar = _get_progress_bar() + self.total_bar = get_progress_bar() self.total_bar.desc = "Copying Files" self.total_bar.colour = "green" self.total_bar.unit = "" @@ -43,7 +43,7 @@ def __init__( self.verbose = verbose self.task_bars: List[tqdm.tqdm] = [ - _get_progress_bar() for _ in range(num_task_bars) + get_progress_bar() for _ in range(num_task_bars) ] self.free_indices = {i for i in range(num_task_bars)} self.task_bar_sema = BoundedSemaphore(num_task_bars) @@ -135,7 +135,7 @@ def close(self): @contextmanager -def _get_free_index(progress_bars: _ProgressBars): +def get_free_index(progress_bars: ProgressBars): try: pbar_index = progress_bars.get_free_task_bar_index() yield pbar_index diff --git a/latch/ldata/transfer/remote_copy.py b/latch/ldata/_transfer/remote_copy.py similarity index 93% rename from latch/ldata/transfer/remote_copy.py rename to latch/ldata/_transfer/remote_copy.py index ccc23091..9c8e774d 100644 --- a/latch/ldata/transfer/remote_copy.py +++ b/latch/ldata/_transfer/remote_copy.py @@ -5,13 +5,14 @@ from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import execute +from latch.ldata.type import LDataNodeType from latch_cli.utils.path import get_name_from_path, get_path_error -from .node import LDataNodeType, _get_node_data +from .node import get_node_data -def _remote_copy(src: str, dst: str, *, show_summary: bool = False) -> None: - node_data = _get_node_data(src, dst, allow_resolve_to_parent=True) +def remote_copy(src: str, dst: str, *, show_summary: bool = False) -> None: + node_data = get_node_data(src, dst, allow_resolve_to_parent=True) src_data = node_data.data[src] dst_data = node_data.data[dst] diff --git a/latch/ldata/transfer/throttle.py b/latch/ldata/_transfer/throttle.py similarity index 91% rename from latch/ldata/transfer/throttle.py rename to latch/ldata/_transfer/throttle.py index 17be4bcf..30d95ecf 100644 --- a/latch/ldata/transfer/throttle.py +++ b/latch/ldata/_transfer/throttle.py @@ -2,7 +2,7 @@ @dataclass -class _Throttle: +class Throttle: delay: float = 0 def get_delay(self): diff --git a/latch/ldata/transfer/upload.py b/latch/ldata/_transfer/upload.py similarity index 85% rename from latch/ldata/transfer/upload.py rename to latch/ldata/_transfer/upload.py index 43bfca16..44bc213d 100644 --- a/latch/ldata/transfer/upload.py +++ b/latch/ldata/_transfer/upload.py @@ -18,36 +18,37 @@ from latch_sdk_config.latch import config as latch_config from typing_extensions import TypeAlias +from latch.ldata.type import LDataNodeType from latch_cli import tinyrequests from latch_cli.constants import latch_constants, units from latch_cli.utils import get_auth_header, urljoins, with_si_suffix from latch_cli.utils.path import normalize_path -from .manager import _TransferStateManager -from .node import LDataNodeType, _get_node_data -from .progress import Progress, _ProgressBars -from .throttle import _Throttle -from .utils import _get_max_workers, _human_readable_time +from .manager import TransferStateManager +from .node import get_node_data +from .progress import Progress, ProgressBars +from .throttle import Throttle +from .utils import get_max_workers, human_readable_time if TYPE_CHECKING: PathQueueType: TypeAlias = "Queue[Optional[Path]]" LatencyQueueType: TypeAlias = "Queue[Optional[float]]" - PartsBySrcType: TypeAlias = DictProxy[Path, ListProxy["_CompletedPart"]] - UploadInfoBySrcType: TypeAlias = DictProxy[Path, "_StartUploadReturnType"] + PartsBySrcType: TypeAlias = DictProxy[Path, ListProxy["CompletedPart"]] + UploadInfoBySrcType: TypeAlias = DictProxy[Path, "StartUploadReturnType"] -class _StartUploadData(TypedDict): +class StartUploadData(TypedDict): upload_id: str urls: List[str] @dataclass(frozen=True) -class _UploadJob: +class UploadJob: src: Path dest: str -def _upload( +def upload( src: str, # pathlib.Path strips trailing slashes but we want to keep them here as they determine cp behavior dest: str, progress: Progress, @@ -60,7 +61,7 @@ def _upload( if progress != Progress.none: click.secho(f"Uploading {src_path.name}", fg="blue") - node_data = _get_node_data(dest, allow_resolve_to_parent=True) + node_data = get_node_data(dest, allow_resolve_to_parent=True) dest_data = node_data.data[dest] normalized = normalize_path(dest) @@ -85,23 +86,23 @@ def _upload( num_bars = 1 show_total_progress = False else: - num_bars = _get_max_workers() + num_bars = get_max_workers() show_total_progress = True - with ProcessPoolExecutor(max_workers=_get_max_workers()) as exec: - with _TransferStateManager() as man: + with ProcessPoolExecutor(max_workers=get_max_workers()) as exec: + with TransferStateManager() as man: parts_by_src: "PartsBySrcType" = man.dict() upload_info_by_src: "UploadInfoBySrcType" = man.dict() - throttle: _Throttle = man.Throttle() + throttle: Throttle = man.Throttle() latency_q: "LatencyQueueType" = man.Queue() - throttle_listener = exec.submit(_throttler, throttle, latency_q) + throttle_listener = exec.submit(throttler, throttle, latency_q) if src_path.is_dir(): if dest_exists and not src.endswith("/"): normalized = urljoins(normalized, src_path.name) - jobs: List[_UploadJob] = [] + jobs: List[UploadJob] = [] total_bytes = 0 for dir_path, _, file_names in os.walk(src_path, followlinks=True): @@ -110,7 +111,7 @@ def _upload( parts_by_src[rel_path] = man.list() jobs.append( - _UploadJob( + UploadJob( rel_path, urljoins( normalized, @@ -123,7 +124,7 @@ def _upload( num_files = len(jobs) - url_generation_bar: _ProgressBars + url_generation_bar: ProgressBars with closing( man.ProgressBars( 0, @@ -132,15 +133,15 @@ def _upload( ) as url_generation_bar: url_generation_bar.set_total(num_files, "Generating URLs") - start_upload_futs: List[ - Future[Optional[_StartUploadReturnType]] - ] = [] + start_upload_futs: List[Future[Optional[StartUploadReturnType]]] = ( + [] + ) start = time.monotonic() for job in jobs: start_upload_futs.append( exec.submit( - _start_upload, + start_upload, job.src, job.dest, url_generation_bar, @@ -157,7 +158,7 @@ def _upload( latency_q.put(None) wait([throttle_listener]) - chunk_upload_bars: _ProgressBars + chunk_upload_bars: ProgressBars with closing( man.ProgressBars( min(num_bars, num_files), @@ -168,7 +169,7 @@ def _upload( chunk_upload_bars.set_total(num_files, "Uploading Files") # todo(ayush): async-ify - chunk_futs: List[Future[_CompletedPart]] = [] + chunk_futs: List[Future[CompletedPart]] = [] for data in start_upload_futs: res = data.result() @@ -187,7 +188,7 @@ def _upload( for part_index, url in enumerate(res.urls): chunk_futs.append( exec.submit( - _upload_file_chunk, + upload_file_chunk, src=res.src, url=url, part_index=part_index, @@ -211,7 +212,7 @@ def _upload( num_files = 1 total_bytes = src_path.stat().st_size - progress_bars: _ProgressBars + progress_bars: ProgressBars with closing( man.ProgressBars( num_bars, @@ -222,18 +223,18 @@ def _upload( pbar_index = progress_bars.get_free_task_bar_index() start = time.monotonic() - res = _start_upload(src_path, normalized) + res = start_upload(src_path, normalized) if res is not None: progress_bars.set( pbar_index, res.src.stat().st_size, res.src.name ) - chunk_futs: List[Future[_CompletedPart]] = [] + chunk_futs: List[Future[CompletedPart]] = [] for part_index, url in enumerate(res.urls): chunk_futs.append( exec.submit( - _upload_file_chunk, + upload_file_chunk, src_path, url, part_index, @@ -245,7 +246,7 @@ def _upload( wait(chunk_futs) - _end_upload( + end_upload( normalized, res.upload_id, [fut.result() for fut in chunk_futs], @@ -256,13 +257,13 @@ def _upload( if progress != Progress.none: click.echo(dedent(f""" {click.style("Upload Complete", fg="green")} - {click.style("Time Elapsed: ", fg="blue")}{_human_readable_time(total_time)} + {click.style("Time Elapsed: ", fg="blue")}{human_readable_time(total_time)} {click.style("Files Uploaded: ", fg="blue")}{num_files} ({with_si_suffix(total_bytes)}) """)) @dataclass(frozen=True) -class _StartUploadReturnType: +class StartUploadReturnType: upload_id: str urls: List[str] part_count: int @@ -274,13 +275,13 @@ class _StartUploadReturnType: MAX_RETRIES = 5 -def _start_upload( +def start_upload( src: Path, dest: str, - progress_bars: Optional[_ProgressBars] = None, - throttle: Optional[_Throttle] = None, + progress_bars: Optional[ProgressBars] = None, + throttle: Optional[Throttle] = None, latency_q: Optional["LatencyQueueType"] = None, -) -> Optional[_StartUploadReturnType]: +) -> Optional[StartUploadReturnType]: if not src.exists(): raise ValueError(f"could not find {src}: no such file or link") @@ -350,9 +351,9 @@ def _start_upload( if "version_id" in json_data["data"]: return # file is empty, so no need to upload any content - data: _StartUploadData = json_data["data"] + data: StartUploadData = json_data["data"] - return _StartUploadReturnType( + return StartUploadReturnType( **data, part_count=part_count, part_size=part_size, src=src, dest=dest ) @@ -360,23 +361,23 @@ def _start_upload( @dataclass(frozen=True) -class _CompletedPart: +class CompletedPart: src: Path etag: str part_number: int -def _upload_file_chunk( +def upload_file_chunk( src: Path, url: str, part_index: int, part_size: int, - progress_bars: Optional[_ProgressBars] = None, + progress_bars: Optional[ProgressBars] = None, pbar_index: Optional[int] = None, parts_by_source: Optional["PartsBySrcType"] = None, upload_id: Optional[str] = None, dest: Optional[str] = None, -) -> _CompletedPart: +) -> CompletedPart: time.sleep(0.1 * random.random()) with open(src, "rb") as f: @@ -389,7 +390,7 @@ def _upload_file_chunk( f"failed to upload part {part_index} of {src}: {res.status_code}" ) - ret = _CompletedPart( + ret = CompletedPart( src=src, etag=res.headers["ETag"], part_number=part_index + 1, @@ -412,7 +413,7 @@ def _upload_file_chunk( and parts_by_source is not None and upload_id is not None ): - _end_upload( + end_upload( dest=dest, upload_id=upload_id, parts=list(parts_by_source[src]), @@ -421,11 +422,11 @@ def _upload_file_chunk( return ret -def _end_upload( +def end_upload( dest: str, upload_id: str, - parts: List[_CompletedPart], - progress_bars: Optional[_ProgressBars] = None, + parts: List[CompletedPart], + progress_bars: Optional[ProgressBars] = None, ): res = tinyrequests.post( latch_config.api.data.end_upload, @@ -457,7 +458,7 @@ def _end_upload( progress_bars.update_total_progress(1) -def _throttler(t: _Throttle, q: "LatencyQueueType"): +def throttler(t: Throttle, q: "LatencyQueueType"): ema = 0 # todo(ayush): these params were tuned via naive grid search uploading a diff --git a/latch/ldata/transfer/utils.py b/latch/ldata/_transfer/utils.py similarity index 87% rename from latch/ldata/transfer/utils.py rename to latch/ldata/_transfer/utils.py index e86d1771..cdb115b6 100644 --- a/latch/ldata/transfer/utils.py +++ b/latch/ldata/_transfer/utils.py @@ -2,7 +2,7 @@ from typing import List -def _get_max_workers() -> int: +def get_max_workers() -> int: try: max_workers = len(os.sched_getaffinity(0)) * 4 except AttributeError: @@ -15,7 +15,7 @@ def _get_max_workers() -> int: return min(max_workers, 16) -def _human_readable_time(t_seconds: float) -> str: +def human_readable_time(t_seconds: float) -> str: s = t_seconds % 60 m = (t_seconds // 60) % 60 h = t_seconds // 60 // 60 diff --git a/latch/ldata/path.py b/latch/ldata/path.py index 3c771ada..d1e93923 100644 --- a/latch/ldata/path.py +++ b/latch/ldata/path.py @@ -1,8 +1,10 @@ import os import re +import shutil +import tempfile from dataclasses import dataclass, field from pathlib import Path -from typing import Generator, Optional, Type, Union +from typing import Iterator, Optional, Type import gql from flytekit import ( @@ -16,19 +18,21 @@ ) from flytekit.extend import TypeEngine, TypeTransformer from latch_sdk_gql.execute import execute +from typing_extensions import Self -from latch.ldata.transfer.node import LDataNodeType -from latch.ldata.transfer.progress import Progress +from latch.ldata.type import LDataNodeType from latch_cli.utils import urljoins -from .transfer.download import _download -from .transfer.remote_copy import _remote_copy -from .transfer.upload import _upload +from ._transfer.download import download as _download +from ._transfer.node import LatchPathNotFound +from ._transfer.progress import Progress as _Progress +from ._transfer.remote_copy import remote_copy as _remote_copy +from ._transfer.upload import upload as _upload node_id_regex = re.compile(r"^latch://(?P[0-9]+)\.node$") -dir_types = { +_dir_types = { LDataNodeType.dir, LDataNodeType.account_root, LDataNodeType.mount, @@ -37,8 +41,6 @@ @dataclass class _Cache: - """Internal cache class to organize information for a `LPath`.""" - path: Optional[str] = None node_id: Optional[str] = None name: Optional[str] = None @@ -47,14 +49,19 @@ class _Cache: content_type: Optional[str] = None -download_idx = 0 - - @dataclass(frozen=True) class LPath: + """Latch Path. + + Represents a remote file/directory path hosted on Latch. Can be used to + interact with files and directories in Latch. + + Attributes: + path: The Latch path. Must start with "latch://". + """ _cache: _Cache = field( - default_factory=lambda: _Cache(), + default_factory=_Cache, init=False, repr=False, hash=False, @@ -67,9 +74,9 @@ def __post_init__(self): if isinstance(self.path, LPath): raise ValueError("LPath cannot be initialized with another LPath") if not self.path.startswith("latch://"): - raise ValueError(f"Invalid LPath: {self.path} is not a Latch path") + raise ValueError(f"invalid LPath: {self.path} is not a Latch path") - def load_metadata(self) -> None: + def fetch_metadata(self) -> None: """(Re-)populate this LPath's instance's cache. Future calls to most getters will return immediately without making a network request. @@ -80,7 +87,6 @@ def load_metadata(self) -> None: gql.gql(""" query GetNodeData($path: String!) { ldataResolvePathToNode(path: $path) { - path ldataNode { finalLinkTarget { id @@ -99,7 +105,7 @@ def load_metadata(self) -> None: )["ldataResolvePathToNode"] if data is None or data["ldataNode"] is None: - raise FileNotFoundError(f"No such Latch file or directory: {self.path}") + raise LatchPathNotFound(f"no such Latch file or directory: {self.path}") self._cache.path = self.path @@ -111,7 +117,7 @@ def load_metadata(self) -> None: meta = final_link_target["ldataObjectMeta"] if meta is not None: self._cache.size = ( - -1 if meta["contentSize"] is None else int(meta["contentSize"]) + None if meta["contentSize"] is None else int(meta["contentSize"]) ) self._cache.content_type = meta["contentType"] @@ -121,33 +127,33 @@ def node_id(self, *, load_if_missing: bool = True) -> Optional[str]: return match.group("id") if self._cache.node_id is None and load_if_missing: - self.load_metadata() + self.fetch_metadata() return self._cache.node_id def name(self, *, load_if_missing: bool = True) -> Optional[str]: if self._cache.name is None and load_if_missing: - self.load_metadata() + self.fetch_metadata() return self._cache.name def type(self, *, load_if_missing: bool = True) -> Optional[LDataNodeType]: if self._cache.type is None and load_if_missing: - self.load_metadata() + self.fetch_metadata() return self._cache.type def size(self, *, load_if_missing: bool = True) -> Optional[int]: if self._cache.size is None and load_if_missing: - self.load_metadata() + self.fetch_metadata() return self._cache.size def content_type(self, *, load_if_missing: bool = True) -> Optional[str]: if self._cache.content_type is None and load_if_missing: - self.load_metadata() + self.fetch_metadata() return self._cache.content_type def is_dir(self, *, load_if_missing: bool = True) -> bool: - return self.type(load_if_missing=load_if_missing) in dir_types + return self.type(load_if_missing=load_if_missing) in _dir_types - def iterdir(self) -> Generator["LPath", None, None]: + def iterdir(self) -> Iterator[Self]: """Yield LPaths objects contained within the directory. Should only be called on directories. Does not recursively list directories. @@ -174,9 +180,9 @@ def iterdir(self) -> Generator["LPath", None, None]: )["ldataResolvePathData"] if data is None: - raise FileNotFoundError(f"No such Latch file or directory: {self.path}") - if data["finalLinkTarget"]["type"].lower() not in dir_types: - raise ValueError(f"{self.path} is not a directory") + raise LatchPathNotFound(f"no such Latch file or directory: {self.path}") + if data["finalLinkTarget"]["type"].lower() not in _dir_types: + raise ValueError(f"not a directory: {self.path}") for node in data["finalLinkTarget"]["childLdataTreeEdges"]["nodes"]: yield LPath(urljoins(self.path, node["child"]["name"])) @@ -194,7 +200,7 @@ def rmr(self) -> None: } } """), - {"nodeId": self.node_id}, + {"nodeId": self.node_id()}, ) def copy_to(self, dst: "LPath", *, show_summary: bool = False) -> None: @@ -204,40 +210,46 @@ def upload_from(self, src: Path, *, show_progress_bar: bool = False) -> None: _upload( os.fspath(src), self.path, - progress=Progress.tasks if show_progress_bar else Progress.none, + progress=_Progress.tasks if show_progress_bar else _Progress.none, verbose=False, ) def download( self, dst: Optional[Path] = None, *, show_progress_bar: bool = False ) -> Path: - if dst is None: - global download_idx - dir = Path.home() / "lpath" - dir.mkdir(parents=True, exist_ok=True) - dst = dir / f"{download_idx}_{self.name()}" - download_idx += 1 - - _download( - self.path, - dst, - progress=Progress.tasks if show_progress_bar else Progress.none, - verbose=False, - confirm_overwrite=False, - ) + temp_dir = None + try: + if dst is None: + temp_dir = Path(tempfile.mkdtemp()) + dst = temp_dir / self.name() + + _download( + self.path, + dst, + progress=_Progress.tasks if show_progress_bar else _Progress.none, + verbose=False, + confirm_overwrite=False, + ) + except Exception as e: + if temp_dir is not None: + shutil.rmtree(temp_dir) + raise e + return dst def __truediv__(self, other: object) -> "LPath": if not isinstance(other, (LPath, str)): return NotImplemented + if isinstance(other, LPath): + other = other.path return LPath(urljoins(self.path, other)) class LPathTransformer(TypeTransformer[LPath]): _TYPE_INFO = BlobType( - # rahul: there is no way to know if the LPath is a file or directory + # todo(rahul): there is no way to know if the LPath is a file or directory # ahead to time, so just set dimensionality to SINGLE - format="binary", + format="", dimensionality=BlobType.BlobDimensionality.SINGLE, ) diff --git a/latch/ldata/transfer/manager.py b/latch/ldata/transfer/manager.py deleted file mode 100644 index 025e0aed..00000000 --- a/latch/ldata/transfer/manager.py +++ /dev/null @@ -1,14 +0,0 @@ -from multiprocessing.managers import SyncManager -from typing import Type - -from .progress import _ProgressBars -from .throttle import _Throttle - - -class _TransferStateManager(SyncManager): - ProgressBars: Type[_ProgressBars] - Throttle: Type[_Throttle] - - -_TransferStateManager.register("ProgressBars", _ProgressBars) -_TransferStateManager.register("Throttle", _Throttle) diff --git a/latch/ldata/type.py b/latch/ldata/type.py new file mode 100644 index 00000000..dcdaefeb --- /dev/null +++ b/latch/ldata/type.py @@ -0,0 +1,9 @@ +from enum import Enum + + +class LDataNodeType(str, Enum): + account_root = "account_root" + dir = "dir" + obj = "obj" + mount = "mount" + link = "link" diff --git a/latch_cli/main.py b/latch_cli/main.py index 93382d14..d39df468 100644 --- a/latch_cli/main.py +++ b/latch_cli/main.py @@ -11,7 +11,7 @@ from typing_extensions import ParamSpec import latch_cli.click_utils -from latch.ldata.transfer.progress import Progress +from latch.ldata._transfer.progress import Progress as _Progress from latch_cli.click_utils import EnumChoice from latch_cli.exceptions.handler import CrashHandler from latch_cli.services.cp.autocomplete import complete as cp_complete @@ -356,7 +356,7 @@ def init( @click.option( "--progress", help="Type of progress information to show while copying", - type=EnumChoice(Progress, case_sensitive=False), + type=EnumChoice(_Progress, case_sensitive=False), default="tasks", show_default=True, ) @@ -380,7 +380,7 @@ def init( def cp( src: List[str], dest: str, - progress: Progress, + progress: _Progress, verbose: bool, no_glob: bool, ): diff --git a/latch_cli/services/cp/main.py b/latch_cli/services/cp/main.py index 071301ca..86f08ccf 100644 --- a/latch_cli/services/cp/main.py +++ b/latch_cli/services/cp/main.py @@ -4,10 +4,10 @@ import click -from latch.ldata.transfer.download import _download -from latch.ldata.transfer.progress import Progress -from latch.ldata.transfer.remote_copy import _remote_copy -from latch.ldata.transfer.upload import _upload +from latch.ldata._transfer.download import download as _download +from latch.ldata._transfer.progress import Progress +from latch.ldata._transfer.remote_copy import remote_copy as _remote_copy +from latch.ldata._transfer.upload import upload as _upload from latch_cli.services.cp.glob import expand_pattern from latch_cli.utils.path import is_remote_path diff --git a/latch_cli/services/ls.py b/latch_cli/services/ls.py index 94690bdb..72b8b878 100644 --- a/latch_cli/services/ls.py +++ b/latch_cli/services/ls.py @@ -10,7 +10,7 @@ import gql from latch_sdk_gql.execute import execute -from latch.ldata.transfer.node import LDataNodeType +from latch.ldata.type import LDataNodeType from latch_cli.click_utils import bold from latch_cli.utils import with_si_suffix from latch_cli.utils.path import normalize_path diff --git a/latch_cli/services/move.py b/latch_cli/services/move.py index a0ff9963..aaee5185 100644 --- a/latch_cli/services/move.py +++ b/latch_cli/services/move.py @@ -5,7 +5,8 @@ from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import execute -from latch.ldata.transfer.node import LDataNodeType, _get_node_data +from latch.ldata._transfer.node import get_node_data as _get_node_data +from latch.ldata.type import LDataNodeType from latch_cli.services.cp.glob import expand_pattern from latch_cli.utils.path import get_name_from_path, get_path_error, is_remote_path diff --git a/latch_cli/services/sync.py b/latch_cli/services/sync.py index 61ffb8a1..969fce85 100644 --- a/latch_cli/services/sync.py +++ b/latch_cli/services/sync.py @@ -11,18 +11,18 @@ from gql.transport.exceptions import TransportQueryError from latch_sdk_gql.execute import JsonValue, execute -import latch.ldata.transfer.upload as upl +import latch.ldata._transfer.upload as _upl def upload_file(src: Path, dest: str): - start = upl._start_upload(src, dest) + start = _upl.start_upload(src, dest) if start is None: return - parts: List[upl.CompletedPart] = [] + parts: List[_upl.CompletedPart] = [] for idx, url in enumerate(start.urls): parts.append( - upl.upload_file_chunk( + _upl.upload_file_chunk( src, url, idx, @@ -30,7 +30,7 @@ def upload_file(src: Path, dest: str): ) ) - upl._end_upload(dest, start.upload_id, parts) + _upl.end_upload(dest, start.upload_id, parts) def check_src(p: Path, *, indent: str = "") -> Optional[Tuple[Path, os.stat_result]]: From 5aa56e75b71bff5652d532a3a943e4a85dfe4d15 Mon Sep 17 00:00:00 2001 From: Rahul Desai Date: Tue, 20 Feb 2024 16:53:37 -0800 Subject: [PATCH 15/15] cleanup generate download file on program exit --- latch/ldata/path.py | 54 ++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/latch/ldata/path.py b/latch/ldata/path.py index d1e93923..859aefdd 100644 --- a/latch/ldata/path.py +++ b/latch/ldata/path.py @@ -1,7 +1,7 @@ +import atexit import os import re import shutil -import tempfile from dataclasses import dataclass, field from pathlib import Path from typing import Iterator, Optional, Type @@ -38,6 +38,8 @@ LDataNodeType.mount, } +_download_idx = 0 + @dataclass class _Cache: @@ -204,9 +206,21 @@ def rmr(self) -> None: ) def copy_to(self, dst: "LPath", *, show_summary: bool = False) -> None: + """Copy the file at this instance's path to the given destination. + + Args: + dst: The destination LPath. + show_summary: Whether to print a summary of the copy operation. + """ _remote_copy(self.path, dst.path, show_summary=show_summary) def upload_from(self, src: Path, *, show_progress_bar: bool = False) -> None: + """Upload the file at the given source to this instance's path. + + Args: + src: The source path. + show_progress_bar: Whether to show a progress bar during the upload. + """ _upload( os.fspath(src), self.path, @@ -217,24 +231,28 @@ def upload_from(self, src: Path, *, show_progress_bar: bool = False) -> None: def download( self, dst: Optional[Path] = None, *, show_progress_bar: bool = False ) -> Path: - temp_dir = None - try: - if dst is None: - temp_dir = Path(tempfile.mkdtemp()) - dst = temp_dir / self.name() - - _download( - self.path, - dst, - progress=_Progress.tasks if show_progress_bar else _Progress.none, - verbose=False, - confirm_overwrite=False, - ) - except Exception as e: - if temp_dir is not None: - shutil.rmtree(temp_dir) - raise e + """Download the file at this instance's path to the given destination. + Args: + dst: The destination path. If None, a temporary directory is created and the file is + downloaded there. The temprary directory is deleted when the program exits. + show_progress_bar: Whether to show a progress bar during the download. + """ + if dst is None: + global _download_idx + tmp_dir = Path.home() / ".latch" / "lpath" / str(_download_idx) + _download_idx += 1 + tmp_dir.mkdir(parents=True, exist_ok=True) + atexit.register(lambda p: shutil.rmtree(p), tmp_dir) + dst = tmp_dir / self.name() + + _download( + self.path, + dst, + progress=_Progress.tasks if show_progress_bar else _Progress.none, + verbose=False, + confirm_overwrite=False, + ) return dst def __truediv__(self, other: object) -> "LPath":