diff --git a/jupyter_cache/base.py b/jupyter_cache/base.py index 428df68..3faf9ad 100644 --- a/jupyter_cache/base.py +++ b/jupyter_cache/base.py @@ -51,16 +51,25 @@ def __iter__(self) -> Iterable[Tuple[Path, io.BufferedReader]]: """Yield the relative path and open files (in bytes mode)""" pass + def __repr__(self): + return "{0}(paths={1})".format( + self.__class__.__name__, len(self.relative_paths) + ) + @attr.s(frozen=True, slots=True) class NbBundleIn: """A container for notebooks and their associated data to cache.""" nb: nbf.NotebookNode = attr.ib( - validator=instance_of(nbf.NotebookNode), metadata={"help": "the notebook"} + validator=instance_of(nbf.NotebookNode), + repr=lambda nb: "Notebook(cells={0})".format(len(nb.cells)), + metadata={"help": "the notebook"}, ) uri: str = attr.ib( - validator=instance_of(str), metadata={"help": "the origin URI of the notebook"} + converter=str, + validator=instance_of(str), + metadata={"help": "the origin URI of the notebook"}, ) artifacts: Optional[NbArtifactsAbstract] = attr.ib( kw_only=True, @@ -86,7 +95,9 @@ class NbBundleOut: """A container for notebooks and their associated data that have been cached.""" nb: nbf.NotebookNode = attr.ib( - validator=instance_of(nbf.NotebookNode), metadata={"help": "the notebook"} + validator=instance_of(nbf.NotebookNode), + repr=lambda nb: "Notebook(cells={0})".format(len(nb.cells)), + metadata={"help": "the notebook"}, ) record: NbCacheRecord = attr.ib(metadata={"help": "the cache record"}) artifacts: Optional[NbArtifactsAbstract] = attr.ib( diff --git a/jupyter_cache/cache/db.py b/jupyter_cache/cache/db.py index 17c322d..c828c81 100644 --- a/jupyter_cache/cache/db.py +++ b/jupyter_cache/cache/db.py @@ -11,6 +11,8 @@ from sqlalchemy.exc import IntegrityError from sqlalchemy.sql.expression import desc +from jupyter_cache.cli.utils import shorten_path + OrmBase = declarative_base() @@ -43,6 +45,11 @@ class Setting(OrmBase): key = Column(String(36), nullable=False, unique=True) value = Column(JSON()) + def __repr__(self): + return "{0}(pk={1},{2}={3})".format( + self.__class__.__name__, self.pk, self.key, self.value + ) + @staticmethod def set_value(key: str, value, db: Engine): with session_context(db) as session: # type: Session @@ -91,11 +98,24 @@ class NbCacheRecord(OrmBase): DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow ) + def __repr__(self): + return "{0}(pk={1})".format(self.__class__.__name__, self.pk) + def to_dict(self): return {k: v for k, v in self.__dict__.items() if not k.startswith("_")} - def __repr__(self): - return "{0}(pk={1})".format(self.__class__.__name__, self.pk) + def format_dict(self, hashkey=False, path_length=None, descript=False): + data = { + "ID": self.pk, + "Origin URI": str(shorten_path(self.uri, path_length)), + "Created": self.created.isoformat(" ", "minutes"), + "Accessed": self.accessed.isoformat(" ", "minutes"), + } + if descript: + data["Description"] = self.description + if hashkey: + data["Hashkey"] = self.hashkey + return data @staticmethod def create_record(uri: str, hashkey: str, db: Engine, **kwargs) -> "NbCacheRecord": @@ -205,6 +225,24 @@ class NbStageRecord(OrmBase): traceback = Column(Text(), nullable=True, default="") created = Column(DateTime, nullable=False, default=datetime.utcnow) + def __repr__(self): + return "{0}(pk={1})".format(self.__class__.__name__, self.pk) + + def to_dict(self): + return {k: v for k, v in self.__dict__.items() if not k.startswith("_")} + + def format_dict(self, cache_record=None, path_length=None, assets=True): + data = { + "ID": self.pk, + "URI": str(shorten_path(self.uri, path_length)), + "Created": self.created.isoformat(" ", "minutes"), + } + if assets: + data["Assets"] = len(self.assets) + if cache_record is not None: + data["Cache ID"] = cache_record.pk + return data + @validates("assets") def validator_assets(self, key, value): return self.validate_assets(value) @@ -227,9 +265,6 @@ def validate_assets(paths, uri=None): raise ValueError(f"Asset '{path}' is not in folder '{uri_folder}''") return list(paths) - def to_dict(self): - return {k: v for k, v in self.__dict__.items() if not k.startswith("_")} - @staticmethod def create_record( uri: str, db: Engine, raise_on_exists=True, assets=() diff --git a/jupyter_cache/cli/commands/cmd_cache.py b/jupyter_cache/cli/commands/cmd_cache.py index 3c5d50a..9f392fc 100644 --- a/jupyter_cache/cli/commands/cmd_cache.py +++ b/jupyter_cache/cli/commands/cmd_cache.py @@ -4,7 +4,7 @@ from jupyter_cache.cli.commands.cmd_main import jcache from jupyter_cache.cli import arguments, options -from jupyter_cache.cli.utils import shorten_path, get_cache +from jupyter_cache.cli.utils import get_cache, tabulate_cache_records @jcache.group("cache") @@ -13,19 +13,6 @@ def cmnd_cache(): pass -def format_cache_record(record, hashkeys, path_length): - data = { - "ID": record.pk, - "Origin URI": str(shorten_path(record.uri, path_length)), - "Created": record.created.isoformat(" ", "minutes"), - "Accessed": record.accessed.isoformat(" ", "minutes"), - # "Description": record.description, - } - if hashkeys: - data["Hashkey"] = record.hashkey - return data - - @cmnd_cache.command("list") @options.CACHE_PATH @click.option( @@ -38,8 +25,6 @@ def format_cache_record(record, hashkeys, path_length): @options.PATH_LENGTH def list_caches(cache_path, latest_only, hashkeys, path_length): """List cached notebook records in the cache.""" - import tabulate - db = get_cache(cache_path) records = db.list_cache_records() if not records: @@ -55,13 +40,7 @@ def list_caches(cache_path, latest_only, hashkeys, path_length): latest_records[record.uri] = record records = list(latest_records.values()) click.echo( - tabulate.tabulate( - [ - format_cache_record(r, hashkeys, path_length) - for r in sorted(records, key=lambda r: r.accessed, reverse=True) - ], - headers="keys", - ) + tabulate_cache_records(records, hashkeys=hashkeys, path_length=path_length) ) @@ -78,7 +57,7 @@ def show_cache(cache_path, pk): except KeyError: click.secho("ID {} does not exist, Aborting!".format(pk), fg="red") sys.exit(1) - data = format_cache_record(record, True, None) + data = record.format_dict(hashkey=True, path_length=None) click.echo(yaml.safe_dump(data, sort_keys=False), nl=False) with db.cache_artefacts_temppath(pk) as folder: paths = [str(p.relative_to(folder)) for p in folder.glob("**/*") if p.is_file()] diff --git a/jupyter_cache/cli/commands/cmd_stage.py b/jupyter_cache/cli/commands/cmd_stage.py index a7c4af8..e3c4bab 100644 --- a/jupyter_cache/cli/commands/cmd_stage.py +++ b/jupyter_cache/cli/commands/cmd_stage.py @@ -4,7 +4,7 @@ from jupyter_cache.cli.commands.cmd_main import jcache from jupyter_cache.cli import arguments, options -from jupyter_cache.cli.utils import shorten_path, get_cache +from jupyter_cache.cli.utils import get_cache, tabulate_stage_records @jcache.group("stage") @@ -69,19 +69,6 @@ def unstage_nbs_id(cache_path, pks, remove_all): click.secho("Success!", fg="green") -def format_staged_record(record, cache_record, path_length, assets=True): - data = { - "ID": record.pk, - "URI": str(shorten_path(record.uri, path_length)), - "Created": record.created.isoformat(" ", "minutes"), - } - if assets: - data["Assets"] = len(record.assets) - if cache_record: - data["Cache ID"] = cache_record.pk - return data - - @cmnd_stage.command("list") @options.CACHE_PATH @click.option( @@ -93,19 +80,11 @@ def format_staged_record(record, cache_record, path_length, assets=True): @options.PATH_LENGTH def list_staged(cache_path, compare, path_length): """List notebooks staged for possible execution.""" - import tabulate - db = get_cache(cache_path) records = db.list_staged_records() if not records: click.secho("No Staged Notebooks", fg="blue") - rows = [] - for record in sorted(records, key=lambda r: r.created, reverse=True): - cache_record = None - if compare: - cache_record = db.get_cache_record_of_staged(record.uri) - rows.append(format_staged_record(record, cache_record, path_length)) - click.echo(tabulate.tabulate(rows, headers="keys")) + click.echo(tabulate_stage_records(records, path_length=path_length, cache=db)) @cmnd_stage.command("show") @@ -128,7 +107,7 @@ def show_staged(cache_path, pk, tb): click.secho("ID {} does not exist, Aborting!".format(pk), fg="red") sys.exit(1) cache_record = db.get_cache_record_of_staged(record.uri) - data = format_staged_record(record, cache_record, None, assets=False) + data = record.format_dict(cache_record=cache_record, path_length=None, assets=False) click.echo(yaml.safe_dump(data, sort_keys=False).rstrip()) if record.assets: click.echo(f"Assets:") diff --git a/jupyter_cache/cli/utils.py b/jupyter_cache/cli/utils.py index d036797..a0a61f0 100644 --- a/jupyter_cache/cli/utils.py +++ b/jupyter_cache/cli/utils.py @@ -14,3 +14,42 @@ def get_cache(path): from jupyter_cache.cache.main import JupyterCacheBase return JupyterCacheBase(path) + + +def tabulate_cache_records(records: list, hashkeys=False, path_length=None) -> str: + """Tabulate cache records. + + :param records: list of ``NbCacheRecord`` + :param hashkeys: include a hashkey column + :param path_length: truncate URI paths to x components + """ + import tabulate + + return tabulate.tabulate( + [ + r.format_dict(hashkey=hashkeys, path_length=path_length) + for r in sorted(records, key=lambda r: r.accessed, reverse=True) + ], + headers="keys", + ) + + +def tabulate_stage_records(records: list, path_length=None, cache=None) -> str: + """Tabulate cache records. + + :param records: list of ``NbStageRecord`` + :param path_length: truncate URI paths to x components + :param cache: If the cache is given, + we use it to add a column of matched cached pk (if available) + """ + import tabulate + + rows = [] + for record in sorted(records, key=lambda r: r.created, reverse=True): + cache_record = None + if cache is not None: + cache_record = cache.get_cache_record_of_staged(record.uri) + rows.append( + record.format_dict(cache_record=cache_record, path_length=path_length) + ) + return tabulate.tabulate(rows, headers="keys")