diff --git a/docs/configuration.rst b/docs/configuration.rst index 0982f8164f..ed682b2f67 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -599,7 +599,9 @@ Description memory requirements are significantly lower when the amount of stored IDs gets reasonably large. - Note: archive paths support regular `format string`_ replacements, + Note: Archive files that do not already exist get generated automatically. + + Note: Archive paths support regular `format string`_ replacements, but be aware that using external inputs for building local paths may pose a security risk. @@ -3139,6 +3141,19 @@ Description Note: Only applies for ``"mode": "custom"``. +metadata.archive +---------------- +Type + |Path|_ +Description + File to store IDs of generated metadata files in, + similar to `extractor.*.archive`_. + + ``archive-format`` and ``archive-prefix`` options, + akin to `extractor.*.archive-format`_ and `extractor.*.archive-prefix`_, + are supported as well. + + metadata.mtime -------------- Type diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 3eebf0be02..ab4e9e6bc7 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -389,8 +389,10 @@ def get_downloader(self, scheme): def initialize(self, kwdict=None): """Delayed initialization of PathFormat, etc.""" - cfg = self.extractor.config - pathfmt = self.pathfmt = path.PathFormat(self.extractor) + extr = self.extractor + cfg = extr.config + + pathfmt = self.pathfmt = path.PathFormat(extr) if kwdict: pathfmt.set_directory(kwdict) @@ -403,17 +405,18 @@ def initialize(self, kwdict=None): archive = cfg("archive") if archive: archive = util.expand_path(archive) + archive_format = (cfg("archive-prefix", extr.category) + + cfg("archive-format", extr.archive_fmt)) try: if "{" in archive: archive = formatter.parse(archive).format_map(kwdict) - self.archive = util.DownloadArchive(archive, self.extractor) + self.archive = util.DownloadArchive(archive, archive_format) except Exception as exc: - self.extractor.log.warning( + extr.log.warning( "Failed to open download archive at '%s' ('%s: %s')", archive, exc.__class__.__name__, exc) else: - self.extractor.log.debug( - "Using download archive '%s'", archive) + extr.log.debug("Using download archive '%s'", archive) skip = cfg("skip", True) if skip: @@ -435,7 +438,7 @@ def initialize(self, kwdict=None): if self.archive: self.archive.check = pathfmt.exists - postprocessors = self.extractor.config_accumulate("postprocessors") + postprocessors = extr.config_accumulate("postprocessors") if postprocessors: self.hooks = collections.defaultdict(list) pp_log = self.get_logger("postprocessor") @@ -453,7 +456,7 @@ def initialize(self, kwdict=None): clist = pp_dict.get("blacklist") negate = True if clist and not util.build_extractor_filter( - clist, negate)(self.extractor): + clist, negate)(extr): continue name = pp_dict.get("name") @@ -471,8 +474,7 @@ def initialize(self, kwdict=None): pp_list.append(pp_obj) if pp_list: - self.extractor.log.debug( - "Active postprocessor modules: %s", pp_list) + extr.log.debug("Active postprocessor modules: %s", pp_list) if "init" in self.hooks: for callback in self.hooks["init"]: callback(pathfmt) diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index e77688841e..5e8f3e9a0d 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -59,9 +59,35 @@ def __init__(self, job, options): events = events.split(",") job.register_hooks({event: self.run for event in events}, options) + archive = options.get("archive") + if archive: + extr = job.extractor + archive = util.expand_path(archive) + archive_format = ( + options.get("archive-prefix", extr.category) + + options.get("archive-format", "_MD_" + extr.archive_fmt)) + try: + if "{" in archive: + archive = formatter.parse(archive).format_map( + job.pathfmt.kwdict) + self.archive = util.DownloadArchive( + archive, archive_format, "_archive_metadata") + except Exception as exc: + self.log.warning( + "Failed to open download archive at '%s' ('%s: %s')", + archive, exc.__class__.__name__, exc) + else: + self.log.debug("Using download archive '%s'", archive) + else: + self.archive = None + self.mtime = options.get("mtime") def run(self, pathfmt): + archive = self.archive + if archive and archive.check(pathfmt.kwdict): + return + directory = self._directory(pathfmt) path = directory + self._filename(pathfmt) @@ -73,6 +99,9 @@ def run(self, pathfmt): with open(path, "w", encoding="utf-8") as fp: self.write(fp, pathfmt.kwdict) + if archive: + archive.add(pathfmt.kwdict) + if self.mtime: mtime = pathfmt.kwdict.get("_mtime") if mtime: diff --git a/gallery_dl/util.py b/gallery_dl/util.py index e28ca861e5..a5aa5eb21b 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -672,11 +672,14 @@ def __str__(self): class DownloadArchive(): - def __init__(self, path, extractor): + def __init__(self, path, format_string, cache_key="_archive_key"): con = sqlite3.connect(path, timeout=60, check_same_thread=False) con.isolation_level = None + self.close = con.close self.cursor = con.cursor() + self.keygen = format_string.format_map + self._cache_key = cache_key try: self.cursor.execute("CREATE TABLE IF NOT EXISTS archive " @@ -685,20 +688,16 @@ def __init__(self, path, extractor): # fallback for missing WITHOUT ROWID support (#553) self.cursor.execute("CREATE TABLE IF NOT EXISTS archive " "(entry PRIMARY KEY)") - self.keygen = ( - extractor.config("archive-prefix", extractor.category) + - extractor.config("archive-format", extractor.archive_fmt) - ).format_map def check(self, kwdict): """Return True if the item described by 'kwdict' exists in archive""" - key = kwdict["_archive_key"] = self.keygen(kwdict) + key = kwdict[self._cache_key] = self.keygen(kwdict) self.cursor.execute( "SELECT 1 FROM archive WHERE entry=? LIMIT 1", (key,)) return self.cursor.fetchone() def add(self, kwdict): """Add item described by 'kwdict' to archive""" - key = kwdict.get("_archive_key") or self.keygen(kwdict) + key = kwdict.get(self._cache_key) or self.keygen(kwdict) self.cursor.execute( "INSERT OR IGNORE INTO archive VALUES (?)", (key,))