From a2de48bdfbba80a7e41e96a44c67d51ff36f0810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Rowlands=20=28=EB=B3=80=EA=B8=B0=ED=98=B8=29?= Date: Thu, 23 Apr 2020 19:59:40 +0900 Subject: [PATCH] output: use strings instead of PathInfo for performance reasons (#3663) * output: don't use PathInfo when collecting used dir cache * fix windows path handling --- dvc/output/base.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/dvc/output/base.py b/dvc/output/base.py index 2e555119cb..a539e18e7f 100644 --- a/dvc/output/base.py +++ b/dvc/output/base.py @@ -1,4 +1,5 @@ import logging +import os from urllib.parse import urlparse from copy import copy @@ -390,11 +391,21 @@ def _collect_used_dir_cache( else: return cache + path = str(self.path_info) + filter_path = str(filter_info) if filter_info else None + is_win = os.name == "nt" for entry in self.dir_cache: checksum = entry[self.remote.PARAM_CHECKSUM] - info = self.path_info / entry[self.remote.PARAM_RELPATH] - if not filter_info or info.isin_or_eq(filter_info): - cache.add(self.scheme, checksum, str(info)) + entry_relpath = entry[self.remote.PARAM_RELPATH] + if is_win: + entry_relpath = entry_relpath.replace("/", os.sep) + entry_path = os.path.join(path, entry_relpath) + if ( + not filter_path + or entry_path == filter_path + or entry_path.startswith(filter_path + os.sep) + ): + cache.add(self.scheme, checksum, entry_path) return cache