From 190cbc237a0be31d16a522d62766a746708523a2 Mon Sep 17 00:00:00 2001 From: Rhys Short Date: Thu, 30 May 2024 08:40:05 +0100 Subject: [PATCH] Add support for including file path for collation (#30) Add support for including file path for collated files, this solves the problem where if you are collecting files from multiple agents, file path clashes occur. Adding the --include-file-path option will ensure that the file names are unique --- harvest/cli.py | 7 +++++++ harvest/collator.py | 18 ++++++++++++++++-- test/test_cli_collate.py | 24 ++++++++++++++++++++++++ test/test_collator.py | 12 ++++++++++++ 4 files changed, 59 insertions(+), 2 deletions(-) diff --git a/harvest/cli.py b/harvest/cli.py index b94a9aa..0cc3f09 100644 --- a/harvest/cli.py +++ b/harvest/cli.py @@ -108,6 +108,12 @@ def _init_arguments(self): metavar="YYYY-MM-DD or YYYYMMDD", default=False, ) + self.add_argument( + "--include-file-path", + help="Should the file path be included in the saved file names", + action="store_true", + dest="include_file_path", + ) def _validate_arguments(self, args): if not args.end: @@ -137,6 +143,7 @@ def _run(self, args): "master", args.repo_path, args.no_validate, + include_file_path=args.include_file_path, ) for file in args.filepath: diff --git a/harvest/collator.py b/harvest/collator.py index fde3fc7..e339165 100644 --- a/harvest/collator.py +++ b/harvest/collator.py @@ -27,7 +27,15 @@ class Collator(object): """Harvest collator to retrieve Git repository content.""" - def __init__(self, repo_url, creds, branch, repo_path=None, validate=True): + def __init__( + self, + repo_url, + creds, + branch, + repo_path=None, + validate=True, + include_file_path=False, + ): """Construct the Collator object.""" parsed = urlparse(repo_url) self.scheme = parsed.scheme @@ -38,6 +46,7 @@ def __init__(self, repo_url, creds, branch, repo_path=None, validate=True): self.repo_path = repo_path self.git_repo = None self.validate = validate + self.include_file_path = include_file_path @property def local_path(self): @@ -84,16 +93,21 @@ def read(self, filepath, from_dt, until_dt): raise FileMissingError(f"{filepath} not found between {since} and {until}") return commits - def write(self, filepath, commits): + def write(self, filepath: str, commits): """ Create file artifacts. :param str filepath: The relative path to the file within the repo :param list commits: A list of commits for a given file and date range """ + file_path_include = "" + if self.include_file_path: + file_path_include = "_".join(filepath.rsplit("/")[:-1]) + "_" + for commit in commits: file_name = ( f"./{self._ts_to_str(commit.committed_date)}_" + f"{file_path_include}" f'{filepath.rsplit("/", 1).pop()}' ) with open(file_name, "w+") as f: diff --git a/test/test_cli_collate.py b/test/test_cli_collate.py index 60b8208..36b5adf 100644 --- a/test/test_cli_collate.py +++ b/test/test_cli_collate.py @@ -307,3 +307,27 @@ def test_collate_local(self, mock_read, mock_write): datetime(today.year, today.month, today.day), ) mock_write.assert_called_once_with("my/path/baz.json", ["commit-foo"]) + + @patch("harvest.collator.Collator.write") + @patch("harvest.collator.Collator.read") + def test_collate_include_file_path(self, mock_read, mock_write): + """Ensures collate sub-command works when '--include-file-path' is provided.""" + mock_read.return_value = ["commit-foo"] + self.harvest.run( + [ + "collate", + "local", + "my/path/baz.json", + "--include-file-path", + "--repo-path", + "os/repo/path", + ] + ) + today = datetime.today() + + mock_read.assert_called_once_with( + "my/path/baz.json", + datetime(today.year, today.month, today.day), + datetime(today.year, today.month, today.day), + ) + mock_write.assert_called_once_with("my/path/baz.json", ["commit-foo"]) diff --git a/test/test_collator.py b/test/test_collator.py index a406862..bf5adbb 100644 --- a/test/test_collator.py +++ b/test/test_collator.py @@ -153,6 +153,18 @@ def test_write_functionality(self): self.assertIn(call("./20191105_foo.json", "w+"), m.mock_calls) self.assertIn(call("./20191101_foo.json", "w+"), m.mock_calls) + def test_write_includes_file_path(self): + m = mock_open() + with patch("builtins.open", m): + collator = Collator(*self.args, include_file_path=True) + collator.write("raw/foo/foo.json", self.commits) + handle = m() + + self.assertEqual(handle.write.call_count, 3) + self.assertIn(call("./20191106_raw_foo_foo.json", "w+"), m.mock_calls) + self.assertIn(call("./20191105_raw_foo_foo.json", "w+"), m.mock_calls) + self.assertIn(call("./20191101_raw_foo_foo.json", "w+"), m.mock_calls) + @patch("harvest.collator.git.Repo.clone_from") @patch("harvest.collator.os.path.isdir") def test_checkout_clone(self, is_dir_mock, clone_from_mock):