From 7a84e2b2b25825e5fca96e0422ed264b4abead20 Mon Sep 17 00:00:00 2001 From: Rhys Short Date: Wed, 13 Mar 2024 15:30:55 +0000 Subject: [PATCH] Add support for including file path for collation Add support for including file path for collated files, this solves the problem where if you are collecting files from multiple agents, file path clashes occur. Adding the --include-file-path option will ensure that the file names are unique --- harvest/cli.py | 7 +++++++ harvest/collator.py | 18 ++++++++++++++++-- test/test_cli_collate.py | 24 ++++++++++++++++++++++++ test/test_collator.py | 12 ++++++++++++ 4 files changed, 59 insertions(+), 2 deletions(-) diff --git a/harvest/cli.py b/harvest/cli.py index b94a9aa..0cc3f09 100644 --- a/harvest/cli.py +++ b/harvest/cli.py @@ -108,6 +108,12 @@ def _init_arguments(self): metavar="YYYY-MM-DD or YYYYMMDD", default=False, ) + self.add_argument( + "--include-file-path", + help="Should the file path be included in the saved file names", + action="store_true", + dest="include_file_path", + ) def _validate_arguments(self, args): if not args.end: @@ -137,6 +143,7 @@ def _run(self, args): "master", args.repo_path, args.no_validate, + include_file_path=args.include_file_path, ) for file in args.filepath: diff --git a/harvest/collator.py b/harvest/collator.py index fde3fc7..e339165 100644 --- a/harvest/collator.py +++ b/harvest/collator.py @@ -27,7 +27,15 @@ class Collator(object): """Harvest collator to retrieve Git repository content.""" - def __init__(self, repo_url, creds, branch, repo_path=None, validate=True): + def __init__( + self, + repo_url, + creds, + branch, + repo_path=None, + validate=True, + include_file_path=False, + ): """Construct the Collator object.""" parsed = urlparse(repo_url) self.scheme = parsed.scheme @@ -38,6 +46,7 @@ def __init__(self, repo_url, creds, branch, repo_path=None, validate=True): self.repo_path = repo_path self.git_repo = None self.validate = validate + self.include_file_path = include_file_path @property def local_path(self): @@ -84,16 +93,21 @@ def read(self, filepath, from_dt, until_dt): raise FileMissingError(f"{filepath} not found between {since} and {until}") return commits - def write(self, filepath, commits): + def write(self, filepath: str, commits): """ Create file artifacts. :param str filepath: The relative path to the file within the repo :param list commits: A list of commits for a given file and date range """ + file_path_include = "" + if self.include_file_path: + file_path_include = "_".join(filepath.rsplit("/")[:-1]) + "_" + for commit in commits: file_name = ( f"./{self._ts_to_str(commit.committed_date)}_" + f"{file_path_include}" f'{filepath.rsplit("/", 1).pop()}' ) with open(file_name, "w+") as f: diff --git a/test/test_cli_collate.py b/test/test_cli_collate.py index 60b8208..36b5adf 100644 --- a/test/test_cli_collate.py +++ b/test/test_cli_collate.py @@ -307,3 +307,27 @@ def test_collate_local(self, mock_read, mock_write): datetime(today.year, today.month, today.day), ) mock_write.assert_called_once_with("my/path/baz.json", ["commit-foo"]) + + @patch("harvest.collator.Collator.write") + @patch("harvest.collator.Collator.read") + def test_collate_include_file_path(self, mock_read, mock_write): + """Ensures collate sub-command works when '--include-file-path' is provided.""" + mock_read.return_value = ["commit-foo"] + self.harvest.run( + [ + "collate", + "local", + "my/path/baz.json", + "--include-file-path", + "--repo-path", + "os/repo/path", + ] + ) + today = datetime.today() + + mock_read.assert_called_once_with( + "my/path/baz.json", + datetime(today.year, today.month, today.day), + datetime(today.year, today.month, today.day), + ) + mock_write.assert_called_once_with("my/path/baz.json", ["commit-foo"]) diff --git a/test/test_collator.py b/test/test_collator.py index a406862..bf5adbb 100644 --- a/test/test_collator.py +++ b/test/test_collator.py @@ -153,6 +153,18 @@ def test_write_functionality(self): self.assertIn(call("./20191105_foo.json", "w+"), m.mock_calls) self.assertIn(call("./20191101_foo.json", "w+"), m.mock_calls) + def test_write_includes_file_path(self): + m = mock_open() + with patch("builtins.open", m): + collator = Collator(*self.args, include_file_path=True) + collator.write("raw/foo/foo.json", self.commits) + handle = m() + + self.assertEqual(handle.write.call_count, 3) + self.assertIn(call("./20191106_raw_foo_foo.json", "w+"), m.mock_calls) + self.assertIn(call("./20191105_raw_foo_foo.json", "w+"), m.mock_calls) + self.assertIn(call("./20191101_raw_foo_foo.json", "w+"), m.mock_calls) + @patch("harvest.collator.git.Repo.clone_from") @patch("harvest.collator.os.path.isdir") def test_checkout_clone(self, is_dir_mock, clone_from_mock):