diff --git a/lib/pbench/server/api/resources/intake_base.py b/lib/pbench/server/api/resources/intake_base.py index e987d07845..d00709f3eb 100644 --- a/lib/pbench/server/api/resources/intake_base.py +++ b/lib/pbench/server/api/resources/intake_base.py @@ -256,7 +256,7 @@ def _intake( current_app.logger.info( "INTAKE (pre) {} {} for {} to {}", self.name, - filename, + dataset_name, username, tar_full_path, ) @@ -400,16 +400,6 @@ def _intake( f"Unable to create dataset in file system for {tar_full_path}: {exc}" ) from exc - usage = shutil.disk_usage(tar_full_path.parent) - current_app.logger.info( - "INTAKE (post) {} {}: {:.3}% full, {} remaining: dataset is {}", - self.name, - tar_full_path.name, - float(usage.used) / float(usage.total) * 100.0, - humanize.naturalsize(usage.free), - humanize.naturalsize(stream.length), - ) - # From this point, failure will remove the tarball from the cache # manager. recovery.add(tarball.delete) @@ -426,8 +416,12 @@ def _intake( benchmark = Metadata.SERVER_BENCHMARK_UNKNOWN metalog = {"pbench": {"name": dataset.name, "script": benchmark}} metadata[Metadata.SERVER_ARCHIVE] = True + current_app.logger.warning( + "INTAKE marking {} as archive-only because no 'metadata.log' can be found.", + dataset.name, + ) notes.append( - f"Results archive is missing '{dataset.name}/metadata.log'" + f"Results archive is missing '{dataset.name}/metadata.log'." ) attributes["missing_metadata"] = True else: @@ -436,7 +430,7 @@ def _intake( benchmark = p.get("script", Metadata.SERVER_BENCHMARK_UNKNOWN) else: benchmark = Metadata.SERVER_BENCHMARK_UNKNOWN - notes.append(f"Identified benchmark workload {benchmark!r}") + notes.append(f"Identified benchmark workload {benchmark!r}.") Metadata.create(dataset=dataset, key=Metadata.METALOG, value=metalog) except Exception as exc: raise APIInternalError( @@ -456,7 +450,7 @@ def _intake( try: retention = datetime.timedelta(days=retention_days) deletion = dataset.uploaded + retention - notes.append(f"Expected expiration date is {deletion:%Y-%m-%d}") + notes.append(f"Expected expiration date is {deletion:%Y-%m-%d}.") # Make a shallow copy so we can add keys without affecting the # original (which will be recorded in the audit log) @@ -481,11 +475,23 @@ def _intake( # Determine whether we should enable the INDEX operation. should_index = not metadata.get(Metadata.SERVER_ARCHIVE, False) enable_next = [OperationName.INDEX] if should_index else None + if not should_index: + notes.append("Indexing is disabled by 'archive only' setting.") Sync(current_app.logger, OperationName.UPLOAD).update( dataset=dataset, state=OperationState.OK, enabled=enable_next ) if notes: attributes["notes"] = notes + + usage = shutil.disk_usage(tar_full_path.parent) + current_app.logger.info( + "INTAKE (post) {} {}: {:.3}% full, {} remaining: dataset size {}", + self.name, + dataset.name, + float(usage.used) / float(usage.total) * 100.0, + humanize.naturalsize(usage.free), + humanize.naturalsize(stream.length), + ) Audit.create( root=audit, status=AuditStatus.SUCCESS, attributes=attributes ) diff --git a/lib/pbench/test/functional/server/test_datasets.py b/lib/pbench/test/functional/server/test_datasets.py index ec34aa4216..3f35fe0297 100644 --- a/lib/pbench/test/functional/server/test_datasets.py +++ b/lib/pbench/test/functional/server/test_datasets.py @@ -21,9 +21,29 @@ def utc_from_str(date: str) -> datetime: + """Convert a date string to a UTC datetime + + Args: + date: date/time string + + Returns: + UTC datetime object + """ return dateutil.parser.parse(date).replace(tzinfo=timezone.utc) +def expiration() -> str: + """Calculate a datetime for dataset deletion from "now". + + Returns: + A "YYYY-MM-DD" string representing the day when a dataset uploaded + "now" would be deleted. + """ + retention = timedelta(days=730) + d = datetime.now(timezone.utc) + retention + return f"{d:%Y-%m-%d}" + + @dataclass class Tarball: """Record the tarball path and the uploaded access value""" @@ -72,16 +92,15 @@ def test_upload_all(self, server_client: PbenchServerClient, login_user): assert ( response.status_code == HTTPStatus.CREATED ), f"upload returned unexpected status {response.status_code}, {response.text} ({t})" - benchmark = server_client.get_metadata(md5, ["server.benchmark"])[ - "server.benchmark" - ] + metabench = server_client.get_metadata(md5, ["server.benchmark"]) + benchmark = metabench["server.benchmark"] assert response.json() == { "message": "File successfully uploaded", "name": name, "resource_id": md5, "notes": [ - f"Identified benchmark workload {benchmark!r}", - "Expected expiration date is 2025-07-31", + f"Identified benchmark workload {benchmark!r}.", + f"Expected expiration date is {expiration()}.", ], } assert response.headers["location"] == server_client._uri( @@ -186,8 +205,9 @@ def test_archive_only(server_client: PbenchServerClient, login_user): "name": name, "resource_id": md5, "notes": [ - "Identified benchmark workload 'fio'", - "Expected expiration date is 2025-07-31", + "Identified benchmark workload 'fio'.", + f"Expected expiration date is {expiration()}.", + "Indexing is disabled by 'archive only' setting.", ], } assert response.headers["location"] == server_client._uri( @@ -227,14 +247,16 @@ def test_no_metadata(server_client: PbenchServerClient, login_user): assert ( response.status_code == HTTPStatus.CREATED ), f"upload {name} returned unexpected status {response.status_code}, {response.text}" + assert response.json() == { "message": "File successfully uploaded", "name": name, "resource_id": md5, "notes": [ - "Results archive is missing 'nometadata/metadata.log'", - "Identified benchmark workload 'unknown'", - "Expected expiration date is 2025-07-31", + "Results archive is missing 'nometadata/metadata.log'.", + "Identified benchmark workload 'unknown'.", + f"Expected expiration date is {expiration()}.", + "Indexing is disabled by 'archive only' setting.", ], } assert response.headers["location"] == server_client._uri( diff --git a/lib/pbench/test/unit/server/test_relay.py b/lib/pbench/test/unit/server/test_relay.py index 3d360f1007..504ebe89c1 100644 --- a/lib/pbench/test/unit/server/test_relay.py +++ b/lib/pbench/test/unit/server/test_relay.py @@ -88,6 +88,7 @@ def test_missing_authorization_header(self, client, server_config): assert not self.cachemanager_created @responses.activate + @pytest.mark.freeze_time("2023-07-01") def test_relay(self, client, server_config, pbench_drb_token, tarball): """Verify the success path @@ -128,8 +129,8 @@ def test_relay(self, client, server_config, pbench_drb_token, tarball): "name": name, "resource_id": md5, "notes": [ - "Identified benchmark workload 'unknown'", - "Expected expiration date is 2025-07-31", + "Identified benchmark workload 'unknown'.", + "Expected expiration date is 2025-06-30.", ], } assert ( @@ -169,8 +170,8 @@ def test_relay(self, client, server_config, pbench_drb_token, tarball): "access": "private", "metadata": {"global.pbench.test": "data"}, "notes": [ - "Identified benchmark workload 'unknown'", - "Expected expiration date is 2025-07-31", + "Identified benchmark workload 'unknown'.", + "Expected expiration date is 2025-06-30.", ], } diff --git a/lib/pbench/test/unit/server/test_upload.py b/lib/pbench/test/unit/server/test_upload.py index 55f4b7559f..bd125ff7bf 100644 --- a/lib/pbench/test/unit/server/test_upload.py +++ b/lib/pbench/test/unit/server/test_upload.py @@ -441,8 +441,8 @@ def test_upload(self, client, pbench_drb_token, server_config, tarball): "name": name, "resource_id": md5, "notes": [ - "Identified benchmark workload 'unknown'", - "Expected expiration date is 1972-01-01", + "Identified benchmark workload 'unknown'.", + "Expected expiration date is 1972-01-01.", ], } assert ( @@ -496,8 +496,8 @@ def test_upload(self, client, pbench_drb_token, server_config, tarball): "access": "private", "metadata": {"global.pbench.test": "data"}, "notes": [ - "Identified benchmark workload 'unknown'", - "Expected expiration date is 1972-01-01", + "Identified benchmark workload 'unknown'.", + "Expected expiration date is 1972-01-01.", ], } @@ -534,6 +534,7 @@ def test_upload_invalid_metadata( ], } + @pytest.mark.freeze_time("2023-07-01") def test_upload_duplicate(self, client, server_config, pbench_drb_token, tarball): datafile, _, md5 = tarball with datafile.open("rb") as data_fp: @@ -549,8 +550,8 @@ def test_upload_duplicate(self, client, server_config, pbench_drb_token, tarball "name": Dataset.stem(datafile), "resource_id": md5, "notes": [ - "Identified benchmark workload 'unknown'", - "Expected expiration date is 2025-07-31", + "Identified benchmark workload 'unknown'.", + "Expected expiration date is 2025-06-30.", ], } assert ( @@ -707,8 +708,9 @@ def test_upload_archive(self, client, pbench_drb_token, server_config, tarball): "access": "private", "metadata": {"server.archiveonly": True, "server.origin": "test"}, "notes": [ - "Identified benchmark workload 'unknown'", - "Expected expiration date is 1972-01-01", + "Identified benchmark workload 'unknown'.", + "Expected expiration date is 1972-01-01.", + "Indexing is disabled by 'archive only' setting.", ], } @@ -732,9 +734,10 @@ def test_upload_nometa(self, client, pbench_drb_token, server_config, tarball): "name": name, "resource_id": md5, "notes": [ - f"Results archive is missing '{name}/metadata.log'", - "Identified benchmark workload 'unknown'", - "Expected expiration date is 1972-01-01", + f"Results archive is missing '{name}/metadata.log'.", + "Identified benchmark workload 'unknown'.", + "Expected expiration date is 1972-01-01.", + "Indexing is disabled by 'archive only' setting.", ], } assert ( @@ -793,8 +796,9 @@ def test_upload_nometa(self, client, pbench_drb_token, server_config, tarball): "metadata": {"server.archiveonly": True, "server.origin": "test"}, "missing_metadata": True, "notes": [ - f"Results archive is missing '{name}/metadata.log'", - "Identified benchmark workload 'unknown'", - "Expected expiration date is 1972-01-01", + f"Results archive is missing '{name}/metadata.log'.", + "Identified benchmark workload 'unknown'.", + "Expected expiration date is 1972-01-01.", + "Indexing is disabled by 'archive only' setting.", ], }