From 4ad4313dd9680407d5fd5f80e895ea12aa6bf3c9 Mon Sep 17 00:00:00 2001 From: Brian Dubois Date: Tue, 17 Oct 2023 13:02:19 -0400 Subject: [PATCH 1/3] No longer allow usage of `tensorboard dev upload`. --- tensorboard/uploader/BUILD | 87 - tensorboard/uploader/dry_run_stubs.py | 53 - tensorboard/uploader/dry_run_stubs_test.py | 52 - tensorboard/uploader/logdir_loader.py | 108 - tensorboard/uploader/logdir_loader_test.py | 166 -- tensorboard/uploader/upload_tracker.py | 429 ---- tensorboard/uploader/upload_tracker_test.py | 395 ---- tensorboard/uploader/uploader.py | 1110 ---------- tensorboard/uploader/uploader_subcommand.py | 158 +- .../uploader/uploader_subcommand_test.py | 177 +- tensorboard/uploader/uploader_test.py | 1851 ----------------- tensorboard/uploader/util.py | 22 - tensorboard/uploader/util_test.py | 25 - 13 files changed, 38 insertions(+), 4595 deletions(-) delete mode 100644 tensorboard/uploader/dry_run_stubs.py delete mode 100644 tensorboard/uploader/dry_run_stubs_test.py delete mode 100644 tensorboard/uploader/logdir_loader.py delete mode 100644 tensorboard/uploader/logdir_loader_test.py delete mode 100644 tensorboard/uploader/upload_tracker.py delete mode 100644 tensorboard/uploader/upload_tracker_test.py diff --git a/tensorboard/uploader/BUILD b/tensorboard/uploader/BUILD index 12a365df150..2f65b5b3f7f 100644 --- a/tensorboard/uploader/BUILD +++ b/tensorboard/uploader/BUILD @@ -68,7 +68,6 @@ py_library( visibility = ["//tensorboard:internal"], deps = [ ":auth", - ":dry_run_stubs", ":exporter", ":flags_parser", ":formatters", @@ -79,7 +78,6 @@ py_library( "//tensorboard:expect_absl_logging_installed", "//tensorboard:expect_grpc_installed", "//tensorboard:program", - "//tensorboard/compat:tensorflow", "//tensorboard/plugins:base_plugin", "//tensorboard/uploader/proto:protos_all_py_pb2_grpc", ], @@ -90,7 +88,6 @@ py_test( srcs = ["uploader_subcommand_test.py"], srcs_version = "PY3", deps = [ - ":dry_run_stubs", ":server_info", ":uploader", ":uploader_subcommand", @@ -104,87 +101,28 @@ py_library( srcs = ["uploader.py"], srcs_version = "PY3", deps = [ - ":logdir_loader", - ":upload_tracker", ":util", "//tensorboard:expect_grpc_installed", - "//tensorboard:expect_protobuf_installed", - "//tensorboard/backend:process_graph", - "//tensorboard/backend/event_processing:directory_loader", - "//tensorboard/backend/event_processing:event_file_loader", - "//tensorboard/backend/event_processing:io_wrapper", - "//tensorboard/compat/proto:protos_all_py_pb2", - "//tensorboard/plugins/graph:metadata", "//tensorboard/uploader/proto:protos_all_py_pb2", "//tensorboard/util:grpc_util", "//tensorboard/util:tb_logging", - "//tensorboard/util:tensor_util", ], ) -py_library( - name = "upload_tracker", - srcs = ["upload_tracker.py"], - srcs_version = "PY3", -) - py_test( name = "uploader_test", srcs = ["uploader_test.py"], srcs_version = "PY3", deps = [ - ":dry_run_stubs", ":server_info", ":test_util", - ":upload_tracker", ":uploader", - ":util", - "//tensorboard:data_compat", - "//tensorboard:dataclass_compat", "//tensorboard:expect_grpc_installed", "//tensorboard:expect_grpc_testing_installed", - "//tensorboard:expect_protobuf_installed", "//tensorboard:expect_tensorflow_installed", "//tensorboard/compat:no_tensorflow", - "//tensorboard/compat/proto:protos_all_py_pb2", - "//tensorboard/plugins/graph:metadata", - "//tensorboard/plugins/histogram:summary_v2", - "//tensorboard/plugins/scalar:metadata", - "//tensorboard/plugins/scalar:summary_v2", - "//tensorboard/summary:summary_v1", "//tensorboard/uploader/proto:protos_all_py_pb2", "//tensorboard/uploader/proto:protos_all_py_pb2_grpc", - "//tensorboard/util:test_util", - ], -) - -py_test( - name = "upload_tracker_test", - srcs = ["upload_tracker_test.py"], - srcs_version = "PY3", - deps = [ - ":upload_tracker", - "//tensorboard:test", - ], -) - -py_library( - name = "dry_run_stubs", - srcs = ["dry_run_stubs.py"], - srcs_version = "PY3", - deps = [ - "//tensorboard/uploader/proto:protos_all_py_pb2", - ], -) - -py_test( - name = "dry_run_stubs_test", - srcs = ["dry_run_stubs_test.py"], - srcs_version = "PY3", - deps = [ - ":dry_run_stubs", - "//tensorboard:test", - "//tensorboard/uploader/proto:protos_all_py_pb2", ], ) @@ -211,30 +149,6 @@ py_test( ], ) -py_library( - name = "logdir_loader", - srcs = ["logdir_loader.py"], - srcs_version = "PY3", - deps = [ - "//tensorboard/backend/event_processing:directory_watcher", - "//tensorboard/backend/event_processing:io_wrapper", - "//tensorboard/util:tb_logging", - ], -) - -py_test( - name = "logdir_loader_test", - srcs = ["logdir_loader_test.py"], - deps = [ - ":logdir_loader", - "//tensorboard:test", - "//tensorboard/backend/event_processing:directory_loader", - "//tensorboard/backend/event_processing:event_file_loader", - "//tensorboard/backend/event_processing:io_wrapper", - "//tensorboard/util:test_util", - ], -) - py_library( name = "test_util", testonly = 1, @@ -257,7 +171,6 @@ py_test( name = "util_test", srcs = ["util_test.py"], deps = [ - ":test_util", ":util", "//tensorboard:expect_protobuf_installed", "//tensorboard:test", diff --git a/tensorboard/uploader/dry_run_stubs.py b/tensorboard/uploader/dry_run_stubs.py deleted file mode 100644 index 7303cfc34d3..00000000000 --- a/tensorboard/uploader/dry_run_stubs.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Dry-run stubs for various rpc services.""" - - -from tensorboard.uploader.proto import write_service_pb2 - - -class DryRunTensorBoardWriterStub: - """A dry-run TensorBoardWriter gRPC Server. - - Only the methods used by the `tensorboard dev upload` are - mocked out in this class. - - When additional methods start to be used by the command, - their mocks should be added to this class. - """ - - def CreateExperiment(self, request, **kwargs): - """Create a new experiment and remember it has been created.""" - del request, kwargs # Unused. - return write_service_pb2.CreateExperimentResponse() - - def WriteScalar(self, request, **kwargs): - del request, kwargs # Unused. - return write_service_pb2.WriteScalarResponse() - - def WriteTensor(self, request, **kwargs): - del request, kwargs # Unused. - return write_service_pb2.WriteTensorResponse() - - def GetOrCreateBlobSequence(self, request, **kwargs): - del request, kwargs # Unused. - return write_service_pb2.GetOrCreateBlobSequenceResponse( - blob_sequence_id="dummy_blob_sequence_id" - ) - - def WriteBlob(self, request, **kwargs): - del kwargs # Unused. - for item in request: - yield write_service_pb2.WriteBlobResponse() diff --git a/tensorboard/uploader/dry_run_stubs_test.py b/tensorboard/uploader/dry_run_stubs_test.py deleted file mode 100644 index 50e7f4d2c0b..00000000000 --- a/tensorboard/uploader/dry_run_stubs_test.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for dry-run rpc servicers.""" - - -from tensorboard import test as tb_test -from tensorboard.uploader import dry_run_stubs -from tensorboard.uploader.proto import write_service_pb2 - - -class DryRunTensorBoardWriterServicerTest(tb_test.TestCase): - def setUp(self): - super().setUp() - self._stub = dry_run_stubs.DryRunTensorBoardWriterStub() - - def testCreateExperiment(self): - self._stub.CreateExperiment(write_service_pb2.CreateExperimentRequest()) - - def testWriteScalar(self): - self._stub.WriteScalar(write_service_pb2.WriteScalarRequest()) - - def testWriteTensor(self): - self._stub.WriteTensor(write_service_pb2.WriteTensorRequest()) - - def testGetOrCreateBlobSequence(self): - self._stub.GetOrCreateBlobSequence( - write_service_pb2.GetOrCreateBlobSequenceRequest() - ) - - def testWriteBlob(self): - def dummy_iterator(): - yield write_service_pb2.WriteBlobRequest() - yield write_service_pb2.WriteBlobRequest() - - for response in self._stub.WriteBlob(dummy_iterator()): - self.assertTrue(response) - - -if __name__ == "__main__": - tb_test.main() diff --git a/tensorboard/uploader/logdir_loader.py b/tensorboard/uploader/logdir_loader.py deleted file mode 100644 index 5d22bfe57d5..00000000000 --- a/tensorboard/uploader/logdir_loader.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Loader for event file data for an entire TensorBoard log directory.""" - - -import collections -import os - -from tensorboard.backend.event_processing import directory_watcher -from tensorboard.backend.event_processing import io_wrapper -from tensorboard.util import tb_logging - - -logger = tb_logging.get_logger() - - -class LogdirLoader: - """Loader for a root log directory, maintaining multiple DirectoryLoaders. - - This class takes a root log directory and a factory for DirectoryLoaders, and - maintains one DirectoryLoader per "logdir subdirectory" of the root logdir. - - Note that this class is not thread-safe. - """ - - def __init__(self, logdir, directory_loader_factory): - """Constructs a new LogdirLoader. - - Args: - logdir: The root log directory to load from. - directory_loader_factory: A factory for creating DirectoryLoaders. The - factory should take a path and return a DirectoryLoader. - - Raises: - ValueError: If logdir or directory_loader_factory are None. - """ - if logdir is None: - raise ValueError("A logdir is required") - if directory_loader_factory is None: - raise ValueError("A directory loader factory is required") - self._logdir = logdir - self._directory_loader_factory = directory_loader_factory - # Maps run names to corresponding DirectoryLoader instances. - self._directory_loaders = {} - - def synchronize_runs(self): - """Finds new runs within `logdir` and makes `DirectoryLoaders` for - them. - - In addition, any existing `DirectoryLoader` whose run directory - no longer exists will be deleted. - """ - logger.info("Starting logdir traversal of %s", self._logdir) - runs_seen = set() - for subdir in io_wrapper.GetLogdirSubdirectories(self._logdir): - run = os.path.relpath(subdir, self._logdir) - runs_seen.add(run) - if run not in self._directory_loaders: - logger.info("- Adding run for relative directory %s", run) - self._directory_loaders[run] = self._directory_loader_factory( - subdir - ) - stale_runs = set(self._directory_loaders) - runs_seen - if stale_runs: - for run in stale_runs: - logger.info("- Removing run for relative directory %s", run) - del self._directory_loaders[run] - logger.info("Ending logdir traversal of %s", self._logdir) - - def get_run_events(self): - """Returns tf.Event generators for each run's `DirectoryLoader`. - - Warning: the generators are stateful and consuming them will affect the - results of any other existing generators for that run; calling code should - ensure it takes events from only a single generator per run at a time. - - Returns: - Dictionary containing an entry for each run, mapping the run name to a - generator yielding tf.Event protobuf objects loaded from that run. - """ - runs = list(self._directory_loaders) - logger.info("Creating event loading generators for %d runs", len(runs)) - run_to_loader = collections.OrderedDict() - for run_name in sorted(runs): - loader = self._directory_loaders[run_name] - run_to_loader[run_name] = self._wrap_loader_generator(loader.Load()) - return run_to_loader - - def _wrap_loader_generator(self, loader_generator): - """Wraps `DirectoryLoader` generator to swallow - `DirectoryDeletedError`.""" - try: - for item in loader_generator: - yield item - except directory_watcher.DirectoryDeletedError: - return diff --git a/tensorboard/uploader/logdir_loader_test.py b/tensorboard/uploader/logdir_loader_test.py deleted file mode 100644 index 9e0f7498d6c..00000000000 --- a/tensorboard/uploader/logdir_loader_test.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright 2019 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tensorboard.uploader.logdir_loader.""" - - -import os.path -import shutil - -from tensorboard.uploader import logdir_loader -from tensorboard import test as tb_test -from tensorboard.backend.event_processing import directory_loader -from tensorboard.backend.event_processing import event_file_loader -from tensorboard.backend.event_processing import io_wrapper -from tensorboard.util import test_util - - -class LogdirLoaderTest(tb_test.TestCase): - def _create_logdir_loader(self, logdir): - def directory_loader_factory(path): - return directory_loader.DirectoryLoader( - path, - event_file_loader.TimestampedEventFileLoader, - path_filter=io_wrapper.IsTensorFlowEventsFile, - ) - - return logdir_loader.LogdirLoader(logdir, directory_loader_factory) - - def _extract_tags(self, event_generator): - """Converts a generator of tf.Events into a list of event tags.""" - return [ - event.summary.value[0].tag - for event in event_generator - if not event.file_version - ] - - def _extract_run_to_tags(self, run_to_events): - """Returns run-to-tags dict from run-to-event-generator dict.""" - run_to_tags = {} - for run_name, event_generator in run_to_events.items(): - # There should be no duplicate runs. - self.assertNotIn(run_name, run_to_tags) - run_to_tags[run_name] = self._extract_tags(event_generator) - return run_to_tags - - def test_empty_logdir(self): - logdir = self.get_temp_dir() - loader = self._create_logdir_loader(logdir) - # Default state is empty. - self.assertEmpty(list(loader.get_run_events())) - loader.synchronize_runs() - # Still empty, since there's no data. - self.assertEmpty(list(loader.get_run_events())) - - def test_single_event_logdir(self): - logdir = self.get_temp_dir() - with test_util.FileWriter(logdir) as writer: - writer.add_test_summary("foo") - loader = self._create_logdir_loader(logdir) - loader.synchronize_runs() - self.assertEqual( - self._extract_run_to_tags(loader.get_run_events()), {".": ["foo"]} - ) - # A second load should indicate no new data for the run. - self.assertEqual( - self._extract_run_to_tags(loader.get_run_events()), {".": []} - ) - - def test_multiple_writes_to_logdir(self): - logdir = self.get_temp_dir() - with test_util.FileWriter(os.path.join(logdir, "a")) as writer: - writer.add_test_summary("tag_a") - with test_util.FileWriter(os.path.join(logdir, "b")) as writer: - writer.add_test_summary("tag_b") - with test_util.FileWriter(os.path.join(logdir, "b", "x")) as writer: - writer.add_test_summary("tag_b_x") - writer_c = test_util.FileWriter(os.path.join(logdir, "c")) - writer_c.add_test_summary("tag_c") - writer_c.flush() - loader = self._create_logdir_loader(logdir) - loader.synchronize_runs() - self.assertEqual( - self._extract_run_to_tags(loader.get_run_events()), - { - "a": ["tag_a"], - "b": ["tag_b"], - "b/x": ["tag_b_x"], - "c": ["tag_c"], - }, - ) - # A second load should indicate no new data. - self.assertEqual( - self._extract_run_to_tags(loader.get_run_events()), - {"a": [], "b": [], "b/x": [], "c": []}, - ) - # Write some new data to both new and pre-existing event files. - with test_util.FileWriter( - os.path.join(logdir, "a"), filename_suffix=".other" - ) as writer: - writer.add_test_summary("tag_a_2") - writer.add_test_summary("tag_a_3") - writer.add_test_summary("tag_a_4") - with test_util.FileWriter( - os.path.join(logdir, "b", "x"), filename_suffix=".other" - ) as writer: - writer.add_test_summary("tag_b_x_2") - with writer_c as writer: - writer.add_test_summary("tag_c_2") - # New data should appear on the next load. - self.assertEqual( - self._extract_run_to_tags(loader.get_run_events()), - { - "a": ["tag_a_2", "tag_a_3", "tag_a_4"], - "b": [], - "b/x": ["tag_b_x_2"], - "c": ["tag_c_2"], - }, - ) - - def test_directory_deletion(self): - logdir = self.get_temp_dir() - with test_util.FileWriter(os.path.join(logdir, "a")) as writer: - writer.add_test_summary("tag_a") - with test_util.FileWriter(os.path.join(logdir, "b")) as writer: - writer.add_test_summary("tag_b") - with test_util.FileWriter(os.path.join(logdir, "c")) as writer: - writer.add_test_summary("tag_c") - loader = self._create_logdir_loader(logdir) - loader.synchronize_runs() - self.assertEqual(list(loader.get_run_events().keys()), ["a", "b", "c"]) - shutil.rmtree(os.path.join(logdir, "b")) - loader.synchronize_runs() - self.assertEqual(list(loader.get_run_events().keys()), ["a", "c"]) - shutil.rmtree(logdir) - loader.synchronize_runs() - self.assertEmpty(loader.get_run_events()) - - def test_directory_deletion_during_event_loading(self): - logdir = self.get_temp_dir() - with test_util.FileWriter(logdir) as writer: - writer.add_test_summary("foo") - loader = self._create_logdir_loader(logdir) - loader.synchronize_runs() - self.assertEqual( - self._extract_run_to_tags(loader.get_run_events()), {".": ["foo"]} - ) - shutil.rmtree(logdir) - runs_to_events = loader.get_run_events() - self.assertEqual(list(runs_to_events.keys()), ["."]) - events = runs_to_events["."] - self.assertEqual(self._extract_tags(events), []) - - -if __name__ == "__main__": - tb_test.main() diff --git a/tensorboard/uploader/upload_tracker.py b/tensorboard/uploader/upload_tracker.py deleted file mode 100644 index a72b7fbe14b..00000000000 --- a/tensorboard/uploader/upload_tracker.py +++ /dev/null @@ -1,429 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Progress tracker for uploader.""" - - -import contextlib -from datetime import datetime -import sys -import time - - -def readable_time_string(): - """Get a human-readable time string for the present.""" - return datetime.now().strftime("%Y-%m-%dT%H:%M:%S") - - -def readable_bytes_string(bytes): - """Get a human-readable string for number of bytes.""" - if bytes >= 2**20: - return "%.1f MB" % (float(bytes) / 2**20) - elif bytes >= 2**10: - return "%.1f kB" % (float(bytes) / 2**10) - else: - return "%d B" % bytes - - -class UploadStats: - """Statistics of uploading.""" - - def __init__(self): - self._last_summarized_timestamp = time.time() - self._last_data_added_timestamp = 0 - self._num_scalars = 0 - self._num_tensors = 0 - self._num_tensors_skipped = 0 - self._tensor_bytes = 0 - self._tensor_bytes_skipped = 0 - self._num_blobs = 0 - self._num_blobs_skipped = 0 - self._blob_bytes = 0 - self._blob_bytes_skipped = 0 - self._plugin_names = set() - - def add_scalars(self, num_scalars): - """Add a batch of scalars. - - Args: - num_scalars: Number of scalars uploaded in this batch. - """ - self._refresh_last_data_added_timestamp() - self._num_scalars += num_scalars - - def add_tensors( - self, - num_tensors, - num_tensors_skipped, - tensor_bytes, - tensor_bytes_skipped, - ): - """Add a batch of tensors. - - Args: - num_tensors: Number of tensors encountered in this batch, including - the ones skipped due to reasons such as large exceeding limit. - num_tensors: Number of tensors skipped. This describes a subset of - `num_tensors` and hence must be `<= num_tensors`. - tensor_bytes: Total byte size of tensors encountered in this batch, - including the skipped ones. - tensor_bytes_skipped: Total byte size of the tensors skipped due to - reasons such as size exceeding limit. - """ - assert num_tensors_skipped <= num_tensors - assert tensor_bytes_skipped <= tensor_bytes - self._refresh_last_data_added_timestamp() - self._num_tensors += num_tensors - self._num_tensors_skipped += num_tensors_skipped - self._tensor_bytes += tensor_bytes - self._tensor_bytes_skipped = tensor_bytes_skipped - - def add_blob(self, blob_bytes, is_skipped): - """Add a blob. - - Args: - blob_bytes: Byte size of the blob. - is_skipped: Whether the uploading of the blob is skipped due to - reasons such as size exceeding limit. - """ - self._refresh_last_data_added_timestamp() - self._num_blobs += 1 - self._blob_bytes += blob_bytes - if is_skipped: - self._num_blobs_skipped += 1 - self._blob_bytes_skipped += blob_bytes - - def add_plugin(self, plugin_name): - """Add a plugin. - - Args: - plugin_name: Name of the plugin. - """ - self._refresh_last_data_added_timestamp() - self._plugin_names.add(plugin_name) - - @property - def num_scalars(self): - return self._num_scalars - - @property - def num_tensors(self): - return self._num_tensors - - @property - def num_tensors_skipped(self): - return self._num_tensors_skipped - - @property - def tensor_bytes(self): - return self._tensor_bytes - - @property - def tensor_bytes_skipped(self): - return self._tensor_bytes_skipped - - @property - def num_blobs(self): - return self._num_blobs - - @property - def num_blobs_skipped(self): - return self._num_blobs_skipped - - @property - def blob_bytes(self): - return self._blob_bytes - - @property - def blob_bytes_skipped(self): - return self._blob_bytes_skipped - - @property - def plugin_names(self): - return self._plugin_names - - def has_data(self): - """Has any data been tracked by this instance. - - This counts the tensor and blob data that have been scanned - but skipped. - - Returns: - Whether this stats tracking object has tracked any data. - """ - return ( - self._num_scalars > 0 - or self._num_tensors > 0 - or self._num_blobs > 0 - ) - - def summarize(self): - """Get a summary string for actually-uploaded and skipped data. - - Calling this property also marks the "last_summarized" timestamp, so that - the has_new_data_since_last_summarize() will be able to report the correct value - later. - - Returns: - A tuple with two items: - - A string summarizing all data uploaded so far. - - If any data was skipped, a string for all skipped data. Else, `None`. - """ - self._last_summarized_timestamp = time.time() - string_pieces = [] - string_pieces.append("%d scalars" % self._num_scalars) - uploaded_tensor_count = self._num_tensors - self._num_tensors_skipped - uploaded_tensor_bytes = self._tensor_bytes - self._tensor_bytes_skipped - string_pieces.append( - "0 tensors" - if not uploaded_tensor_count - else ( - "%d tensors (%s)" - % ( - uploaded_tensor_count, - readable_bytes_string(uploaded_tensor_bytes), - ) - ) - ) - uploaded_blob_count = self._num_blobs - self._num_blobs_skipped - uploaded_blob_bytes = self._blob_bytes - self._blob_bytes_skipped - string_pieces.append( - "0 binary objects" - if not uploaded_blob_count - else ( - "%d binary objects (%s)" - % ( - uploaded_blob_count, - readable_bytes_string(uploaded_blob_bytes), - ) - ) - ) - skipped_string = ( - self._skipped_summary() if self._skipped_any() else None - ) - return ", ".join(string_pieces), skipped_string - - def _skipped_any(self): - """Whether any data was skipped.""" - return self._num_tensors_skipped or self._num_blobs_skipped - - def has_new_data_since_last_summarize(self): - return self._last_data_added_timestamp > self._last_summarized_timestamp - - def _skipped_summary(self): - """Get a summary string for skipped data.""" - string_pieces = [] - if self._num_tensors_skipped: - string_pieces.append( - "%d tensors (%s)" - % ( - self._num_tensors_skipped, - readable_bytes_string(self._tensor_bytes_skipped), - ) - ) - if self._num_blobs_skipped: - string_pieces.append( - "%d binary objects (%s)" - % ( - self._num_blobs_skipped, - readable_bytes_string(self._blob_bytes_skipped), - ) - ) - return ", ".join(string_pieces) - - def _refresh_last_data_added_timestamp(self): - self._last_data_added_timestamp = time.time() - - -_STYLE_RESET = "\033[0m" -_STYLE_BOLD = "\033[1m" -_STYLE_GREEN = "\033[32m" -_STYLE_YELLOW = "\033[33m" -_STYLE_DARKGRAY = "\033[90m" -_STYLE_ERASE_LINE = "\033[2K" - - -class UploadTracker: - """Tracker for uploader progress and status.""" - - _SUPPORTED_VERBISITY_VALUES = (0, 1) - - def __init__(self, verbosity, one_shot=False): - if verbosity not in self._SUPPORTED_VERBISITY_VALUES: - raise ValueError( - "Unsupported verbosity value %s (supported values: %s)" - % (verbosity, self._SUPPORTED_VERBISITY_VALUES) - ) - self._verbosity = verbosity - self._stats = UploadStats() - self._send_count = 0 - self._one_shot = one_shot - - def _dummy_generator(self): - while True: - # Yield an arbitrary value 0: The progress bar is indefinite. - yield 0 - - def _overwrite_line_message(self, message, color_code=_STYLE_GREEN): - """Overwrite the current line with a stylized message.""" - if not self._verbosity: - return - message += "." * 3 - sys.stdout.write( - _STYLE_ERASE_LINE + color_code + message + _STYLE_RESET + "\r" - ) - sys.stdout.flush() - - def _single_line_message(self, message): - """Write a timestamped single line, with newline, to stdout.""" - if not self._verbosity: - return - start_message = "%s[%s]%s %s\n" % ( - _STYLE_BOLD, - readable_time_string(), - _STYLE_RESET, - message, - ) - sys.stdout.write(start_message) - sys.stdout.flush() - - def has_data(self): - """Determine if any data has been uploaded under the tracker's watch.""" - return self._stats.has_data() - - def _update_cumulative_status(self): - """Write an update summarizing the data uploaded since the start.""" - if not self._verbosity: - return - if not self._stats.has_new_data_since_last_summarize(): - return - uploaded_str, skipped_str = self._stats.summarize() - uploaded_message = "%s[%s]%s Total uploaded: %s\n" % ( - _STYLE_BOLD, - readable_time_string(), - _STYLE_RESET, - uploaded_str, - ) - sys.stdout.write(uploaded_message) - if skipped_str: - sys.stdout.write( - "%sTotal skipped: %s\n%s" - % (_STYLE_DARKGRAY, skipped_str, _STYLE_RESET) - ) - sys.stdout.flush() - # TODO(cais): Add summary of what plugins have been involved, once it's - # clear how to get canonical plugin names. - - def add_plugin_name(self, plugin_name): - self._stats.add_plugin(plugin_name) - - @contextlib.contextmanager - def send_tracker(self): - """Create a context manager for a round of data sending.""" - self._send_count += 1 - if self._send_count == 1: - self._single_line_message("Started scanning logdir.") - try: - # self._reset_bars() - self._overwrite_line_message("Data upload starting") - yield - finally: - self._update_cumulative_status() - if self._one_shot: - self._single_line_message("Done scanning logdir.") - else: - self._overwrite_line_message( - "Listening for new data in logdir", - color_code=_STYLE_YELLOW, - ) - - @contextlib.contextmanager - def scalars_tracker(self, num_scalars): - """Create a context manager for tracking a scalar batch upload. - - Args: - num_scalars: Number of scalars in the batch. - """ - self._overwrite_line_message("Uploading %d scalars" % num_scalars) - try: - yield - finally: - self._stats.add_scalars(num_scalars) - - @contextlib.contextmanager - def tensors_tracker( - self, - num_tensors, - num_tensors_skipped, - tensor_bytes, - tensor_bytes_skipped, - ): - """Create a context manager for tracking a tensor batch upload. - - Args: - num_tensors: Total number of tensors in the batch. - num_tensors_skipped: Number of tensors skipped (a subset of - `num_tensors`). Hence this must be `<= num_tensors`. - tensor_bytes: Total byte size of the tensors in the batch. - tensor_bytes_skipped: Byte size of skipped tensors in the batch (a - subset of `tensor_bytes`). Must be `<= tensor_bytes`. - """ - if num_tensors_skipped: - message = "Uploading %d tensors (%s) (Skipping %d tensors, %s)" % ( - num_tensors - num_tensors_skipped, - readable_bytes_string(tensor_bytes - tensor_bytes_skipped), - num_tensors_skipped, - readable_bytes_string(tensor_bytes_skipped), - ) - else: - message = "Uploading %d tensors (%s)" % ( - num_tensors, - readable_bytes_string(tensor_bytes), - ) - self._overwrite_line_message(message) - try: - yield - finally: - self._stats.add_tensors( - num_tensors, - num_tensors_skipped, - tensor_bytes, - tensor_bytes_skipped, - ) - - @contextlib.contextmanager - def blob_tracker(self, blob_bytes): - """Creates context manager tracker for uploading a blob. - - Args: - blob_bytes: Total byte size of the blob being uploaded. - """ - self._overwrite_line_message( - "Uploading binary object (%s)" % readable_bytes_string(blob_bytes) - ) - try: - yield _BlobTracker(self._stats, blob_bytes) - finally: - pass - - -class _BlobTracker: - def __init__(self, upload_stats, blob_bytes): - self._upload_stats = upload_stats - self._blob_bytes = blob_bytes - - def mark_uploaded(self, is_uploaded): - self._upload_stats.add_blob( - self._blob_bytes, is_skipped=(not is_uploaded) - ) diff --git a/tensorboard/uploader/upload_tracker_test.py b/tensorboard/uploader/upload_tracker_test.py deleted file mode 100644 index 3b5788c7cf1..00000000000 --- a/tensorboard/uploader/upload_tracker_test.py +++ /dev/null @@ -1,395 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for tensorboard.uploader.upload_tracker.""" - - -import sys - -from unittest import mock - -from tensorboard import test as tb_test -from tensorboard.uploader import upload_tracker - - -class ReadableBytesStringTest(tb_test.TestCase): - def testZero(self): - self.assertEqual(upload_tracker.readable_bytes_string(0), "0 B") - - def testLessThan1K(self): - self.assertEqual(upload_tracker.readable_bytes_string(42), "42 B") - - def testBetween1KAnd1M(self): - self.assertEqual(upload_tracker.readable_bytes_string(1337), "1.3 kB") - - def testMoreThan1M(self): - self.assertEqual( - upload_tracker.readable_bytes_string(299792458), "285.9 MB" - ) - - -class UploadStatsTest(tb_test.TestCase): - """Unit tests for the UploadStats class.""" - - def testAddScalar(self): - stats = upload_tracker.UploadStats() - stats.add_scalars(1234) - self.assertEqual(stats.num_scalars, 1234) - stats.add_scalars(4321) - self.assertEqual(stats.num_scalars, 5555) - - def testAddTensor(self): - stats = upload_tracker.UploadStats() - stats.add_tensors( - num_tensors=10, - num_tensors_skipped=0, - tensor_bytes=1000, - tensor_bytes_skipped=0, - ) - self.assertEqual(stats.num_tensors, 10) - self.assertEqual(stats.num_tensors_skipped, 0) - self.assertEqual(stats.tensor_bytes, 1000) - self.assertEqual(stats.tensor_bytes_skipped, 0) - stats.add_tensors( - num_tensors=20, - num_tensors_skipped=5, - tensor_bytes=2000, - tensor_bytes_skipped=500, - ) - self.assertEqual(stats.num_tensors, 30) - self.assertEqual(stats.num_tensors_skipped, 5) - self.assertEqual(stats.tensor_bytes, 3000) - self.assertEqual(stats.tensor_bytes_skipped, 500) - - def testAddTensorsNumTensorsSkippedGreaterThanNumTenosrsErrors(self): - stats = upload_tracker.UploadStats() - with self.assertRaises(AssertionError): - stats.add_tensors( - num_tensors=10, - num_tensors_skipped=12, - tensor_bytes=1000, - tensor_bytes_skipped=0, - ) - - def testAddBlob(self): - stats = upload_tracker.UploadStats() - stats.add_blob(blob_bytes=1000, is_skipped=False) - self.assertEqual(stats.blob_bytes, 1000) - self.assertEqual(stats.blob_bytes_skipped, 0) - stats.add_blob(blob_bytes=2000, is_skipped=True) - self.assertEqual(stats.blob_bytes, 3000) - self.assertEqual(stats.blob_bytes_skipped, 2000) - - def testAddPlugin(self): - stats = upload_tracker.UploadStats() - stats.add_plugin("scalars") - self.assertEqual(stats.plugin_names, set(["scalars"])) - stats.add_plugin("scalars") - self.assertEqual(stats.plugin_names, set(["scalars"])) - stats.add_plugin("histograms") - self.assertEqual(stats.plugin_names, set(["histograms", "scalars"])) - - def testHasNewDataSinceLastSummarizeReturnsFalseInitially(self): - stats = upload_tracker.UploadStats() - self.assertEqual(stats.has_new_data_since_last_summarize(), False) - - def testUploadedSummaryWithTensorsAndBlobs(self): - stats = upload_tracker.UploadStats() - stats.add_scalars(1234) - stats.add_tensors( - num_tensors=50, - num_tensors_skipped=10, - tensor_bytes=2000, - tensor_bytes_skipped=1800, - ) - stats.add_blob(blob_bytes=1000, is_skipped=False) - stats.add_blob(blob_bytes=2000, is_skipped=True) - self.assertEqual(stats.has_new_data_since_last_summarize(), True) - uploaded_summary, skipped_summary = stats.summarize() - self.assertEqual( - uploaded_summary, - "1234 scalars, 40 tensors (200 B), 1 binary objects (1000 B)", - ) - self.assertEqual( - skipped_summary, - "10 tensors (1.8 kB), 1 binary objects (2.0 kB)", - ) - self.assertEqual(stats.has_new_data_since_last_summarize(), False) - - def testSummarizeeWithoutTensorsOrBlobs(self): - stats = upload_tracker.UploadStats() - stats.add_scalars(1234) - self.assertEqual(stats.has_new_data_since_last_summarize(), True) - (uploaded_summary, skipped_summary) = stats.summarize() - self.assertEqual( - uploaded_summary, - "1234 scalars, 0 tensors, 0 binary objects", - ) - self.assertIsNone(skipped_summary) - self.assertEqual(stats.has_new_data_since_last_summarize(), False) - - def testHasNewDataSinceLastSummarizeReturnsTrueAfterNewScalars(self): - stats = upload_tracker.UploadStats() - self.assertEqual(stats.has_new_data_since_last_summarize(), False) - stats.add_scalars(1234) - self.assertEqual(stats.has_new_data_since_last_summarize(), True) - stats.summarize() - self.assertEqual(stats.has_new_data_since_last_summarize(), False) - stats.add_scalars(4321) - self.assertEqual(stats.has_new_data_since_last_summarize(), True) - - def testHasNewDataSinceLastSummarizeReturnsTrueAfterNewTensors(self): - stats = upload_tracker.UploadStats() - self.assertEqual(stats.has_new_data_since_last_summarize(), False) - stats.add_scalars(1234) - self.assertEqual(stats.has_new_data_since_last_summarize(), True) - stats.summarize() - self.assertEqual(stats.has_new_data_since_last_summarize(), False) - stats.add_tensors( - num_tensors=10, - num_tensors_skipped=10, - tensor_bytes=1000, - tensor_bytes_skipped=1000, - ) - self.assertEqual(stats.has_new_data_since_last_summarize(), True) - - def testHasNewDataSinceLastSummarizeReturnsTrueAfterNewBlob(self): - stats = upload_tracker.UploadStats() - self.assertEqual(stats.has_new_data_since_last_summarize(), False) - stats.add_scalars(1234) - self.assertEqual(stats.has_new_data_since_last_summarize(), True) - stats.summarize() - self.assertEqual(stats.has_new_data_since_last_summarize(), False) - stats.add_blob(blob_bytes=2000, is_skipped=True) - self.assertEqual(stats.has_new_data_since_last_summarize(), True) - - def testHasDataInitiallyReturnsFalse(self): - stats = upload_tracker.UploadStats() - self.assertEqual(stats.has_data(), False) - - def testHasDataReturnsTrueWithScalars(self): - stats = upload_tracker.UploadStats() - stats.add_scalars(1) - self.assertEqual(stats.has_data(), True) - - def testHasDataReturnsTrueWithUnskippedTensors(self): - stats = upload_tracker.UploadStats() - stats.add_tensors( - num_tensors=10, - num_tensors_skipped=0, - tensor_bytes=1000, - tensor_bytes_skipped=0, - ) - self.assertEqual(stats.has_data(), True) - - def testHasDataReturnsTrueWithSkippedTensors(self): - stats = upload_tracker.UploadStats() - stats.add_tensors( - num_tensors=10, - num_tensors_skipped=10, - tensor_bytes=1000, - tensor_bytes_skipped=1000, - ) - self.assertEqual(stats.has_data(), True) - - def testHasDataReturnsTrueWithUnskippedBlob(self): - stats = upload_tracker.UploadStats() - stats.add_blob(blob_bytes=1000, is_skipped=False) - self.assertEqual(stats.has_data(), True) - - def testHasDataReturnsTrueWithSkippedBlob(self): - stats = upload_tracker.UploadStats() - stats.add_blob(blob_bytes=1000, is_skipped=True) - self.assertEqual(stats.has_data(), True) - - -class UploadTrackerTest(tb_test.TestCase): - """Test for the UploadTracker class.""" - - def setUp(self): - super().setUp() - self.cumulative_bar = mock.MagicMock() - self.skipped_bar = mock.MagicMock() - self.uploading_bar = mock.MagicMock() - self.mock_write = mock.MagicMock() - self.mock_stdout_write = mock.patch.object( - sys.stdout, "write", self.mock_write - ) - self.mock_stdout_write.start() - self.mock_flush = mock.MagicMock() - self.mock_stdout_flush = mock.patch.object( - sys.stdout, "flush", self.mock_flush - ) - self.mock_stdout_flush.start() - - def tearDown(self): - self.mock_stdout_write.stop() - self.mock_stdout_flush.stop() - super().tearDown() - - def testSendTracker(self): - tracker = upload_tracker.UploadTracker(verbosity=1) - with tracker.send_tracker(): - self.assertEqual(self.mock_write.call_count, 2) - self.assertEqual(self.mock_flush.call_count, 2) - self.assertIn( - "Data upload starting...", - self.mock_write.call_args[0][0], - ) - self.assertEqual(self.mock_write.call_count, 3) - self.assertEqual(self.mock_flush.call_count, 3) - self.assertIn( - "Listening for new data in logdir...", - self.mock_write.call_args[0][0], - ) - self.assertEqual(tracker.has_data(), False) - - def testSendTrackerWithVerbosity0(self): - tracker = upload_tracker.UploadTracker(verbosity=0) - with tracker.send_tracker(): - self.assertEqual(self.mock_write.call_count, 0) - self.assertEqual(self.mock_flush.call_count, 0) - self.assertEqual(self.mock_write.call_count, 0) - self.assertEqual(self.mock_flush.call_count, 0) - - def testScalarsTracker(self): - tracker = upload_tracker.UploadTracker(verbosity=1) - with tracker.scalars_tracker(123): - self.assertEqual(self.mock_write.call_count, 1) - self.assertEqual(self.mock_flush.call_count, 1) - self.assertIn( - "Uploading 123 scalars...", - self.mock_write.call_args[0][0], - ) - self.assertEqual(self.mock_write.call_count, 1) - self.assertEqual(self.mock_flush.call_count, 1) - self.assertEqual(tracker.has_data(), True) - - def testScalarsTrackerWithVerbosity0(self): - tracker = upload_tracker.UploadTracker(verbosity=0) - with tracker.scalars_tracker(123): - self.assertEqual(self.mock_write.call_count, 0) - self.assertEqual(self.mock_flush.call_count, 0) - self.assertEqual(self.mock_write.call_count, 0) - self.assertEqual(self.mock_flush.call_count, 0) - - def testTensorsTrackerWithSkippedTensors(self): - tracker = upload_tracker.UploadTracker(verbosity=1) - with tracker.tensors_tracker( - num_tensors=200, - num_tensors_skipped=50, - tensor_bytes=6000, - tensor_bytes_skipped=4000, - ): - self.assertEqual(self.mock_write.call_count, 1) - self.assertEqual(self.mock_flush.call_count, 1) - self.assertIn( - "Uploading 150 tensors (2.0 kB) (Skipping 50 tensors, 3.9 kB)", - self.mock_write.call_args[0][0], - ) - self.assertEqual(tracker.has_data(), True) - - def testTensorsTrackerWithVerbosity0(self): - tracker = upload_tracker.UploadTracker(verbosity=0) - with tracker.tensors_tracker( - num_tensors=200, - num_tensors_skipped=50, - tensor_bytes=6000, - tensor_bytes_skipped=4000, - ): - self.assertEqual(self.mock_write.call_count, 0) - self.assertEqual(self.mock_flush.call_count, 0) - self.assertEqual(self.mock_write.call_count, 0) - self.assertEqual(self.mock_flush.call_count, 0) - - def testTensorsTrackerWithoutSkippedTensors(self): - tracker = upload_tracker.UploadTracker(verbosity=1) - with tracker.tensors_tracker( - num_tensors=200, - num_tensors_skipped=0, - tensor_bytes=6000, - tensor_bytes_skipped=0, - ): - self.assertEqual(self.mock_write.call_count, 1) - self.assertEqual(self.mock_flush.call_count, 1) - self.assertIn( - "Uploading 200 tensors (5.9 kB)", - self.mock_write.call_args[0][0], - ) - self.assertEqual(tracker.has_data(), True) - - def testBlobTrackerUploaded(self): - tracker = upload_tracker.UploadTracker(verbosity=1) - with tracker.blob_tracker(blob_bytes=2048) as blob_tracker: - self.assertEqual(self.mock_write.call_count, 1) - self.assertEqual(self.mock_flush.call_count, 1) - self.assertIn( - "Uploading binary object (2.0 kB)", - self.mock_write.call_args[0][0], - ) - - def testBlobTrackerWithVerbosity0(self): - tracker = upload_tracker.UploadTracker(verbosity=0) - with tracker.blob_tracker(blob_bytes=2048): - self.assertEqual(self.mock_write.call_count, 0) - self.assertEqual(self.mock_flush.call_count, 0) - self.assertEqual(self.mock_write.call_count, 0) - self.assertEqual(self.mock_flush.call_count, 0) - - def testBlobTrackerNotUploaded(self): - tracker = upload_tracker.UploadTracker(verbosity=1) - with tracker.send_tracker(): - self.assertEqual(self.mock_write.call_count, 2) - self.assertEqual(self.mock_flush.call_count, 2) - self.assertIn( - "Started scanning", - self.mock_write.call_args_list[0][0][0], - ) - with tracker.blob_tracker( - blob_bytes=2048 * 1024 * 1024 - ) as blob_tracker: - self.assertEqual(self.mock_write.call_count, 3) - self.assertEqual(self.mock_flush.call_count, 3) - self.assertIn( - "Uploading binary object (2048.0 MB)", - self.mock_write.call_args[0][0], - ) - blob_tracker.mark_uploaded(is_uploaded=False) - self.assertEqual(self.mock_write.call_count, 6) - self.assertEqual(self.mock_flush.call_count, 5) - self.assertIn( - "Total uploaded: 0 scalars, 0 tensors, 0 binary objects\n", - self.mock_write.call_args_list[3][0][0], - ) - self.assertIn( - "Total skipped: 1 binary objects (2048.0 MB)\n", - self.mock_write.call_args_list[4][0][0], - ) - self.assertEqual(tracker.has_data(), True) - - def testInvalidVerbosityRaisesError(self): - with self.assertRaises(ValueError): - upload_tracker.UploadTracker(verbosity="1") - with self.assertRaises(ValueError): - upload_tracker.UploadTracker(verbosity=-1) - with self.assertRaises(ValueError): - upload_tracker.UploadTracker(verbosity=0.5) - with self.assertRaises(ValueError): - upload_tracker.UploadTracker(verbosity=100) - with self.assertRaises(ValueError): - upload_tracker.UploadTracker(verbosity=None) - - -if __name__ == "__main__": - tb_test.main() diff --git a/tensorboard/uploader/uploader.py b/tensorboard/uploader/uploader.py index 3654524a9e9..79a689e7d1e 100644 --- a/tensorboard/uploader/uploader.py +++ b/tensorboard/uploader/uploader.py @@ -15,28 +15,11 @@ """Uploads a TensorBoard logdir to TensorBoard.dev.""" -import contextlib -import functools -import time - import grpc -from google.protobuf import message -from tensorboard.compat.proto import graph_pb2 -from tensorboard.compat.proto import summary_pb2 -from tensorboard.compat.proto import types_pb2 from tensorboard.uploader.proto import write_service_pb2 -from tensorboard.uploader import logdir_loader -from tensorboard.uploader import upload_tracker -from tensorboard.uploader import util -from tensorboard.backend import process_graph -from tensorboard.backend.event_processing import directory_loader -from tensorboard.backend.event_processing import event_file_loader -from tensorboard.backend.event_processing import io_wrapper -from tensorboard.plugins.graph import metadata as graphs_metadata from tensorboard.util import grpc_util from tensorboard.util import tb_logging -from tensorboard.util import tensor_util # Minimum length of a logdir polling cycle in seconds. Shorter cycles will # sleep to avoid spinning over the logdir, which isn't great for disks and can @@ -55,179 +38,6 @@ logger = tb_logging.get_logger() -class TensorBoardUploader: - """Uploads a TensorBoard logdir to TensorBoard.dev.""" - - def __init__( - self, - writer_client, - logdir, - allowed_plugins, - upload_limits, - logdir_poll_rate_limiter=None, - rpc_rate_limiter=None, - tensor_rpc_rate_limiter=None, - blob_rpc_rate_limiter=None, - name=None, - description=None, - verbosity=None, - one_shot=None, - ): - """Constructs a TensorBoardUploader. - - Args: - writer_client: a TensorBoardWriterService stub instance - logdir: path of the log directory to upload - allowed_plugins: collection of string plugin names; events will only - be uploaded if their time series's metadata specifies one of these - plugin names - upload_limits: instance of tensorboard.service.UploadLimits proto. - logdir_poll_rate_limiter: a `RateLimiter` to use to limit logdir - polling frequency, to avoid thrashing disks, especially on networked - file systems - rpc_rate_limiter: a `RateLimiter` to use to limit write RPC frequency. - Note this limit applies at the level of single RPCs in the Scalar - and Tensor case, but at the level of an entire blob upload in the - Blob case-- which may require a few preparatory RPCs and a stream - of chunks. Note the chunk stream is internally rate-limited by - backpressure from the server, so it is not a concern that we do not - explicitly rate-limit within the stream here. - name: String name to assign to the experiment. - description: String description to assign to the experiment. - verbosity: Level of verbosity, an integer. Supported value: - 0 - No upload statistics is printed. - 1 - Print upload statistics while uploading data (default). - one_shot: Once uploading starts, upload only the existing data in - the logdir and then return immediately, instead of the default - behavior of continuing to listen for new data in the logdir and - upload them when it appears. - """ - self._api = writer_client - self._logdir = logdir - self._allowed_plugins = frozenset(allowed_plugins) - self._upload_limits = upload_limits - - self._name = name - self._description = description - self._verbosity = 1 if verbosity is None else verbosity - self._one_shot = False if one_shot is None else one_shot - self._request_sender = None - self._experiment_id = None - if logdir_poll_rate_limiter is None: - self._logdir_poll_rate_limiter = util.RateLimiter( - _MIN_LOGDIR_POLL_INTERVAL_SECS - ) - else: - self._logdir_poll_rate_limiter = logdir_poll_rate_limiter - - if rpc_rate_limiter is None: - self._rpc_rate_limiter = util.RateLimiter( - self._upload_limits.min_scalar_request_interval / 1000 - ) - else: - self._rpc_rate_limiter = rpc_rate_limiter - - if tensor_rpc_rate_limiter is None: - self._tensor_rpc_rate_limiter = util.RateLimiter( - self._upload_limits.min_tensor_request_interval / 1000 - ) - else: - self._tensor_rpc_rate_limiter = tensor_rpc_rate_limiter - - if blob_rpc_rate_limiter is None: - self._blob_rpc_rate_limiter = util.RateLimiter( - self._upload_limits.min_blob_request_interval / 1000 - ) - else: - self._blob_rpc_rate_limiter = blob_rpc_rate_limiter - - active_filter = ( - lambda secs: secs + _EVENT_FILE_INACTIVE_SECS >= time.time() - ) - directory_loader_factory = functools.partial( - directory_loader.DirectoryLoader, - loader_factory=event_file_loader.TimestampedEventFileLoader, - path_filter=io_wrapper.IsTensorFlowEventsFile, - active_filter=active_filter, - ) - self._logdir_loader = logdir_loader.LogdirLoader( - self._logdir, directory_loader_factory - ) - self._tracker = upload_tracker.UploadTracker( - verbosity=self._verbosity, one_shot=self._one_shot - ) - - def has_data(self) -> bool: - """Returns this object's upload tracker.""" - return self._tracker.has_data() - - @property - def experiment_id(self) -> str: - """Returns the experiment_id associated with this uploader. - - May be none if no experiment is set, for instance, if - `create_experiment` has not been called. - """ - return self._experiment_id - - def create_experiment(self): - """Creates an Experiment for this upload session and returns the ID.""" - logger.info("Creating experiment") - request = write_service_pb2.CreateExperimentRequest( - name=self._name, description=self._description - ) - response = grpc_util.call_with_retries( - self._api.CreateExperiment, request - ) - self._request_sender = _BatchedRequestSender( - response.experiment_id, - self._api, - allowed_plugins=self._allowed_plugins, - upload_limits=self._upload_limits, - rpc_rate_limiter=self._rpc_rate_limiter, - tensor_rpc_rate_limiter=self._tensor_rpc_rate_limiter, - blob_rpc_rate_limiter=self._blob_rpc_rate_limiter, - tracker=self._tracker, - ) - self._experiment_id = response.experiment_id - return response.experiment_id - - def start_uploading(self): - """Uploads data from the logdir. - - This will continuously scan the logdir, uploading as data is added - unless the uploader was built with the _one_shot option, in which - case it will terminate after the first scan. - - Raises: - RuntimeError: If `create_experiment` has not yet been called. - ExperimentNotFoundError: If the experiment is deleted during the - course of the upload. - """ - if self._request_sender is None: - raise RuntimeError( - "Must call create_experiment() before start_uploading()" - ) - while True: - self._logdir_poll_rate_limiter.tick() - self._upload_once() - if self._one_shot: - break - - def _upload_once(self): - """Runs one upload cycle, sending zero or more RPCs.""" - logger.info("Starting an upload cycle") - - sync_start_time = time.time() - self._logdir_loader.synchronize_runs() - sync_duration_secs = time.time() - sync_start_time - logger.info("Logdir sync took %.3f seconds", sync_duration_secs) - - run_to_events = self._logdir_loader.get_run_events() - with self._tracker.send_tracker(): - self._request_sender.send_requests(run_to_events) - - def update_experiment_metadata( writer_client, experiment_id, name=None, description=None ): @@ -308,923 +118,3 @@ class ExperimentNotFoundError(RuntimeError): class PermissionDeniedError(RuntimeError): pass - - -class _OutOfSpaceError(Exception): - """Action could not proceed without overflowing request budget. - - This is a signaling exception (like `StopIteration`) used internally - by `_*RequestSender`; it does not mean that anything has gone wrong. - """ - - pass - - -class _BatchedRequestSender: - """Helper class for building requests that fit under a size limit. - - This class maintains stateful request builders for each of the possible - request types (scalars, tensors, and blobs). These accumulate batches - independently, each maintaining its own byte budget and emitting a request - when the batch becomes full. As a consequence, events of different types - will likely be sent to the backend out of order. E.g., in the extreme case, - a single tensor-flavored request may be sent only when the event stream is - exhausted, even though many more recent scalar events were sent earlier. - - This class is not threadsafe. Use external synchronization if - calling its methods concurrently. - """ - - def __init__( - self, - experiment_id, - api, - allowed_plugins, - upload_limits, - rpc_rate_limiter, - tensor_rpc_rate_limiter, - blob_rpc_rate_limiter, - tracker, - ): - # Map from `(run_name, tag_name)` to `SummaryMetadata` if the time - # series is a scalar time series, else to `_NON_SCALAR_TIME_SERIES`. - self._tag_metadata = {} - self._allowed_plugins = frozenset(allowed_plugins) - self._tracker = tracker - self._scalar_request_sender = _ScalarBatchedRequestSender( - experiment_id, - api, - rpc_rate_limiter, - upload_limits.max_scalar_request_size, - tracker=self._tracker, - ) - self._tensor_request_sender = _TensorBatchedRequestSender( - experiment_id, - api, - tensor_rpc_rate_limiter, - upload_limits.max_tensor_request_size, - upload_limits.max_tensor_point_size, - tracker=self._tracker, - ) - self._blob_request_sender = _BlobRequestSender( - experiment_id, - api, - blob_rpc_rate_limiter, - upload_limits.max_blob_request_size, - upload_limits.max_blob_size, - tracker=self._tracker, - ) - self._tracker = tracker - - def send_requests(self, run_to_events): - """Accepts a stream of TF events and sends batched write RPCs. - - Each sent request will be batched, the size of each batch depending on - the type of data (Scalar vs Tensor vs Blob) being sent. - - Args: - run_to_events: Mapping from run name to generator of `tf.Event` - values, as returned by `LogdirLoader.get_run_events`. - - Raises: - RuntimeError: If no progress can be made because even a single - point is too large (say, due to a gigabyte-long tag name). - """ - - for (run_name, event, value) in self._run_values(run_to_events): - time_series_key = (run_name, value.tag) - - # The metadata for a time series is memorized on the first event. - # If later events arrive with a mismatching plugin_name, they are - # ignored with a warning. - metadata = self._tag_metadata.get(time_series_key) - first_in_time_series = False - if metadata is None: - first_in_time_series = True - metadata = value.metadata - self._tag_metadata[time_series_key] = metadata - - plugin_name = metadata.plugin_data.plugin_name - # TODO(cais): Call self._tracker.add_plugin_name() to track the - # data for what plugins have been uploaded. - if value.HasField("metadata") and ( - plugin_name != value.metadata.plugin_data.plugin_name - ): - logger.warning( - "Mismatching plugin names for %s. Expected %s, found %s.", - time_series_key, - metadata.plugin_data.plugin_name, - value.metadata.plugin_data.plugin_name, - ) - continue - if plugin_name not in self._allowed_plugins: - if first_in_time_series: - logger.info( - "Skipping time series %r with unsupported plugin name %r", - time_series_key, - plugin_name, - ) - continue - - if metadata.data_class == summary_pb2.DATA_CLASS_SCALAR: - self._scalar_request_sender.add_event( - run_name, event, value, metadata - ) - elif metadata.data_class == summary_pb2.DATA_CLASS_TENSOR: - self._tensor_request_sender.add_event( - run_name, event, value, metadata - ) - elif metadata.data_class == summary_pb2.DATA_CLASS_BLOB_SEQUENCE: - self._blob_request_sender.add_event( - run_name, event, value, metadata - ) - - self._scalar_request_sender.flush() - self._tensor_request_sender.flush() - self._blob_request_sender.flush() - - def _run_values(self, run_to_events): - """Helper generator to create a single stream of work items. - - Note that `dataclass_compat` may emit multiple variants of - the same event, for backwards compatibility. Thus this stream should - be filtered to obtain the desired version of each event. Here, we - ignore any event that does not have a `summary` field. - - Furthermore, the events emitted here could contain values that do not - have `metadata.data_class` set; these too should be ignored. In - `_send_summary_value(...)` above, we switch on `metadata.data_class` - and drop any values with an unknown (i.e., absent or unrecognized) - `data_class`. - """ - # Note that this join in principle has deletion anomalies: if the input - # stream contains runs with no events, or events with no values, we'll - # lose that information. This is not a problem: we would need to prune - # such data from the request anyway. - for (run_name, events) in run_to_events.items(): - for event in events: - _filter_graph_defs(event) - for value in event.summary.value: - yield (run_name, event, value) - - -class _ScalarBatchedRequestSender: - """Helper class for building requests that fit under a size limit. - - This class accumulates a current request. `add_event(...)` may or may not - send the request (and start a new one). After all `add_event(...)` calls - are complete, a final call to `flush()` is needed to send the final request. - - This class is not threadsafe. Use external synchronization if calling its - methods concurrently. - """ - - def __init__( - self, - experiment_id, - api, - rpc_rate_limiter, - max_request_size, - tracker, - ): - if experiment_id is None: - raise ValueError("experiment_id cannot be None") - self._experiment_id = experiment_id - self._api = api - self._rpc_rate_limiter = rpc_rate_limiter - self._byte_budget_manager = _ByteBudgetManager(max_request_size) - self._tracker = tracker - - self._runs = {} # cache: map from run name to `Run` proto in request - self._tags = ( - {} - ) # cache: map from `(run, tag)` to `Tag` proto in run in request - self._new_request() - - def _new_request(self): - """Allocates a new request and refreshes the budget.""" - self._request = write_service_pb2.WriteScalarRequest() - self._runs.clear() - self._tags.clear() - self._num_values = 0 - self._request.experiment_id = self._experiment_id - self._byte_budget_manager.reset(self._request) - - def add_event(self, run_name, event, value, metadata): - """Attempts to add the given event to the current request. - - If the event cannot be added to the current request because the byte - budget is exhausted, the request is flushed, and the event is added - to the next request. - """ - try: - self._add_event_internal(run_name, event, value, metadata) - except _OutOfSpaceError: - self.flush() - # Try again. This attempt should never produce OutOfSpaceError - # because we just flushed. - try: - self._add_event_internal(run_name, event, value, metadata) - except _OutOfSpaceError: - raise RuntimeError("add_event failed despite flush") - - def _add_event_internal(self, run_name, event, value, metadata): - run_proto = self._runs.get(run_name) - if run_proto is None: - run_proto = self._create_run(run_name) - self._runs[run_name] = run_proto - tag_proto = self._tags.get((run_name, value.tag)) - if tag_proto is None: - tag_proto = self._create_tag(run_proto, value.tag, metadata) - self._tags[(run_name, value.tag)] = tag_proto - self._create_point(tag_proto, event, value) - self._num_values += 1 - - def flush(self): - """Sends the active request after removing empty runs and tags. - - Starts a new, empty active request. - """ - request = self._request - _prune_empty_tags_and_runs(request) - if not request.runs: - return - - self._rpc_rate_limiter.tick() - - with _request_logger( - request, request.runs - ), self._tracker.scalars_tracker(self._num_values): - try: - # TODO(@nfelt): execute this RPC asynchronously. - grpc_util.call_with_retries(self._api.WriteScalar, request) - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.NOT_FOUND: - raise ExperimentNotFoundError() - logger.error("Upload call failed with error %s", e) - - self._new_request() - - def _create_run(self, run_name): - """Adds a run to the live request, if there's space. - - Args: - run_name: String name of the run to add. - - Returns: - The `WriteScalarRequest.Run` that was added to `request.runs`. - - Raises: - _OutOfSpaceError: If adding the run would exceed the remaining - request budget. - """ - run_proto = self._request.runs.add(name=run_name) - self._byte_budget_manager.add_run(run_proto) - return run_proto - - def _create_tag(self, run_proto, tag_name, metadata): - """Adds a tag for the given value, if there's space. - - Args: - run_proto: `WriteScalarRequest.Run` proto to which to add a tag. - tag_name: String name of the tag to add (as `value.tag`). - metadata: TensorBoard `SummaryMetadata` proto from the first - occurrence of this time series. - - Returns: - The `WriteScalarRequest.Tag` that was added to `run_proto.tags`. - - Raises: - _OutOfSpaceError: If adding the tag would exceed the remaining - request budget. - """ - tag_proto = run_proto.tags.add(name=tag_name) - tag_proto.metadata.CopyFrom(metadata) - self._byte_budget_manager.add_tag(tag_proto) - return tag_proto - - def _create_point(self, tag_proto, event, value): - """Adds a scalar point to the given tag, if there's space. - - Args: - tag_proto: `WriteScalarRequest.Tag` proto to which to add a point. - event: Enclosing `Event` proto with the step and wall time data. - value: Scalar `Summary.Value` proto with the actual scalar data. - - Raises: - _OutOfSpaceError: If adding the point would exceed the remaining - request budget. - """ - point = tag_proto.points.add() - point.step = event.step - # TODO(@nfelt): skip tensor roundtrip for Value with simple_value set - point.value = tensor_util.make_ndarray(value.tensor).item() - util.set_timestamp(point.wall_time, event.wall_time) - try: - self._byte_budget_manager.add_point(point) - except _OutOfSpaceError: - tag_proto.points.pop() - raise - - -class _TensorBatchedRequestSender: - """Helper class for building WriteTensor() requests that fit under a size limit. - - This class accumulates a current request. `add_event(...)` may or may not - send the request (and start a new one). After all `add_event(...)` calls - are complete, a final call to `flush()` is needed to send the final request. - - This class is not threadsafe. Use external synchronization if calling its - methods concurrently. - """ - - def __init__( - self, - experiment_id, - api, - rpc_rate_limiter, - max_request_size, - max_tensor_point_size, - tracker, - ): - if experiment_id is None: - raise ValueError("experiment_id cannot be None") - self._experiment_id = experiment_id - self._api = api - self._rpc_rate_limiter = rpc_rate_limiter - self._byte_budget_manager = _ByteBudgetManager(max_request_size) - self._max_tensor_point_size = max_tensor_point_size - self._tracker = tracker - - self._runs = {} # cache: map from run name to `Run` proto in request - self._tags = ( - {} - ) # cache: map from `(run, tag)` to `Tag` proto in run in request - self._new_request() - - def _new_request(self): - """Allocates a new request and refreshes the budget.""" - - self._request = write_service_pb2.WriteTensorRequest() - self._runs.clear() - self._tags.clear() - self._request.experiment_id = self._experiment_id - self._byte_budget_manager.reset(self._request) - self._num_values = 0 - self._num_values_skipped = 0 - self._tensor_bytes = 0 - self._tensor_bytes_skipped = 0 - - def add_event(self, run_name, event, value, metadata): - """Attempts to add the given event to the current request. - - If the event cannot be added to the current request because the byte - budget is exhausted, the request is flushed, and the event is added - to the next request. - """ - try: - self._add_event_internal(run_name, event, value, metadata) - except _OutOfSpaceError: - self.flush() - # Try again. This attempt should never produce OutOfSpaceError - # because we just flushed. - try: - self._add_event_internal(run_name, event, value, metadata) - except _OutOfSpaceError: - raise RuntimeError("add_event failed despite flush") - - def _add_event_internal(self, run_name, event, value, metadata): - run_proto = self._runs.get(run_name) - if run_proto is None: - run_proto = self._create_run(run_name) - self._runs[run_name] = run_proto - tag_proto = self._tags.get((run_name, value.tag)) - if tag_proto is None: - tag_proto = self._create_tag(run_proto, value.tag, metadata) - self._tags[(run_name, value.tag)] = tag_proto - self._create_point(tag_proto, event, value, run_name) - self._num_values += 1 - - def flush(self): - """Sends the active request after removing empty runs and tags. - - Starts a new, empty active request. - """ - request = self._request - _prune_empty_tags_and_runs(request) - if not request.runs: - return - - self._rpc_rate_limiter.tick() - - with _request_logger(request, request.runs): - with self._tracker.tensors_tracker( - self._num_values, - self._num_values_skipped, - self._tensor_bytes, - self._tensor_bytes_skipped, - ): - try: - grpc_util.call_with_retries(self._api.WriteTensor, request) - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.NOT_FOUND: - raise ExperimentNotFoundError() - logger.error("Upload call failed with error %s", e) - - self._new_request() - - def _create_run(self, run_name): - """Adds a run to the live request, if there's space. - - Args: - run_name: String name of the run to add. - - Returns: - The `WriteTensorRequest.Run` that was added to `request.runs`. - - Raises: - _OutOfSpaceError: If adding the run would exceed the remaining - request budget. - """ - run_proto = self._request.runs.add(name=run_name) - self._byte_budget_manager.add_run(run_proto) - return run_proto - - def _create_tag(self, run_proto, tag_name, metadata): - """Adds a tag for the given value, if there's space. - - Args: - run_proto: `WriteTensorRequest.Run` proto to which to add a tag. - tag_name: String name of the tag to add (as `value.tag`). - metadata: TensorBoard `SummaryMetadata` proto from the first - occurrence of this time series. - - Returns: - The `WriteTensorRequest.Tag` that was added to `run_proto.tags`. - - Raises: - _OutOfSpaceError: If adding the tag would exceed the remaining - request budget. - """ - tag_proto = run_proto.tags.add(name=tag_name) - tag_proto.metadata.CopyFrom(metadata) - self._byte_budget_manager.add_tag(tag_proto) - return tag_proto - - def _create_point(self, tag_proto, event, value, run_name): - """Adds a tensor point to the given tag, if there's space. - - Args: - tag_proto: `WriteTensorRequest.Tag` proto to which to add a point. - event: Enclosing `Event` proto with the step and wall time data. - value: Tensor `Summary.Value` proto with the actual tensor data. - run_name: Name of the wrong, only used for error reporting. - - Raises: - _OutOfSpaceError: If adding the point would exceed the remaining - request budget. - """ - point = tag_proto.points.add() - point.step = event.step - point.value.CopyFrom(value.tensor) - util.set_timestamp(point.wall_time, event.wall_time) - - self._tensor_bytes += point.value.ByteSize() - if point.value.ByteSize() > self._max_tensor_point_size: - logger.warning( - "Tensor (run:%s, tag:%s, step: %d) too large; skipping. " - "Size %d exceeds limit of %d bytes.", - run_name, - tag_proto.name, - event.step, - point.value.ByteSize(), - self._max_tensor_point_size, - ) - tag_proto.points.pop() - self._num_values_skipped += 1 - self._tensor_bytes_skipped += point.value.ByteSize() - return - - self._validate_tensor_value( - value.tensor, value.tag, event.step, event.wall_time - ) - - try: - self._byte_budget_manager.add_point(point) - except _OutOfSpaceError: - tag_proto.points.pop() - raise - - def _validate_tensor_value(self, tensor_proto, tag, step, wall_time): - """Validate a TensorProto by attempting to parse it.""" - try: - tensor_util.make_ndarray(tensor_proto) - except ValueError as error: - raise ValueError( - "The uploader failed to upload a tensor. This seems to be " - "due to a malformation in the tensor, which may be caused by " - "a bug in the process that wrote the tensor.\n\n" - "The tensor has tag '%s' and is at step %d and wall_time %.6f.\n\n" - "Original error:\n%s" % (tag, step, wall_time, error) - ) - - -class _ByteBudgetManager: - """Helper class for managing the request byte budget for certain RPCs. - - This should be used for RPCs that organize data by Runs, Tags, and Points, - specifically WriteScalar and WriteTensor. - - Any call to add_run(), add_tag(), or add_point() may raise an - _OutOfSpaceError, which is non-fatal. It signals to the caller that they - should flush the current request and begin a new one. - - For more information on the protocol buffer encoding and how byte cost - can be calculated, visit: - - https://developers.google.com/protocol-buffers/docs/encoding - """ - - def __init__(self, max_bytes): - # The remaining number of bytes that we may yet add to the request. - self._byte_budget = None # type: int - self._max_bytes = max_bytes - - def reset(self, base_request): - """Resets the byte budget and calculates the cost of the base request. - - Args: - base_request: Base request. - - Raises: - _OutOfSpaceError: If the size of the request exceeds the entire - request byte budget. - """ - self._byte_budget = self._max_bytes - self._byte_budget -= base_request.ByteSize() - if self._byte_budget < 0: - raise RuntimeError("Byte budget too small for base request") - - def add_run(self, run_proto): - """Integrates the cost of a run proto into the byte budget. - - Args: - run_proto: The proto representing a run. - - Raises: - _OutOfSpaceError: If adding the run would exceed the remaining request - budget. - """ - cost = ( - # The size of the run proto without any tag fields set. - run_proto.ByteSize() - # The size of the varint that describes the length of the run - # proto. We can't yet know the final size of the run proto -- we - # haven't yet set any tag or point values -- so we can't know the - # final size of this length varint. We conservatively assume it is - # maximum size. - + _MAX_VARINT64_LENGTH_BYTES - # The size of the proto key. - + 1 - ) - if cost > self._byte_budget: - raise _OutOfSpaceError() - self._byte_budget -= cost - - def add_tag(self, tag_proto): - """Integrates the cost of a tag proto into the byte budget. - - Args: - tag_proto: The proto representing a tag. - - Raises: - _OutOfSpaceError: If adding the tag would exceed the remaining request - budget. - """ - cost = ( - # The size of the tag proto without any tag fields set. - tag_proto.ByteSize() - # The size of the varint that describes the length of the tag - # proto. We can't yet know the final size of the tag proto -- we - # haven't yet set any point values -- so we can't know the final - # size of this length varint. We conservatively assume it is maximum - # size. - + _MAX_VARINT64_LENGTH_BYTES - # The size of the proto key. - + 1 - ) - if cost > self._byte_budget: - raise _OutOfSpaceError() - self._byte_budget -= cost - - def add_point(self, point_proto): - """Integrates the cost of a point proto into the byte budget. - - Args: - point_proto: The proto representing a point. - - Raises: - _OutOfSpaceError: If adding the point would exceed the remaining request - budget. - """ - submessage_cost = point_proto.ByteSize() - cost = ( - # The size of the point proto. - submessage_cost - # The size of the varint that describes the length of the point - # proto. - + _varint_cost(submessage_cost) - # The size of the proto key. - + 1 - ) - if cost > self._byte_budget: - raise _OutOfSpaceError() - self._byte_budget -= cost - - -class _BlobRequestSender: - """Uploader for blob-type event data. - - Unlike the other types, this class does not accumulate events in batches; - every blob is sent individually and immediately. Nonetheless we retain - the `add_event()`/`flush()` structure for symmetry. - - This class is not threadsafe. Use external synchronization if calling its - methods concurrently. - """ - - def __init__( - self, - experiment_id, - api, - rpc_rate_limiter, - max_blob_request_size, - max_blob_size, - tracker, - ): - if experiment_id is None: - raise ValueError("experiment_id cannot be None") - self._experiment_id = experiment_id - self._api = api - self._rpc_rate_limiter = rpc_rate_limiter - self._max_blob_request_size = max_blob_request_size - self._max_blob_size = max_blob_size - self._tracker = tracker - - # Start in the empty state, just like self._new_request(). - self._run_name = None - self._event = None - self._value = None - self._metadata = None - - def _new_request(self): - """Declares the previous event complete.""" - self._run_name = None - self._event = None - self._value = None - self._metadata = None - - def add_event( - self, - run_name, - event, - value, - metadata, - ): - """Attempts to add the given event to the current request. - - If the event cannot be added to the current request because the byte - budget is exhausted, the request is flushed, and the event is added - to the next request. - """ - if self._value: - raise RuntimeError("Tried to send blob while another is pending") - self._run_name = run_name - self._event = event # provides step and possibly plugin_name - self._value = value - # TODO(soergel): should we really unpack the tensor here, or ship - # it wholesale and unpack server side, or something else? - # TODO(soergel): can we extract the proto fields directly instead? - self._blobs = tensor_util.make_ndarray(self._value.tensor) - if self._blobs.ndim == 1: - self._metadata = metadata - self.flush() - else: - logger.warning( - "A blob sequence must be represented as a rank-1 Tensor. " - "Provided data has rank %d, for run %s, tag %s, step %s ('%s' plugin) .", - self._blobs.ndim, - run_name, - self._value.tag, - self._event.step, - metadata.plugin_data.plugin_name, - ) - # Skip this upload. - self._new_request() - - def flush(self): - """Sends the current blob sequence fully, and clears it to make way for the next.""" - if self._value: - blob_sequence_id = self._get_or_create_blob_sequence() - logger.info( - "Sending %d blobs for sequence id: %s", - len(self._blobs), - blob_sequence_id, - ) - - sent_blobs = 0 - for seq_index, blob in enumerate(self._blobs): - # Note the _send_blob() stream is internally flow-controlled. - # This rate limit applies to *starting* the stream. - self._rpc_rate_limiter.tick() - with self._tracker.blob_tracker(len(blob)) as blob_tracker: - sent_blobs += self._send_blob( - blob_sequence_id, seq_index, blob - ) - blob_tracker.mark_uploaded(bool(sent_blobs)) - - logger.info( - "Sent %d of %d blobs for sequence id: %s", - sent_blobs, - len(self._blobs), - blob_sequence_id, - ) - - self._new_request() - - def _get_or_create_blob_sequence(self): - request = write_service_pb2.GetOrCreateBlobSequenceRequest( - experiment_id=self._experiment_id, - run=self._run_name, - tag=self._value.tag, - step=self._event.step, - final_sequence_length=len(self._blobs), - metadata=self._metadata, - ) - util.set_timestamp(request.wall_time, self._event.wall_time) - with _request_logger(request): - try: - # TODO(@nfelt): execute this RPC asynchronously. - response = grpc_util.call_with_retries( - self._api.GetOrCreateBlobSequence, request - ) - blob_sequence_id = response.blob_sequence_id - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.NOT_FOUND: - raise ExperimentNotFoundError() - logger.error("Upload call failed with error %s", e) - # TODO(soergel): clean up - raise - - return blob_sequence_id - - def _send_blob(self, blob_sequence_id, seq_index, blob): - """Tries to send a single blob for a given index within a blob sequence. - - The blob will not be sent if it was sent already, or if it is too large. - - Returns: - The number of blobs successfully sent (i.e., 1 or 0). - """ - # TODO(soergel): retry and resume logic - - if len(blob) > self._max_blob_size: - logger.warning( - "Blob too large; skipping. Size %d exceeds limit of %d bytes.", - len(blob), - self._max_blob_size, - ) - return 0 - - request_iterator = self._write_blob_request_iterator( - blob_sequence_id, seq_index, blob - ) - upload_start_time = time.time() - count = 0 - # TODO(soergel): don't wait for responses for greater throughput - # See https://stackoverflow.com/questions/55029342/handling-async-streaming-request-in-grpc-python - try: - for response in self._api.WriteBlob(request_iterator): - count += 1 - # TODO(soergel): validate responses? probably not. - pass - upload_duration_secs = time.time() - upload_start_time - logger.info( - "Upload for %d chunks totaling %d bytes took %.3f seconds (%.3f MB/sec)", - count, - len(blob), - upload_duration_secs, - len(blob) / upload_duration_secs / (1024 * 1024), - ) - return 1 - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.ALREADY_EXISTS: - logger.error("Attempted to re-upload existing blob. Skipping.") - return 0 - else: - logger.info("WriteBlob RPC call got error %s", e) - raise - - def _write_blob_request_iterator(self, blob_sequence_id, seq_index, blob): - # For now all use cases have the blob in memory already. - # In the future we may want to stream from disk; that will require - # refactoring here. - # TODO(soergel): compute crc32c's to allow server-side data validation. - for offset in range(0, len(blob), self._max_blob_request_size): - chunk = blob[offset : offset + self._max_blob_request_size] - finalize_object = offset + self._max_blob_request_size >= len(blob) - request = write_service_pb2.WriteBlobRequest( - blob_sequence_id=blob_sequence_id, - index=seq_index, - data=chunk, - offset=offset, - crc32c=None, - finalize_object=finalize_object, - final_crc32c=None, - blob_bytes=len(blob), - ) - yield request - - -@contextlib.contextmanager -def _request_logger(request, runs=None): - upload_start_time = time.time() - request_bytes = request.ByteSize() - logger.info("Trying request of %d bytes", request_bytes) - yield - upload_duration_secs = time.time() - upload_start_time - if runs: - logger.info( - "Upload for %d runs (%d bytes) took %.3f seconds", - len(runs), - request_bytes, - upload_duration_secs, - ) - else: - logger.info( - "Upload of (%d bytes) took %.3f seconds", - request_bytes, - upload_duration_secs, - ) - - -def _varint_cost(n): - """Computes the size of `n` encoded as an unsigned base-128 varint. - - This should be consistent with the proto wire format: - - - Args: - n: A non-negative integer. - - Returns: - An integer number of bytes. - """ - result = 1 - while n >= 128: - result += 1 - n >>= 7 - return result - - -def _prune_empty_tags_and_runs(request): - for (run_idx, run) in reversed(list(enumerate(request.runs))): - for (tag_idx, tag) in reversed(list(enumerate(run.tags))): - if not tag.points: - del run.tags[tag_idx] - if not run.tags: - del request.runs[run_idx] - - -def _filter_graph_defs(event): - for v in event.summary.value: - if v.metadata.plugin_data.plugin_name != graphs_metadata.PLUGIN_NAME: - continue - if v.tag == graphs_metadata.RUN_GRAPH_NAME: - data = list(v.tensor.string_val) - filtered_data = [_filtered_graph_bytes(x) for x in data] - filtered_data = [x for x in filtered_data if x is not None] - if filtered_data != data: - new_tensor = tensor_util.make_tensor_proto( - filtered_data, dtype=types_pb2.DT_STRING - ) - v.tensor.CopyFrom(new_tensor) - - -def _filtered_graph_bytes(graph_bytes): - try: - graph_def = graph_pb2.GraphDef().FromString(graph_bytes) - # The reason for the RuntimeWarning catch here is b/27494216, whereby - # some proto parsers incorrectly raise that instead of DecodeError - # on certain kinds of malformed input. Triggering this seems to require - # a combination of mysterious circumstances. - except (message.DecodeError, RuntimeWarning): - logger.warning( - "Could not parse GraphDef of size %d. Skipping.", - len(graph_bytes), - ) - return None - # Use the default filter parameters: - # limit_attr_size=1024, large_attrs_key="_too_large_attrs" - process_graph.prepare_graph_for_ui(graph_def) - return graph_def.SerializeToString() diff --git a/tensorboard/uploader/uploader_subcommand.py b/tensorboard/uploader/uploader_subcommand.py index 64ec292bfe7..baa36dbbf84 100644 --- a/tensorboard/uploader/uploader_subcommand.py +++ b/tensorboard/uploader/uploader_subcommand.py @@ -16,19 +16,16 @@ import abc -import os import sys import textwrap from absl import logging import grpc -from tensorboard.compat import tf from tensorboard.uploader.proto import experiment_pb2 from tensorboard.uploader.proto import export_service_pb2_grpc from tensorboard.uploader.proto import write_service_pb2_grpc from tensorboard.uploader import auth -from tensorboard.uploader import dry_run_stubs from tensorboard.uploader import exporter as exporter_lib from tensorboard.uploader import flags_parser from tensorboard.uploader import formatters @@ -59,7 +56,7 @@ def _prompt_for_user_ack(intent): """Prompts for user consent, exiting the program if they decline.""" body = intent.get_ack_message_body() - header = "\n***** TensorBoard Uploader *****\n" + header = "\n***** TensorBoard.dev Uploader *****\n" user_ack_message = "\n".join((header, body, _MESSAGE_TOS)) sys.stderr.write(user_ack_message) sys.stderr.write("\n") @@ -87,6 +84,31 @@ def _run(flags, experiment_url_callback=None): sys.stderr.write("Logged out of uploader.\n") sys.stderr.flush() return + if isinstance(intent, UploadIntent): + sys.stderr.write( + textwrap.dedent( + """\ + **************************************************************** + **************************************************************** + **************************************************************** + + Uploading TensorBoard logs to https://tensorboard.dev/ is no longer + supported. + + TensorBoard.dev is shutting down. + + Please export your experiments by Dec 31, 2023. + + See the FAQ at https://tensorboard.dev. + + **************************************************************** + **************************************************************** + **************************************************************** + """ + ) + ) + sys.stderr.flush() + return # TODO(b/141723268): maybe reconfirm Google Account prior to reuse. credentials = store.read_credentials() if not credentials: @@ -399,113 +421,19 @@ def _die_if_bad_experiment_description(description): class UploadIntent(_Intent): """The user intends to upload an experiment from the given logdir.""" - _MESSAGE_TEMPLATE = textwrap.dedent( - """\ - This will upload your TensorBoard logs to https://tensorboard.dev/ from - the following directory: - - {logdir} + def get_ack_message_body(self): + """Does nothing. - This TensorBoard will be visible to everyone. Do not upload sensitive - data. + Uploading is no longer supported and is handled specially by main. """ - ) - - def __init__( - self, - logdir, - name=None, - description=None, - verbosity=None, - dry_run=None, - one_shot=None, - experiment_url_callback=None, - ): - self.logdir = logdir - self.name = name - self.description = description - self.verbosity = verbosity - self.dry_run = False if dry_run is None else dry_run - self.one_shot = False if one_shot is None else one_shot - self.experiment_url_callback = experiment_url_callback - - def get_ack_message_body(self): - return self._MESSAGE_TEMPLATE.format(logdir=self.logdir) + return "" def execute(self, server_info, channel): - if self.dry_run: - api_client = dry_run_stubs.DryRunTensorBoardWriterStub() - else: - api_client = write_service_pb2_grpc.TensorBoardWriterServiceStub( - channel - ) - _die_if_bad_experiment_name(self.name) - _die_if_bad_experiment_description(self.description) - uploader = uploader_lib.TensorBoardUploader( - api_client, - self.logdir, - allowed_plugins=server_info_lib.allowed_plugins(server_info), - upload_limits=server_info_lib.upload_limits(server_info), - name=self.name, - description=self.description, - verbosity=self.verbosity, - one_shot=self.one_shot, - ) - if self.one_shot and not tf.io.gfile.isdir(self.logdir): - print("%s: No such directory." % self.logdir) - print( - "User specified `one_shot` mode with an unavailable " - "logdir. Exiting without creating an experiment." - ) - return - experiment_id = uploader.create_experiment() - url = server_info_lib.experiment_url(server_info, experiment_id) - if self.experiment_url_callback is not None: - self.experiment_url_callback(url) - if not self.one_shot: - print( - "Upload started and will continue reading any new data as it's " - "added to the logdir.\n\nTo stop uploading, press Ctrl-C." - ) - if self.dry_run: - print( - "\n** This is a dry run. " - "No data will be sent to tensorboard.dev. **\n" - ) - else: - print( - "\nNew experiment created. View your TensorBoard at: %s\n" % url - ) - interrupted = False - try: - uploader.start_uploading() - except uploader_lib.ExperimentNotFoundError: - print("Experiment was deleted; uploading has been cancelled") - return - except KeyboardInterrupt: - interrupted = True - finally: - if self.one_shot and not uploader.has_data(): - print( - "TensorBoard was run in `one_shot` mode, but did not find " - "any uploadable data in the specified logdir: %s\n" - "An empty experiment was created. " - "To delete the empty experiment you can execute the " - "following\n\n" - " tensorboard dev delete --experiment_id=%s" - % (self.logdir, uploader.experiment_id) - ) - end_message = "\n\n" - if interrupted: - end_message += "Interrupted." - else: - end_message += "Done." - # Only Add the "View your TensorBoard" message if there was any - # data added at all. - if not self.dry_run and uploader.has_data(): - end_message += " View your TensorBoard at %s" % url - sys.stdout.write(end_message + "\n") - sys.stdout.flush() + """Does nothing. + + Uploading is no longer supported and is handled specially by main. + """ + pass class _ExportIntent(_Intent): @@ -575,20 +503,8 @@ def _get_intent(flags, experiment_url_callback=None): if cmd is None: raise base_plugin.FlagsError("Must specify subcommand (try --help).") if cmd == flags_parser.SUBCOMMAND_KEY_UPLOAD: - if flags.logdir: - return UploadIntent( - os.path.expanduser(flags.logdir), - name=flags.name, - description=flags.description, - verbosity=flags.verbose, - dry_run=flags.dry_run, - one_shot=flags.one_shot, - experiment_url_callback=experiment_url_callback, - ) - else: - raise base_plugin.FlagsError( - "Must specify directory to upload via `--logdir`." - ) + return UploadIntent() + if cmd == flags_parser.SUBCOMMAND_KEY_UPDATE_METADATA: if flags.experiment_id: if flags.name is not None or flags.description is not None: diff --git a/tensorboard/uploader/uploader_subcommand_test.py b/tensorboard/uploader/uploader_subcommand_test.py index 347b807b102..1e4a065e088 100644 --- a/tensorboard/uploader/uploader_subcommand_test.py +++ b/tensorboard/uploader/uploader_subcommand_test.py @@ -23,189 +23,14 @@ from tensorboard.uploader.proto import experiment_pb2 from tensorboard.uploader.proto import server_info_pb2 -from tensorboard.uploader.proto import write_service_pb2 from tensorboard.uploader.proto import write_service_pb2_grpc -from tensorboard.uploader import dry_run_stubs from tensorboard.uploader import exporter as exporter_lib from tensorboard.uploader import uploader as uploader_lib from tensorboard.uploader import uploader_subcommand -from tensorboard.plugins.histogram import metadata as histograms_metadata -from tensorboard.plugins.graph import metadata as graphs_metadata -from tensorboard.plugins.scalar import metadata as scalars_metadata from tensorboard.plugins import base_plugin -# By default allow at least one plugin for each upload type: Scalar, Tensor, and -# Blobs. -_SCALARS_HISTOGRAMS_AND_GRAPHS = frozenset( - ( - scalars_metadata.PLUGIN_NAME, - histograms_metadata.PLUGIN_NAME, - graphs_metadata.PLUGIN_NAME, - ) -) - - -class UploadIntentTest(tf.test.TestCase): - def testUploadIntentOneShotEmptyDirectoryFails(self): - """Test the upload intent under the one-shot mode with missing dir. - - In the case of a non-existent directoy, uploading should not - create an experiment. - """ - # Mock three places: - # 1. The uploader itself, we will inspect invocations of its methods but - # do not want to actually upload anything. - # 2. Writing to stdout, so we can inspect messages to the user. - # 3. The creation of the grpc WriteServiceChannel, which happens in the - # non dry_run execution, but we don't want to actually open a network - # communication. - mock_uploader = mock.MagicMock() - mock_stdout_write = mock.MagicMock() - with mock.patch.object( - uploader_lib, - "TensorBoardUploader", - return_value=mock_uploader, - ), mock.patch.object( - sys.stdout, "write", mock_stdout_write - ), mock.patch.object( - write_service_pb2_grpc, "TensorBoardWriterServiceStub" - ): - # Set up an UploadIntent configured with one_shot and a - # non-existent directory. - intent = uploader_subcommand.UploadIntent( - "/dev/null/non/existent/directory", one_shot=True - ) - # Execute the intent.execute method. - intent.execute(server_info_pb2.ServerInfoResponse(), None) - # Expect that there is no call to create an experiment. - self.assertEqual(mock_uploader.create_experiment.call_count, 0) - # Expect a message to the user indicating no experiment was created. - stdout_writes = [x[0][0] for x in mock_stdout_write.call_args_list] - self.assertRegex( - ",".join(stdout_writes), - ".*Exiting without creating an experiment.*", - ) - - def testUploadIntentOneShot(self): - """Test the upload intent under the one-shot mode.""" - # Mock three places: - # 1. The uploader itself, we will inspect invocations of its methods but - # do not want to actually upload anything. - # 2. Writing to stdout, so we can inspect messages to the user. - # 3. The creation of the grpc WriteServiceChannel, which happens in the - # non dry_run execution, but we don't want to actually open a network - # communication. mock_uploader = mock.MagicMock() - mock_uploader = mock.MagicMock() - mock_uploader.create_experiment = mock.MagicMock( - return_value="fake_experiment_id" - ) - mock_stdout_write = mock.MagicMock() - with mock.patch.object( - sys.stdout, "write", mock_stdout_write - ), mock.patch.object( - uploader_lib, "TensorBoardUploader", return_value=mock_uploader - ), mock.patch.object( - write_service_pb2_grpc, "TensorBoardWriterServiceStub" - ): - # Set up an UploadIntent configured with one_shot and an empty temp - # directory. - intent = uploader_subcommand.UploadIntent( - self.get_temp_dir(), one_shot=True - ) - # Execute the intent.execute method. - intent.execute(server_info_pb2.ServerInfoResponse(), None) - # Expect that there is one call to create_experiment. - self.assertEqual(mock_uploader.create_experiment.call_count, 1) - # Expect that there is one call to start_uploading. - self.assertEqual(mock_uploader.start_uploading.call_count, 1) - # Expect that ".*Done scanning logdir.*" is among the things printed. - stdout_writes = [x[0][0] for x in mock_stdout_write.call_args_list] - self.assertRegex( - ",".join(stdout_writes), - ".*experiment created.*", - ) - # Expect that the last thing written is the string "Done" and the - # experiment_id. - self.assertRegex(stdout_writes[-1], ".*Done.*") - self.assertRegex(stdout_writes[-1], ".*fake_experiment_id.*") - - def testUploadIntentWithExperimentUrlCallback(self): - """Test the upload intent with a callback.""" - server_info = server_info_pb2.ServerInfoResponse() - server_info.url_format.template = "https://tensorboard.dev/x/{}" - server_info.url_format.id_placeholder = "{}" - - stub = dry_run_stubs.DryRunTensorBoardWriterStub() - stub.CreateExperiment = ( - lambda req, **__: write_service_pb2.CreateExperimentResponse( - experiment_id="test_experiment_id", url="this URL is ignored" - ) - ) - - expected_url = "https://tensorboard.dev/x/test_experiment_id" - - with mock.patch.object( - dry_run_stubs, - "DryRunTensorBoardWriterStub", - wraps=lambda: stub, - ), mock.patch.object(sys.stdout, "write"): - mock_channel = mock.Mock() - mock_experiment_url_callback = mock.Mock() - intent = uploader_subcommand.UploadIntent( - self.get_temp_dir(), - dry_run=True, - one_shot=True, - experiment_url_callback=mock_experiment_url_callback, - ) - intent.execute(server_info, mock_channel) - mock_experiment_url_callback.assert_called_once_with(expected_url) - - def testUploadIntentDryRunNonOneShotInterrupted(self): - mock_server_info = mock.MagicMock() - mock_channel = mock.MagicMock() - mock_stdout_write = mock.MagicMock() - mock_uploader = mock.MagicMock() - with mock.patch.object( - mock_uploader, - "start_uploading", - side_effect=KeyboardInterrupt(), - ), mock.patch.object( - uploader_lib, "TensorBoardUploader", return_value=mock_uploader - ), mock.patch.object( - sys.stdout, "write", mock_stdout_write - ): - intent = uploader_subcommand.UploadIntent( - self.get_temp_dir(), dry_run=True, one_shot=False - ) - intent.execute(mock_server_info, mock_channel) - self.assertRegex( - mock_stdout_write.call_args_list[-1][0][0], ".*Interrupted.*" - ) - - def testUploadIntentNonDryRunNonOneShotInterrupted(self): - mock_server_info = mock.MagicMock() - mock_channel = mock.MagicMock() - mock_stdout_write = mock.MagicMock() - mock_uploader = mock.MagicMock() - with mock.patch.object( - mock_uploader, - "start_uploading", - side_effect=KeyboardInterrupt(), - ), mock.patch.object( - uploader_lib, "TensorBoardUploader", return_value=mock_uploader - ), mock.patch.object( - sys.stdout, "write", mock_stdout_write - ): - intent = uploader_subcommand.UploadIntent( - self.get_temp_dir(), dry_run=False, one_shot=False - ) - intent.execute(mock_server_info, mock_channel) - self.assertIn( - "\nInterrupted. View your TensorBoard at ", - mock_stdout_write.call_args_list[-1][0][0], - ) - +class IntentTest(tf.test.TestCase): def testListIntentSetsExperimentMask(self): mock_server_info = mock.MagicMock() mock_channel = mock.MagicMock() diff --git a/tensorboard/uploader/uploader_test.py b/tensorboard/uploader/uploader_test.py index db9d7d48f4c..eb9de9d0096 100644 --- a/tensorboard/uploader/uploader_test.py +++ b/tensorboard/uploader/uploader_test.py @@ -16,8 +16,6 @@ import itertools -import os -import re from unittest import mock import grpc @@ -25,50 +23,11 @@ import tensorflow as tf -from google.protobuf import message -from tensorboard import data_compat -from tensorboard import dataclass_compat -from tensorboard.compat.proto import tensor_shape_pb2 from tensorboard.uploader.proto import experiment_pb2 -from tensorboard.uploader.proto import scalar_pb2 -from tensorboard.uploader.proto import server_info_pb2 from tensorboard.uploader.proto import write_service_pb2 from tensorboard.uploader.proto import write_service_pb2_grpc from tensorboard.uploader import test_util -from tensorboard.uploader import upload_tracker from tensorboard.uploader import uploader as uploader_lib -from tensorboard.uploader import logdir_loader -from tensorboard.uploader import util -from tensorboard.compat.proto import event_pb2 -from tensorboard.compat.proto import graph_pb2 -from tensorboard.compat.proto import summary_pb2 -from tensorboard.compat.proto import tensor_pb2 -from tensorboard.compat.proto import types_pb2 -from tensorboard.plugins.histogram import metadata as histograms_metadata -from tensorboard.plugins.histogram import summary_v2 as histogram_v2 -from tensorboard.plugins.graph import metadata as graphs_metadata -from tensorboard.plugins.scalar import metadata as scalars_metadata -from tensorboard.plugins.scalar import summary_v2 as scalar_v2 -from tensorboard.summary import v1 as summary_v1 -from tensorboard.util import test_util as tb_test_util -from tensorboard.util import tensor_util - - -def _create_example_graph_bytes(large_attr_size): - graph_def = graph_pb2.GraphDef() - graph_def.node.add(name="alice", op="Person") - graph_def.node.add(name="bob", op="Person") - - graph_def.node[1].attr["small"].s = b"small_attr_value" - graph_def.node[1].attr["large"].s = b"l" * large_attr_size - graph_def.node.add( - name="friendship", op="Friendship", input=["alice", "bob"] - ) - return graph_def.SerializeToString() - - -class AbortUploadError(Exception): - """Exception used in testing to abort the upload process.""" def _create_mock_client(): @@ -93,1782 +52,6 @@ def _create_mock_client(): return mock_client -# By default allow at least one plugin for each upload type: Scalar, Tensor, and -# Blobs. -_SCALARS_HISTOGRAMS_AND_GRAPHS = frozenset( - ( - scalars_metadata.PLUGIN_NAME, - histograms_metadata.PLUGIN_NAME, - graphs_metadata.PLUGIN_NAME, - ) -) - -# Sentinel for `_create_*` helpers, for arguments for which we want to -# supply a default other than the `None` used by the code under test. -_USE_DEFAULT = object() - - -def _create_uploader( - writer_client=_USE_DEFAULT, - logdir=None, - max_scalar_request_size=_USE_DEFAULT, - max_blob_request_size=_USE_DEFAULT, - max_blob_size=_USE_DEFAULT, - logdir_poll_rate_limiter=_USE_DEFAULT, - rpc_rate_limiter=_USE_DEFAULT, - tensor_rpc_rate_limiter=_USE_DEFAULT, - blob_rpc_rate_limiter=_USE_DEFAULT, - name=None, - description=None, - verbosity=0, # Use 0 to minimize littering the test output. - one_shot=None, -): - if writer_client is _USE_DEFAULT: - writer_client = _create_mock_client() - if max_scalar_request_size is _USE_DEFAULT: - max_scalar_request_size = 128000 - if max_blob_request_size is _USE_DEFAULT: - max_blob_request_size = 128000 - if max_blob_size is _USE_DEFAULT: - max_blob_size = 12345 - if logdir_poll_rate_limiter is _USE_DEFAULT: - logdir_poll_rate_limiter = util.RateLimiter(0) - if rpc_rate_limiter is _USE_DEFAULT: - rpc_rate_limiter = util.RateLimiter(0) - if tensor_rpc_rate_limiter is _USE_DEFAULT: - tensor_rpc_rate_limiter = util.RateLimiter(0) - if blob_rpc_rate_limiter is _USE_DEFAULT: - blob_rpc_rate_limiter = util.RateLimiter(0) - - upload_limits = server_info_pb2.UploadLimits( - max_scalar_request_size=max_scalar_request_size, - max_tensor_request_size=128000, - max_tensor_point_size=11111, - max_blob_request_size=max_blob_request_size, - max_blob_size=max_blob_size, - ) - - return uploader_lib.TensorBoardUploader( - writer_client, - logdir, - allowed_plugins=_SCALARS_HISTOGRAMS_AND_GRAPHS, - upload_limits=upload_limits, - logdir_poll_rate_limiter=logdir_poll_rate_limiter, - rpc_rate_limiter=rpc_rate_limiter, - tensor_rpc_rate_limiter=tensor_rpc_rate_limiter, - blob_rpc_rate_limiter=blob_rpc_rate_limiter, - name=name, - description=description, - verbosity=verbosity, - one_shot=one_shot, - ) - - -def _create_request_sender( - experiment_id=None, - api=None, - allowed_plugins=_USE_DEFAULT, -): - if api is _USE_DEFAULT: - api = _create_mock_client() - if allowed_plugins is _USE_DEFAULT: - allowed_plugins = _SCALARS_HISTOGRAMS_AND_GRAPHS - - upload_limits = server_info_pb2.UploadLimits( - max_scalar_request_size=128000, - max_tensor_request_size=128000, - max_tensor_point_size=11111, - max_blob_size=12345, - ) - - rpc_rate_limiter = util.RateLimiter(0) - tensor_rpc_rate_limiter = util.RateLimiter(0) - blob_rpc_rate_limiter = util.RateLimiter(0) - - return uploader_lib._BatchedRequestSender( - experiment_id=experiment_id, - api=api, - allowed_plugins=allowed_plugins, - upload_limits=upload_limits, - rpc_rate_limiter=rpc_rate_limiter, - tensor_rpc_rate_limiter=tensor_rpc_rate_limiter, - blob_rpc_rate_limiter=blob_rpc_rate_limiter, - tracker=upload_tracker.UploadTracker(verbosity=0), - ) - - -def _create_scalar_request_sender( - experiment_id=None, - api=_USE_DEFAULT, - max_request_size=_USE_DEFAULT, - tracker=None, -): - if api is _USE_DEFAULT: - api = _create_mock_client() - if max_request_size is _USE_DEFAULT: - max_request_size = 128000 - return uploader_lib._ScalarBatchedRequestSender( - experiment_id=experiment_id, - api=api, - rpc_rate_limiter=util.RateLimiter(0), - max_request_size=max_request_size, - tracker=tracker or upload_tracker.UploadTracker(verbosity=0), - ) - - -def _create_tensor_request_sender( - experiment_id=None, - api=_USE_DEFAULT, - max_request_size=_USE_DEFAULT, - max_tensor_point_size=_USE_DEFAULT, - tracker=None, -): - if api is _USE_DEFAULT: - api = _create_mock_client() - if max_request_size is _USE_DEFAULT: - max_request_size = 128000 - if max_tensor_point_size is _USE_DEFAULT: - max_tensor_point_size = 11111 - return uploader_lib._TensorBatchedRequestSender( - experiment_id=experiment_id, - api=api, - rpc_rate_limiter=util.RateLimiter(0), - max_request_size=max_request_size, - max_tensor_point_size=max_tensor_point_size, - tracker=tracker or upload_tracker.UploadTracker(verbosity=0), - ) - - -class TensorboardUploaderTest(tf.test.TestCase): - def test_create_experiment(self): - logdir = "/logs/foo" - uploader = _create_uploader(_create_mock_client(), logdir) - eid = uploader.create_experiment() - self.assertEqual(eid, "123") - - def test_create_experiment_with_name(self): - logdir = "/logs/foo" - mock_client = _create_mock_client() - new_name = "This is the new name" - uploader = _create_uploader(mock_client, logdir, name=new_name) - eid = uploader.create_experiment() - self.assertEqual(eid, "123") - mock_client.CreateExperiment.assert_called_once() - (args, _) = mock_client.CreateExperiment.call_args - - expected_request = write_service_pb2.CreateExperimentRequest( - name=new_name, - ) - self.assertEqual(args[0], expected_request) - - def test_create_experiment_with_description(self): - logdir = "/logs/foo" - mock_client = _create_mock_client() - new_description = """ - **description**" - may have "strange" unicode chars 🌴 \\/<> - """ - uploader = _create_uploader( - mock_client, logdir, description=new_description - ) - eid = uploader.create_experiment() - self.assertEqual(eid, "123") - mock_client.CreateExperiment.assert_called_once() - (args, _) = mock_client.CreateExperiment.call_args - - expected_request = write_service_pb2.CreateExperimentRequest( - description=new_description, - ) - self.assertEqual(args[0], expected_request) - - def test_create_experiment_with_all_metadata(self): - logdir = "/logs/foo" - mock_client = _create_mock_client() - new_description = """ - **description**" - may have "strange" unicode chars 🌴 \\/<> - """ - new_name = "This is a cool name." - uploader = _create_uploader( - mock_client, logdir, name=new_name, description=new_description - ) - eid = uploader.create_experiment() - self.assertEqual(eid, "123") - mock_client.CreateExperiment.assert_called_once() - (args, _) = mock_client.CreateExperiment.call_args - - expected_request = write_service_pb2.CreateExperimentRequest( - name=new_name, - description=new_description, - ) - self.assertEqual(args[0], expected_request) - - def test_start_uploading_without_create_experiment_fails(self): - mock_client = _create_mock_client() - uploader = _create_uploader(mock_client, "/logs/foo") - with self.assertRaisesRegex(RuntimeError, "call create_experiment()"): - uploader.start_uploading() - - def test_start_uploading_scalars(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tensor_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ): - uploader = _create_uploader( - mock_client, - "/logs/foo", - # Send each Event below in a separate WriteScalarRequest - max_scalar_request_size=100, - rpc_rate_limiter=mock_rate_limiter, - tensor_rpc_rate_limiter=mock_tensor_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - verbosity=1, # In order to test the upload tracker. - ) - uploader.create_experiment() - - def scalar_event(tag, value): - return event_pb2.Event(summary=scalar_v2.scalar_pb(tag, value)) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat( - [scalar_event("1.1", 5.0), scalar_event("1.2", 5.0)] - ), - "run 2": _apply_compat( - [scalar_event("2.1", 5.0), scalar_event("2.2", 5.0)] - ), - }, - { - "run 3": _apply_compat( - [scalar_event("3.1", 5.0), scalar_event("3.2", 5.0)] - ), - "run 4": _apply_compat( - [scalar_event("4.1", 5.0), scalar_event("4.2", 5.0)] - ), - "run 5": _apply_compat( - [scalar_event("5.1", 5.0), scalar_event("5.2", 5.0)] - ), - }, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(4 + 6, mock_client.WriteScalar.call_count) - self.assertEqual(4 + 6, mock_rate_limiter.tick.call_count) - self.assertEqual(0, mock_tensor_rate_limiter.tick.call_count) - self.assertEqual(0, mock_blob_rate_limiter.tick.call_count) - - # Check upload tracker calls. - self.assertEqual(mock_tracker.send_tracker.call_count, 2) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 10) - self.assertLen(mock_tracker.scalars_tracker.call_args[0], 1) - self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) - self.assertEqual(mock_tracker.blob_tracker.call_count, 0) - - def test_start_uploading_scalars_one_shot(self): - """Check that one-shot uploading stops without AbortUploadError.""" - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tensor_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ): - uploader = _create_uploader( - mock_client, - "/logs/foo", - # Send each Event below in a separate WriteScalarRequest - max_scalar_request_size=100, - rpc_rate_limiter=mock_rate_limiter, - tensor_rpc_rate_limiter=mock_tensor_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - verbosity=1, # In order to test the upload tracker. - one_shot=True, - ) - uploader.create_experiment() - - def scalar_event(tag, value): - return event_pb2.Event(summary=scalar_v2.scalar_pb(tag, value)) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat( - [scalar_event("1.1", 5.0), scalar_event("1.2", 5.0)] - ), - "run 2": _apply_compat( - [scalar_event("2.1", 5.0), scalar_event("2.2", 5.0)] - ), - }, - # Note the lack of AbortUploadError here. - ] - - with mock.patch.object(uploader, "_logdir_loader", mock_logdir_loader): - uploader.start_uploading() - - self.assertEqual(4, mock_client.WriteScalar.call_count) - self.assertEqual(4, mock_rate_limiter.tick.call_count) - self.assertEqual(0, mock_tensor_rate_limiter.tick.call_count) - self.assertEqual(0, mock_blob_rate_limiter.tick.call_count) - - # Check upload tracker calls. - self.assertEqual(mock_tracker.send_tracker.call_count, 1) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 4) - self.assertLen(mock_tracker.scalars_tracker.call_args[0], 1) - self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) - self.assertEqual(mock_tracker.blob_tracker.call_count, 0) - - def test_start_uploading_tensors(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tensor_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ): - uploader = _create_uploader( - mock_client, - "/logs/foo", - rpc_rate_limiter=mock_rate_limiter, - tensor_rpc_rate_limiter=mock_tensor_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - verbosity=1, # In order to test the upload tracker. - ) - uploader.create_experiment() - - def tensor_event(tag, value): - return event_pb2.Event( - summary=histogram_v2.histogram_pb(tag, value) - ) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat( - [tensor_event("1.1", [5.0]), tensor_event("1.2", [5.0])] - ), - }, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(1, mock_client.WriteTensor.call_count) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(1, mock_tensor_rate_limiter.tick.call_count) - self.assertEqual(0, mock_blob_rate_limiter.tick.call_count) - - # Check upload tracker calls. - self.assertEqual(mock_tracker.send_tracker.call_count, 1) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 0) - tensors_tracker = mock_tracker.tensors_tracker - self.assertEqual(tensors_tracker.call_count, 1) - self.assertLen(tensors_tracker.call_args[0], 4) - self.assertEqual(tensors_tracker.call_args[0][0], 2) # num_tensors - self.assertEqual( - tensors_tracker.call_args[0][1], 0 - ) # num_tensors_skipped - # tensor_bytes: avoid asserting the exact value as it's hard to reason about. - self.assertGreater(tensors_tracker.call_args[0][2], 0) - self.assertEqual( - tensors_tracker.call_args[0][3], 0 - ) # tensor_bytes_skipped - self.assertEqual(mock_tracker.blob_tracker.call_count, 0) - - def test_start_uploading_graphs(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tensor_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ): - uploader = _create_uploader( - mock_client, - "/logs/foo", - # Verify behavior with lots of small chunks - max_blob_request_size=100, - rpc_rate_limiter=mock_rate_limiter, - tensor_rpc_rate_limiter=mock_tensor_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - verbosity=1, # In order to test tracker. - ) - uploader.create_experiment() - - # Of course a real Event stream will never produce the same Event twice, - # but is this test context it's fine to reuse this one. - graph_event = event_pb2.Event( - graph_def=_create_example_graph_bytes(950) - ) - expected_graph_def = graph_pb2.GraphDef.FromString( - graph_event.graph_def - ) - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat([graph_event, graph_event]), - "run 2": _apply_compat([graph_event, graph_event]), - }, - { - "run 3": _apply_compat([graph_event, graph_event]), - "run 4": _apply_compat([graph_event, graph_event]), - "run 5": _apply_compat([graph_event, graph_event]), - }, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(1, mock_client.CreateExperiment.call_count) - self.assertEqual(10, mock_client.WriteBlob.call_count) - for (i, call) in enumerate(mock_client.WriteBlob.call_args_list): - requests = list(call[0][0]) - data = b"".join(r.data for r in requests) - actual_graph_def = graph_pb2.GraphDef.FromString(data) - self.assertProtoEquals(expected_graph_def, actual_graph_def) - self.assertEqual( - set(r.blob_sequence_id for r in requests), - {"blob%d" % i}, - ) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(0, mock_tensor_rate_limiter.tick.call_count) - self.assertEqual(10, mock_blob_rate_limiter.tick.call_count) - - # Check upload tracker calls. - self.assertEqual(mock_tracker.send_tracker.call_count, 2) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 0) - self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) - self.assertEqual(mock_tracker.blob_tracker.call_count, 10) - self.assertLen(mock_tracker.blob_tracker.call_args[0], 1) - self.assertGreater(mock_tracker.blob_tracker.call_args[0][0], 0) - - def test_upload_skip_large_blob(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - uploader = _create_uploader( - mock_client, - "/logs/foo", - # Verify behavior with lots of small chunks - max_blob_request_size=100, - max_blob_size=100, - rpc_rate_limiter=mock_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - ) - uploader.create_experiment() - - graph_event = event_pb2.Event( - graph_def=_create_example_graph_bytes(950) - ) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - {"run 1": _apply_compat([graph_event])}, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(1, mock_client.CreateExperiment.call_count) - self.assertEqual(0, mock_client.WriteBlob.call_count) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(1, mock_blob_rate_limiter.tick.call_count) - - def test_filter_graphs(self): - # Three graphs: one short, one long, one corrupt. - bytes_0 = _create_example_graph_bytes(123) - bytes_1 = _create_example_graph_bytes(9999) - # invalid (truncated) proto: length-delimited field 1 (0x0a) of - # length 0x7f specified, but only len("bogus") = 5 bytes given - # - bytes_2 = b"\x0a\x7fbogus" - - logdir = self.get_temp_dir() - for (i, b) in enumerate([bytes_0, bytes_1, bytes_2]): - run_dir = os.path.join(logdir, "run_%04d" % i) - event = event_pb2.Event(step=0, wall_time=123 * i, graph_def=b) - with tb_test_util.FileWriter(run_dir) as writer: - writer.add_event(event) - - limiter = mock.create_autospec(util.RateLimiter) - limiter.tick.side_effect = [None, AbortUploadError] - mock_client = _create_mock_client() - uploader = _create_uploader( - mock_client, - logdir, - logdir_poll_rate_limiter=limiter, - ) - uploader.create_experiment() - - with self.assertRaises(AbortUploadError): - uploader.start_uploading() - - actual_blobs = [] - for call in mock_client.WriteBlob.call_args_list: - requests = call[0][0] - actual_blobs.append(b"".join(r.data for r in requests)) - - actual_graph_defs = [] - for blob in actual_blobs: - try: - actual_graph_defs.append(graph_pb2.GraphDef.FromString(blob)) - except message.DecodeError: - actual_graph_defs.append(None) - - with self.subTest("graphs with small attr values should be unchanged"): - expected_graph_def_0 = graph_pb2.GraphDef.FromString(bytes_0) - self.assertEqual(actual_graph_defs[0], expected_graph_def_0) - - with self.subTest("large attr values should be filtered out"): - expected_graph_def_1 = graph_pb2.GraphDef.FromString(bytes_1) - del expected_graph_def_1.node[1].attr["large"] - expected_graph_def_1.node[1].attr["_too_large_attrs"].list.s.append( - b"large" - ) - requests = list(mock_client.WriteBlob.call_args[0][0]) - self.assertEqual(actual_graph_defs[1], expected_graph_def_1) - - with self.subTest("corrupt graphs should be skipped"): - self.assertLen(actual_blobs, 2) - - def test_upload_server_error(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - uploader = _create_uploader( - mock_client, - "/logs/foo", - rpc_rate_limiter=mock_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - ) - uploader.create_experiment() - - # Of course a real Event stream will never produce the same Event twice, - # but is this test context it's fine to reuse this one. - graph_event = event_pb2.Event( - graph_def=_create_example_graph_bytes(950) - ) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - {"run 1": _apply_compat([graph_event])}, - {"run 1": _apply_compat([graph_event])}, - AbortUploadError, - ] - - mock_client.WriteBlob.side_effect = [ - [write_service_pb2.WriteBlobResponse()], - test_util.grpc_error(grpc.StatusCode.INTERNAL, "nope"), - ] - - # This demonstrates that the INTERNAL error is NOT handled, so the - # uploader will die if this happens. - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(grpc.RpcError): - uploader.start_uploading() - self.assertEqual(1, mock_client.CreateExperiment.call_count) - self.assertEqual(2, mock_client.WriteBlob.call_count) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(2, mock_blob_rate_limiter.tick.call_count) - - def test_upload_same_graph_twice(self): - mock_client = _create_mock_client() - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - mock_blob_rate_limiter = mock.create_autospec(util.RateLimiter) - uploader = _create_uploader( - mock_client, - "/logs/foo", - rpc_rate_limiter=mock_rate_limiter, - blob_rpc_rate_limiter=mock_blob_rate_limiter, - ) - uploader.create_experiment() - - graph_event = event_pb2.Event( - graph_def=_create_example_graph_bytes(950) - ) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - {"run 1": _apply_compat([graph_event])}, - {"run 1": _apply_compat([graph_event])}, - AbortUploadError, - ] - - mock_client.WriteBlob.side_effect = [ - [write_service_pb2.WriteBlobResponse()], - test_util.grpc_error(grpc.StatusCode.ALREADY_EXISTS, "nope"), - ] - - # This demonstrates that the ALREADY_EXISTS error is handled gracefully. - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - self.assertEqual(1, mock_client.CreateExperiment.call_count) - self.assertEqual(2, mock_client.WriteBlob.call_count) - self.assertEqual(0, mock_rate_limiter.tick.call_count) - self.assertEqual(2, mock_blob_rate_limiter.tick.call_count) - - def test_upload_empty_logdir(self): - logdir = self.get_temp_dir() - mock_client = _create_mock_client() - uploader = _create_uploader(mock_client, logdir) - uploader.create_experiment() - uploader._upload_once() - mock_client.WriteScalar.assert_not_called() - - def test_upload_polls_slowly_once_done(self): - class Success(Exception): - pass - - mock_rate_limiter = mock.create_autospec(util.RateLimiter) - upload_call_count = 0 - - def mock_upload_once(): - nonlocal upload_call_count - upload_call_count += 1 - tick_count = mock_rate_limiter.tick.call_count - self.assertEqual(tick_count, upload_call_count) - if tick_count >= 3: - raise Success() - - uploader = _create_uploader( - logdir=self.get_temp_dir(), - logdir_poll_rate_limiter=mock_rate_limiter, - ) - uploader._upload_once = mock_upload_once - - uploader.create_experiment() - with self.assertRaises(Success): - uploader.start_uploading() - - def test_upload_swallows_rpc_failure(self): - logdir = self.get_temp_dir() - with tb_test_util.FileWriter(logdir) as writer: - writer.add_test_summary("foo") - mock_client = _create_mock_client() - uploader = _create_uploader(mock_client, logdir) - uploader.create_experiment() - error = test_util.grpc_error(grpc.StatusCode.INTERNAL, "Failure") - mock_client.WriteScalar.side_effect = error - uploader._upload_once() - mock_client.WriteScalar.assert_called_once() - - def test_upload_full_logdir(self): - logdir = self.get_temp_dir() - mock_client = _create_mock_client() - uploader = _create_uploader(mock_client, logdir) - uploader.create_experiment() - - # Convenience helpers for constructing expected requests. - run = write_service_pb2.WriteScalarRequest.Run - tag = write_service_pb2.WriteScalarRequest.Tag - point = scalar_pb2.ScalarPoint - - # First round - writer = tb_test_util.FileWriter(logdir) - writer.add_test_summary("foo", simple_value=5.0, step=1) - writer.add_test_summary("foo", simple_value=6.0, step=2) - writer.add_test_summary("foo", simple_value=7.0, step=3) - writer.add_test_summary("bar", simple_value=8.0, step=3) - writer.flush() - writer_a = tb_test_util.FileWriter(os.path.join(logdir, "a")) - writer_a.add_test_summary("qux", simple_value=9.0, step=2) - writer_a.flush() - uploader._upload_once() - self.assertEqual(1, mock_client.WriteScalar.call_count) - request1 = mock_client.WriteScalar.call_args[0][0] - _clear_wall_times(request1) - expected_request1 = write_service_pb2.WriteScalarRequest( - experiment_id="123", - runs=[ - run( - name=".", - tags=[ - tag( - name="foo", - metadata=test_util.scalar_metadata("foo"), - points=[ - point(step=1, value=5.0), - point(step=2, value=6.0), - point(step=3, value=7.0), - ], - ), - tag( - name="bar", - metadata=test_util.scalar_metadata("bar"), - points=[point(step=3, value=8.0)], - ), - ], - ), - run( - name="a", - tags=[ - tag( - name="qux", - metadata=test_util.scalar_metadata("qux"), - points=[point(step=2, value=9.0)], - ) - ], - ), - ], - ) - self.assertProtoEquals(expected_request1, request1) - mock_client.WriteScalar.reset_mock() - - # Second round - writer.add_test_summary("foo", simple_value=10.0, step=5) - writer.add_test_summary("baz", simple_value=11.0, step=1) - writer.flush() - writer_b = tb_test_util.FileWriter(os.path.join(logdir, "b")) - writer_b.add_test_summary("xyz", simple_value=12.0, step=1) - writer_b.flush() - uploader._upload_once() - self.assertEqual(1, mock_client.WriteScalar.call_count) - request2 = mock_client.WriteScalar.call_args[0][0] - _clear_wall_times(request2) - expected_request2 = write_service_pb2.WriteScalarRequest( - experiment_id="123", - runs=[ - run( - name=".", - tags=[ - tag( - name="foo", - metadata=test_util.scalar_metadata("foo"), - points=[point(step=5, value=10.0)], - ), - tag( - name="baz", - metadata=test_util.scalar_metadata("baz"), - points=[point(step=1, value=11.0)], - ), - ], - ), - run( - name="b", - tags=[ - tag( - name="xyz", - metadata=test_util.scalar_metadata("xyz"), - points=[point(step=1, value=12.0)], - ) - ], - ), - ], - ) - self.assertProtoEquals(expected_request2, request2) - mock_client.WriteScalar.reset_mock() - - # Empty third round - uploader._upload_once() - mock_client.WriteScalar.assert_not_called() - - def test_verbosity_zero_creates_upload_tracker_with_verbosity_zero(self): - mock_client = _create_mock_client() - mock_tracker = mock.MagicMock() - with mock.patch.object( - upload_tracker, "UploadTracker", return_value=mock_tracker - ) as mock_constructor: - uploader = _create_uploader( - mock_client, - "/logs/foo", - verbosity=0, # Explicitly set verbosity to 0. - ) - uploader.create_experiment() - - def scalar_event(tag, value): - return event_pb2.Event(summary=scalar_v2.scalar_pb(tag, value)) - - mock_logdir_loader = mock.create_autospec(logdir_loader.LogdirLoader) - mock_logdir_loader.get_run_events.side_effect = [ - { - "run 1": _apply_compat( - [scalar_event("1.1", 5.0), scalar_event("1.2", 5.0)] - ), - }, - AbortUploadError, - ] - - with mock.patch.object( - uploader, "_logdir_loader", mock_logdir_loader - ), self.assertRaises(AbortUploadError): - uploader.start_uploading() - - self.assertEqual(mock_constructor.call_count, 1) - self.assertEqual( - mock_constructor.call_args[1], {"verbosity": 0, "one_shot": False} - ) - self.assertEqual(mock_tracker.scalars_tracker.call_count, 1) - - -class BatchedRequestSenderTest(tf.test.TestCase): - def _populate_run_from_events( - self, scalar_run, tensor_run, events, allowed_plugins=_USE_DEFAULT - ): - mock_client = _create_mock_client() - builder = _create_request_sender( - experiment_id="123", - api=mock_client, - allowed_plugins=allowed_plugins, - ) - builder.send_requests({"": _apply_compat(events)}) - scalar_requests = [ - c[0][0] for c in mock_client.WriteScalar.call_args_list - ] - if scalar_requests: - self.assertLen(scalar_requests, 1) - self.assertLen(scalar_requests[0].runs, 1) - scalar_run.MergeFrom(scalar_requests[0].runs[0]) - tensor_requests = [ - c[0][0] for c in mock_client.WriteTensor.call_args_list - ] - if tensor_requests: - self.assertLen(tensor_requests, 1) - self.assertLen(tensor_requests[0].runs, 1) - tensor_run.MergeFrom(tensor_requests[0].runs[0]) - - def test_empty_events(self): - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, []) - self.assertProtoEquals( - scalar_run, write_service_pb2.WriteScalarRequest.Run() - ) - self.assertProtoEquals( - tensor_run, write_service_pb2.WriteTensorRequest.Run() - ) - - def test_scalar_and_tensor_events(self): - events = [ - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar1", 5.0)), - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar2", 5.0)), - event_pb2.Event( - summary=histogram_v2.histogram_pb("histogram", [5.0]) - ), - event_pb2.Event( - summary=histogram_v2.histogram_pb("histogram", [6.0]) - ), - ] - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, events) - scalar_tag_counts = _extract_tag_counts(scalar_run) - self.assertEqual(scalar_tag_counts, {"scalar1": 1, "scalar2": 1}) - tensor_tag_counts = _extract_tag_counts(tensor_run) - self.assertEqual(tensor_tag_counts, {"histogram": 2}) - - def test_skips_non_scalar_and_non_tensor_events(self): - events = [ - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar1", 5.0)), - event_pb2.Event(file_version="brain.Event:2"), - event_pb2.Event( - summary=histogram_v2.histogram_pb("histogram", [5.0]) - ), - ] - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, events) - scalar_tag_counts = _extract_tag_counts(scalar_run) - self.assertEqual(scalar_tag_counts, {"scalar1": 1}) - tensor_tag_counts = _extract_tag_counts(tensor_run) - self.assertEqual(tensor_tag_counts, {"histogram": 1}) - - def test_skips_non_scalar_events_in_scalar_time_series(self): - events = [ - event_pb2.Event(file_version="brain.Event:2"), - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar1", 5.0)), - event_pb2.Event(summary=scalar_v2.scalar_pb("scalar2", 5.0)), - event_pb2.Event( - summary=histogram_v2.histogram_pb("scalar2", [5.0]) - ), - ] - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, events) - scalar_tag_counts = _extract_tag_counts(scalar_run) - self.assertEqual(scalar_tag_counts, {"scalar1": 1, "scalar2": 1}) - tensor_tag_counts = _extract_tag_counts(tensor_run) - self.assertEqual(tensor_tag_counts, {}) - - def test_skips_events_from_disallowed_plugins(self): - event = event_pb2.Event( - step=1, wall_time=123.456, summary=scalar_v2.scalar_pb("foo", 5.0) - ) - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events( - scalar_run, - tensor_run, - [event], - allowed_plugins=frozenset("not-scalars"), - ) - expected_scalar_run = write_service_pb2.WriteScalarRequest.Run() - self.assertProtoEquals(scalar_run, expected_scalar_run) - expected_tensor_run = write_service_pb2.WriteTensorRequest.Run() - self.assertProtoEquals(tensor_run, expected_tensor_run) - - def test_remembers_first_metadata_in_time_series(self): - scalar_1 = event_pb2.Event(summary=scalar_v2.scalar_pb("loss", 4.0)) - scalar_2 = event_pb2.Event(summary=scalar_v2.scalar_pb("loss", 3.0)) - scalar_2.summary.value[0].ClearField("metadata") - events = [ - event_pb2.Event(file_version="brain.Event:2"), - scalar_1, - scalar_2, - ] - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, events) - scalar_tag_counts = _extract_tag_counts(scalar_run) - self.assertEqual(scalar_tag_counts, {"loss": 2}) - - def test_expands_multiple_values_in_event(self): - event = event_pb2.Event(step=1, wall_time=123.456) - event.summary.value.add(tag="foo", simple_value=1.0) - event.summary.value.add(tag="foo", simple_value=2.0) - event.summary.value.add(tag="foo", simple_value=3.0) - scalar_run = write_service_pb2.WriteScalarRequest.Run() - tensor_run = write_service_pb2.WriteTensorRequest.Run() - self._populate_run_from_events(scalar_run, tensor_run, [event]) - expected_scalar_run = write_service_pb2.WriteScalarRequest.Run() - foo_tag = expected_scalar_run.tags.add() - foo_tag.name = "foo" - foo_tag.metadata.display_name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "scalars" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=1.0 - ) - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=2.0 - ) - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=3.0 - ) - self.assertProtoEquals(scalar_run, expected_scalar_run) - - -class ScalarBatchedRequestSenderTest(tf.test.TestCase): - def _add_events(self, sender, run_name, events): - for event in events: - for value in event.summary.value: - sender.add_event(run_name, event, value, value.metadata) - - def _add_events_and_flush(self, events): - mock_client = _create_mock_client() - sender = _create_scalar_request_sender( - experiment_id="123", - api=mock_client, - ) - self._add_events(sender, "", events) - sender.flush() - - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - self.assertLen(requests, 1) - self.assertLen(requests[0].runs, 1) - return requests[0].runs[0] - - def test_aggregation_by_tag(self): - def make_event(step, wall_time, tag, value): - return event_pb2.Event( - step=step, - wall_time=wall_time, - summary=scalar_v2.scalar_pb(tag, value), - ) - - events = [ - make_event(1, 1.0, "one", 11.0), - make_event(1, 2.0, "two", 22.0), - make_event(2, 3.0, "one", 33.0), - make_event(2, 4.0, "two", 44.0), - make_event( - 1, 5.0, "one", 55.0 - ), # Should preserve duplicate step=1. - make_event(1, 6.0, "three", 66.0), - ] - run_proto = self._add_events_and_flush(events) - tag_data = { - tag.name: [ - (p.step, p.wall_time.ToSeconds(), p.value) for p in tag.points - ] - for tag in run_proto.tags - } - self.assertEqual( - tag_data, - { - "one": [(1, 1.0, 11.0), (2, 3.0, 33.0), (1, 5.0, 55.0)], - "two": [(1, 2.0, 22.0), (2, 4.0, 44.0)], - "three": [(1, 6.0, 66.0)], - }, - ) - - def test_v1_summary(self): - event = event_pb2.Event(step=1, wall_time=123.456) - event.summary.value.add(tag="foo", simple_value=5.0) - run_proto = self._add_events_and_flush(_apply_compat([event])) - expected_run_proto = write_service_pb2.WriteScalarRequest.Run() - foo_tag = expected_run_proto.tags.add() - foo_tag.name = "foo" - foo_tag.metadata.display_name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "scalars" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=5.0 - ) - self.assertProtoEquals(run_proto, expected_run_proto) - - def test_v1_summary_tb_summary(self): - tf_summary = summary_v1.scalar_pb("foo", 5.0) - tb_summary = summary_pb2.Summary.FromString( - tf_summary.SerializeToString() - ) - event = event_pb2.Event(step=1, wall_time=123.456, summary=tb_summary) - run_proto = self._add_events_and_flush(_apply_compat([event])) - expected_run_proto = write_service_pb2.WriteScalarRequest.Run() - foo_tag = expected_run_proto.tags.add() - foo_tag.name = "foo/scalar_summary" - foo_tag.metadata.display_name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "scalars" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=5.0 - ) - self.assertProtoEquals(run_proto, expected_run_proto) - - def test_v2_summary(self): - event = event_pb2.Event( - step=1, wall_time=123.456, summary=scalar_v2.scalar_pb("foo", 5.0) - ) - run_proto = self._add_events_and_flush(_apply_compat([event])) - expected_run_proto = write_service_pb2.WriteScalarRequest.Run() - foo_tag = expected_run_proto.tags.add() - foo_tag.name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "scalars" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_SCALAR - foo_tag.points.add( - step=1, wall_time=test_util.timestamp_pb(123456000000), value=5.0 - ) - self.assertProtoEquals(run_proto, expected_run_proto) - - def test_propagates_experiment_deletion(self): - event = event_pb2.Event(step=1) - event.summary.value.add(tag="foo", simple_value=1.0) - - mock_client = _create_mock_client() - sender = _create_scalar_request_sender("123", mock_client) - self._add_events(sender, "run", _apply_compat([event])) - - error = test_util.grpc_error(grpc.StatusCode.NOT_FOUND, "nope") - mock_client.WriteScalar.side_effect = error - with self.assertRaises(uploader_lib.ExperimentNotFoundError): - sender.flush() - - def test_no_budget_for_base_request(self): - mock_client = _create_mock_client() - long_experiment_id = "A" * 12 - with self.assertRaises(RuntimeError) as cm: - _create_scalar_request_sender( - experiment_id=long_experiment_id, - api=mock_client, - max_request_size=12, - ) - self.assertEqual( - str(cm.exception), "Byte budget too small for base request" - ) - - def test_no_room_for_single_point(self): - mock_client = _create_mock_client() - event = event_pb2.Event(step=1, wall_time=123.456) - event.summary.value.add(tag="foo", simple_value=1.0) - long_run_name = "A" * 12 - sender = _create_scalar_request_sender( - "123", mock_client, max_request_size=12 - ) - with self.assertRaises(RuntimeError) as cm: - self._add_events(sender, long_run_name, [event]) - self.assertEqual(str(cm.exception), "add_event failed despite flush") - - def test_break_at_run_boundary(self): - mock_client = _create_mock_client() - # Choose run name sizes such that one run fits in a 1024 byte request, - # but not two. - long_run_1 = "A" * 768 - long_run_2 = "B" * 768 - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add(tag="foo", simple_value=1.0) - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add(tag="bar", simple_value=-2.0) - - sender = _create_scalar_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - ) - self._add_events(sender, long_run_1, _apply_compat([event_1])) - self._add_events(sender, long_run_2, _apply_compat([event_2])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - - for request in requests: - _clear_wall_times(request) - - # Expect two RPC calls despite a single explicit call to flush(). - expected = [ - write_service_pb2.WriteScalarRequest(experiment_id="123"), - write_service_pb2.WriteScalarRequest(experiment_id="123"), - ] - ( - expected[0] - .runs.add(name=long_run_1) - .tags.add(name="foo", metadata=test_util.scalar_metadata("foo")) - .points.add(step=1, value=1.0) - ) - ( - expected[1] - .runs.add(name=long_run_2) - .tags.add(name="bar", metadata=test_util.scalar_metadata("bar")) - .points.add(step=2, value=-2.0) - ) - self.assertEqual(requests, expected) - - def test_break_at_tag_boundary(self): - mock_client = _create_mock_client() - # Choose tag name sizes such that one tag fits in a 1024 byte requst, - # but not two. Note that tag names appear in both `Tag.name` and the - # summary metadata. - long_tag_1 = "a" * 384 - long_tag_2 = "b" * 384 - event = event_pb2.Event(step=1) - event.summary.value.add(tag=long_tag_1, simple_value=1.0) - event.summary.value.add(tag=long_tag_2, simple_value=2.0) - - sender = _create_scalar_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - ) - self._add_events(sender, "train", _apply_compat([event])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - for request in requests: - _clear_wall_times(request) - - # Expect two RPC calls despite a single explicit call to flush(). - expected = [ - write_service_pb2.WriteScalarRequest(experiment_id="123"), - write_service_pb2.WriteScalarRequest(experiment_id="123"), - ] - ( - expected[0] - .runs.add(name="train") - .tags.add( - name=long_tag_1, metadata=test_util.scalar_metadata(long_tag_1) - ) - .points.add(step=1, value=1.0) - ) - ( - expected[1] - .runs.add(name="train") - .tags.add( - name=long_tag_2, metadata=test_util.scalar_metadata(long_tag_2) - ) - .points.add(step=1, value=2.0) - ) - self.assertEqual(requests, expected) - - def test_break_at_scalar_point_boundary(self): - mock_client = _create_mock_client() - point_count = 2000 # comfortably saturates a single 1024-byte request - events = [] - for step in range(point_count): - summary = scalar_v2.scalar_pb("loss", -2.0 * step) - if step > 0: - summary.value[0].ClearField("metadata") - events.append(event_pb2.Event(summary=summary, step=step)) - tracker = upload_tracker.UploadTracker(verbosity=0) - sender = _create_scalar_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - tracker=tracker, - ) - self._add_events(sender, "train", _apply_compat(events)) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - for request in requests: - _clear_wall_times(request) - - self.assertGreater(len(requests), 1) - self.assertLess(len(requests), point_count) - # This is the observed number of requests when running the test. There - # is no reasonable way to derive this value from just reading the code. - # The number of requests does not have to be 33 to be correct but if it - # changes it probably warrants some investigation or thought. - self.assertEqual(33, len(requests)) - - total_points_in_result = 0 - for request in requests: - self.assertLen(request.runs, 1) - run = request.runs[0] - self.assertEqual(run.name, "train") - self.assertLen(run.tags, 1) - tag = run.tags[0] - self.assertEqual(tag.name, "loss") - for point in tag.points: - self.assertEqual(point.step, total_points_in_result) - self.assertEqual(point.value, -2.0 * point.step) - total_points_in_result += 1 - self.assertLessEqual(request.ByteSize(), 1024) - self.assertEqual(total_points_in_result, point_count) - with self.subTest("Scalar report count correct."): - self.assertEqual(tracker._stats.num_scalars, point_count) - - def test_prunes_tags_and_runs(self): - mock_client = _create_mock_client() - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add(tag="foo", simple_value=1.0) - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add(tag="bar", simple_value=-2.0) - - add_point_call_count = 0 - - def mock_add_point(byte_budget_manager_self, point): - # Simulate out-of-space error the first time that we try to store - # the second point. - nonlocal add_point_call_count - add_point_call_count += 1 - if add_point_call_count == 2: - raise uploader_lib._OutOfSpaceError() - - with mock.patch.object( - uploader_lib._ByteBudgetManager, - "add_point", - mock_add_point, - ): - sender = _create_scalar_request_sender("123", mock_client) - self._add_events(sender, "train", _apply_compat([event_1])) - self._add_events(sender, "test", _apply_compat([event_2])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteScalar.call_args_list] - for request in requests: - _clear_wall_times(request) - - expected = [ - write_service_pb2.WriteScalarRequest(experiment_id="123"), - write_service_pb2.WriteScalarRequest(experiment_id="123"), - ] - ( - expected[0] - .runs.add(name="train") - .tags.add(name="foo", metadata=test_util.scalar_metadata("foo")) - .points.add(step=1, value=1.0) - ) - ( - expected[1] - .runs.add(name="test") - .tags.add(name="bar", metadata=test_util.scalar_metadata("bar")) - .points.add(step=2, value=-2.0) - ) - self.assertEqual(expected, requests) - - def test_wall_time_precision(self): - # Test a wall time that is exactly representable in float64 but has enough - # digits to incur error if converted to nanoseconds the naive way (* 1e9). - event1 = event_pb2.Event(step=1, wall_time=1567808404.765432119) - event1.summary.value.add(tag="foo", simple_value=1.0) - # Test a wall time where as a float64, the fractional part on its own will - # introduce error if truncated to 9 decimal places instead of rounded. - event2 = event_pb2.Event(step=2, wall_time=1.000000002) - event2.summary.value.add(tag="foo", simple_value=2.0) - run_proto = self._add_events_and_flush(_apply_compat([event1, event2])) - self.assertEqual( - test_util.timestamp_pb(1567808404765432119), - run_proto.tags[0].points[0].wall_time, - ) - self.assertEqual( - test_util.timestamp_pb(1000000002), - run_proto.tags[0].points[1].wall_time, - ) - - -class TensorBatchedRequestSenderTest(tf.test.TestCase): - def _add_events(self, sender, run_name, events): - for event in events: - for value in event.summary.value: - sender.add_event(run_name, event, value, value.metadata) - - def _add_events_and_flush(self, events, max_tensor_point_size=_USE_DEFAULT): - mock_client = _create_mock_client() - sender = _create_tensor_request_sender( - experiment_id="123", - api=mock_client, - max_tensor_point_size=max_tensor_point_size, - ) - self._add_events(sender, "", events) - sender.flush() - - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - self.assertLen(requests, 1) - self.assertLen(requests[0].runs, 1) - return requests[0].runs[0] - - def test_histogram_event(self): - event = event_pb2.Event( - step=1, - wall_time=123.456, - summary=histogram_v2.histogram_pb("foo", [1.0]), - ) - - run_proto = self._add_events_and_flush(_apply_compat([event])) - expected_run_proto = write_service_pb2.WriteTensorRequest.Run() - foo_tag = expected_run_proto.tags.add() - foo_tag.name = "foo" - foo_tag.metadata.plugin_data.plugin_name = "histograms" - foo_tag.metadata.data_class = summary_pb2.DATA_CLASS_TENSOR - foo_tag.points.add( - step=1, - wall_time=test_util.timestamp_pb(123456000000), - value=tensor_pb2.TensorProto(dtype=types_pb2.DT_DOUBLE), - ) - # Simplify the tensor value a bit before making assertions on it. - # We care that it is copied to the request but we don't need it to be - # an extensive test. - run_proto.tags[0].points[0].value.ClearField("tensor_shape") - run_proto.tags[0].points[0].value.ClearField("tensor_content") - self.assertProtoEquals(run_proto, expected_run_proto) - - def test_histogram_event_with_empty_tensor_content_errors_out(self): - event = event_pb2.Event(step=42) - event.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, - # Use empty tensor content to elicit an error. - tensor_content=b"", - ), - ) - - mock_client = _create_mock_client() - sender = _create_tensor_request_sender("123", mock_client) - with self.assertRaisesRegex( - ValueError, - re.compile( - r"failed to upload a tensor.*malformation.*tag.*\'one\'.*step.*42", - re.DOTALL, - ), - ): - self._add_events(sender, "run", _apply_compat([event])) - - def test_histogram_event_with_incorrect_tensor_shape_errors_out(self): - event = event_pb2.Event(step=1337) - tensor_proto = tensor_util.make_tensor_proto([1.0, 2.0]) - # Add an extraneous dimension to the tensor shape in order to - # elicit an error. - tensor_proto.tensor_shape.dim.append( - tensor_shape_pb2.TensorShapeProto.Dim(size=2) - ) - event.summary.value.add(tag="two", tensor=tensor_proto) - - mock_client = _create_mock_client() - sender = _create_tensor_request_sender("123", mock_client) - with self.assertRaisesRegex( - ValueError, - re.compile( - r"failed to upload a tensor.*malformation.*tag.*\'two\'.*step.*1337." - r"*shape", - re.DOTALL, - ), - ): - self._add_events(sender, "run", _apply_compat([event])) - - def test_aggregation_by_tag(self): - def make_event(step, wall_time, tag): - event = event_pb2.Event(step=step, wall_time=wall_time) - event.summary.value.add( - tag=tag, - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - return event - - events = [ - make_event(1, 1.0, "one"), - make_event(1, 2.0, "two"), - make_event(2, 3.0, "one"), - make_event(2, 4.0, "two"), - make_event(1, 5.0, "one"), # Should preserve duplicate step=1. - make_event(1, 6.0, "three"), - ] - run_proto = self._add_events_and_flush(events) - tag_data = { - tag.name: [(p.step, p.wall_time.ToSeconds()) for p in tag.points] - for tag in run_proto.tags - } - self.assertEqual( - tag_data, - { - "one": [(1, 1.0), (2, 3.0), (1, 5.0)], - "two": [(1, 2.0), (2, 4.0)], - "three": [(1, 6.0)], - }, - ) - - def test_propagates_experiment_deletion(self): - event = event_pb2.Event(step=1) - event.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - - mock_client = _create_mock_client() - sender = _create_tensor_request_sender("123", mock_client) - self._add_events(sender, "run", _apply_compat([event])) - - error = test_util.grpc_error(grpc.StatusCode.NOT_FOUND, "nope") - mock_client.WriteTensor.side_effect = error - with self.assertRaises(uploader_lib.ExperimentNotFoundError): - sender.flush() - - def test_no_budget_for_base_request(self): - mock_client = _create_mock_client() - long_experiment_id = "A" * 12 - with self.assertRaises(RuntimeError) as cm: - _create_tensor_request_sender( - experiment_id=long_experiment_id, - api=mock_client, - max_request_size=12, - ) - self.assertEqual( - str(cm.exception), "Byte budget too small for base request" - ) - - def test_no_room_for_single_point(self): - mock_client = _create_mock_client() - event = event_pb2.Event(step=1) - event.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - long_run_name = "A" * 12 - sender = _create_tensor_request_sender( - "123", mock_client, max_request_size=12 - ) - with self.assertRaises(RuntimeError) as cm: - self._add_events(sender, long_run_name, [event]) - self.assertEqual(str(cm.exception), "add_event failed despite flush") - - def test_break_at_run_boundary(self): - mock_client = _create_mock_client() - # Choose run name sizes such that one run fits in a 1024 byte request, - # but not two. - long_run_1 = "A" * 768 - long_run_2 = "B" * 768 - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add( - tag="two", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[2.0] - ), - ) - - sender = _create_tensor_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - ) - self._add_events(sender, long_run_1, _apply_compat([event_1])) - self._add_events(sender, long_run_2, _apply_compat([event_2])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - - # Expect two RPC calls despite a single explicit call to flush(). - self.assertEqual(2, len(requests)) - self.assertEqual(1, len(requests[0].runs)) - self.assertEqual(long_run_1, requests[0].runs[0].name) - self.assertEqual(1, len(requests[1].runs)) - self.assertEqual(long_run_2, requests[1].runs[0].name) - - def test_break_at_tag_boundary(self): - mock_client = _create_mock_client() - # Choose tag name sizes such that one tag fits in a 1024 byte request, - # but not two. - long_tag_1 = "a" * 600 - long_tag_2 = "b" * 600 - event = event_pb2.Event(step=1, wall_time=1) - event.summary.value.add( - tag=long_tag_1, - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - event.summary.value.add( - tag=long_tag_2, - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[2.0] - ), - ) - - sender = _create_tensor_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - ) - self._add_events(sender, "train", _apply_compat([event])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - - # Expect two RPC calls despite a single explicit call to flush(). - self.assertEqual(2, len(requests)) - # First RPC contains one tag. - self.assertEqual(1, len(requests[0].runs)) - self.assertEqual("train", requests[0].runs[0].name) - self.assertEqual(1, len(requests[0].runs[0].tags)) - self.assertEqual(long_tag_1, requests[0].runs[0].tags[0].name) - # Second RPC contains the other tag. - self.assertEqual(1, len(requests[1].runs)) - self.assertEqual("train", requests[1].runs[0].name) - self.assertEqual(1, len(requests[1].runs[0].tags)) - self.assertEqual(long_tag_2, requests[1].runs[0].tags[0].name) - - def test_break_at_tensor_point_boundary(self): - mock_client = _create_mock_client() - point_count = 2000 # comfortably saturates a single 1024-byte request - events = [] - for step in range(point_count): - event = event_pb2.Event(step=step) - tensor_proto = tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0 * step, -1.0 * step] - ) - tensor_proto.tensor_shape.dim.append( - tensor_shape_pb2.TensorShapeProto.Dim(size=2) - ) - event.summary.value.add(tag="histo", tensor=tensor_proto) - events.append(event) - - tracker = upload_tracker.UploadTracker(verbosity=0) - sender = _create_tensor_request_sender( - "123", - mock_client, - # Set a limit to request size - max_request_size=1024, - tracker=tracker, - ) - self._add_events(sender, "train", _apply_compat(events)) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - - self.assertGreater(len(requests), 1) - self.assertLess(len(requests), point_count) - self.assertEqual(72, len(requests)) - - total_points_in_result = 0 - for request in requests: - self.assertLen(request.runs, 1) - run = request.runs[0] - self.assertEqual(run.name, "train") - self.assertLen(run.tags, 1) - tag = run.tags[0] - self.assertEqual(tag.name, "histo") - for point in tag.points: - self.assertEqual(point.step, total_points_in_result) - self.assertEqual( - point.value.double_val, - [1.0 * point.step, -1.0 * point.step], - ) - total_points_in_result += 1 - self.assertLessEqual(request.ByteSize(), 1024) - self.assertEqual(total_points_in_result, point_count) - with self.subTest("Tensor report count correct."): - self.assertEqual(tracker._stats.num_tensors, point_count) - - def test_strip_large_tensors(self): - # Generate test data with varying tensor point sizes. Use raw bytes. - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add( - tag="one", - # This TensorProto has a byte size of 18. - tensor=tensor_util.make_tensor_proto([1.0, 2.0]), - ) - event_1.summary.value.add( - tag="two", - # This TensorProto has a byte size of 22. - tensor=tensor_util.make_tensor_proto([1.0, 2.0, 3.0]), - ) - # This TensorProto has a 12-byte tensor_content. - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add( - tag="one", - # This TensorProto has a byte size of 18. - tensor=tensor_util.make_tensor_proto([2.0, 4.0]), - ) - event_2.summary.value.add( - tag="two", - # This TensorProto has a byte size of 26. - tensor=tensor_util.make_tensor_proto([1.0, 2.0, 3.0, 4.0]), - ) - - run_proto = self._add_events_and_flush( - _apply_compat([event_1, event_2]), - # Set threshold that will filter out the tensor point with 26 bytes - # of data and above. The additional byte is for proto overhead. - max_tensor_point_size=24, - ) - tag_data = { - tag.name: [(p.step, p.value.tensor_content) for p in tag.points] - for tag in run_proto.tags - } - # A single tensor point is filtered out. - self.assertEqual( - tag_data, - { - "one": [ - (1, b"\x00\x00\x80?\x00\x00\x00@"), - (2, b"\x00\x00\x00@\x00\x00\x80@"), - ], - "two": [(1, b"\x00\x00\x80?\x00\x00\x00@\x00\x00@@")], - }, - ) - - run_proto_2 = self._add_events_and_flush( - _apply_compat([event_1, event_2]), - # Set threshold that will filter out the tensor points with 22 and 26 - # bytes of data and above. The additional byte is for proto overhead. - max_tensor_point_size=20, - ) - tag_data_2 = { - tag.name: [(p.step, p.value.tensor_content) for p in tag.points] - for tag in run_proto_2.tags - } - # All tensor points from the same tag are filtered out, and the tag is pruned. - self.assertEqual( - tag_data_2, - { - "one": [ - (1, b"\x00\x00\x80?\x00\x00\x00@"), - (2, b"\x00\x00\x00@\x00\x00\x80@"), - ], - }, - ) - - def test_prunes_tags_and_runs(self): - mock_client = _create_mock_client() - event_1 = event_pb2.Event(step=1) - event_1.summary.value.add( - tag="one", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - event_2 = event_pb2.Event(step=2) - event_2.summary.value.add( - tag="two", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[2.0] - ), - ) - - add_point_call_count = 0 - - def mock_add_point(byte_budget_manager_self, point): - # Simulate out-of-space error the first time that we try to store - # the second point. - nonlocal add_point_call_count - add_point_call_count += 1 - if add_point_call_count == 2: - raise uploader_lib._OutOfSpaceError() - - with mock.patch.object( - uploader_lib._ByteBudgetManager, - "add_point", - mock_add_point, - ): - sender = _create_tensor_request_sender("123", mock_client) - self._add_events(sender, "train", _apply_compat([event_1])) - self._add_events(sender, "test", _apply_compat([event_2])) - sender.flush() - requests = [c[0][0] for c in mock_client.WriteTensor.call_args_list] - - # Expect two RPC calls despite a single explicit call to flush(). - self.assertEqual(2, len(requests)) - # First RPC contains one tag. - self.assertEqual(1, len(requests[0].runs)) - self.assertEqual("train", requests[0].runs[0].name) - self.assertEqual(1, len(requests[0].runs[0].tags)) - self.assertEqual("one", requests[0].runs[0].tags[0].name) - # Second RPC contains the other tag. - self.assertEqual(1, len(requests[1].runs)) - self.assertEqual("test", requests[1].runs[0].name) - self.assertEqual(1, len(requests[1].runs[0].tags)) - self.assertEqual("two", requests[1].runs[0].tags[0].name) - - def test_wall_time_precision(self): - # Test a wall time that is exactly representable in float64 but has enough - # digits to incur error if converted to nanoseconds the naive way (* 1e9). - event_1 = event_pb2.Event(step=1, wall_time=1567808404.765432119) - event_1.summary.value.add( - tag="tag", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[1.0] - ), - ) - # Test a wall time where as a float64, the fractional part on its own will - # introduce error if truncated to 9 decimal places instead of rounded. - event_2 = event_pb2.Event(step=2, wall_time=1.000000002) - event_2.summary.value.add( - tag="tag", - tensor=tensor_pb2.TensorProto( - dtype=types_pb2.DT_DOUBLE, double_val=[2.0] - ), - ) - run_proto = self._add_events_and_flush( - _apply_compat([event_1, event_2]) - ) - self.assertEqual( - test_util.timestamp_pb(1567808404765432119), - run_proto.tags[0].points[0].wall_time, - ) - self.assertEqual( - test_util.timestamp_pb(1000000002), - run_proto.tags[0].points[1].wall_time, - ) - - class DeleteExperimentTest(tf.test.TestCase): def _create_mock_client(self): # Create a stub instance (using a test channel) in order to derive a mock @@ -1992,39 +175,5 @@ def test_internal_error(self): self.assertIn("travesty", msg) -class VarintCostTest(tf.test.TestCase): - def test_varint_cost(self): - self.assertEqual(uploader_lib._varint_cost(0), 1) - self.assertEqual(uploader_lib._varint_cost(7), 1) - self.assertEqual(uploader_lib._varint_cost(127), 1) - self.assertEqual(uploader_lib._varint_cost(128), 2) - self.assertEqual(uploader_lib._varint_cost(128 * 128 - 1), 2) - self.assertEqual(uploader_lib._varint_cost(128 * 128), 3) - - -def _clear_wall_times(request): - """Clears the wall_time fields in a WriteScalarRequest to be - deterministic.""" - for run in request.runs: - for tag in run.tags: - for point in tag.points: - point.ClearField("wall_time") - - -def _apply_compat(events): - initial_metadata = {} - for event in events: - event = data_compat.migrate_event(event) - events = dataclass_compat.migrate_event( - event, initial_metadata=initial_metadata - ) - for event in events: - yield event - - -def _extract_tag_counts(run_proto): - return {tag.name: len(tag.points) for tag in run_proto.tags} - - if __name__ == "__main__": tf.test.main() diff --git a/tensorboard/uploader/util.py b/tensorboard/uploader/util.py index f8917e86f6d..3b90fdbbaed 100644 --- a/tensorboard/uploader/util.py +++ b/tensorboard/uploader/util.py @@ -19,28 +19,6 @@ import errno import os import os.path -import time - - -class RateLimiter: - """Helper class for rate-limiting using a fixed minimum interval.""" - - def __init__(self, interval_secs): - """Constructs a RateLimiter that permits a tick() every - `interval_secs`.""" - self._time = time # Use property for ease of testing. - self._interval_secs = interval_secs - self._last_called_secs = 0 - - def tick(self): - """Blocks until it has been at least `interval_secs` since last - tick().""" - wait_secs = ( - self._last_called_secs + self._interval_secs - self._time.time() - ) - if wait_secs > 0: - self._time.sleep(wait_secs) - self._last_called_secs = self._time.time() def get_user_config_directory(): diff --git a/tensorboard/uploader/util_test.py b/tensorboard/uploader/util_test.py index 49d12694d57..09cc7101ebd 100644 --- a/tensorboard/uploader/util_test.py +++ b/tensorboard/uploader/util_test.py @@ -22,35 +22,10 @@ from unittest import mock from google.protobuf import timestamp_pb2 -from tensorboard.uploader import test_util from tensorboard.uploader import util from tensorboard import test as tb_test -class RateLimiterTest(tb_test.TestCase): - def test_rate_limiting(self): - rate_limiter = util.RateLimiter(10) - fake_time = test_util.FakeTime(current=1000) - with mock.patch.object(rate_limiter, "_time", fake_time): - self.assertEqual(1000, fake_time.time()) - # No sleeping for initial tick. - rate_limiter.tick() - self.assertEqual(1000, fake_time.time()) - # Second tick requires a full sleep. - rate_limiter.tick() - self.assertEqual(1010, fake_time.time()) - # Third tick requires a sleep just to make up the remaining second. - fake_time.sleep(9) - self.assertEqual(1019, fake_time.time()) - rate_limiter.tick() - self.assertEqual(1020, fake_time.time()) - # Fourth tick requires no sleep since we have no remaining seconds. - fake_time.sleep(11) - self.assertEqual(1031, fake_time.time()) - rate_limiter.tick() - self.assertEqual(1031, fake_time.time()) - - class GetUserConfigDirectoryTest(tb_test.TestCase): def test_windows(self): with mock.patch.object(os, "name", "nt"): From cab2bc0b7e48968eb1c3af32b65f0685374e6de4 Mon Sep 17 00:00:00 2001 From: Brian Dubois Date: Wed, 18 Oct 2023 12:10:03 -0400 Subject: [PATCH 2/3] Bring back upload_tracker and logdir_loader. They are used internally. --- tensorboard/uploader/BUILD | 41 ++ tensorboard/uploader/logdir_loader.py | 108 +++++ tensorboard/uploader/logdir_loader_test.py | 166 ++++++++ tensorboard/uploader/upload_tracker.py | 429 ++++++++++++++++++++ tensorboard/uploader/upload_tracker_test.py | 395 ++++++++++++++++++ 5 files changed, 1139 insertions(+) create mode 100644 tensorboard/uploader/logdir_loader.py create mode 100644 tensorboard/uploader/logdir_loader_test.py create mode 100644 tensorboard/uploader/upload_tracker.py create mode 100644 tensorboard/uploader/upload_tracker_test.py diff --git a/tensorboard/uploader/BUILD b/tensorboard/uploader/BUILD index 2f65b5b3f7f..a038c6a814f 100644 --- a/tensorboard/uploader/BUILD +++ b/tensorboard/uploader/BUILD @@ -109,6 +109,12 @@ py_library( ], ) +py_library( + name = "upload_tracker", + srcs = ["upload_tracker.py"], + srcs_version = "PY3", +) + py_test( name = "uploader_test", srcs = ["uploader_test.py"], @@ -126,6 +132,16 @@ py_test( ], ) +py_test( + name = "upload_tracker_test", + srcs = ["upload_tracker_test.py"], + srcs_version = "PY3", + deps = [ + ":upload_tracker", + "//tensorboard:test", + ], +) + py_library( name = "auth", srcs = ["auth.py"], @@ -149,6 +165,30 @@ py_test( ], ) +py_library( + name = "logdir_loader", + srcs = ["logdir_loader.py"], + srcs_version = "PY3", + deps = [ + "//tensorboard/backend/event_processing:directory_watcher", + "//tensorboard/backend/event_processing:io_wrapper", + "//tensorboard/util:tb_logging", + ], +) + +py_test( + name = "logdir_loader_test", + srcs = ["logdir_loader_test.py"], + deps = [ + ":logdir_loader", + "//tensorboard:test", + "//tensorboard/backend/event_processing:directory_loader", + "//tensorboard/backend/event_processing:event_file_loader", + "//tensorboard/backend/event_processing:io_wrapper", + "//tensorboard/util:test_util", + ], +) + py_library( name = "test_util", testonly = 1, @@ -172,6 +212,7 @@ py_test( srcs = ["util_test.py"], deps = [ ":util", + "//tensorboard:expect_grpc_installed", "//tensorboard:expect_protobuf_installed", "//tensorboard:test", ], diff --git a/tensorboard/uploader/logdir_loader.py b/tensorboard/uploader/logdir_loader.py new file mode 100644 index 00000000000..5d22bfe57d5 --- /dev/null +++ b/tensorboard/uploader/logdir_loader.py @@ -0,0 +1,108 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Loader for event file data for an entire TensorBoard log directory.""" + + +import collections +import os + +from tensorboard.backend.event_processing import directory_watcher +from tensorboard.backend.event_processing import io_wrapper +from tensorboard.util import tb_logging + + +logger = tb_logging.get_logger() + + +class LogdirLoader: + """Loader for a root log directory, maintaining multiple DirectoryLoaders. + + This class takes a root log directory and a factory for DirectoryLoaders, and + maintains one DirectoryLoader per "logdir subdirectory" of the root logdir. + + Note that this class is not thread-safe. + """ + + def __init__(self, logdir, directory_loader_factory): + """Constructs a new LogdirLoader. + + Args: + logdir: The root log directory to load from. + directory_loader_factory: A factory for creating DirectoryLoaders. The + factory should take a path and return a DirectoryLoader. + + Raises: + ValueError: If logdir or directory_loader_factory are None. + """ + if logdir is None: + raise ValueError("A logdir is required") + if directory_loader_factory is None: + raise ValueError("A directory loader factory is required") + self._logdir = logdir + self._directory_loader_factory = directory_loader_factory + # Maps run names to corresponding DirectoryLoader instances. + self._directory_loaders = {} + + def synchronize_runs(self): + """Finds new runs within `logdir` and makes `DirectoryLoaders` for + them. + + In addition, any existing `DirectoryLoader` whose run directory + no longer exists will be deleted. + """ + logger.info("Starting logdir traversal of %s", self._logdir) + runs_seen = set() + for subdir in io_wrapper.GetLogdirSubdirectories(self._logdir): + run = os.path.relpath(subdir, self._logdir) + runs_seen.add(run) + if run not in self._directory_loaders: + logger.info("- Adding run for relative directory %s", run) + self._directory_loaders[run] = self._directory_loader_factory( + subdir + ) + stale_runs = set(self._directory_loaders) - runs_seen + if stale_runs: + for run in stale_runs: + logger.info("- Removing run for relative directory %s", run) + del self._directory_loaders[run] + logger.info("Ending logdir traversal of %s", self._logdir) + + def get_run_events(self): + """Returns tf.Event generators for each run's `DirectoryLoader`. + + Warning: the generators are stateful and consuming them will affect the + results of any other existing generators for that run; calling code should + ensure it takes events from only a single generator per run at a time. + + Returns: + Dictionary containing an entry for each run, mapping the run name to a + generator yielding tf.Event protobuf objects loaded from that run. + """ + runs = list(self._directory_loaders) + logger.info("Creating event loading generators for %d runs", len(runs)) + run_to_loader = collections.OrderedDict() + for run_name in sorted(runs): + loader = self._directory_loaders[run_name] + run_to_loader[run_name] = self._wrap_loader_generator(loader.Load()) + return run_to_loader + + def _wrap_loader_generator(self, loader_generator): + """Wraps `DirectoryLoader` generator to swallow + `DirectoryDeletedError`.""" + try: + for item in loader_generator: + yield item + except directory_watcher.DirectoryDeletedError: + return diff --git a/tensorboard/uploader/logdir_loader_test.py b/tensorboard/uploader/logdir_loader_test.py new file mode 100644 index 00000000000..9e0f7498d6c --- /dev/null +++ b/tensorboard/uploader/logdir_loader_test.py @@ -0,0 +1,166 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorboard.uploader.logdir_loader.""" + + +import os.path +import shutil + +from tensorboard.uploader import logdir_loader +from tensorboard import test as tb_test +from tensorboard.backend.event_processing import directory_loader +from tensorboard.backend.event_processing import event_file_loader +from tensorboard.backend.event_processing import io_wrapper +from tensorboard.util import test_util + + +class LogdirLoaderTest(tb_test.TestCase): + def _create_logdir_loader(self, logdir): + def directory_loader_factory(path): + return directory_loader.DirectoryLoader( + path, + event_file_loader.TimestampedEventFileLoader, + path_filter=io_wrapper.IsTensorFlowEventsFile, + ) + + return logdir_loader.LogdirLoader(logdir, directory_loader_factory) + + def _extract_tags(self, event_generator): + """Converts a generator of tf.Events into a list of event tags.""" + return [ + event.summary.value[0].tag + for event in event_generator + if not event.file_version + ] + + def _extract_run_to_tags(self, run_to_events): + """Returns run-to-tags dict from run-to-event-generator dict.""" + run_to_tags = {} + for run_name, event_generator in run_to_events.items(): + # There should be no duplicate runs. + self.assertNotIn(run_name, run_to_tags) + run_to_tags[run_name] = self._extract_tags(event_generator) + return run_to_tags + + def test_empty_logdir(self): + logdir = self.get_temp_dir() + loader = self._create_logdir_loader(logdir) + # Default state is empty. + self.assertEmpty(list(loader.get_run_events())) + loader.synchronize_runs() + # Still empty, since there's no data. + self.assertEmpty(list(loader.get_run_events())) + + def test_single_event_logdir(self): + logdir = self.get_temp_dir() + with test_util.FileWriter(logdir) as writer: + writer.add_test_summary("foo") + loader = self._create_logdir_loader(logdir) + loader.synchronize_runs() + self.assertEqual( + self._extract_run_to_tags(loader.get_run_events()), {".": ["foo"]} + ) + # A second load should indicate no new data for the run. + self.assertEqual( + self._extract_run_to_tags(loader.get_run_events()), {".": []} + ) + + def test_multiple_writes_to_logdir(self): + logdir = self.get_temp_dir() + with test_util.FileWriter(os.path.join(logdir, "a")) as writer: + writer.add_test_summary("tag_a") + with test_util.FileWriter(os.path.join(logdir, "b")) as writer: + writer.add_test_summary("tag_b") + with test_util.FileWriter(os.path.join(logdir, "b", "x")) as writer: + writer.add_test_summary("tag_b_x") + writer_c = test_util.FileWriter(os.path.join(logdir, "c")) + writer_c.add_test_summary("tag_c") + writer_c.flush() + loader = self._create_logdir_loader(logdir) + loader.synchronize_runs() + self.assertEqual( + self._extract_run_to_tags(loader.get_run_events()), + { + "a": ["tag_a"], + "b": ["tag_b"], + "b/x": ["tag_b_x"], + "c": ["tag_c"], + }, + ) + # A second load should indicate no new data. + self.assertEqual( + self._extract_run_to_tags(loader.get_run_events()), + {"a": [], "b": [], "b/x": [], "c": []}, + ) + # Write some new data to both new and pre-existing event files. + with test_util.FileWriter( + os.path.join(logdir, "a"), filename_suffix=".other" + ) as writer: + writer.add_test_summary("tag_a_2") + writer.add_test_summary("tag_a_3") + writer.add_test_summary("tag_a_4") + with test_util.FileWriter( + os.path.join(logdir, "b", "x"), filename_suffix=".other" + ) as writer: + writer.add_test_summary("tag_b_x_2") + with writer_c as writer: + writer.add_test_summary("tag_c_2") + # New data should appear on the next load. + self.assertEqual( + self._extract_run_to_tags(loader.get_run_events()), + { + "a": ["tag_a_2", "tag_a_3", "tag_a_4"], + "b": [], + "b/x": ["tag_b_x_2"], + "c": ["tag_c_2"], + }, + ) + + def test_directory_deletion(self): + logdir = self.get_temp_dir() + with test_util.FileWriter(os.path.join(logdir, "a")) as writer: + writer.add_test_summary("tag_a") + with test_util.FileWriter(os.path.join(logdir, "b")) as writer: + writer.add_test_summary("tag_b") + with test_util.FileWriter(os.path.join(logdir, "c")) as writer: + writer.add_test_summary("tag_c") + loader = self._create_logdir_loader(logdir) + loader.synchronize_runs() + self.assertEqual(list(loader.get_run_events().keys()), ["a", "b", "c"]) + shutil.rmtree(os.path.join(logdir, "b")) + loader.synchronize_runs() + self.assertEqual(list(loader.get_run_events().keys()), ["a", "c"]) + shutil.rmtree(logdir) + loader.synchronize_runs() + self.assertEmpty(loader.get_run_events()) + + def test_directory_deletion_during_event_loading(self): + logdir = self.get_temp_dir() + with test_util.FileWriter(logdir) as writer: + writer.add_test_summary("foo") + loader = self._create_logdir_loader(logdir) + loader.synchronize_runs() + self.assertEqual( + self._extract_run_to_tags(loader.get_run_events()), {".": ["foo"]} + ) + shutil.rmtree(logdir) + runs_to_events = loader.get_run_events() + self.assertEqual(list(runs_to_events.keys()), ["."]) + events = runs_to_events["."] + self.assertEqual(self._extract_tags(events), []) + + +if __name__ == "__main__": + tb_test.main() diff --git a/tensorboard/uploader/upload_tracker.py b/tensorboard/uploader/upload_tracker.py new file mode 100644 index 00000000000..a72b7fbe14b --- /dev/null +++ b/tensorboard/uploader/upload_tracker.py @@ -0,0 +1,429 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Progress tracker for uploader.""" + + +import contextlib +from datetime import datetime +import sys +import time + + +def readable_time_string(): + """Get a human-readable time string for the present.""" + return datetime.now().strftime("%Y-%m-%dT%H:%M:%S") + + +def readable_bytes_string(bytes): + """Get a human-readable string for number of bytes.""" + if bytes >= 2**20: + return "%.1f MB" % (float(bytes) / 2**20) + elif bytes >= 2**10: + return "%.1f kB" % (float(bytes) / 2**10) + else: + return "%d B" % bytes + + +class UploadStats: + """Statistics of uploading.""" + + def __init__(self): + self._last_summarized_timestamp = time.time() + self._last_data_added_timestamp = 0 + self._num_scalars = 0 + self._num_tensors = 0 + self._num_tensors_skipped = 0 + self._tensor_bytes = 0 + self._tensor_bytes_skipped = 0 + self._num_blobs = 0 + self._num_blobs_skipped = 0 + self._blob_bytes = 0 + self._blob_bytes_skipped = 0 + self._plugin_names = set() + + def add_scalars(self, num_scalars): + """Add a batch of scalars. + + Args: + num_scalars: Number of scalars uploaded in this batch. + """ + self._refresh_last_data_added_timestamp() + self._num_scalars += num_scalars + + def add_tensors( + self, + num_tensors, + num_tensors_skipped, + tensor_bytes, + tensor_bytes_skipped, + ): + """Add a batch of tensors. + + Args: + num_tensors: Number of tensors encountered in this batch, including + the ones skipped due to reasons such as large exceeding limit. + num_tensors: Number of tensors skipped. This describes a subset of + `num_tensors` and hence must be `<= num_tensors`. + tensor_bytes: Total byte size of tensors encountered in this batch, + including the skipped ones. + tensor_bytes_skipped: Total byte size of the tensors skipped due to + reasons such as size exceeding limit. + """ + assert num_tensors_skipped <= num_tensors + assert tensor_bytes_skipped <= tensor_bytes + self._refresh_last_data_added_timestamp() + self._num_tensors += num_tensors + self._num_tensors_skipped += num_tensors_skipped + self._tensor_bytes += tensor_bytes + self._tensor_bytes_skipped = tensor_bytes_skipped + + def add_blob(self, blob_bytes, is_skipped): + """Add a blob. + + Args: + blob_bytes: Byte size of the blob. + is_skipped: Whether the uploading of the blob is skipped due to + reasons such as size exceeding limit. + """ + self._refresh_last_data_added_timestamp() + self._num_blobs += 1 + self._blob_bytes += blob_bytes + if is_skipped: + self._num_blobs_skipped += 1 + self._blob_bytes_skipped += blob_bytes + + def add_plugin(self, plugin_name): + """Add a plugin. + + Args: + plugin_name: Name of the plugin. + """ + self._refresh_last_data_added_timestamp() + self._plugin_names.add(plugin_name) + + @property + def num_scalars(self): + return self._num_scalars + + @property + def num_tensors(self): + return self._num_tensors + + @property + def num_tensors_skipped(self): + return self._num_tensors_skipped + + @property + def tensor_bytes(self): + return self._tensor_bytes + + @property + def tensor_bytes_skipped(self): + return self._tensor_bytes_skipped + + @property + def num_blobs(self): + return self._num_blobs + + @property + def num_blobs_skipped(self): + return self._num_blobs_skipped + + @property + def blob_bytes(self): + return self._blob_bytes + + @property + def blob_bytes_skipped(self): + return self._blob_bytes_skipped + + @property + def plugin_names(self): + return self._plugin_names + + def has_data(self): + """Has any data been tracked by this instance. + + This counts the tensor and blob data that have been scanned + but skipped. + + Returns: + Whether this stats tracking object has tracked any data. + """ + return ( + self._num_scalars > 0 + or self._num_tensors > 0 + or self._num_blobs > 0 + ) + + def summarize(self): + """Get a summary string for actually-uploaded and skipped data. + + Calling this property also marks the "last_summarized" timestamp, so that + the has_new_data_since_last_summarize() will be able to report the correct value + later. + + Returns: + A tuple with two items: + - A string summarizing all data uploaded so far. + - If any data was skipped, a string for all skipped data. Else, `None`. + """ + self._last_summarized_timestamp = time.time() + string_pieces = [] + string_pieces.append("%d scalars" % self._num_scalars) + uploaded_tensor_count = self._num_tensors - self._num_tensors_skipped + uploaded_tensor_bytes = self._tensor_bytes - self._tensor_bytes_skipped + string_pieces.append( + "0 tensors" + if not uploaded_tensor_count + else ( + "%d tensors (%s)" + % ( + uploaded_tensor_count, + readable_bytes_string(uploaded_tensor_bytes), + ) + ) + ) + uploaded_blob_count = self._num_blobs - self._num_blobs_skipped + uploaded_blob_bytes = self._blob_bytes - self._blob_bytes_skipped + string_pieces.append( + "0 binary objects" + if not uploaded_blob_count + else ( + "%d binary objects (%s)" + % ( + uploaded_blob_count, + readable_bytes_string(uploaded_blob_bytes), + ) + ) + ) + skipped_string = ( + self._skipped_summary() if self._skipped_any() else None + ) + return ", ".join(string_pieces), skipped_string + + def _skipped_any(self): + """Whether any data was skipped.""" + return self._num_tensors_skipped or self._num_blobs_skipped + + def has_new_data_since_last_summarize(self): + return self._last_data_added_timestamp > self._last_summarized_timestamp + + def _skipped_summary(self): + """Get a summary string for skipped data.""" + string_pieces = [] + if self._num_tensors_skipped: + string_pieces.append( + "%d tensors (%s)" + % ( + self._num_tensors_skipped, + readable_bytes_string(self._tensor_bytes_skipped), + ) + ) + if self._num_blobs_skipped: + string_pieces.append( + "%d binary objects (%s)" + % ( + self._num_blobs_skipped, + readable_bytes_string(self._blob_bytes_skipped), + ) + ) + return ", ".join(string_pieces) + + def _refresh_last_data_added_timestamp(self): + self._last_data_added_timestamp = time.time() + + +_STYLE_RESET = "\033[0m" +_STYLE_BOLD = "\033[1m" +_STYLE_GREEN = "\033[32m" +_STYLE_YELLOW = "\033[33m" +_STYLE_DARKGRAY = "\033[90m" +_STYLE_ERASE_LINE = "\033[2K" + + +class UploadTracker: + """Tracker for uploader progress and status.""" + + _SUPPORTED_VERBISITY_VALUES = (0, 1) + + def __init__(self, verbosity, one_shot=False): + if verbosity not in self._SUPPORTED_VERBISITY_VALUES: + raise ValueError( + "Unsupported verbosity value %s (supported values: %s)" + % (verbosity, self._SUPPORTED_VERBISITY_VALUES) + ) + self._verbosity = verbosity + self._stats = UploadStats() + self._send_count = 0 + self._one_shot = one_shot + + def _dummy_generator(self): + while True: + # Yield an arbitrary value 0: The progress bar is indefinite. + yield 0 + + def _overwrite_line_message(self, message, color_code=_STYLE_GREEN): + """Overwrite the current line with a stylized message.""" + if not self._verbosity: + return + message += "." * 3 + sys.stdout.write( + _STYLE_ERASE_LINE + color_code + message + _STYLE_RESET + "\r" + ) + sys.stdout.flush() + + def _single_line_message(self, message): + """Write a timestamped single line, with newline, to stdout.""" + if not self._verbosity: + return + start_message = "%s[%s]%s %s\n" % ( + _STYLE_BOLD, + readable_time_string(), + _STYLE_RESET, + message, + ) + sys.stdout.write(start_message) + sys.stdout.flush() + + def has_data(self): + """Determine if any data has been uploaded under the tracker's watch.""" + return self._stats.has_data() + + def _update_cumulative_status(self): + """Write an update summarizing the data uploaded since the start.""" + if not self._verbosity: + return + if not self._stats.has_new_data_since_last_summarize(): + return + uploaded_str, skipped_str = self._stats.summarize() + uploaded_message = "%s[%s]%s Total uploaded: %s\n" % ( + _STYLE_BOLD, + readable_time_string(), + _STYLE_RESET, + uploaded_str, + ) + sys.stdout.write(uploaded_message) + if skipped_str: + sys.stdout.write( + "%sTotal skipped: %s\n%s" + % (_STYLE_DARKGRAY, skipped_str, _STYLE_RESET) + ) + sys.stdout.flush() + # TODO(cais): Add summary of what plugins have been involved, once it's + # clear how to get canonical plugin names. + + def add_plugin_name(self, plugin_name): + self._stats.add_plugin(plugin_name) + + @contextlib.contextmanager + def send_tracker(self): + """Create a context manager for a round of data sending.""" + self._send_count += 1 + if self._send_count == 1: + self._single_line_message("Started scanning logdir.") + try: + # self._reset_bars() + self._overwrite_line_message("Data upload starting") + yield + finally: + self._update_cumulative_status() + if self._one_shot: + self._single_line_message("Done scanning logdir.") + else: + self._overwrite_line_message( + "Listening for new data in logdir", + color_code=_STYLE_YELLOW, + ) + + @contextlib.contextmanager + def scalars_tracker(self, num_scalars): + """Create a context manager for tracking a scalar batch upload. + + Args: + num_scalars: Number of scalars in the batch. + """ + self._overwrite_line_message("Uploading %d scalars" % num_scalars) + try: + yield + finally: + self._stats.add_scalars(num_scalars) + + @contextlib.contextmanager + def tensors_tracker( + self, + num_tensors, + num_tensors_skipped, + tensor_bytes, + tensor_bytes_skipped, + ): + """Create a context manager for tracking a tensor batch upload. + + Args: + num_tensors: Total number of tensors in the batch. + num_tensors_skipped: Number of tensors skipped (a subset of + `num_tensors`). Hence this must be `<= num_tensors`. + tensor_bytes: Total byte size of the tensors in the batch. + tensor_bytes_skipped: Byte size of skipped tensors in the batch (a + subset of `tensor_bytes`). Must be `<= tensor_bytes`. + """ + if num_tensors_skipped: + message = "Uploading %d tensors (%s) (Skipping %d tensors, %s)" % ( + num_tensors - num_tensors_skipped, + readable_bytes_string(tensor_bytes - tensor_bytes_skipped), + num_tensors_skipped, + readable_bytes_string(tensor_bytes_skipped), + ) + else: + message = "Uploading %d tensors (%s)" % ( + num_tensors, + readable_bytes_string(tensor_bytes), + ) + self._overwrite_line_message(message) + try: + yield + finally: + self._stats.add_tensors( + num_tensors, + num_tensors_skipped, + tensor_bytes, + tensor_bytes_skipped, + ) + + @contextlib.contextmanager + def blob_tracker(self, blob_bytes): + """Creates context manager tracker for uploading a blob. + + Args: + blob_bytes: Total byte size of the blob being uploaded. + """ + self._overwrite_line_message( + "Uploading binary object (%s)" % readable_bytes_string(blob_bytes) + ) + try: + yield _BlobTracker(self._stats, blob_bytes) + finally: + pass + + +class _BlobTracker: + def __init__(self, upload_stats, blob_bytes): + self._upload_stats = upload_stats + self._blob_bytes = blob_bytes + + def mark_uploaded(self, is_uploaded): + self._upload_stats.add_blob( + self._blob_bytes, is_skipped=(not is_uploaded) + ) diff --git a/tensorboard/uploader/upload_tracker_test.py b/tensorboard/uploader/upload_tracker_test.py new file mode 100644 index 00000000000..3b5788c7cf1 --- /dev/null +++ b/tensorboard/uploader/upload_tracker_test.py @@ -0,0 +1,395 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorboard.uploader.upload_tracker.""" + + +import sys + +from unittest import mock + +from tensorboard import test as tb_test +from tensorboard.uploader import upload_tracker + + +class ReadableBytesStringTest(tb_test.TestCase): + def testZero(self): + self.assertEqual(upload_tracker.readable_bytes_string(0), "0 B") + + def testLessThan1K(self): + self.assertEqual(upload_tracker.readable_bytes_string(42), "42 B") + + def testBetween1KAnd1M(self): + self.assertEqual(upload_tracker.readable_bytes_string(1337), "1.3 kB") + + def testMoreThan1M(self): + self.assertEqual( + upload_tracker.readable_bytes_string(299792458), "285.9 MB" + ) + + +class UploadStatsTest(tb_test.TestCase): + """Unit tests for the UploadStats class.""" + + def testAddScalar(self): + stats = upload_tracker.UploadStats() + stats.add_scalars(1234) + self.assertEqual(stats.num_scalars, 1234) + stats.add_scalars(4321) + self.assertEqual(stats.num_scalars, 5555) + + def testAddTensor(self): + stats = upload_tracker.UploadStats() + stats.add_tensors( + num_tensors=10, + num_tensors_skipped=0, + tensor_bytes=1000, + tensor_bytes_skipped=0, + ) + self.assertEqual(stats.num_tensors, 10) + self.assertEqual(stats.num_tensors_skipped, 0) + self.assertEqual(stats.tensor_bytes, 1000) + self.assertEqual(stats.tensor_bytes_skipped, 0) + stats.add_tensors( + num_tensors=20, + num_tensors_skipped=5, + tensor_bytes=2000, + tensor_bytes_skipped=500, + ) + self.assertEqual(stats.num_tensors, 30) + self.assertEqual(stats.num_tensors_skipped, 5) + self.assertEqual(stats.tensor_bytes, 3000) + self.assertEqual(stats.tensor_bytes_skipped, 500) + + def testAddTensorsNumTensorsSkippedGreaterThanNumTenosrsErrors(self): + stats = upload_tracker.UploadStats() + with self.assertRaises(AssertionError): + stats.add_tensors( + num_tensors=10, + num_tensors_skipped=12, + tensor_bytes=1000, + tensor_bytes_skipped=0, + ) + + def testAddBlob(self): + stats = upload_tracker.UploadStats() + stats.add_blob(blob_bytes=1000, is_skipped=False) + self.assertEqual(stats.blob_bytes, 1000) + self.assertEqual(stats.blob_bytes_skipped, 0) + stats.add_blob(blob_bytes=2000, is_skipped=True) + self.assertEqual(stats.blob_bytes, 3000) + self.assertEqual(stats.blob_bytes_skipped, 2000) + + def testAddPlugin(self): + stats = upload_tracker.UploadStats() + stats.add_plugin("scalars") + self.assertEqual(stats.plugin_names, set(["scalars"])) + stats.add_plugin("scalars") + self.assertEqual(stats.plugin_names, set(["scalars"])) + stats.add_plugin("histograms") + self.assertEqual(stats.plugin_names, set(["histograms", "scalars"])) + + def testHasNewDataSinceLastSummarizeReturnsFalseInitially(self): + stats = upload_tracker.UploadStats() + self.assertEqual(stats.has_new_data_since_last_summarize(), False) + + def testUploadedSummaryWithTensorsAndBlobs(self): + stats = upload_tracker.UploadStats() + stats.add_scalars(1234) + stats.add_tensors( + num_tensors=50, + num_tensors_skipped=10, + tensor_bytes=2000, + tensor_bytes_skipped=1800, + ) + stats.add_blob(blob_bytes=1000, is_skipped=False) + stats.add_blob(blob_bytes=2000, is_skipped=True) + self.assertEqual(stats.has_new_data_since_last_summarize(), True) + uploaded_summary, skipped_summary = stats.summarize() + self.assertEqual( + uploaded_summary, + "1234 scalars, 40 tensors (200 B), 1 binary objects (1000 B)", + ) + self.assertEqual( + skipped_summary, + "10 tensors (1.8 kB), 1 binary objects (2.0 kB)", + ) + self.assertEqual(stats.has_new_data_since_last_summarize(), False) + + def testSummarizeeWithoutTensorsOrBlobs(self): + stats = upload_tracker.UploadStats() + stats.add_scalars(1234) + self.assertEqual(stats.has_new_data_since_last_summarize(), True) + (uploaded_summary, skipped_summary) = stats.summarize() + self.assertEqual( + uploaded_summary, + "1234 scalars, 0 tensors, 0 binary objects", + ) + self.assertIsNone(skipped_summary) + self.assertEqual(stats.has_new_data_since_last_summarize(), False) + + def testHasNewDataSinceLastSummarizeReturnsTrueAfterNewScalars(self): + stats = upload_tracker.UploadStats() + self.assertEqual(stats.has_new_data_since_last_summarize(), False) + stats.add_scalars(1234) + self.assertEqual(stats.has_new_data_since_last_summarize(), True) + stats.summarize() + self.assertEqual(stats.has_new_data_since_last_summarize(), False) + stats.add_scalars(4321) + self.assertEqual(stats.has_new_data_since_last_summarize(), True) + + def testHasNewDataSinceLastSummarizeReturnsTrueAfterNewTensors(self): + stats = upload_tracker.UploadStats() + self.assertEqual(stats.has_new_data_since_last_summarize(), False) + stats.add_scalars(1234) + self.assertEqual(stats.has_new_data_since_last_summarize(), True) + stats.summarize() + self.assertEqual(stats.has_new_data_since_last_summarize(), False) + stats.add_tensors( + num_tensors=10, + num_tensors_skipped=10, + tensor_bytes=1000, + tensor_bytes_skipped=1000, + ) + self.assertEqual(stats.has_new_data_since_last_summarize(), True) + + def testHasNewDataSinceLastSummarizeReturnsTrueAfterNewBlob(self): + stats = upload_tracker.UploadStats() + self.assertEqual(stats.has_new_data_since_last_summarize(), False) + stats.add_scalars(1234) + self.assertEqual(stats.has_new_data_since_last_summarize(), True) + stats.summarize() + self.assertEqual(stats.has_new_data_since_last_summarize(), False) + stats.add_blob(blob_bytes=2000, is_skipped=True) + self.assertEqual(stats.has_new_data_since_last_summarize(), True) + + def testHasDataInitiallyReturnsFalse(self): + stats = upload_tracker.UploadStats() + self.assertEqual(stats.has_data(), False) + + def testHasDataReturnsTrueWithScalars(self): + stats = upload_tracker.UploadStats() + stats.add_scalars(1) + self.assertEqual(stats.has_data(), True) + + def testHasDataReturnsTrueWithUnskippedTensors(self): + stats = upload_tracker.UploadStats() + stats.add_tensors( + num_tensors=10, + num_tensors_skipped=0, + tensor_bytes=1000, + tensor_bytes_skipped=0, + ) + self.assertEqual(stats.has_data(), True) + + def testHasDataReturnsTrueWithSkippedTensors(self): + stats = upload_tracker.UploadStats() + stats.add_tensors( + num_tensors=10, + num_tensors_skipped=10, + tensor_bytes=1000, + tensor_bytes_skipped=1000, + ) + self.assertEqual(stats.has_data(), True) + + def testHasDataReturnsTrueWithUnskippedBlob(self): + stats = upload_tracker.UploadStats() + stats.add_blob(blob_bytes=1000, is_skipped=False) + self.assertEqual(stats.has_data(), True) + + def testHasDataReturnsTrueWithSkippedBlob(self): + stats = upload_tracker.UploadStats() + stats.add_blob(blob_bytes=1000, is_skipped=True) + self.assertEqual(stats.has_data(), True) + + +class UploadTrackerTest(tb_test.TestCase): + """Test for the UploadTracker class.""" + + def setUp(self): + super().setUp() + self.cumulative_bar = mock.MagicMock() + self.skipped_bar = mock.MagicMock() + self.uploading_bar = mock.MagicMock() + self.mock_write = mock.MagicMock() + self.mock_stdout_write = mock.patch.object( + sys.stdout, "write", self.mock_write + ) + self.mock_stdout_write.start() + self.mock_flush = mock.MagicMock() + self.mock_stdout_flush = mock.patch.object( + sys.stdout, "flush", self.mock_flush + ) + self.mock_stdout_flush.start() + + def tearDown(self): + self.mock_stdout_write.stop() + self.mock_stdout_flush.stop() + super().tearDown() + + def testSendTracker(self): + tracker = upload_tracker.UploadTracker(verbosity=1) + with tracker.send_tracker(): + self.assertEqual(self.mock_write.call_count, 2) + self.assertEqual(self.mock_flush.call_count, 2) + self.assertIn( + "Data upload starting...", + self.mock_write.call_args[0][0], + ) + self.assertEqual(self.mock_write.call_count, 3) + self.assertEqual(self.mock_flush.call_count, 3) + self.assertIn( + "Listening for new data in logdir...", + self.mock_write.call_args[0][0], + ) + self.assertEqual(tracker.has_data(), False) + + def testSendTrackerWithVerbosity0(self): + tracker = upload_tracker.UploadTracker(verbosity=0) + with tracker.send_tracker(): + self.assertEqual(self.mock_write.call_count, 0) + self.assertEqual(self.mock_flush.call_count, 0) + self.assertEqual(self.mock_write.call_count, 0) + self.assertEqual(self.mock_flush.call_count, 0) + + def testScalarsTracker(self): + tracker = upload_tracker.UploadTracker(verbosity=1) + with tracker.scalars_tracker(123): + self.assertEqual(self.mock_write.call_count, 1) + self.assertEqual(self.mock_flush.call_count, 1) + self.assertIn( + "Uploading 123 scalars...", + self.mock_write.call_args[0][0], + ) + self.assertEqual(self.mock_write.call_count, 1) + self.assertEqual(self.mock_flush.call_count, 1) + self.assertEqual(tracker.has_data(), True) + + def testScalarsTrackerWithVerbosity0(self): + tracker = upload_tracker.UploadTracker(verbosity=0) + with tracker.scalars_tracker(123): + self.assertEqual(self.mock_write.call_count, 0) + self.assertEqual(self.mock_flush.call_count, 0) + self.assertEqual(self.mock_write.call_count, 0) + self.assertEqual(self.mock_flush.call_count, 0) + + def testTensorsTrackerWithSkippedTensors(self): + tracker = upload_tracker.UploadTracker(verbosity=1) + with tracker.tensors_tracker( + num_tensors=200, + num_tensors_skipped=50, + tensor_bytes=6000, + tensor_bytes_skipped=4000, + ): + self.assertEqual(self.mock_write.call_count, 1) + self.assertEqual(self.mock_flush.call_count, 1) + self.assertIn( + "Uploading 150 tensors (2.0 kB) (Skipping 50 tensors, 3.9 kB)", + self.mock_write.call_args[0][0], + ) + self.assertEqual(tracker.has_data(), True) + + def testTensorsTrackerWithVerbosity0(self): + tracker = upload_tracker.UploadTracker(verbosity=0) + with tracker.tensors_tracker( + num_tensors=200, + num_tensors_skipped=50, + tensor_bytes=6000, + tensor_bytes_skipped=4000, + ): + self.assertEqual(self.mock_write.call_count, 0) + self.assertEqual(self.mock_flush.call_count, 0) + self.assertEqual(self.mock_write.call_count, 0) + self.assertEqual(self.mock_flush.call_count, 0) + + def testTensorsTrackerWithoutSkippedTensors(self): + tracker = upload_tracker.UploadTracker(verbosity=1) + with tracker.tensors_tracker( + num_tensors=200, + num_tensors_skipped=0, + tensor_bytes=6000, + tensor_bytes_skipped=0, + ): + self.assertEqual(self.mock_write.call_count, 1) + self.assertEqual(self.mock_flush.call_count, 1) + self.assertIn( + "Uploading 200 tensors (5.9 kB)", + self.mock_write.call_args[0][0], + ) + self.assertEqual(tracker.has_data(), True) + + def testBlobTrackerUploaded(self): + tracker = upload_tracker.UploadTracker(verbosity=1) + with tracker.blob_tracker(blob_bytes=2048) as blob_tracker: + self.assertEqual(self.mock_write.call_count, 1) + self.assertEqual(self.mock_flush.call_count, 1) + self.assertIn( + "Uploading binary object (2.0 kB)", + self.mock_write.call_args[0][0], + ) + + def testBlobTrackerWithVerbosity0(self): + tracker = upload_tracker.UploadTracker(verbosity=0) + with tracker.blob_tracker(blob_bytes=2048): + self.assertEqual(self.mock_write.call_count, 0) + self.assertEqual(self.mock_flush.call_count, 0) + self.assertEqual(self.mock_write.call_count, 0) + self.assertEqual(self.mock_flush.call_count, 0) + + def testBlobTrackerNotUploaded(self): + tracker = upload_tracker.UploadTracker(verbosity=1) + with tracker.send_tracker(): + self.assertEqual(self.mock_write.call_count, 2) + self.assertEqual(self.mock_flush.call_count, 2) + self.assertIn( + "Started scanning", + self.mock_write.call_args_list[0][0][0], + ) + with tracker.blob_tracker( + blob_bytes=2048 * 1024 * 1024 + ) as blob_tracker: + self.assertEqual(self.mock_write.call_count, 3) + self.assertEqual(self.mock_flush.call_count, 3) + self.assertIn( + "Uploading binary object (2048.0 MB)", + self.mock_write.call_args[0][0], + ) + blob_tracker.mark_uploaded(is_uploaded=False) + self.assertEqual(self.mock_write.call_count, 6) + self.assertEqual(self.mock_flush.call_count, 5) + self.assertIn( + "Total uploaded: 0 scalars, 0 tensors, 0 binary objects\n", + self.mock_write.call_args_list[3][0][0], + ) + self.assertIn( + "Total skipped: 1 binary objects (2048.0 MB)\n", + self.mock_write.call_args_list[4][0][0], + ) + self.assertEqual(tracker.has_data(), True) + + def testInvalidVerbosityRaisesError(self): + with self.assertRaises(ValueError): + upload_tracker.UploadTracker(verbosity="1") + with self.assertRaises(ValueError): + upload_tracker.UploadTracker(verbosity=-1) + with self.assertRaises(ValueError): + upload_tracker.UploadTracker(verbosity=0.5) + with self.assertRaises(ValueError): + upload_tracker.UploadTracker(verbosity=100) + with self.assertRaises(ValueError): + upload_tracker.UploadTracker(verbosity=None) + + +if __name__ == "__main__": + tb_test.main() From eddf795a7a6d58ee5884e901d68ba31c0c50b606 Mon Sep 17 00:00:00 2001 From: Brian Dubois Date: Tue, 24 Oct 2023 07:22:10 -0400 Subject: [PATCH 3/3] Add comment to UploadIntent. --- tensorboard/uploader/uploader_subcommand.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorboard/uploader/uploader_subcommand.py b/tensorboard/uploader/uploader_subcommand.py index baa36dbbf84..bd4bcaf4a01 100644 --- a/tensorboard/uploader/uploader_subcommand.py +++ b/tensorboard/uploader/uploader_subcommand.py @@ -419,7 +419,11 @@ def _die_if_bad_experiment_description(description): class UploadIntent(_Intent): - """The user intends to upload an experiment from the given logdir.""" + """The user intends to upload an experiment from the given logdir. + + However, TensorBoard.dev is being turned down and we no longer allow + upload. + """ def get_ack_message_body(self): """Does nothing.