From 9c776255b971bcae5228d883e3737f4ad9317de1 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 1 Jan 2018 09:03:21 +0800 Subject: [PATCH] Add S3 logging to TensorFlow's logging system (#15493) * Add S3 logging to TensorFlow's logging system This fix is an attempt to help the issue raised in 15159 where there is no logging in S3 file system and it is not easy to debug to diagnose. This fix adds S3 logging to TensFlow's logging with ``` LogLevel::Info -> INFO LogLevel::Warn -> WARNING LogLevel::Error -> ERROR LogLevel::Fatal -> FATAL ``` This fix is related to 15159 (not a complete fix). Signed-off-by: Yong Tang * Sanitize with clang-format Signed-off-by: Yong Tang * Enable S3 logging to TensorFlow logging Signed-off-by: Yong Tang * Sanitize with clang-format Signed-off-by: Yong Tang * Update Bazel BUILD file Signed-off-by: Yong Tang * Address review feedbacks. Signed-off-by: Yong Tang * const local variable * Make local variable const and avoid std::string * Add punctuation & remove redundant comment * Expose InitializeAWSLogging and ShutdownAWSLogging in S3Logging Signed-off-by: Yong Tang * Fix style. * move static functions first * move member functions before members fields * see if DISALLOW_COPY_AND_ASSIGN will work. * Update `S3Log` => `AWSLog` for review feedback Signed-off-by: Yong Tang * Set TF_CPP_MIN_LOG_LEVEL=1 to filter out LOG(INFO) Signed-off-by: Yong Tang --- tensorflow/core/platform/default/logging.cc | 4 +- tensorflow/core/platform/default/logging.h | 4 + tensorflow/core/platform/s3/BUILD | 19 +++ tensorflow/core/platform/s3/aws_logging.cc | 121 ++++++++++++++++++ tensorflow/core/platform/s3/aws_logging.h | 68 ++++++++++ tensorflow/core/platform/s3/s3_file_system.cc | 10 ++ .../python/debug/examples/examples_test.sh | 3 + 7 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/platform/s3/aws_logging.cc create mode 100644 tensorflow/core/platform/s3/aws_logging.h diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc index ebdd4b624aa423..82bd69f9ca46eb 100644 --- a/tensorflow/core/platform/default/logging.cc +++ b/tensorflow/core/platform/default/logging.cc @@ -114,6 +114,8 @@ int64 LogLevelStrToInt(const char* tf_env_var_val) { return level; } +} // namespace + int64 MinLogLevelFromEnv() { const char* tf_env_var_val = getenv("TF_CPP_MIN_LOG_LEVEL"); return LogLevelStrToInt(tf_env_var_val); @@ -124,8 +126,6 @@ int64 MinVLogLevelFromEnv() { return LogLevelStrToInt(tf_env_var_val); } -} // namespace - LogMessage::~LogMessage() { // Read the min log level once during the first call to logging. static int64 min_log_level = MinLogLevelFromEnv(); diff --git a/tensorflow/core/platform/default/logging.h b/tensorflow/core/platform/default/logging.h index d5f7350cdd805e..40c260f236613e 100644 --- a/tensorflow/core/platform/default/logging.h +++ b/tensorflow/core/platform/default/logging.h @@ -305,6 +305,10 @@ T&& CheckNotNull(const char* file, int line, const char* exprtext, T&& t) { return std::forward(t); } +int64 MinLogLevelFromEnv(); + +int64 MinVLogLevelFromEnv(); + } // namespace internal } // namespace tensorflow diff --git a/tensorflow/core/platform/s3/BUILD b/tensorflow/core/platform/s3/BUILD index b7bc1a11d65837..2cd5f877c9fcc9 100644 --- a/tensorflow/core/platform/s3/BUILD +++ b/tensorflow/core/platform/s3/BUILD @@ -28,6 +28,8 @@ filegroup( tf_cc_binary( name = "s3_file_system.so", srcs = [ + "aws_logging.cc", + "aws_logging.h", "s3_crypto.cc", "s3_crypto.h", "s3_file_system.cc", @@ -66,6 +68,22 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "aws_logging", + srcs = [ + "aws_logging.cc", + ], + hdrs = [ + "aws_logging.h", + ], + deps = [ + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "@aws//:aws", + ], + alwayslink = 1, +) + cc_library( name = "s3_file_system", srcs = [ @@ -75,6 +93,7 @@ cc_library( "s3_file_system.h", ], deps = [ + ":aws_logging", ":s3_crypto", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", diff --git a/tensorflow/core/platform/s3/aws_logging.cc b/tensorflow/core/platform/s3/aws_logging.cc new file mode 100644 index 00000000000000..41b854d6343b2a --- /dev/null +++ b/tensorflow/core/platform/s3/aws_logging.cc @@ -0,0 +1,121 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/platform/s3/aws_logging.h" +#include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mutex.h" + +#include +#include +#include + +#include + +namespace tensorflow { + +AWSLogSystem::AWSLogSystem(Aws::Utils::Logging::LogLevel log_level) + : log_level_(log_level) {} + +void AWSLogSystem::Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, + const char* format, ...) { + std::va_list args; + va_start(args, format); + + const string s = strings::Printf(format, args); + + va_end(args); + + LogMessage(log_level, s); +} + +void AWSLogSystem::LogStream(Aws::Utils::Logging::LogLevel log_level, + const char* tag, + const Aws::OStringStream& message_stream) { + LogMessage(log_level, message_stream.rdbuf()->str().c_str()); +} + +void AWSLogSystem::LogMessage(Aws::Utils::Logging::LogLevel log_level, + const std::string& message) { + switch (log_level) { + case Aws::Utils::Logging::LogLevel::Info: + LOG(INFO) << message; + break; + case Aws::Utils::Logging::LogLevel::Warn: + LOG(WARNING) << message; + break; + case Aws::Utils::Logging::LogLevel::Error: + LOG(ERROR) << message; + break; + case Aws::Utils::Logging::LogLevel::Fatal: + LOG(FATAL) << message; + break; + default: + LOG(ERROR) << message; + break; + } +} + +namespace { +static const char* kAWSLoggingTag = "AWSLogging"; + +Aws::Utils::Logging::LogLevel ParseLogLevelFromEnv() { + Aws::Utils::Logging::LogLevel log_level = Aws::Utils::Logging::LogLevel::Info; + + const int64_t level = tensorflow::internal::MinLogLevelFromEnv(); + + switch (level) { + case INFO: + log_level = Aws::Utils::Logging::LogLevel::Info; + break; + case WARNING: + log_level = Aws::Utils::Logging::LogLevel::Warn; + break; + case ERROR: + log_level = Aws::Utils::Logging::LogLevel::Error; + break; + case FATAL: + log_level = Aws::Utils::Logging::LogLevel::Fatal; + break; + default: + log_level = Aws::Utils::Logging::LogLevel::Info; + break; + } + + return log_level; +} +} + +static bool initialized = false; +static mutex s3_logging_mutex(LINKER_INITIALIZED); +void AWSLogSystem::InitializeAWSLogging() { + std::lock_guard s3_logging_lock(s3_logging_mutex); + if (!initialized) { + Aws::Utils::Logging::InitializeAWSLogging( + Aws::MakeShared(kAWSLoggingTag, ParseLogLevelFromEnv())); + initialized = true; + return; + } +} + +void AWSLogSystem::ShutdownAWSLogging() { + std::lock_guard s3_logging_lock(s3_logging_mutex); + if (initialized) { + Aws::Utils::Logging::ShutdownAWSLogging(); + initialized = false; + return; + } +} + +} // namespace tensorflow diff --git a/tensorflow/core/platform/s3/aws_logging.h b/tensorflow/core/platform/s3/aws_logging.h new file mode 100644 index 00000000000000..b0da8f3c83524d --- /dev/null +++ b/tensorflow/core/platform/s3/aws_logging.h @@ -0,0 +1,68 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_S3_S3_LOGGING_H_ +#define TENSORFLOW_CONTRIB_S3_S3_LOGGING_H_ + +#include +#include + +#include +#include +#include "tensorflow/core/platform/default/logging.h" + +namespace tensorflow { + +class AWSLogSystem : public Aws::Utils::Logging::LogSystemInterface { + public: + static void InitializeAWSLogging(); + static void ShutdownAWSLogging(); + + explicit AWSLogSystem(Aws::Utils::Logging::LogLevel log_level); + virtual ~AWSLogSystem() = default; + + // Gets the currently configured log level. + virtual Aws::Utils::Logging::LogLevel GetLogLevel(void) const override { + return log_level_; + } + + // Set a new log level. This has the immediate effect of changing the log. + void SetLogLevel(Aws::Utils::Logging::LogLevel log_level) { + log_level_.store(log_level); + } + + // Does a printf style output to ProcessFormattedStatement. Don't use this, + // it's unsafe. See LogStream. + // Since non-static C++ methods have an implicit this argument, + // TF_PRINTF_ATTRIBUTE should be counted from two (vs. one). + virtual void Log(Aws::Utils::Logging::LogLevel log_level, const char* tag, + const char* format, ...) override TF_PRINTF_ATTRIBUTE(4, 5); + + // Writes the stream to ProcessFormattedStatement. + virtual void LogStream(Aws::Utils::Logging::LogLevel log_level, + const char* tag, + const Aws::OStringStream& messageStream) override; + + private: + void LogMessage(Aws::Utils::Logging::LogLevel log_level, + const string& message); + std::atomic log_level_; + + TF_DISALLOW_COPY_AND_ASSIGN(AWSLogSystem); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_S3_S3_LOGGING_H_ diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc index 682ad97eec3b3f..397f26ec0bf6c8 100644 --- a/tensorflow/core/platform/s3/s3_file_system.cc +++ b/tensorflow/core/platform/s3/s3_file_system.cc @@ -15,10 +15,13 @@ limitations under the License. #include "tensorflow/core/platform/s3/s3_file_system.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/s3/aws_logging.h" #include "tensorflow/core/platform/s3/s3_crypto.h" #include #include +#include +#include #include #include #include @@ -33,6 +36,7 @@ limitations under the License. namespace tensorflow { +namespace { static const char* kS3FileSystemAllocationTag = "S3FileSystemAllocation"; static const size_t kS3ReadAppendableFileBufferSize = 1024 * 1024; static const int kS3GetChildrenMaxKeys = 100; @@ -226,7 +230,11 @@ class S3ReadOnlyMemoryRegion : public ReadOnlyMemoryRegion { uint64 length_; }; +} // namespace + S3FileSystem::S3FileSystem() { + AWSLogSystem::InitializeAWSLogging(); + Aws::SDKOptions options; options.cryptoOptions.sha256Factory_create_fn = []() { return Aws::MakeShared(S3CryptoAllocationTag); @@ -240,6 +248,8 @@ S3FileSystem::S3FileSystem() { S3FileSystem::~S3FileSystem() { Aws::SDKOptions options; Aws::ShutdownAPI(options); + + AWSLogSystem::ShutdownAWSLogging(); } Status S3FileSystem::NewRandomAccessFile( diff --git a/tensorflow/python/debug/examples/examples_test.sh b/tensorflow/python/debug/examples/examples_test.sh index 25916f1903cd41..2df6c0b6a27010 100755 --- a/tensorflow/python/debug/examples/examples_test.sh +++ b/tensorflow/python/debug/examples/examples_test.sh @@ -23,6 +23,9 @@ set -e +# Filter out LOG(INFO) +export TF_CPP_MIN_LOG_LEVEL=1 + IS_VIRTUALENV=0 PYTHON_BIN_PATH="" while true; do