Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changing logging levels #222

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
11 changes: 10 additions & 1 deletion dlio_benchmark/common/enumerations.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ class StorageType(Enum):

def __str__(self):
return self.value
class LogLevel(Enum):
"""
Different levels of logging
"""
DEBUG = "debug"
INFO = "info"
WARNING = "warning"
ERROR = "error"
def __str__(self):
return self.value

class MetadataType(Enum):
"""
Expand Down Expand Up @@ -107,7 +117,6 @@ class ComputationType(Enum):
SYNC = 'sync'
ASYNC = 'async'


class FormatType(Enum):
"""
Format Type supported by the benchmark.
Expand Down
2 changes: 0 additions & 2 deletions dlio_benchmark/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,6 @@ def initialize(self):
- Start profiling session for Darshan and Tensorboard.
"""
self.comm.barrier()
if self.args.debug and self.args.my_rank == 0:
input("Debug mode: Press enter to start\n")

if self.args.generate_data:
if self.args.my_rank == 0:
Expand Down
26 changes: 18 additions & 8 deletions dlio_benchmark/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from dlio_benchmark.common.constants import MODULE_CONFIG
from dlio_benchmark.common.enumerations import StorageType, FormatType, Shuffle, ReadType, FileAccess, Compression, \
FrameworkType, \
FrameworkType, LogLevel, \
DataLoaderType, Profiler, DatasetType, DataLoaderSampler, CheckpointLocationType, CheckpointMechanismType
from dlio_benchmark.utils.utility import DLIOMPI, get_trace_name, utcnow
from dataclasses import dataclass
Expand Down Expand Up @@ -91,7 +91,7 @@ class ConfigArguments:
chunk_size: int = 0
compression: Compression = Compression.NONE
compression_level: int = 4
debug: bool = False
log_level: LogLevel = LogLevel.INFO
total_training_steps: int = -1
do_eval: bool = False
batch_size_eval: int = 1
Expand Down Expand Up @@ -167,18 +167,29 @@ def configure_dlio_logging(self, is_child=False):
if is_child and self.multiprocessing_context == "fork":
return
# Configure the logging library
log_level = logging.DEBUG if self.debug else logging.INFO
log_format_verbose = '[%(levelname)s] %(message)s [%(pathname)s:%(lineno)d]'
log_format_simple = '[%(levelname)s] %(message)s'
# Set logging format to be simple only when debug_level <= INFO
log_format = log_format_simple
if self.log_level == LogLevel.DEBUG:
log_level = logging.DEBUG
log_format = log_format_verbose
elif self.log_level == LogLevel.WARNING:
log_level = logging.WARNING
elif self.log_level == LogLevel.ERROR:
log_level = logging.ERROR
else:
log_level = logging.INFO
logging.basicConfig(
level=log_level,
force=True,
handlers=[
logging.FileHandler(self.logfile_path, mode="a", encoding='utf-8'),
logging.StreamHandler()
],
format='[%(levelname)s] %(message)s [%(pathname)s:%(lineno)d]'
format = log_format
# logging's max timestamp resolution is msecs, we will pass in usecs in the message
)

def configure_dftracer(self, is_child=False, use_pid=False):
# with "multiprocessing_context=fork" the profiler file remains open in the child process
if is_child and self.multiprocessing_context == "fork":
Expand Down Expand Up @@ -542,7 +553,8 @@ def LoadConfig(args, config):
args.output_folder = config['output']['folder']
if 'log_file' in config['output']:
args.log_file = config['output']['log_file']

if 'log_level' in config['output']:
args.log_level = LogLevel(config['output']['log_level'])
if args.output_folder is None:
try:
hydra_cfg = hydra.core.hydra_config.HydraConfig.get()
Expand All @@ -558,8 +570,6 @@ def LoadConfig(args, config):
args.generate_only = True
else:
args.generate_only = False
if 'debug' in config['workflow']:
args.debug = config['workflow']['debug']
if 'evaluation' in config['workflow']:
args.do_eval = config['workflow']['evaluation']
if 'checkpoint' in config['workflow']:
Expand Down
4 changes: 2 additions & 2 deletions dlio_benchmark/utils/statscounter.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def batch_processed(self, epoch, step, block, t0, computation_time):
else:
self.output[epoch]['proc'] = [duration]
self.output[epoch]['compute']=[computation_time]
logging.info(f"{utcnow()} Rank {self.my_rank} step {step} processed {self.batch_size} samples in {duration} s")
logging.debug(f"{utcnow()} Rank {self.my_rank} step {step} processed {self.batch_size} samples in {duration} s")

def compute_metrics_train(self, epoch, block):
key = f"block{block}"
Expand Down Expand Up @@ -358,7 +358,7 @@ def eval_batch_processed(self, epoch, step, t0, computation_time):
duration = time() - t0
self.output[epoch]['proc']['eval'].append(duration)
self.output[epoch]['compute']['eval'].append(computation_time)
logging.info(f"{utcnow()} Rank {self.my_rank} step {step} processed {self.batch_size_eval} samples in {duration} s")
logging.debug(f"{utcnow()} Rank {self.my_rank} step {step} processed {self.batch_size_eval} samples in {duration} s")
def finalize(self):
self.summary['end'] = utcnow()
def save_data(self):
Expand Down
26 changes: 13 additions & 13 deletions dlio_benchmark/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,38 +37,38 @@
from dftracer.logger import dftracer as PerfTrace, dft_fn as Profile, DFTRACER_ENABLE as DFTRACER_ENABLE
except:
class Profile(object):
def __init__(self, **kwargs):
return
def log(self, **kwargs):
return
def log_init(self, **kwargs):
return
def iter(self, **kwargs):
def __init__(self, cat, name=None, epoch=None, step=None, image_idx=None, image_size=None):
hariharan-devarajan marked this conversation as resolved.
Show resolved Hide resolved
return
def log(self, func):
return func
def log_init(self, func):
return func
def iter(self, func, iter_name="step"):
return func
def __enter__(self):
return
def __exit__(self, **kwargs):
def __exit__(self, type, value, traceback):
return
def update(self, **kwargs):
def update(self, epoch=None, step=None, image_idx=None, image_size=None, args={}):
hariharan-devarajan marked this conversation as resolved.
Show resolved Hide resolved
return
def flush(self):
return
def reset(self):
return
def log_static(self, **kwargs):
return
def log_static(self, func):
return func
class dftracer(object):
def __init__(self,):
self.type = None
def initialize_log(self, **kwargs):
def initialize_log(self, logfile=None, data_dir=None, process_id=-1):
return
def get_time(self):
return
def enter_event(self):
return
def exit_event(self):
return
def log_event(self, **kwargs):
def log_event(self, name, cat, start_time, duration, string_args=None):
return
def finalize(self):
return
Expand Down
8 changes: 8 additions & 0 deletions docs/source/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -357,11 +357,19 @@ output
* - log_file
- dlio.log
- log file name
* - log_level
- "info"
- select the logging level [error|warning|info|debug]

.. note::

If ``folder`` is not set (None), the output folder will be ```hydra_log/unet3d/$DATE-$TIME```.

.. note::

``log_level=debug`` will output detailed logging info per steps; whereas ``log_level=info`` only output log at the end of each epoch.
For performance mode, we would suggest using error mode to suppress unnecessory logs.

profiling
------------------
.. list-table::
Expand Down
Loading