Skip to content

Commit

Permalink
Merge pull request #1647 from vlomonaco/master
Browse files Browse the repository at this point in the history
Improved Efficiency of the DiskUsage Metric
  • Loading branch information
AntonioCarta authored May 28, 2024
2 parents 8f0e61f + 2995a27 commit 9df4621
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 20 deletions.
59 changes: 39 additions & 20 deletions avalanche/evaluation/metrics/disk_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
################################################################################

import os
import time
from sys import platform
import subprocess
from pathlib import Path
from typing import Union, Sequence, List, Optional

Expand Down Expand Up @@ -45,6 +48,8 @@ def __init__(
paths_to_monitor = [paths_to_monitor]

self._paths_to_monitor: List[str] = [str(p) for p in paths_to_monitor]
# this is used to avoid sending multiple warnings
self._warning_sent = False

self.total_usage: float = 0.0

Expand All @@ -57,7 +62,7 @@ def update(self):

dirs_size = 0.0
for directory in self._paths_to_monitor:
dirs_size += DiskUsage.get_dir_size(directory)
dirs_size += self.get_dir_size(directory)

self.total_usage = dirs_size

Expand All @@ -81,24 +86,44 @@ def reset(self) -> None:
"""
self.total_usage = 0

@staticmethod
def get_dir_size(path: str) -> float:
def get_dir_size(self, path) -> float:
"""
Obtains the size of the given directory, in KiB.
:param path: The path of an existing directory.
:return: A float value describing the size (in KiB)
of the directory as the sum of all its elements.
"""

start = time.time()
total_size = 0.0
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
# skip if it is symbolic link
if not os.path.islink(fp):
# in KB
s = os.path.getsize(fp) / 1024
total_size += s

if platform == "linux" or platform == "linux2":
total_size = (
float(
subprocess.check_output(["du", "-sb", path])
.split()[0]
.decode("utf-8")
)
/ 1024
)
else:
for dirpath, dirnames, filenames in os.walk(path):
for f in filenames:
fp = os.path.join(dirpath, f)
# skip if it is symbolic link
if not os.path.islink(fp):
# in KB
s = os.path.getsize(fp) / 1024
total_size += s

end = time.time()
elapsed_t = end - start
# if we wait for more than 1 sec.
if elapsed_t > 0.5 and self._warning_sent is False:
print(f"\n\nWARNING: Time to get size of {path}: {elapsed_t}")
print("Are you sure you want to monitor this directory?\n")
self._warning_sent = True

return total_size

Expand Down Expand Up @@ -132,10 +157,7 @@ def __init__(self, paths_to_monitor):
Creates an instance of the minibatch Disk usage metric.
"""
super(MinibatchDiskUsage, self).__init__(
paths_to_monitor,
reset_at="iteration",
emit_at="iteration",
mode="train",
paths_to_monitor, reset_at="iteration", emit_at="iteration", mode="train"
)

def __str__(self):
Expand Down Expand Up @@ -177,10 +199,7 @@ def __init__(self, paths_to_monitor):
Creates an instance of the experience Disk usage metric.
"""
super(ExperienceDiskUsage, self).__init__(
paths_to_monitor,
reset_at="experience",
emit_at="experience",
mode="eval",
paths_to_monitor, reset_at="experience", emit_at="experience", mode="eval"
)

def __str__(self):
Expand Down Expand Up @@ -214,7 +233,7 @@ def disk_usage_metrics(
minibatch=False,
epoch=False,
experience=False,
stream=False
stream=False,
) -> List[DiskPluginMetric]:
"""
Helper method that can be used to obtain the desired set of
Expand Down
17 changes: 17 additions & 0 deletions tests/evaluation/test_disk_usage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
""" Disk Usage Metric Test"""

import unittest

from avalanche.evaluation.metrics import DiskUsage


class DiskUsageTests(unittest.TestCase):
def test_basic(self):
"""just checking that directory size is computed without errors."""

disk = DiskUsage()
disk.get_dir_size(".")


if __name__ == "__main__":
unittest.main()

0 comments on commit 9df4621

Please sign in to comment.