Skip to content

Commit

Permalink
feat: return the correct size for custom container objects (#547)
Browse files Browse the repository at this point in the history
Summary:
- feat: return the correct size for custom container objects

This is needed to correctly evaluate whether an object is worth
memoizing or keeping in the cache.

See for context: Safe-DS/Runner#51 and
Safe-DS/Runner#44

For future container classes (like e.g. image set this would also need
to be added, to be compatible with the memoizing implementation in the
runner)
  • Loading branch information
WinPlay02 authored Feb 5, 2024
1 parent 32b4d8f commit f44c34d
Show file tree
Hide file tree
Showing 14 changed files with 245 additions and 0 deletions.
11 changes: 11 additions & 0 deletions src/safeds/data/image/containers/_image.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import sys
import io
import warnings
from pathlib import Path
Expand Down Expand Up @@ -108,6 +109,16 @@ def __eq__(self, other: object) -> bool:
and torch.all(torch.eq(self._image_tensor, other._set_device(self.device)._image_tensor)).item()
)

def __sizeof__(self) -> int:
"""
Return the complete size of this object.
Returns
-------
Size of this object in bytes.
"""
return sys.getsizeof(self._image_tensor) + self._image_tensor.element_size() * self._image_tensor.nelement()

def _repr_jpeg_(self) -> bytes | None:
"""
Return a JPEG image as bytes.
Expand Down
11 changes: 11 additions & 0 deletions src/safeds/data/tabular/containers/_column.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import sys
import io
from collections.abc import Sequence
from numbers import Number
Expand Down Expand Up @@ -247,6 +248,16 @@ def __repr__(self) -> str:
"""
return f"Column({self._name!r}, {list(self._data)!r})"

def __sizeof__(self) -> int:
"""
Return the complete size of this object.
Returns
-------
Size of this object in bytes.
"""
return sys.getsizeof(self._data) + sys.getsizeof(self._name) + sys.getsizeof(self._type)

def __str__(self) -> str:
"""
Return a user-friendly string representation of this column.
Expand Down
11 changes: 11 additions & 0 deletions src/safeds/data/tabular/containers/_row.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import sys
import functools
from collections.abc import Callable, Mapping
from typing import TYPE_CHECKING, Any
Expand Down Expand Up @@ -269,6 +270,16 @@ def __repr__(self) -> str:
"""
return f"Row({self!s})"

def __sizeof__(self) -> int:
"""
Return the complete size of this object.
Returns
-------
Size of this object in bytes.
"""
return sys.getsizeof(self._data) + sys.getsizeof(self._schema)

def __str__(self) -> str:
"""
Return a user-friendly string representation of this row.
Expand Down
11 changes: 11 additions & 0 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import sys
import functools
import io
import warnings
Expand Down Expand Up @@ -475,6 +476,16 @@ def __repr__(self) -> str:
tmp.columns = self.column_names
return tmp.__repr__()

def __sizeof__(self) -> int:
"""
Return the complete size of this object.
Returns
-------
Size of this object in bytes.
"""
return sys.getsizeof(self._data) + sys.getsizeof(self._schema)

def __str__(self) -> str:
tmp = self._data.reset_index(drop=True)
tmp.columns = self.column_names
Expand Down
11 changes: 11 additions & 0 deletions src/safeds/data/tabular/containers/_tagged_table.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import sys
from typing import TYPE_CHECKING

from safeds.data.tabular.containers import Column, Row, Table
Expand Down Expand Up @@ -164,6 +165,16 @@ def __init__(
self._features: Table = _data.keep_only_columns(feature_names)
self._target: Column = _data.get_column(target_name)

def __sizeof__(self) -> int:
"""
Return the complete size of this object.
Returns
-------
Size of this object in bytes.
"""
return Table.__sizeof__(self) + sys.getsizeof(self._features) + sys.getsizeof(self._target)

# ------------------------------------------------------------------------------------------------------------------
# Properties
# ------------------------------------------------------------------------------------------------------------------
Expand Down
11 changes: 11 additions & 0 deletions src/safeds/data/tabular/containers/_time_series.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import sys
from typing import TYPE_CHECKING

from safeds.data.tabular.containers import Column, Row, Table, TaggedTable
Expand Down Expand Up @@ -186,6 +187,16 @@ def __init__(
raise UnknownColumnNameError([time_name])
self._time: Column = _data.get_column(time_name)

def __sizeof__(self) -> int:
"""
Return the complete size of this object.
Returns
-------
Size of this object in bytes.
"""
return TaggedTable.__sizeof__(self) + sys.getsizeof(self._time)

# ------------------------------------------------------------------------------------------------------------------
# Properties
# ------------------------------------------------------------------------------------------------------------------
Expand Down
11 changes: 11 additions & 0 deletions src/safeds/data/tabular/typing/_schema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import sys
from dataclasses import dataclass
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -100,6 +101,16 @@ def __repr__(self) -> str:
"""
return f"Schema({self!s})"

def __sizeof__(self) -> int:
"""
Return the complete size of this object.
Returns
-------
Size of this object in bytes.
"""
return sum(map(sys.getsizeof, self._schema.keys())) + sum(map(sys.getsizeof, self._schema.values())) + sys.getsizeof(self._schema)

def __str__(self) -> str:
"""
Return a user-friendly string representation of the schema.
Expand Down
14 changes: 14 additions & 0 deletions tests/safeds/data/image/containers/test_image.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import typing
from pathlib import Path
from tempfile import NamedTemporaryFile
Expand Down Expand Up @@ -900,3 +901,16 @@ def test_should_return_edges_of_image(
image_edges = image.find_edges()
assert image_edges == snapshot_png
_assert_width_height_channel(image, image_edges)


@pytest.mark.parametrize("device", _test_devices(), ids=_test_devices_ids())
class TestSizeof:
@pytest.mark.parametrize(
"resource_path",
_test_images_all(),
ids=_test_images_all_ids(),
)
def test_should_size_be_greater_than_normal_object(self, resource_path: str | Path, device: Device) -> None:
_skip_if_device_not_available(device)
image = Image.from_file(resolve_resource_path(resource_path), device)
assert sys.getsizeof(image) >= image.width * image.height * image.channel
21 changes: 21 additions & 0 deletions tests/safeds/data/tabular/containers/_column/test_sizeof.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import sys

import pytest
from safeds.data.tabular.containers import Column


@pytest.mark.parametrize(
"column",
[
Column("a", []),
Column("a", [0]),
Column("a", [0, "1"]),
],
ids=[
"empty",
"one row",
"multiple rows",
],
)
def test_should_size_be_greater_than_normal_object(column: Column) -> None:
assert sys.getsizeof(column) > sys.getsizeof(object())
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import sys

import pytest
from safeds.data.tabular.containers import Table, TimeSeries


@pytest.mark.parametrize(
"time_series",
[
TimeSeries(
{
"time": [0, 1, 2],
"feature_1": [3, 9, 6],
"feature_2": [6, 12, 9],
"target": [1, 3, 2],
},
"target",
"time",
["feature_1", "feature_2"],
),
TimeSeries(
{
"time": [0, 1, 2],
"feature_1": [3, 9, 6],
"feature_2": [6, 12, 9],
"other": [3, 9, 12],
"target": [1, 3, 2],
},
"target",
"time",
["feature_1", "feature_2"],
),
],
ids=["normal", "table_with_column_as_non_feature"],
)
def test_should_size_be_greater_than_normal_object(time_series: TimeSeries) -> None:
assert sys.getsizeof(time_series) > sys.getsizeof(object())
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import sys

import pytest
from safeds.data.tabular.containers import Table, TaggedTable


@pytest.mark.parametrize(
"tagged_table",
[
TaggedTable(
{
"feature_1": [3, 9, 6],
"feature_2": [6, 12, 9],
"target": [1, 3, 2],
},
"target",
["feature_1", "feature_2"],
),
TaggedTable(
{
"feature_1": [3, 9, 6],
"feature_2": [6, 12, 9],
"other": [3, 9, 12],
"target": [1, 3, 2],
},
"target",
["feature_1", "feature_2"],
),
],
ids=["normal", "table_with_column_as_non_feature"],
)
def test_should_size_be_greater_than_normal_object(tagged_table: TaggedTable) -> None:
assert sys.getsizeof(tagged_table) > sys.getsizeof(object())
21 changes: 21 additions & 0 deletions tests/safeds/data/tabular/containers/_table/test_sizeof.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import sys

import pytest
from safeds.data.tabular.containers import Table


@pytest.mark.parametrize(
"table",
[
Table(),
Table({"col1": [0]}),
Table({"col1": [0, "1"], "col2": ["a", "b"]}),
],
ids=[
"empty table",
"table with one row",
"table with multiple rows",
],
)
def test_should_size_be_greater_than_normal_object(table: Table) -> None:
assert sys.getsizeof(table) > sys.getsizeof(object())
19 changes: 19 additions & 0 deletions tests/safeds/data/tabular/containers/test_row.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import sys
from collections.abc import Callable
from typing import Any

Expand Down Expand Up @@ -552,3 +553,21 @@ def test_should_sort_columns(self, row: Row, comparator: Callable[[tuple, tuple]
def test_should_sort_table_out_of_place(self, row: Row) -> None:
sorted_row = row.sort_columns()
assert sorted_row != row


class TestSizeof:
@pytest.mark.parametrize(
"row",
[
Row(),
Row({"col1": 0}),
Row({"col1": 0, "col2": "a"}),
],
ids=[
"empty",
"single column",
"multiple columns",
],
)
def test_should_size_be_greater_than_normal_object(self, row: Row) -> None:
assert sys.getsizeof(row) > sys.getsizeof(object())
19 changes: 19 additions & 0 deletions tests/safeds/data/tabular/typing/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import sys
from typing import TYPE_CHECKING

import pandas as pd
Expand Down Expand Up @@ -495,3 +496,21 @@ class TestReprMarkdown:
)
def test_should_create_a_string_representation(self, schema: Schema, expected: str) -> None:
assert schema._repr_markdown_() == expected


class TestSizeof:
@pytest.mark.parametrize(
"schema",
[
Schema({}),
Schema({"A": Integer()}),
Schema({"A": Integer(), "B": String()}),
],
ids=[
"empty",
"single column",
"multiple columns",
],
)
def test_should_size_be_greater_than_normal_object(self, schema: Schema) -> None:
assert sys.getsizeof(schema) > sys.getsizeof(object())

0 comments on commit f44c34d

Please sign in to comment.