From 188487249b135d9220d6aacad330d8fdafa507b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erlend=20H=C3=A5rstad?= Date: Thu, 19 Mar 2020 11:55:16 +0100 Subject: [PATCH 1/2] Force channel.curves() to return copy when slicing Because all channel.curves() does is to call frame.curves() and slice the result, we should return a copy of the slices array rather than a view into the original array. The reason being that a view keeps the original array alive as long as the view live, meaning we lay claim to way more memory than needed. Copying is slower than returning a view, but it can potentially save so much memory that the copy is worth it. Further down the line, the core library will support reading sub-chunks of frames, which will make the extra copy obsolete. --- python/dlisio/plumbing/channel.py | 2 +- python/tests/test_curves.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/python/dlisio/plumbing/channel.py b/python/dlisio/plumbing/channel.py index 44b4ff7c0..27dc325f0 100644 --- a/python/dlisio/plumbing/channel.py +++ b/python/dlisio/plumbing/channel.py @@ -203,7 +203,7 @@ def curves(self): ------- curves : np.ndarray """ - return self.frame.curves()[self.name] + return np.copy(self.frame.curves()[self.name]) def describe_attr(self, buf, width, indent, exclude): describe_description(buf, self.long_name, width, indent, exclude) diff --git a/python/tests/test_curves.py b/python/tests/test_curves.py index 6d35e389a..75a09c1a4 100644 --- a/python/tests/test_curves.py +++ b/python/tests/test_curves.py @@ -73,6 +73,16 @@ def makeframe(): frame.link() return frame +def test_curves_are_copy(f): + # All channel.curves() really does is to slice the full frame array + # returned by frame.curves(). Make sure the returned slice is a copy not a + # view. Returning a view makes it impossible to free up any memory from + # the original array, hence holding on to way more memory than needed. + + channel = f.object('CHANNEL', 'CHANN1') + curves = channel.curves() + assert curves.flags['OWNDATA'] + def test_curves_values(f): frame = f.object('FRAME', 'FRAME1', 10, 0) curves = frame.curves() From a79e6f26494bfe629c2bffe6534fb0e66967727e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erlend=20H=C3=A5rstad?= Date: Thu, 19 Mar 2020 12:55:56 +0100 Subject: [PATCH 2/2] Encourage use of frame.curves to channel.curves Make it more clear to the user that reading curves in a frame on a one-by-one basis with channel.curves() is way slower than reading it all in one go with frame.curves() --- python/dlisio/plumbing/channel.py | 12 ++++++++++++ python/docs/examples.rst | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/python/dlisio/plumbing/channel.py b/python/dlisio/plumbing/channel.py index 27dc325f0..03c1f8bfd 100644 --- a/python/dlisio/plumbing/channel.py +++ b/python/dlisio/plumbing/channel.py @@ -165,6 +165,18 @@ def curves(self): """ Returns a numpy ndarray with the curves-values. + Notes + ----- + + This method should only be used if there is only *one* channel of + interest in a particular frame. + + Due to the memory-layout of dlis-files, reading a single channel from + disk and reading the entire frame is almost equally fast. That means + reading channels from the same frame one-by-one with this method is + _way_ slower than reading the entire frame with :func:`Frame.curves()` + and then indexing on the channels-of-interest. + Examples -------- diff --git a/python/docs/examples.rst b/python/docs/examples.rst index c1f880199..df4de66b7 100644 --- a/python/docs/examples.rst +++ b/python/docs/examples.rst @@ -179,6 +179,11 @@ which returns a structured numpy array that support common slicing operations: >>> curve[0:5] array([852606., 852606., 852606., 852606., 852606.], dtype=float32) +Note that its almost always considerably faster to read curves-data with +:py:func:`dlisio.plumbing.Frame.curves()`. Please refer to +:py:func:`dlisio.plumbing.Channel.curves()` for further elaboration on why this +is. + Access all curves in a frame with :py:func:`dlisio.plumbing.Frame.curves()`. The returned structured numpy array can be indexed by Channel mnemonics and/or sliced by samples: