Skip to content

Commit

Permalink
Merge pull request #99 from jokva/use-dot-separator-dtype
Browse files Browse the repository at this point in the history
Use . for mnemonic-origin-copy separator in dtype
  • Loading branch information
jokva authored May 2, 2019
2 parents 211e89d + 30adff0 commit 31c743e
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 4 deletions.
40 changes: 38 additions & 2 deletions python/dlisio/plumbing/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ class Frame(BasicObject):
Frame, along with the type- and range of the index of the channels.
Note that the index itself is also a channel object.
Attributes
----------
dtype_format : str
The basic format string for duplicated mnemonics - this string is the
default formatting for creating unique labels from the
mnemonic-origin-copynumber triple in dtype.
Notes
-----
Expand All @@ -38,6 +45,8 @@ class Frame(BasicObject):
'INDEX-MAX' : scalar('index_max')
}

dtype_format = '{:s}.{:d}.{:d}'

def __init__(self, obj = None, name = None):
super().__init__(obj, name = name, type = 'FRAME')

Expand Down Expand Up @@ -75,6 +84,10 @@ def __init__(self, obj = None, name = None):
#: arrays from all channels.
self._dtype = None

#: Instance-specific dtype label formatter on duplicated mnemonics.
#: Defaults to Frame.dtype_format
self.dtype_fmt = self.dtype_format

@property
def dtype(self):
"""dtype
Expand All @@ -89,7 +102,16 @@ def dtype(self):
Consider a frame with the channels mnemonics [('TIME', 0, 0), ('TDEP',
0, 0), ('TIME, 1, 0)]. The dtype names for this frame would be
('TIME:0:0', 'TDEP', 'TIME:1:0').
('TIME.0.0', 'TDEP', 'TIME.1.0').
Duplicated mnemonics are formatted by the dtype_fmt attribute. To use a
custom format for a specific frame instance, set dtype_fmt for the
Frame object. If you want to have some other formatting for *all*
dtypes, set the dtype_format class attribute. It has to be a 3-element
format-string taking a string and two ints. Custom formatting is
particularly useful for peculiar files where the full stop (.) appears
in the mnemonic itself, and a consistent way of parsing origin and
copynumber are needed.
See also
--------
Expand All @@ -101,7 +123,21 @@ def dtype(self):
-------
dtype : np.dtype
Examples
--------
A frame with two TIME channels:
>>> frame.dtype
dtype([('TIME.0.0', '<f4'), ('TDEP', '<i2'), ('TIME.1.0', '<i2')])
Override instance-specific mnemonic formatting
>>> frame.dtype
dtype([('TIME.0.0', '<f4'), ('TDEP', '<i2'), ('TIME.1.0', '<i2')])
>>> frame.dtype_fmt = '{:s}-{:d}-{:d}'
>>> frame.dtype
dtype([('TIME-0-0', '<f4'), ('TDEP', '<i2'), ('TIME-1-0', '<i2')])
"""

if self._dtype: return self._dtype

seen = {}
Expand All @@ -112,7 +148,7 @@ def dtype(self):
msg = ', '.join((source, problem))
info = 'name = {}, origin = {}, copynumber = {}'.format

fmtlabel = '{:s}:{:d}:{:d}'.format
fmtlabel = self.dtype_fmt.format
for i, ch in enumerate(self.channels):
# current has to be a list (or something mutable at least), because
# it have to be updated on multiple labes
Expand Down
58 changes: 56 additions & 2 deletions python/tests/test_frames.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import numpy as np

import dlisio

from . import DWL206
Expand All @@ -16,7 +18,7 @@ def test_frame_getitem(DWL206):
assert curves['TDEP'][0] == 852606.0
assert curves[0]['TDEP'] == 852606.0

def test_duplicated_mnemonics_gets_unique_labels():
def makeframe():
time0 = dlisio.plumbing.Channel()
time0.name = 'TIME'
time0.origin = 0
Expand All @@ -41,5 +43,57 @@ def test_duplicated_mnemonics_gets_unique_labels():
frame = dlisio.plumbing.Frame()
frame.channels = [time0, tdep, time1]

return frame

def test_duplicated_mnemonics_gets_unique_labels():
frame = makeframe()
assert 'fDDD' == frame.fmtstr()
assert ('TIME.0.0', 'TDEP', 'TIME.1.0') == frame.dtype.names

def test_duplicated_mnemonics_dtype_supports_buffer_protocol():
# Getting a buffer from a numpy array adds a :name: field after the label
# name, and forbids the presence of :. Unfortunately, the full visible
# (non-whitespace) ascii set is legal for the RP66 IDENT type, so in theory
# it's possible that a similar mnemonic can be legally present.
#
# In practice, this is unlikely to be a problem. By default, dlisio uses
# the full stop (.) as a separator, but for particularly nasty files this
# would collide with a different channel mnemonic in the same frame. A
# possible fix could be to use a blank character for mnemonic-origin-copy
# separation, or lowercase letters (which are not supposed to be a part of
# the IDENT type, but dlisio imposes no such restriction)
#
# https://github.com/equinor/dlisio/pull/97
frame = makeframe()
_ = memoryview(np.zeros(1, dtype = frame.dtype))

def test_instance_dtype_fmt():
frame = makeframe()
frame.dtype_fmt = 'x-{:s} {:d}~{:d}'

# fmtstr is unchanged
assert 'fDDD' == frame.fmtstr()
assert ('x-TIME 0~0', 'TDEP', 'x-TIME 1~0') == frame.dtype.names

def test_instance_dtype_fmt():
frame = makeframe()
frame.dtype_fmt = 'x-{:s} {:d}~{:d}'

# fmtstr is unchanged
assert 'fDDD' == frame.fmtstr()
assert ('TIME:0:0', 'TDEP', 'TIME:1:0') == frame.dtype.names
assert ('x-TIME 0~0', 'TDEP', 'x-TIME 1~0') == frame.dtype.names

def test_class_dtype_fmt():
original = dlisio.plumbing.Frame.dtype_format

try:
# change dtype before the object itself is constructed, so it
dlisio.plumbing.Frame.dtype_format = 'x-{:s} {:d}~{:d}'
frame = makeframe()
assert 'fDDD' == frame.fmtstr()
assert ('x-TIME 0~0', 'TDEP', 'x-TIME 1~0') == frame.dtype.names

finally:
# even if the test fails, make sure the format string is reset to its
# default, to not interfere with other tests
dlisio.plumbing.Frame.dtype_format = original

0 comments on commit 31c743e

Please sign in to comment.