Merge pull request #327 from MIT-LCP/more-signal-fmts

Fix reading formats 8, 310, and 311
MIT-LCP · Oct 14, 2021 · 8fc10ab · 8fc10ab
2 parents 34ea7a2 + 14048dd
commit 8fc10ab
Show file tree

Hide file tree

Showing 16 changed files with 157 additions and 106 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,14 @@
+*.py        diff=python
+
+*.anI       binary
+*.atr       binary
+*.d[0-9]    binary
+*.dat       binary
+*.edf       binary
+*.gz        binary
+*.mat       binary
+*.qrs       binary
+*.wabp      binary
+*.wav       binary
+*.wqrs      binary
+*.xyz       binary
diff --git a/sample-data/binformats.d0 b/sample-data/binformats.d0
diff --git a/sample-data/binformats.d1 b/sample-data/binformats.d1
diff --git a/sample-data/binformats.d2 b/sample-data/binformats.d2
diff --git a/sample-data/binformats.d3 b/sample-data/binformats.d3
@@ -0,0 +1,3 @@
+ .<JXft���������+9GUcq���������(6DR`n|���������	%3AO]ky���������"0>LZhv���������-;IWes����������*8FTbp~���������'5CQ_m{���������$2@N\jx���������!/=KYgu���������,:HVdr����������)7ESao}���������
+&4BP^lz���������#1?M[iw��������� .<JXft���������+9GUcq���������(6DR`n|���������	%3AO]ky���������"0>LZhv���������-;IWes����������*8FTbp~���������'5CQ_m{���������$2@N\jx���������!/=KYgu���������,:HVdr����������)7ESao}���������
+&4BP^lz���������#1?M[

diff --git a/sample-data/binformats.d4 b/sample-data/binformats.d4
diff --git a/sample-data/binformats.d5 b/sample-data/binformats.d5
diff --git a/sample-data/binformats.d6 b/sample-data/binformats.d6
diff --git a/sample-data/binformats.d7 b/sample-data/binformats.d7
diff --git a/sample-data/binformats.d8 b/sample-data/binformats.d8
diff --git a/sample-data/binformats.d9 b/sample-data/binformats.d9
diff --git a/sample-data/binformats.hea b/sample-data/binformats.hea
@@ -0,0 +1,11 @@
+binformats 10 200 499
+binformats.d0 8 200/mV 12 0 -2047 -31143 0 sig 0, fmt 8
+binformats.d1 16 200/mV 16 0 -32766 -750 0 sig 1, fmt 16
+binformats.d2 61 200/mV 16 0 -32765 -251 0 sig 2, fmt 61
+binformats.d3 80 200/mV 8 0 -124 -517 0 sig 3, fmt 80
+binformats.d4 160 200/mV 16 0 -32763 747 0 sig 4, fmt 160
+binformats.d5 212 200/mV 12 0 -2042 -6824 0 sig 5, fmt 212
+binformats.d6 310 200/mV 10 0 -505 -1621 0 sig 6, fmt 310
+binformats.d7 311 200/mV 10 0 -504 -2145 0 sig 7, fmt 311
+binformats.d8 24 200/mV 24 0 -8388599 11715 0 sig 8, fmt 24
+binformats.d9 32 200/mV 32 0 -2147483638 19035 0 sig 9, fmt 32
diff --git a/tests/target-output/record-1f.gz b/tests/target-output/record-1f.gz
diff --git a/tests/test_record.py b/tests/test_record.py
@@ -146,6 +146,35 @@ def test_1e(self):
         assert record.__eq__(record_pn)
         assert record_2.__eq__(record_write)
 
+    def test_1f(self):
+        """
+        All binary formats, multiple signal files in one record.
+
+        Target file created with:
+            rdsamp -r sample-data/binformats | cut -f 2- |
+            gzip -9 -n > record-1f.gz
+        """
+        record = wfdb.rdrecord('sample-data/binformats', physical=False)
+        sig_target = np.genfromtxt('tests/target-output/record-1f.gz')
+
+        for n, name in enumerate(record.sig_name):
+            np.testing.assert_array_equal(
+                record.d_signal[:, n],
+                sig_target[:, n],
+                "Mismatch in %s" % name)
+
+        for sampfrom in range(0, 3):
+            for sampto in range(record.sig_len - 3, record.sig_len):
+                record_2 = wfdb.rdrecord('sample-data/binformats',
+                                         physical=False,
+                                         sampfrom=sampfrom, sampto=sampto)
+                for n, name in enumerate(record.sig_name):
+                    if record.fmt[n] != '8':
+                        np.testing.assert_array_equal(
+                            record_2.d_signal[:, n],
+                            sig_target[sampfrom:sampto, n],
+                            "Mismatch in %s" % name)
+
     # ------------------ 2. Special format records ------------------ #
 
     def test_2a(self):

diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py
@@ -865,7 +865,7 @@ def smooth_frames(self, sigtype='physical'):
 
 
 def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
-                samps_per_frame, skew, sampfrom, sampto, channels,
+                samps_per_frame, skew, init_value, sampfrom, sampto, channels,
                 smooth_frames, ignore_skew, no_file=False, sig_data=None, return_res=64):
     """
     Read the digital samples from a single segment record's associated
@@ -893,6 +893,8 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
         The samples/frame for each signal of the dat file.
     skew : list
         The skew for the signals of the dat file.
+    init_value : list
+        The initial value for each signal of the dat file.
     sampfrom : int
         The starting sample number to be read from the signals.
     sampto : int
@@ -939,6 +941,7 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
     byte_offset = byte_offset[:]
     samps_per_frame = samps_per_frame[:]
     skew = skew[:]
+    init_value = init_value[:]
 
     # Set defaults for empty fields
     for i in range(n_sig):
@@ -948,6 +951,8 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
             samps_per_frame[i] = 1
         if skew[i] == None:
             skew[i] = 0
+        if init_value[i] == None:
+            init_value[i] = 0
 
     # If skew is to be ignored, set all to 0
     if ignore_skew:
@@ -964,6 +969,7 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
     w_byte_offset = {} # one scalar per dat file
     w_samps_per_frame = {} # one list per dat file
     w_skew = {} # one list per dat file
+    w_init_value = {} # one list per dat file
     w_channel = {} # one list per dat file
 
     for fn in file_name:
@@ -977,6 +983,7 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
             w_byte_offset[fn] = byte_offset[datchannel[fn][0]]
             w_samps_per_frame[fn] = [samps_per_frame[c] for c in datchannel[fn]]
             w_skew[fn] = [skew[c] for c in datchannel[fn]]
+            w_init_value[fn] = [init_value[c] for c in datchannel[fn]]
             w_channel[fn] = idc
 
     # Wanted dat channels, relative to the dat file itself
@@ -997,17 +1004,23 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
 
         # Read each wanted dat file and store signals
         for fn in w_file_name:
-            if no_file:
-                signals[:, out_dat_channel[fn]] = _rd_dat_signals(fn, dir_name,
-                    pn_dir, w_fmt[fn], len(datchannel[fn]), sig_len,
-                    w_byte_offset[fn], w_samps_per_frame[fn], w_skew[fn],
-                    sampfrom, sampto, smooth_frames, no_file=True,
-                    sig_data=sig_data)[:, r_w_channel[fn]]
-            else:
-                signals[:, out_dat_channel[fn]] = _rd_dat_signals(fn, dir_name,
-                    pn_dir, w_fmt[fn], len(datchannel[fn]), sig_len,
-                    w_byte_offset[fn], w_samps_per_frame[fn], w_skew[fn],
-                    sampfrom, sampto, smooth_frames)[:, r_w_channel[fn]]
+            datsignals = _rd_dat_signals(
+                file_name=fn,
+                dir_name=dir_name,
+                pn_dir=pn_dir,
+                fmt=w_fmt[fn],
+                n_sig=len(datchannel[fn]),
+                sig_len=sig_len,
+                byte_offset=w_byte_offset[fn],
+                samps_per_frame=w_samps_per_frame[fn],
+                skew=w_skew[fn],
+                init_value=w_init_value[fn],
+                sampfrom=sampfrom,
+                sampto=sampto,
+                smooth_frames=smooth_frames,
+                no_file=no_file,
+                sig_data=sig_data)
+            signals[:, out_dat_channel[fn]] = datsignals[:, r_w_channel[fn]]
 
     # Return each sample in signals with multiple samples/frame, without smoothing.
     # Return a list of numpy arrays for each signal.
@@ -1016,16 +1029,22 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
 
         for fn in w_file_name:
             # Get the list of all signals contained in the dat file
-            if no_file:
-                datsignals = _rd_dat_signals(fn, dir_name, pn_dir, w_fmt[fn],
-                    len(datchannel[fn]), sig_len, w_byte_offset[fn],
-                    w_samps_per_frame[fn], w_skew[fn], sampfrom, sampto,
-                    smooth_frames, no_file=True, sig_data=sig_data)
-            else:
-                datsignals = _rd_dat_signals(fn, dir_name, pn_dir, w_fmt[fn],
-                    len(datchannel[fn]), sig_len, w_byte_offset[fn],
-                    w_samps_per_frame[fn], w_skew[fn], sampfrom, sampto,
-                    smooth_frames)
+            datsignals = _rd_dat_signals(
+                file_name=fn,
+                dir_name=dir_name,
+                pn_dir=pn_dir,
+                fmt=w_fmt[fn],
+                n_sig=len(datchannel[fn]),
+                sig_len=sig_len,
+                byte_offset=w_byte_offset[fn],
+                samps_per_frame=w_samps_per_frame[fn],
+                skew=w_skew[fn],
+                init_value=w_init_value[fn],
+                sampfrom=sampfrom,
+                sampto=sampto,
+                smooth_frames=smooth_frames,
+                no_file=no_file,
+                sig_data=sig_data)
 
             # Copy over the wanted signals
             for cn in range(len(out_dat_channel[fn])):
@@ -1035,8 +1054,9 @@ def _rd_segment(file_name, dir_name, pn_dir, fmt, n_sig, sig_len, byte_offset,
 
 
 def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
-                   byte_offset, samps_per_frame, skew, sampfrom, sampto,
-                   smooth_frames, no_file=False, sig_data=None):
+                    byte_offset, samps_per_frame, skew, init_value,
+                    sampfrom, sampto, smooth_frames,
+                    no_file=False, sig_data=None):
     """
     Read all signals from a WFDB dat file.
 
@@ -1062,6 +1082,8 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
         The samples/frame for each signal of the dat file.
     skew : list
         The skew for the signals of the dat file.
+    init_value : list
+        The initial value for each signal of the dat file.
     sampfrom : int
         The starting sample number to be read from the signals.
     sampto : int
@@ -1160,6 +1182,32 @@ def _rd_dat_signals(file_name, dir_name, pn_dir, fmt, n_sig, sig_len,
         elif fmt == '160':
             sig_data = (sig_data.astype('int32') - 32768).astype('int16')
 
+    # For format 8, convert sample differences to absolute samples.  Note
+    # that if sampfrom is not 0, the results will be wrong, since we can't
+    # know the starting value without reading the entire record from the
+    # beginning - an inherent limitation of the format, and the use of
+    # format 8 is discouraged for this reason!  However, the following is
+    # consistent with the behavior of the WFDB library: the initial value
+    # specified by the header file is used as the starting sample value,
+    # regardless of where in the record we begin reading.  Therefore, the
+    # following should give the same results as rdsamp.
+    if fmt == '8':
+        dif_frames = sig_data.reshape(-1, tsamps_per_frame)
+        abs_frames = np.empty(dif_frames.shape, dtype='int32')
+        ch_start = 0
+        for ch in range(n_sig):
+            ch_end = ch_start + samps_per_frame[ch]
+            # Extract sample differences as a 2D array
+            ch_dif_signal = dif_frames[:, ch_start:ch_end]
+            # Convert to a 1D array of absolute samples
+            ch_abs_signal = ch_dif_signal.cumsum(dtype=abs_frames.dtype)
+            ch_abs_signal += init_value[ch]
+            # Transfer to the output array
+            ch_abs_signal = ch_abs_signal.reshape(ch_dif_signal.shape)
+            abs_frames[:, ch_start:ch_end] = ch_abs_signal
+            ch_start = ch_end
+        sig_data = abs_frames.reshape(-1)
+
     # At this point, dtype of sig_data is the minimum integer format
     # required for storing the final digital samples.
 
@@ -1472,14 +1520,6 @@ def _blocks_to_samples(sig_data, n_samp, fmt):
         sig[sig > 2047] -= 4096
 
     elif fmt == '310':
-        # Easier to process when dealing with whole blocks
-        if n_samp % 3:
-            n_samp = upround(n_samp,3)
-            added_samps = n_samp % 3
-            sig_data = np.append(sig_data, np.zeros(added_samps, dtype='uint8'))
-        else:
-            added_samps = 0
-
         sig_data = sig_data.astype('int16')
         sig = np.zeros(n_samp, dtype='int16')
 
@@ -1491,24 +1531,11 @@ def _blocks_to_samples(sig_data, n_samp, fmt):
         # Third signal is 5 msb of second byte and 5 msb of forth byte
         sig[2::3] = np.bitwise_and((sig_data[1::4] >> 3), 0x1f)[0:len(sig[2::3])] + 32 * np.bitwise_and(sig_data[3::4] >> 3, 0x1f)[0:len(sig[2::3])]
 
-        # Remove trailing samples read within the byte block if
-        # originally not 3n sampled
-        if added_samps:
-            sig = sig[:-added_samps]
-
         # Loaded values as un_signed. Convert to 2's complement form:
         # values > 2^9-1 are negative.
         sig[sig > 511] -= 1024
 
     elif fmt == '311':
-        # Easier to process when dealing with whole blocks
-        if n_samp % 3:
-            n_samp = upround(n_samp,3)
-            added_samps = n_samp % 3
-            sig_data = np.append(sig_data, np.zeros(added_samps, dtype='uint8'))
-        else:
-            added_samps = 0
-
         sig_data = sig_data.astype('int16')
         sig = np.zeros(n_samp, dtype='int16')
 
@@ -1520,11 +1547,6 @@ def _blocks_to_samples(sig_data, n_samp, fmt):
         # Third sample is 4 msb of third byte and 6 msb of forth byte
         sig[2::3] = (sig_data[2::4] >> 4)[0:len(sig[2::3])] + 16 * np.bitwise_and(sig_data[3::4], 0x7f)[0:len(sig[2::3])]
 
-        # Remove trailing samples read within the byte block if
-        # originally not 3n sampled
-        if added_samps:
-            sig = sig[:-added_samps]
-
         # Loaded values as un_signed. Convert to 2's complement form.
         # Values > 2^9-1 are negative.
         sig[sig > 511] -= 1024

diff --git a/wfdb/io/record.py b/wfdb/io/record.py
@@ -3507,39 +3507,38 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None,
 
     # A single segment record
     elif isinstance(record, Record):
+        if record_name.endswith('.edf') or record_name.endswith('.wav'):
+            no_file = True
+            sig_data = record.d_signal
+        else:
+            no_file = False
+            sig_data = None
+
+        signals = _signal._rd_segment(
+            file_name=record.file_name,
+            dir_name=dir_name,
+            pn_dir=pn_dir,
+            fmt=record.fmt,
+            n_sig=record.n_sig,
+            sig_len=record.sig_len,
+            byte_offset=record.byte_offset,
+            samps_per_frame=record.samps_per_frame,
+            skew=record.skew,
+            init_value=record.init_value,
+            sampfrom=sampfrom,
+            sampto=sampto,
+            channels=channels,
+            smooth_frames=smooth_frames,
+            ignore_skew=ignore_skew,
+            no_file=no_file,
+            sig_data=sig_data,
+            return_res=return_res)
 
         # Only 1 sample/frame, or frames are smoothed. Return uniform numpy array
         if smooth_frames or max([record.samps_per_frame[c] for c in channels]) == 1:
             # Read signals from the associated dat files that contain
             # wanted channels
-            if record_name.endswith('.edf') or record_name.endswith('.wav'):
-                record.d_signal = _signal._rd_segment(record.file_name,
-                                                      dir_name, pn_dir,
-                                                      record.fmt,
-                                                      record.n_sig,
-                                                      record.sig_len,
-                                                      record.byte_offset,
-                                                      record.samps_per_frame,
-                                                      record.skew, sampfrom,
-                                                      sampto, channels,
-                                                      smooth_frames,
-                                                      ignore_skew,
-                                                      no_file=True,
-                                                      sig_data=record.d_signal,
-                                                      return_res=return_res)
-            else:
-                record.d_signal = _signal._rd_segment(record.file_name,
-                                                      dir_name, pn_dir,
-                                                      record.fmt,
-                                                      record.n_sig,
-                                                      record.sig_len,
-                                                      record.byte_offset,
-                                                      record.samps_per_frame,
-                                                      record.skew, sampfrom,
-                                                      sampto, channels,
-                                                      smooth_frames,
-                                                      ignore_skew,
-                                                      return_res=return_res)
+            record.d_signal = signals
 
             # Arrange/edit the object fields to reflect user channel
             # and/or signal range input
@@ -3552,34 +3551,7 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None,
 
         # Return each sample of the signals with multiple samples per frame
         else:
-            if record_name.endswith('.edf') or record_name.endswith('.wav'):
-                record.e_d_signal = _signal._rd_segment(record.file_name,
-                                                      dir_name, pn_dir,
-                                                      record.fmt,
-                                                      record.n_sig,
-                                                      record.sig_len,
-                                                      record.byte_offset,
-                                                      record.samps_per_frame,
-                                                      record.skew, sampfrom,
-                                                      sampto, channels,
-                                                      smooth_frames,
-                                                      ignore_skew,
-                                                      no_file=True,
-                                                      sig_data=record.d_signal,
-                                                      return_res=return_res)
-            else:
-                record.e_d_signal = _signal._rd_segment(record.file_name,
-                                                        dir_name, pn_dir,
-                                                        record.fmt,
-                                                        record.n_sig,
-                                                        record.sig_len,
-                                                        record.byte_offset,
-                                                        record.samps_per_frame,
-                                                        record.skew, sampfrom,
-                                                        sampto, channels,
-                                                        smooth_frames,
-                                                        ignore_skew,
-                                                        return_res=return_res)
+            record.e_d_signal = signals
 
             # Arrange/edit the object fields to reflect user channel
             # and/or signal range input