Merge pull request #50 from kamo-naoyuki/extended

Extended ark format
nttcslab-sp · Sep 10, 2020 · 0e0af26 · 0e0af26
2 parents 6f1a668 + 806c401
commit 0e0af26
Show file tree

Hide file tree

Showing 16 changed files with 1,234 additions and 778 deletions.
diff --git a/kaldiio/__init__.py b/kaldiio/__init__.py
@@ -14,7 +14,7 @@
 from kaldiio.utils import parse_specifier
 
 try:
-    __version__ = pkg_resources.get_distribution('kaldiio').version
+    __version__ = pkg_resources.get_distribution("kaldiio").version
 except Exception:
     __version__ = None
 del pkg_resources
diff --git a/kaldiio/compression_header.py b/kaldiio/compression_header.py
@@ -16,13 +16,14 @@
 
 class GlobalHeader(object):
     """This is a imitation class of the structure "GlobalHeader" """
-    def __init__(self, type, min_value, range, rows, cols, endian='<'):
-        if type in ('CM', 'CM2'):
-            c = 65535.
-        elif type == 'CM3':
-            c = 255.
+
+    def __init__(self, type, min_value, range, rows, cols, endian="<"):
+        if type in ("CM", "CM2"):
+            c = 65535.0
+        elif type == "CM3":
+            c = 255.0
         else:
-            raise RuntimeError('Not supported type={}'.format(type))
+            raise RuntimeError("Not supported type={}".format(type))
         self.type = type
         self.c = c
         self.min_value = min_value
@@ -36,76 +37,84 @@ def size(self):
         return 17 + len(self.type)
 
     @staticmethod
-    def read(fd, type='CM', endian='<'):
-        min_value = struct.unpack(endian + 'f', fd.read(4))[0]
-        range = struct.unpack(endian + 'f', fd.read(4))[0]
-        rows = struct.unpack(endian + 'i', fd.read(4))[0]
-        cols = struct.unpack(endian + 'i', fd.read(4))[0]
+    def read(fd, type="CM", endian="<"):
+        min_value = struct.unpack(endian + "f", fd.read(4))[0]
+        range = struct.unpack(endian + "f", fd.read(4))[0]
+        rows = struct.unpack(endian + "i", fd.read(4))[0]
+        cols = struct.unpack(endian + "i", fd.read(4))[0]
         return GlobalHeader(type, min_value, range, rows, cols, endian)
 
     def write(self, fd, endian=None):
         if endian is None:
             endian = self.endian
-        fd.write(self.type.encode() + b' ')
-        fd.write(struct.pack(endian + 'f', self.min_value))
-        fd.write(struct.pack(endian + 'f', self.range))
-        fd.write(struct.pack(endian + 'i', self.rows))
-        fd.write(struct.pack(endian + 'i', self.cols))
+        fd.write(self.type.encode() + b" ")
+        fd.write(struct.pack(endian + "f", self.min_value))
+        fd.write(struct.pack(endian + "f", self.range))
+        fd.write(struct.pack(endian + "i", self.rows))
+        fd.write(struct.pack(endian + "i", self.cols))
         return self.size
 
     @staticmethod
-    def compute(array, compression_method, endian='<'):
+    def compute(array, compression_method, endian="<"):
         if compression_method == kAutomaticMethod:
             if array.shape[0] > 8:
                 compression_method = kSpeechFeature
             else:
                 compression_method = kTwoByteAuto
 
         if compression_method == kSpeechFeature:
-            matrix_type = 'CM'
-        elif compression_method == kTwoByteAuto or \
-                compression_method == kTwoByteSignedInteger:
-            matrix_type = 'CM2'
-        elif compression_method == kOneByteAuto or \
-                compression_method == kOneByteUnsignedInteger or \
-                compression_method == kOneByteZeroOne:
-            matrix_type = 'CM3'
+            matrix_type = "CM"
+        elif (
+            compression_method == kTwoByteAuto
+            or compression_method == kTwoByteSignedInteger
+        ):
+            matrix_type = "CM2"
+        elif (
+            compression_method == kOneByteAuto
+            or compression_method == kOneByteUnsignedInteger
+            or compression_method == kOneByteZeroOne
+        ):
+            matrix_type = "CM3"
         else:
             raise ValueError(
-                'Unknown compression_method: {}'.format(compression_method))
-
-        if compression_method == kSpeechFeature or \
-                compression_method == kTwoByteAuto or \
-                compression_method == kOneByteAuto:
+                "Unknown compression_method: {}".format(compression_method)
+            )
+
+        if (
+            compression_method == kSpeechFeature
+            or compression_method == kTwoByteAuto
+            or compression_method == kOneByteAuto
+        ):
             min_value = array.min()
             max_value = array.max()
             if min_value == max_value:
-                max_value = min_value + (1. + abs(min_value))
+                max_value = min_value + (1.0 + abs(min_value))
             range_ = max_value - min_value
         elif compression_method == kTwoByteSignedInteger:
-            min_value = -32768.
-            range_ = 65535.
+            min_value = -32768.0
+            range_ = 65535.0
         elif compression_method == kOneByteUnsignedInteger:
-            min_value = 0.
-            range_ = 255.
+            min_value = 0.0
+            range_ = 255.0
         elif compression_method == kOneByteZeroOne:
-            min_value = 0.
-            range_ = 1.
+            min_value = 0.0
+            range_ = 1.0
         else:
             raise ValueError(
-                'Unknown compression_method: {}'.format(compression_method))
+                "Unknown compression_method: {}".format(compression_method)
+            )
 
         return GlobalHeader(
-            matrix_type, min_value, range_, array.shape[0], array.shape[1],
-            endian)
+            matrix_type, min_value, range_, array.shape[0], array.shape[1], endian
+        )
 
     def float_to_uint(self, array):
-        if self.c == 65535.:
-            dtype = np.dtype(self.endian + 'u2')
+        if self.c == 65535.0:
+            dtype = np.dtype(self.endian + "u2")
         else:
-            dtype = np.dtype(self.endian + 'u1')
+            dtype = np.dtype(self.endian + "u1")
         # + 0.499 is to round to closest int
-        array = ((array - self.min_value) / self.range * self.c + 0.499)
+        array = (array - self.min_value) / self.range * self.c + 0.499
         return array.astype(np.dtype(dtype))
 
     def uint_to_float(self, array):
@@ -115,7 +124,8 @@ def uint_to_float(self, array):
 
 class PerColHeader(object):
     """This is a imitation class of the structure "PerColHeader" """
-    def __init__(self, p0, p25, p75, p100, endian='<'):
+
+    def __init__(self, p0, p25, p75, p100, endian="<"):
         # p means percentile
         self.p0 = p0
         self.p25 = p25
@@ -133,22 +143,25 @@ def read(fd, global_header):
         # Read PerColHeader
         size_of_percolheader = 8
         buf = fd.read(size_of_percolheader * global_header.cols)
-        header_array = np.frombuffer(buf, dtype=np.dtype(endian + 'u2'))
+        header_array = np.frombuffer(buf, dtype=np.dtype(endian + "u2"))
         header_array = np.asarray(header_array, np.float32)
         # Decompress header
         header_array = global_header.uint_to_float(header_array)
         header_array = header_array.reshape(-1, 4, 1)
-        return PerColHeader(header_array[:, 0], header_array[:, 1],
-                            header_array[:, 2], header_array[:, 3],
-                            endian)
+        return PerColHeader(
+            header_array[:, 0],
+            header_array[:, 1],
+            header_array[:, 2],
+            header_array[:, 3],
+            endian,
+        )
 
     def write(self, fd, global_header, endian=None):
         if endian is None:
             endian = self.endian
-        header_array = np.concatenate(
-            [self.p0, self.p25, self.p75, self.p100], axis=1)
+        header_array = np.concatenate([self.p0, self.p25, self.p75, self.p100], axis=1)
         header_array = global_header.float_to_uint(header_array)
-        header_array = header_array.astype(np.dtype(endian + 'u2'))
+        header_array = header_array.astype(np.dtype(endian + "u2"))
         byte_str = header_array.tobytes()
         fd.write(byte_str)
         return len(byte_str)
@@ -158,8 +171,8 @@ def compute(array, global_header):
         quarter_nr = array.shape[0] // 4
         if array.shape[0] >= 5:
             srows = np.partition(
-                array,
-                [0, quarter_nr, 3 * quarter_nr, array.shape[0] - 1], axis=0)
+                array, [0, quarter_nr, 3 * quarter_nr, array.shape[0] - 1], axis=0
+            )
             p0 = srows[0]
             p25 = srows[quarter_nr]
             p75 = srows[3 * quarter_nr]
@@ -208,16 +221,16 @@ def float_to_char(self, array):
         ma2 = ~ma1 * ~ma3
 
         # +0.5 round to the closest int
-        tmp = (array - p0) / (p25 - p0) * 64. + 0.5
-        tmp = np.where(tmp < 0., 0., np.where(tmp > 64., 64., tmp))
+        tmp = (array - p0) / (p25 - p0) * 64.0 + 0.5
+        tmp = np.where(tmp < 0.0, 0.0, np.where(tmp > 64.0, 64.0, tmp))
 
-        tmp2 = ((array - p25) / (p75 - p25) * 128. + 64.5)
-        tmp2 = np.where(tmp2 < 64., 64., np.where(tmp2 > 192., 192., tmp2))
+        tmp2 = (array - p25) / (p75 - p25) * 128.0 + 64.5
+        tmp2 = np.where(tmp2 < 64.0, 64.0, np.where(tmp2 > 192.0, 192.0, tmp2))
 
-        tmp3 = ((array - p75) / (p100 - p75) * 63. + 192.5)
-        tmp3 = np.where(tmp3 < 192., 192., np.where(tmp3 > 255., 255., tmp3))
+        tmp3 = (array - p75) / (p100 - p75) * 63.0 + 192.5
+        tmp3 = np.where(tmp3 < 192.0, 192.0, np.where(tmp3 > 255.0, 255.0, tmp3))
         array = np.where(ma1, tmp, np.where(ma2, tmp2, tmp3))
-        return array.astype(np.dtype(self.endian + 'u1'))
+        return array.astype(np.dtype(self.endian + "u1"))
 
     def char_to_float(self, array):
         array = array.astype(np.float32)
@@ -228,6 +241,11 @@ def char_to_float(self, array):
         ma2 = ~ma1 * ~ma3  # 192 >= array > 64
 
         return np.where(
-            ma1, p0 + (p25 - p0) * array * (1 / 64.),
-            np.where(ma2, p25 + (p75 - p25) * (array - 64.) * (1 / 128.),
-                     p75 + (p100 - p75) * (array - 192.) * (1 / 63.)))
+            ma1,
+            p0 + (p25 - p0) * array * (1 / 64.0),
+            np.where(
+                ma2,
+                p25 + (p75 - p25) * (array - 64.0) * (1 / 128.0),
+                p75 + (p100 - p75) * (array - 192.0) * (1 / 63.0),
+            ),
+        )
diff --git a/kaldiio/highlevel.py b/kaldiio/highlevel.py
@@ -16,37 +16,46 @@ class WriteHelper(object):
     >>> helper('uttid', array)
 
     """
-    def __init__(self, wspecifier, compression_method=None):
+
+    def __init__(self, wspecifier, compression_method=None, write_function=None):
         self.initialized = False
         self.closed = False
 
         self.compression_method = compression_method
+        self.write_function = write_function
         spec_dict = parse_specifier(wspecifier)
-        if spec_dict['scp'] is not None and spec_dict['ark'] is None:
+        if spec_dict["scp"] is not None and spec_dict["ark"] is None:
             raise ValueError(
-                'Writing only in a scp file is not supported. '
-                'Please specify a ark file with a scp file.')
+                "Writing only in a scp file is not supported. "
+                "Please specify a ark file with a scp file."
+            )
         for k in spec_dict:
-            if spec_dict[k] and k not in ('scp', 'ark', 't', 'f'):
+            if spec_dict[k] and k not in ("scp", "ark", "t", "f"):
                 warnings.warn(
-                    '{} option is given, but currently it never affects'
-                    .format(k))
-
-        self.text = spec_dict['t']
-        self.flush = spec_dict['f']
-        ark_file = spec_dict['ark']
-        self.fark = open_like_kaldi(ark_file, 'wb')
-        if spec_dict['scp'] is not None:
-            self.fscp = open_like_kaldi(spec_dict['scp'], 'w')
+                    "{} option is given, but currently it never affects".format(k)
+                )
+
+        self.text = spec_dict["t"]
+        self.flush = spec_dict["f"]
+        ark_file = spec_dict["ark"]
+        self.fark = open_like_kaldi(ark_file, "wb")
+        if spec_dict["scp"] is not None:
+            self.fscp = open_like_kaldi(spec_dict["scp"], "w")
         else:
             self.fscp = None
         self.initialized = True
 
     def __call__(self, key, array):
         if self.closed:
-            raise RuntimeError('WriteHelper has been already closed')
-        save_ark(self.fark, {key: array}, scp=self.fscp, text=self.text,
-                 compression_method=self.compression_method)
+            raise RuntimeError("WriteHelper has been already closed")
+        save_ark(
+            self.fark,
+            {key: array},
+            scp=self.fscp,
+            text=self.text,
+            compression_method=self.compression_method,
+            write_function=self.write_function,
+        )
 
         if self.flush:
             if self.fark is not None:
@@ -87,6 +96,7 @@ class ReadHelper(object):
     ...     numpy.testing.assert_array_equal(array_in, array_out)
 
     """
+
     def __init__(self, wspecifier, segments=None):
         self.initialized = False
         self.scp = None
@@ -95,30 +105,28 @@ def __init__(self, wspecifier, segments=None):
         self.segments = segments
 
         spec_dict = parse_specifier(wspecifier)
-        if spec_dict['scp'] is not None and spec_dict['ark'] is not None:
-            raise RuntimeError('Specify one of scp or ark in rspecifier')
+        if spec_dict["scp"] is not None and spec_dict["ark"] is not None:
+            raise RuntimeError("Specify one of scp or ark in rspecifier")
         for k in spec_dict:
-            if spec_dict[k] and k not in ('scp', 'ark', 'p'):
+            if spec_dict[k] and k not in ("scp", "ark", "p"):
                 warnings.warn(
-                    '{} option is given, but currently it never affects'
-                    .format(k))
-        self.permissive = spec_dict['p']
+                    "{} option is given, but currently it never affects".format(k)
+                )
+        self.permissive = spec_dict["p"]
 
-        if spec_dict['scp'] is not None:
-            self.scp = spec_dict['scp']
+        if spec_dict["scp"] is not None:
+            self.scp = spec_dict["scp"]
         else:
             self.scp = False
 
         if self.scp:
-            self.generator = load_scp_sequential(
-                spec_dict['scp'], segments=segments)
+            self.generator = load_scp_sequential(spec_dict["scp"], segments=segments)
 
             self.file = None
         else:
             if segments is not None:
-                raise ValueError(
-                    'Not supporting "segments" argument with ark file')
-            self.file = open_like_kaldi(spec_dict['ark'], 'rb')
+                raise ValueError('Not supporting "segments" argument with ark file')
+            self.file = open_like_kaldi(spec_dict["ark"], "rb")
         self.initialized = True
 
     def __iter__(self):