Skip to content

Commit

Permalink
Merge pull request #50 from kamo-naoyuki/extended
Browse files Browse the repository at this point in the history
Extended ark format
  • Loading branch information
kamo-naoyuki authored Sep 10, 2020
2 parents 6f1a668 + 806c401 commit 0e0af26
Show file tree
Hide file tree
Showing 16 changed files with 1,234 additions and 778 deletions.
2 changes: 1 addition & 1 deletion kaldiio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from kaldiio.utils import parse_specifier

try:
__version__ = pkg_resources.get_distribution('kaldiio').version
__version__ = pkg_resources.get_distribution("kaldiio").version
except Exception:
__version__ = None
del pkg_resources
146 changes: 82 additions & 64 deletions kaldiio/compression_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@

class GlobalHeader(object):
"""This is a imitation class of the structure "GlobalHeader" """
def __init__(self, type, min_value, range, rows, cols, endian='<'):
if type in ('CM', 'CM2'):
c = 65535.
elif type == 'CM3':
c = 255.

def __init__(self, type, min_value, range, rows, cols, endian="<"):
if type in ("CM", "CM2"):
c = 65535.0
elif type == "CM3":
c = 255.0
else:
raise RuntimeError('Not supported type={}'.format(type))
raise RuntimeError("Not supported type={}".format(type))
self.type = type
self.c = c
self.min_value = min_value
Expand All @@ -36,76 +37,84 @@ def size(self):
return 17 + len(self.type)

@staticmethod
def read(fd, type='CM', endian='<'):
min_value = struct.unpack(endian + 'f', fd.read(4))[0]
range = struct.unpack(endian + 'f', fd.read(4))[0]
rows = struct.unpack(endian + 'i', fd.read(4))[0]
cols = struct.unpack(endian + 'i', fd.read(4))[0]
def read(fd, type="CM", endian="<"):
min_value = struct.unpack(endian + "f", fd.read(4))[0]
range = struct.unpack(endian + "f", fd.read(4))[0]
rows = struct.unpack(endian + "i", fd.read(4))[0]
cols = struct.unpack(endian + "i", fd.read(4))[0]
return GlobalHeader(type, min_value, range, rows, cols, endian)

def write(self, fd, endian=None):
if endian is None:
endian = self.endian
fd.write(self.type.encode() + b' ')
fd.write(struct.pack(endian + 'f', self.min_value))
fd.write(struct.pack(endian + 'f', self.range))
fd.write(struct.pack(endian + 'i', self.rows))
fd.write(struct.pack(endian + 'i', self.cols))
fd.write(self.type.encode() + b" ")
fd.write(struct.pack(endian + "f", self.min_value))
fd.write(struct.pack(endian + "f", self.range))
fd.write(struct.pack(endian + "i", self.rows))
fd.write(struct.pack(endian + "i", self.cols))
return self.size

@staticmethod
def compute(array, compression_method, endian='<'):
def compute(array, compression_method, endian="<"):
if compression_method == kAutomaticMethod:
if array.shape[0] > 8:
compression_method = kSpeechFeature
else:
compression_method = kTwoByteAuto

if compression_method == kSpeechFeature:
matrix_type = 'CM'
elif compression_method == kTwoByteAuto or \
compression_method == kTwoByteSignedInteger:
matrix_type = 'CM2'
elif compression_method == kOneByteAuto or \
compression_method == kOneByteUnsignedInteger or \
compression_method == kOneByteZeroOne:
matrix_type = 'CM3'
matrix_type = "CM"
elif (
compression_method == kTwoByteAuto
or compression_method == kTwoByteSignedInteger
):
matrix_type = "CM2"
elif (
compression_method == kOneByteAuto
or compression_method == kOneByteUnsignedInteger
or compression_method == kOneByteZeroOne
):
matrix_type = "CM3"
else:
raise ValueError(
'Unknown compression_method: {}'.format(compression_method))

if compression_method == kSpeechFeature or \
compression_method == kTwoByteAuto or \
compression_method == kOneByteAuto:
"Unknown compression_method: {}".format(compression_method)
)

if (
compression_method == kSpeechFeature
or compression_method == kTwoByteAuto
or compression_method == kOneByteAuto
):
min_value = array.min()
max_value = array.max()
if min_value == max_value:
max_value = min_value + (1. + abs(min_value))
max_value = min_value + (1.0 + abs(min_value))
range_ = max_value - min_value
elif compression_method == kTwoByteSignedInteger:
min_value = -32768.
range_ = 65535.
min_value = -32768.0
range_ = 65535.0
elif compression_method == kOneByteUnsignedInteger:
min_value = 0.
range_ = 255.
min_value = 0.0
range_ = 255.0
elif compression_method == kOneByteZeroOne:
min_value = 0.
range_ = 1.
min_value = 0.0
range_ = 1.0
else:
raise ValueError(
'Unknown compression_method: {}'.format(compression_method))
"Unknown compression_method: {}".format(compression_method)
)

return GlobalHeader(
matrix_type, min_value, range_, array.shape[0], array.shape[1],
endian)
matrix_type, min_value, range_, array.shape[0], array.shape[1], endian
)

def float_to_uint(self, array):
if self.c == 65535.:
dtype = np.dtype(self.endian + 'u2')
if self.c == 65535.0:
dtype = np.dtype(self.endian + "u2")
else:
dtype = np.dtype(self.endian + 'u1')
dtype = np.dtype(self.endian + "u1")
# + 0.499 is to round to closest int
array = ((array - self.min_value) / self.range * self.c + 0.499)
array = (array - self.min_value) / self.range * self.c + 0.499
return array.astype(np.dtype(dtype))

def uint_to_float(self, array):
Expand All @@ -115,7 +124,8 @@ def uint_to_float(self, array):

class PerColHeader(object):
"""This is a imitation class of the structure "PerColHeader" """
def __init__(self, p0, p25, p75, p100, endian='<'):

def __init__(self, p0, p25, p75, p100, endian="<"):
# p means percentile
self.p0 = p0
self.p25 = p25
Expand All @@ -133,22 +143,25 @@ def read(fd, global_header):
# Read PerColHeader
size_of_percolheader = 8
buf = fd.read(size_of_percolheader * global_header.cols)
header_array = np.frombuffer(buf, dtype=np.dtype(endian + 'u2'))
header_array = np.frombuffer(buf, dtype=np.dtype(endian + "u2"))
header_array = np.asarray(header_array, np.float32)
# Decompress header
header_array = global_header.uint_to_float(header_array)
header_array = header_array.reshape(-1, 4, 1)
return PerColHeader(header_array[:, 0], header_array[:, 1],
header_array[:, 2], header_array[:, 3],
endian)
return PerColHeader(
header_array[:, 0],
header_array[:, 1],
header_array[:, 2],
header_array[:, 3],
endian,
)

def write(self, fd, global_header, endian=None):
if endian is None:
endian = self.endian
header_array = np.concatenate(
[self.p0, self.p25, self.p75, self.p100], axis=1)
header_array = np.concatenate([self.p0, self.p25, self.p75, self.p100], axis=1)
header_array = global_header.float_to_uint(header_array)
header_array = header_array.astype(np.dtype(endian + 'u2'))
header_array = header_array.astype(np.dtype(endian + "u2"))
byte_str = header_array.tobytes()
fd.write(byte_str)
return len(byte_str)
Expand All @@ -158,8 +171,8 @@ def compute(array, global_header):
quarter_nr = array.shape[0] // 4
if array.shape[0] >= 5:
srows = np.partition(
array,
[0, quarter_nr, 3 * quarter_nr, array.shape[0] - 1], axis=0)
array, [0, quarter_nr, 3 * quarter_nr, array.shape[0] - 1], axis=0
)
p0 = srows[0]
p25 = srows[quarter_nr]
p75 = srows[3 * quarter_nr]
Expand Down Expand Up @@ -208,16 +221,16 @@ def float_to_char(self, array):
ma2 = ~ma1 * ~ma3

# +0.5 round to the closest int
tmp = (array - p0) / (p25 - p0) * 64. + 0.5
tmp = np.where(tmp < 0., 0., np.where(tmp > 64., 64., tmp))
tmp = (array - p0) / (p25 - p0) * 64.0 + 0.5
tmp = np.where(tmp < 0.0, 0.0, np.where(tmp > 64.0, 64.0, tmp))

tmp2 = ((array - p25) / (p75 - p25) * 128. + 64.5)
tmp2 = np.where(tmp2 < 64., 64., np.where(tmp2 > 192., 192., tmp2))
tmp2 = (array - p25) / (p75 - p25) * 128.0 + 64.5
tmp2 = np.where(tmp2 < 64.0, 64.0, np.where(tmp2 > 192.0, 192.0, tmp2))

tmp3 = ((array - p75) / (p100 - p75) * 63. + 192.5)
tmp3 = np.where(tmp3 < 192., 192., np.where(tmp3 > 255., 255., tmp3))
tmp3 = (array - p75) / (p100 - p75) * 63.0 + 192.5
tmp3 = np.where(tmp3 < 192.0, 192.0, np.where(tmp3 > 255.0, 255.0, tmp3))
array = np.where(ma1, tmp, np.where(ma2, tmp2, tmp3))
return array.astype(np.dtype(self.endian + 'u1'))
return array.astype(np.dtype(self.endian + "u1"))

def char_to_float(self, array):
array = array.astype(np.float32)
Expand All @@ -228,6 +241,11 @@ def char_to_float(self, array):
ma2 = ~ma1 * ~ma3 # 192 >= array > 64

return np.where(
ma1, p0 + (p25 - p0) * array * (1 / 64.),
np.where(ma2, p25 + (p75 - p25) * (array - 64.) * (1 / 128.),
p75 + (p100 - p75) * (array - 192.) * (1 / 63.)))
ma1,
p0 + (p25 - p0) * array * (1 / 64.0),
np.where(
ma2,
p25 + (p75 - p25) * (array - 64.0) * (1 / 128.0),
p75 + (p100 - p75) * (array - 192.0) * (1 / 63.0),
),
)
68 changes: 38 additions & 30 deletions kaldiio/highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,37 +16,46 @@ class WriteHelper(object):
>>> helper('uttid', array)
"""
def __init__(self, wspecifier, compression_method=None):

def __init__(self, wspecifier, compression_method=None, write_function=None):
self.initialized = False
self.closed = False

self.compression_method = compression_method
self.write_function = write_function
spec_dict = parse_specifier(wspecifier)
if spec_dict['scp'] is not None and spec_dict['ark'] is None:
if spec_dict["scp"] is not None and spec_dict["ark"] is None:
raise ValueError(
'Writing only in a scp file is not supported. '
'Please specify a ark file with a scp file.')
"Writing only in a scp file is not supported. "
"Please specify a ark file with a scp file."
)
for k in spec_dict:
if spec_dict[k] and k not in ('scp', 'ark', 't', 'f'):
if spec_dict[k] and k not in ("scp", "ark", "t", "f"):
warnings.warn(
'{} option is given, but currently it never affects'
.format(k))

self.text = spec_dict['t']
self.flush = spec_dict['f']
ark_file = spec_dict['ark']
self.fark = open_like_kaldi(ark_file, 'wb')
if spec_dict['scp'] is not None:
self.fscp = open_like_kaldi(spec_dict['scp'], 'w')
"{} option is given, but currently it never affects".format(k)
)

self.text = spec_dict["t"]
self.flush = spec_dict["f"]
ark_file = spec_dict["ark"]
self.fark = open_like_kaldi(ark_file, "wb")
if spec_dict["scp"] is not None:
self.fscp = open_like_kaldi(spec_dict["scp"], "w")
else:
self.fscp = None
self.initialized = True

def __call__(self, key, array):
if self.closed:
raise RuntimeError('WriteHelper has been already closed')
save_ark(self.fark, {key: array}, scp=self.fscp, text=self.text,
compression_method=self.compression_method)
raise RuntimeError("WriteHelper has been already closed")
save_ark(
self.fark,
{key: array},
scp=self.fscp,
text=self.text,
compression_method=self.compression_method,
write_function=self.write_function,
)

if self.flush:
if self.fark is not None:
Expand Down Expand Up @@ -87,6 +96,7 @@ class ReadHelper(object):
... numpy.testing.assert_array_equal(array_in, array_out)
"""

def __init__(self, wspecifier, segments=None):
self.initialized = False
self.scp = None
Expand All @@ -95,30 +105,28 @@ def __init__(self, wspecifier, segments=None):
self.segments = segments

spec_dict = parse_specifier(wspecifier)
if spec_dict['scp'] is not None and spec_dict['ark'] is not None:
raise RuntimeError('Specify one of scp or ark in rspecifier')
if spec_dict["scp"] is not None and spec_dict["ark"] is not None:
raise RuntimeError("Specify one of scp or ark in rspecifier")
for k in spec_dict:
if spec_dict[k] and k not in ('scp', 'ark', 'p'):
if spec_dict[k] and k not in ("scp", "ark", "p"):
warnings.warn(
'{} option is given, but currently it never affects'
.format(k))
self.permissive = spec_dict['p']
"{} option is given, but currently it never affects".format(k)
)
self.permissive = spec_dict["p"]

if spec_dict['scp'] is not None:
self.scp = spec_dict['scp']
if spec_dict["scp"] is not None:
self.scp = spec_dict["scp"]
else:
self.scp = False

if self.scp:
self.generator = load_scp_sequential(
spec_dict['scp'], segments=segments)
self.generator = load_scp_sequential(spec_dict["scp"], segments=segments)

self.file = None
else:
if segments is not None:
raise ValueError(
'Not supporting "segments" argument with ark file')
self.file = open_like_kaldi(spec_dict['ark'], 'rb')
raise ValueError('Not supporting "segments" argument with ark file')
self.file = open_like_kaldi(spec_dict["ark"], "rb")
self.initialized = True

def __iter__(self):
Expand Down
Loading

0 comments on commit 0e0af26

Please sign in to comment.