-
-
Notifications
You must be signed in to change notification settings - Fork 18k
/
pytables.py
5289 lines (4440 loc) · 168 KB
/
pytables.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
High level interface to PyTables for reading and writing pandas data structures
to disk
"""
from __future__ import annotations
from contextlib import suppress
import copy
from datetime import (
date,
tzinfo,
)
import itertools
import os
import re
from textwrap import dedent
from types import TracebackType
from typing import (
TYPE_CHECKING,
Any,
Callable,
Final,
Hashable,
Iterator,
Literal,
Sequence,
cast,
overload,
)
import warnings
import numpy as np
from pandas._config import (
config,
get_option,
)
from pandas._libs import (
lib,
writers as libwriters,
)
from pandas._libs.tslibs import timezones
from pandas._typing import (
AnyArrayLike,
ArrayLike,
AxisInt,
DtypeArg,
FilePath,
Shape,
npt,
)
from pandas.compat._optional import import_optional_dependency
from pandas.compat.pickle_compat import patch_pickle
from pandas.errors import (
AttributeConflictWarning,
ClosedFileError,
IncompatibilityWarning,
PerformanceWarning,
PossibleDataLossError,
)
from pandas.util._decorators import cache_readonly
from pandas.util._exceptions import find_stack_level
from pandas.core.dtypes.common import (
ensure_object,
is_bool_dtype,
is_categorical_dtype,
is_complex_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_extension_array_dtype,
is_integer_dtype,
is_list_like,
is_object_dtype,
is_string_dtype,
is_timedelta64_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.missing import array_equivalent
from pandas import (
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
PeriodIndex,
RangeIndex,
Series,
TimedeltaIndex,
concat,
isna,
)
from pandas.core.arrays import (
Categorical,
DatetimeArray,
PeriodArray,
)
import pandas.core.common as com
from pandas.core.computation.pytables import (
PyTablesExpr,
maybe_expression,
)
from pandas.core.construction import extract_array
from pandas.core.indexes.api import ensure_index
from pandas.core.internals import (
ArrayManager,
BlockManager,
)
from pandas.io.common import stringify_path
from pandas.io.formats.printing import (
adjoin,
pprint_thing,
)
if TYPE_CHECKING:
from tables import (
Col,
File,
Node,
)
from pandas.core.internals import Block
# versioning attribute
_version = "0.15.2"
# encoding
_default_encoding = "UTF-8"
def _ensure_decoded(s):
"""if we have bytes, decode them to unicode"""
if isinstance(s, np.bytes_):
s = s.decode("UTF-8")
return s
def _ensure_encoding(encoding: str | None) -> str:
# set the encoding if we need
if encoding is None:
encoding = _default_encoding
return encoding
def _ensure_str(name):
"""
Ensure that an index / column name is a str (python 3); otherwise they
may be np.string dtype. Non-string dtypes are passed through unchanged.
https://github.com/pandas-dev/pandas/issues/13492
"""
if isinstance(name, str):
name = str(name)
return name
Term = PyTablesExpr
def _ensure_term(where, scope_level: int):
"""
Ensure that the where is a Term or a list of Term.
This makes sure that we are capturing the scope of variables that are
passed create the terms here with a frame_level=2 (we are 2 levels down)
"""
# only consider list/tuple here as an ndarray is automatically a coordinate
# list
level = scope_level + 1
if isinstance(where, (list, tuple)):
where = [
Term(term, scope_level=level + 1) if maybe_expression(term) else term
for term in where
if term is not None
]
elif maybe_expression(where):
where = Term(where, scope_level=level)
return where if where is None or len(where) else None
incompatibility_doc: Final = """
where criteria is being ignored as this version [%s] is too old (or
not-defined), read the file in and write it out to a new file to upgrade (with
the copy_to method)
"""
attribute_conflict_doc: Final = """
the [%s] attribute of the existing index is [%s] which conflicts with the new
[%s], resetting the attribute to None
"""
performance_doc: Final = """
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->%s,key->%s] [items->%s]
"""
# formats
_FORMAT_MAP = {"f": "fixed", "fixed": "fixed", "t": "table", "table": "table"}
# axes map
_AXES_MAP = {DataFrame: [0]}
# register our configuration options
dropna_doc: Final = """
: boolean
drop ALL nan rows when appending to a table
"""
format_doc: Final = """
: format
default format writing format, if None, then
put will default to 'fixed' and append will default to 'table'
"""
with config.config_prefix("io.hdf"):
config.register_option("dropna_table", False, dropna_doc, validator=config.is_bool)
config.register_option(
"default_format",
None,
format_doc,
validator=config.is_one_of_factory(["fixed", "table", None]),
)
# oh the troubles to reduce import time
_table_mod = None
_table_file_open_policy_is_strict = False
def _tables():
global _table_mod
global _table_file_open_policy_is_strict
if _table_mod is None:
import tables
_table_mod = tables
# set the file open policy
# return the file open policy; this changes as of pytables 3.1
# depending on the HDF5 version
with suppress(AttributeError):
_table_file_open_policy_is_strict = (
tables.file._FILE_OPEN_POLICY == "strict"
)
return _table_mod
# interface to/from ###
def to_hdf(
path_or_buf: FilePath | HDFStore,
key: str,
value: DataFrame | Series,
mode: str = "a",
complevel: int | None = None,
complib: str | None = None,
append: bool = False,
format: str | None = None,
index: bool = True,
min_itemsize: int | dict[str, int] | None = None,
nan_rep=None,
dropna: bool | None = None,
data_columns: Literal[True] | list[str] | None = None,
errors: str = "strict",
encoding: str = "UTF-8",
) -> None:
"""store this object, close it if we opened it"""
if append:
f = lambda store: store.append(
key,
value,
format=format,
index=index,
min_itemsize=min_itemsize,
nan_rep=nan_rep,
dropna=dropna,
data_columns=data_columns,
errors=errors,
encoding=encoding,
)
else:
# NB: dropna is not passed to `put`
f = lambda store: store.put(
key,
value,
format=format,
index=index,
min_itemsize=min_itemsize,
nan_rep=nan_rep,
data_columns=data_columns,
errors=errors,
encoding=encoding,
dropna=dropna,
)
path_or_buf = stringify_path(path_or_buf)
if isinstance(path_or_buf, str):
with HDFStore(
path_or_buf, mode=mode, complevel=complevel, complib=complib
) as store:
f(store)
else:
f(path_or_buf)
def read_hdf(
path_or_buf: FilePath | HDFStore,
key=None,
mode: str = "r",
errors: str = "strict",
where: str | list | None = None,
start: int | None = None,
stop: int | None = None,
columns: list[str] | None = None,
iterator: bool = False,
chunksize: int | None = None,
**kwargs,
):
"""
Read from the store, close it if we opened it.
Retrieve pandas object stored in file, optionally based on where
criteria.
.. warning::
Pandas uses PyTables for reading and writing HDF5 files, which allows
serializing object-dtype data with pickle when using the "fixed" format.
Loading pickled data received from untrusted sources can be unsafe.
See: https://docs.python.org/3/library/pickle.html for more.
Parameters
----------
path_or_buf : str, path object, pandas.HDFStore
Any valid string path is acceptable. Only supports the local file system,
remote URLs and file-like objects are not supported.
If you want to pass in a path object, pandas accepts any
``os.PathLike``.
Alternatively, pandas accepts an open :class:`pandas.HDFStore` object.
key : object, optional
The group identifier in the store. Can be omitted if the HDF file
contains a single pandas object.
mode : {'r', 'r+', 'a'}, default 'r'
Mode to use when opening the file. Ignored if path_or_buf is a
:class:`pandas.HDFStore`. Default is 'r'.
errors : str, default 'strict'
Specifies how encoding and decoding errors are to be handled.
See the errors argument for :func:`open` for a full list
of options.
where : list, optional
A list of Term (or convertible) objects.
start : int, optional
Row number to start selection.
stop : int, optional
Row number to stop selection.
columns : list, optional
A list of columns names to return.
iterator : bool, optional
Return an iterator object.
chunksize : int, optional
Number of rows to include in an iteration when using an iterator.
**kwargs
Additional keyword arguments passed to HDFStore.
Returns
-------
object
The selected object. Return type depends on the object stored.
See Also
--------
DataFrame.to_hdf : Write a HDF file from a DataFrame.
HDFStore : Low-level access to HDF files.
Examples
--------
>>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z']) # doctest: +SKIP
>>> df.to_hdf('./store.h5', 'data') # doctest: +SKIP
>>> reread = pd.read_hdf('./store.h5') # doctest: +SKIP
"""
if mode not in ["r", "r+", "a"]:
raise ValueError(
f"mode {mode} is not allowed while performing a read. "
f"Allowed modes are r, r+ and a."
)
# grab the scope
if where is not None:
where = _ensure_term(where, scope_level=1)
if isinstance(path_or_buf, HDFStore):
if not path_or_buf.is_open:
raise OSError("The HDFStore must be open for reading.")
store = path_or_buf
auto_close = False
else:
path_or_buf = stringify_path(path_or_buf)
if not isinstance(path_or_buf, str):
raise NotImplementedError(
"Support for generic buffers has not been implemented."
)
try:
exists = os.path.exists(path_or_buf)
# if filepath is too long
except (TypeError, ValueError):
exists = False
if not exists:
raise FileNotFoundError(f"File {path_or_buf} does not exist")
store = HDFStore(path_or_buf, mode=mode, errors=errors, **kwargs)
# can't auto open/close if we are using an iterator
# so delegate to the iterator
auto_close = True
try:
if key is None:
groups = store.groups()
if len(groups) == 0:
raise ValueError(
"Dataset(s) incompatible with Pandas data types, "
"not table, or no datasets found in HDF5 file."
)
candidate_only_group = groups[0]
# For the HDF file to have only one dataset, all other groups
# should then be metadata groups for that candidate group. (This
# assumes that the groups() method enumerates parent groups
# before their children.)
for group_to_check in groups[1:]:
if not _is_metadata_of(group_to_check, candidate_only_group):
raise ValueError(
"key must be provided when HDF5 "
"file contains multiple datasets."
)
key = candidate_only_group._v_pathname
return store.select(
key,
where=where,
start=start,
stop=stop,
columns=columns,
iterator=iterator,
chunksize=chunksize,
auto_close=auto_close,
)
except (ValueError, TypeError, KeyError):
if not isinstance(path_or_buf, HDFStore):
# if there is an error, close the store if we opened it.
with suppress(AttributeError):
store.close()
raise
def _is_metadata_of(group: Node, parent_group: Node) -> bool:
"""Check if a given group is a metadata group for a given parent_group."""
if group._v_depth <= parent_group._v_depth:
return False
current = group
while current._v_depth > 1:
parent = current._v_parent
if parent == parent_group and current._v_name == "meta":
return True
current = current._v_parent
return False
class HDFStore:
"""
Dict-like IO interface for storing pandas objects in PyTables.
Either Fixed or Table format.
.. warning::
Pandas uses PyTables for reading and writing HDF5 files, which allows
serializing object-dtype data with pickle when using the "fixed" format.
Loading pickled data received from untrusted sources can be unsafe.
See: https://docs.python.org/3/library/pickle.html for more.
Parameters
----------
path : str
File path to HDF5 file.
mode : {'a', 'w', 'r', 'r+'}, default 'a'
``'r'``
Read-only; no data can be modified.
``'w'``
Write; a new file is created (an existing file with the same
name would be deleted).
``'a'``
Append; an existing file is opened for reading and writing,
and if the file does not exist it is created.
``'r+'``
It is similar to ``'a'``, but the file must already exist.
complevel : int, 0-9, default None
Specifies a compression level for data.
A value of 0 or None disables compression.
complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'
Specifies the compression library to be used.
As of v0.20.2 these additional compressors for Blosc are supported
(default if no compressor specified: 'blosc:blosclz'):
{'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',
'blosc:zlib', 'blosc:zstd'}.
Specifying a compression library which is not available issues
a ValueError.
fletcher32 : bool, default False
If applying compression use the fletcher32 checksum.
**kwargs
These parameters will be passed to the PyTables open_file method.
Examples
--------
>>> bar = pd.DataFrame(np.random.randn(10, 4))
>>> store = pd.HDFStore('test.h5')
>>> store['foo'] = bar # write to HDF5
>>> bar = store['foo'] # retrieve
>>> store.close()
**Create or load HDF5 file in-memory**
When passing the `driver` option to the PyTables open_file method through
**kwargs, the HDF5 file is loaded or created in-memory and will only be
written when closed:
>>> bar = pd.DataFrame(np.random.randn(10, 4))
>>> store = pd.HDFStore('test.h5', driver='H5FD_CORE')
>>> store['foo'] = bar
>>> store.close() # only now, data is written to disk
"""
_handle: File | None
_mode: str
def __init__(
self,
path,
mode: str = "a",
complevel: int | None = None,
complib=None,
fletcher32: bool = False,
**kwargs,
) -> None:
if "format" in kwargs:
raise ValueError("format is not a defined argument for HDFStore")
tables = import_optional_dependency("tables")
if complib is not None and complib not in tables.filters.all_complibs:
raise ValueError(
f"complib only supports {tables.filters.all_complibs} compression."
)
if complib is None and complevel is not None:
complib = tables.filters.default_complib
self._path = stringify_path(path)
if mode is None:
mode = "a"
self._mode = mode
self._handle = None
self._complevel = complevel if complevel else 0
self._complib = complib
self._fletcher32 = fletcher32
self._filters = None
self.open(mode=mode, **kwargs)
def __fspath__(self) -> str:
return self._path
@property
def root(self):
"""return the root node"""
self._check_if_open()
assert self._handle is not None # for mypy
return self._handle.root
@property
def filename(self) -> str:
return self._path
def __getitem__(self, key: str):
return self.get(key)
def __setitem__(self, key: str, value) -> None:
self.put(key, value)
def __delitem__(self, key: str) -> None:
return self.remove(key)
def __getattr__(self, name: str):
"""allow attribute access to get stores"""
try:
return self.get(name)
except (KeyError, ClosedFileError):
pass
raise AttributeError(
f"'{type(self).__name__}' object has no attribute '{name}'"
)
def __contains__(self, key: str) -> bool:
"""
check for existence of this key
can match the exact pathname or the pathnm w/o the leading '/'
"""
node = self.get_node(key)
if node is not None:
name = node._v_pathname
if key in (name, name[1:]):
return True
return False
def __len__(self) -> int:
return len(self.groups())
def __repr__(self) -> str:
pstr = pprint_thing(self._path)
return f"{type(self)}\nFile path: {pstr}\n"
def __enter__(self) -> HDFStore:
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_value: BaseException | None,
traceback: TracebackType | None,
) -> None:
self.close()
def keys(self, include: str = "pandas") -> list[str]:
"""
Return a list of keys corresponding to objects stored in HDFStore.
Parameters
----------
include : str, default 'pandas'
When kind equals 'pandas' return pandas objects.
When kind equals 'native' return native HDF5 Table objects.
.. versionadded:: 1.1.0
Returns
-------
list
List of ABSOLUTE path-names (e.g. have the leading '/').
Raises
------
raises ValueError if kind has an illegal value
"""
if include == "pandas":
return [n._v_pathname for n in self.groups()]
elif include == "native":
assert self._handle is not None # mypy
return [
n._v_pathname for n in self._handle.walk_nodes("/", classname="Table")
]
raise ValueError(
f"`include` should be either 'pandas' or 'native' but is '{include}'"
)
def __iter__(self) -> Iterator[str]:
return iter(self.keys())
def items(self) -> Iterator[tuple[str, list]]:
"""
iterate on key->group
"""
for g in self.groups():
yield g._v_pathname, g
def open(self, mode: str = "a", **kwargs) -> None:
"""
Open the file in the specified mode
Parameters
----------
mode : {'a', 'w', 'r', 'r+'}, default 'a'
See HDFStore docstring or tables.open_file for info about modes
**kwargs
These parameters will be passed to the PyTables open_file method.
"""
tables = _tables()
if self._mode != mode:
# if we are changing a write mode to read, ok
if self._mode in ["a", "w"] and mode in ["r", "r+"]:
pass
elif mode in ["w"]:
# this would truncate, raise here
if self.is_open:
raise PossibleDataLossError(
f"Re-opening the file [{self._path}] with mode [{self._mode}] "
"will delete the current file!"
)
self._mode = mode
# close and reopen the handle
if self.is_open:
self.close()
if self._complevel and self._complevel > 0:
self._filters = _tables().Filters(
self._complevel, self._complib, fletcher32=self._fletcher32
)
if _table_file_open_policy_is_strict and self.is_open:
msg = (
"Cannot open HDF5 file, which is already opened, "
"even in read-only mode."
)
raise ValueError(msg)
self._handle = tables.open_file(self._path, self._mode, **kwargs)
def close(self) -> None:
"""
Close the PyTables file handle
"""
if self._handle is not None:
self._handle.close()
self._handle = None
@property
def is_open(self) -> bool:
"""
return a boolean indicating whether the file is open
"""
if self._handle is None:
return False
return bool(self._handle.isopen)
def flush(self, fsync: bool = False) -> None:
"""
Force all buffered modifications to be written to disk.
Parameters
----------
fsync : bool (default False)
call ``os.fsync()`` on the file handle to force writing to disk.
Notes
-----
Without ``fsync=True``, flushing may not guarantee that the OS writes
to disk. With fsync, the operation will block until the OS claims the
file has been written; however, other caching layers may still
interfere.
"""
if self._handle is not None:
self._handle.flush()
if fsync:
with suppress(OSError):
os.fsync(self._handle.fileno())
def get(self, key: str):
"""
Retrieve pandas object stored in file.
Parameters
----------
key : str
Returns
-------
object
Same type as object stored in file.
"""
with patch_pickle():
# GH#31167 Without this patch, pickle doesn't know how to unpickle
# old DateOffset objects now that they are cdef classes.
group = self.get_node(key)
if group is None:
raise KeyError(f"No object named {key} in the file")
return self._read_group(group)
def select(
self,
key: str,
where=None,
start=None,
stop=None,
columns=None,
iterator: bool = False,
chunksize=None,
auto_close: bool = False,
):
"""
Retrieve pandas object stored in file, optionally based on where criteria.
.. warning::
Pandas uses PyTables for reading and writing HDF5 files, which allows
serializing object-dtype data with pickle when using the "fixed" format.
Loading pickled data received from untrusted sources can be unsafe.
See: https://docs.python.org/3/library/pickle.html for more.
Parameters
----------
key : str
Object being retrieved from file.
where : list or None
List of Term (or convertible) objects, optional.
start : int or None
Row number to start selection.
stop : int, default None
Row number to stop selection.
columns : list or None
A list of columns that if not None, will limit the return columns.
iterator : bool or False
Returns an iterator.
chunksize : int or None
Number or rows to include in iteration, return an iterator.
auto_close : bool or False
Should automatically close the store when finished.
Returns
-------
object
Retrieved object from file.
"""
group = self.get_node(key)
if group is None:
raise KeyError(f"No object named {key} in the file")
# create the storer and axes
where = _ensure_term(where, scope_level=1)
s = self._create_storer(group)
s.infer_axes()
# function to call on iteration
def func(_start, _stop, _where):
return s.read(start=_start, stop=_stop, where=_where, columns=columns)
# create the iterator
it = TableIterator(
self,
s,
func,
where=where,
nrows=s.nrows,
start=start,
stop=stop,
iterator=iterator,
chunksize=chunksize,
auto_close=auto_close,
)
return it.get_result()
def select_as_coordinates(
self,
key: str,
where=None,
start: int | None = None,
stop: int | None = None,
):
"""
return the selection as an Index
.. warning::
Pandas uses PyTables for reading and writing HDF5 files, which allows
serializing object-dtype data with pickle when using the "fixed" format.
Loading pickled data received from untrusted sources can be unsafe.
See: https://docs.python.org/3/library/pickle.html for more.
Parameters
----------
key : str
where : list of Term (or convertible) objects, optional
start : integer (defaults to None), row number to start selection
stop : integer (defaults to None), row number to stop selection
"""
where = _ensure_term(where, scope_level=1)
tbl = self.get_storer(key)
if not isinstance(tbl, Table):
raise TypeError("can only read_coordinates with a table")
return tbl.read_coordinates(where=where, start=start, stop=stop)
def select_column(
self,
key: str,
column: str,
start: int | None = None,
stop: int | None = None,
):
"""
return a single column from the table. This is generally only useful to
select an indexable
.. warning::
Pandas uses PyTables for reading and writing HDF5 files, which allows
serializing object-dtype data with pickle when using the "fixed" format.
Loading pickled data received from untrusted sources can be unsafe.
See: https://docs.python.org/3/library/pickle.html for more.
Parameters
----------
key : str
column : str
The column of interest.
start : int or None, default None
stop : int or None, default None
Raises
------
raises KeyError if the column is not found (or key is not a valid
store)
raises ValueError if the column can not be extracted individually (it
is part of a data block)
"""
tbl = self.get_storer(key)
if not isinstance(tbl, Table):
raise TypeError("can only read_column with a table")
return tbl.read_column(column=column, start=start, stop=stop)
def select_as_multiple(
self,
keys,
where=None,
selector=None,
columns=None,
start=None,
stop=None,
iterator: bool = False,
chunksize=None,
auto_close: bool = False,
):
"""
Retrieve pandas objects from multiple tables.
.. warning::
Pandas uses PyTables for reading and writing HDF5 files, which allows
serializing object-dtype data with pickle when using the "fixed" format.
Loading pickled data received from untrusted sources can be unsafe.
See: https://docs.python.org/3/library/pickle.html for more.
Parameters
----------
keys : a list of the tables
selector : the table to apply the where criteria (defaults to keys[0]
if not supplied)
columns : the columns I want back
start : integer (defaults to None), row number to start selection
stop : integer (defaults to None), row number to stop selection
iterator : bool, return an iterator, default False
chunksize : nrows to include in iteration, return an iterator
auto_close : bool, default False
Should automatically close the store when finished.
Raises
------
raises KeyError if keys or selector is not found or keys is empty
raises TypeError if keys is not a list or tuple
raises ValueError if the tables are not ALL THE SAME DIMENSIONS
"""
# default to single select
where = _ensure_term(where, scope_level=1)
if isinstance(keys, (list, tuple)) and len(keys) == 1:
keys = keys[0]
if isinstance(keys, str):
return self.select(
key=keys,
where=where,
columns=columns,
start=start,
stop=stop,
iterator=iterator,
chunksize=chunksize,
auto_close=auto_close,
)
if not isinstance(keys, (list, tuple)):
raise TypeError("keys must be a list/tuple")