From 8da167102d1c9da68cebbc77a960c3590e116212 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Tue, 30 Aug 2022 13:31:04 -0500 Subject: [PATCH 1/5] fix: Ensure that ak._v2.to_json raises errors when appropriate. (#1649) --- src/awkward/_v2/contents/content.py | 9 +++- ...est_1449-v2-to_json-from_json-functions.py | 52 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/awkward/_v2/contents/content.py b/src/awkward/_v2/contents/content.py index 657783e5e7..c93f5c0391 100644 --- a/src/awkward/_v2/contents/content.py +++ b/src/awkward/_v2/contents/content.py @@ -1378,12 +1378,14 @@ def to_json( complex_real_string = None complex_imag_string = None elif ( - isinstance(complex_record_fields, tuple) + isinstance(complex_record_fields, (tuple, list)) and len(complex_record_fields) == 2 and isinstance(complex_record_fields[0], str) and isinstance(complex_record_fields[1], str) ): complex_real_string, complex_imag_string = complex_record_fields + else: + complex_real_string, complex_imag_string = None, None return self.packed()._to_list( behavior, @@ -1411,6 +1413,11 @@ def _to_list_custom(self, behavior, json_conversions): for i in range(self.length): out[i] = array[i] + # These json_conversions are applied in NumpyArray (for numbers) + # and ListArray/ListOffsetArray/RegularArray (for bytestrings), + # but they're also applied here because __getitem__ might return + # something convertible (the overloaded __getitem__ might be + # trivial, as it is in Vector). if json_conversions is not None: convert_bytes = json_conversions["convert_bytes"] if convert_bytes is not None: diff --git a/tests/v2/test_1449-v2-to_json-from_json-functions.py b/tests/v2/test_1449-v2-to_json-from_json-functions.py index e6ab379e10..9f39a502dc 100644 --- a/tests/v2/test_1449-v2-to_json-from_json-functions.py +++ b/tests/v2/test_1449-v2-to_json-from_json-functions.py @@ -8,6 +8,58 @@ import awkward as ak # noqa: F401 +def test_without_control(): + array = ak._v2.Array( + [ + {"ok": 1, "x": 1.1, "y": 1 + 1j, "z": b"one"}, + {"ok": 2, "x": 2.2, "y": 2 + 2j, "z": b"two"}, + {"ok": 3, "x": 3.3, "y": 3 + 3j, "z": b"three"}, + {"ok": 4, "x": float("nan"), "y": float("nan"), "z": b"four"}, + {"ok": 5, "x": float("inf"), "y": float("inf") + 5j, "z": b"five"}, + {"ok": 6, "x": float("-inf"), "y": 6 + float("-inf") * 1j, "z": b"six"}, + {"ok": 7, "x": 7.7, "y": 7 + 7j, "z": b"seven"}, + {"ok": 8, "x": None, "y": 8 + 8j, "z": b"eight"}, + {"ok": 9, "x": 9.9, "y": 9 + 9j, "z": b"nine"}, + ] + ) + + assert ak._v2.to_json(array.ok) == "[1,2,3,4,5,6,7,8,9]" + + with pytest.raises(ValueError): + ak._v2.to_json(array.x) + + assert ak._v2.to_json(array.x[:3]) == "[1.1,2.2,3.3]" + + with pytest.raises(ValueError): + ak._v2.to_json(array.x, nan_string="NAN") + + with pytest.raises(ValueError): + ak._v2.to_json(array.x, nan_string="NAN", posinf_string="INF") + + assert ( + ak._v2.to_json( + array.x, nan_string="NAN", posinf_string="INF", neginf_string="-INF" + ) + == '[1.1,2.2,3.3,"NAN","INF","-INF",7.7,null,9.9]' + ) + + with pytest.raises(TypeError): + ak._v2.to_json(array.y[:3]) + + assert ( + ak._v2.to_json(array.y[:3], complex_record_fields=["R", "I"]) + == '[{"R":1.0,"I":1.0},{"R":2.0,"I":2.0},{"R":3.0,"I":3.0}]' + ) + + with pytest.raises(TypeError): + ak._v2.to_json(array.z) + + assert ( + ak._v2.to_json(array.z, convert_bytes=lambda x: x.decode()) + == '["one","two","three","four","five","six","seven","eight","nine"]' + ) + + def test_to_json_options(tmp_path): filename = os.path.join(tmp_path, "whatever.json") From dd2a3f400e29fc9ea908fc7d8267f592091457bb Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Tue, 30 Aug 2022 17:07:29 -0500 Subject: [PATCH 2/5] feat: Drop ak.behavior['.', 'Name'] = Class, which isn't working/isn't tested. (#1651) --- src/awkward/_v2/_util.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/awkward/_v2/_util.py b/src/awkward/_v2/_util.py index b69f96204a..e3c8fcb320 100644 --- a/src/awkward/_v2/_util.py +++ b/src/awkward/_v2/_util.py @@ -483,11 +483,6 @@ def arrayclass(layout, behavior): cls = behavior[arr] if isinstance(cls, type) and issubclass(cls, ak._v2.highlevel.Array): return cls - rec = layout.parameter("__record__") - if isstr(rec): - cls = behavior[".", rec] - if isinstance(cls, type) and issubclass(cls, ak._v2.highlevel.Array): - return cls deeprec = layout.purelist_parameter("__record__") if isstr(deeprec): cls = behavior["*", deeprec] @@ -554,11 +549,6 @@ def numba_array_typer(layouttype, behavior): typer = behavior["__numba_typer__", arr] if callable(typer): return typer - rec = layouttype.parameters.get("__record__") - if isstr(rec): - typer = behavior["__numba_typer__", ".", rec] - if callable(typer): - return typer deeprec = layouttype.parameters.get("__record__") if isstr(deeprec): typer = behavior["__numba_typer__", "*", deeprec] @@ -574,11 +564,6 @@ def numba_array_lower(layouttype, behavior): lower = behavior["__numba_lower__", arr] if callable(lower): return lower - rec = layouttype.parameters.get("__record__") - if isstr(rec): - lower = behavior["__numba_lower__", ".", rec] - if callable(lower): - return lower deeprec = layouttype.parameters.get("__record__") if isstr(deeprec): lower = behavior["__numba_lower__", "*", deeprec] From 1a0858e20cc2af56b2fe01541721f88ac61e2f20 Mon Sep 17 00:00:00 2001 From: "allcontributors[bot]" <46447321+allcontributors[bot]@users.noreply.github.com> Date: Wed, 31 Aug 2022 11:30:48 -0500 Subject: [PATCH 3/5] docs: add Saransh-cpp as a contributor for code (#1653) * docs: update README.md [skip ci] * docs: update .all-contributorsrc [skip ci] Co-authored-by: allcontributors[bot] <46447321+allcontributors[bot]@users.noreply.github.com> --- .all-contributorsrc | 9 +++++++++ README.md | 1 + 2 files changed, 10 insertions(+) diff --git a/.all-contributorsrc b/.all-contributorsrc index ed10f7894d..bb16ac769a 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -320,6 +320,15 @@ "contributions": [ "code" ] + }, + { + "login": "Saransh-cpp", + "name": "Saransh", + "avatar_url": "https://avatars.githubusercontent.com/u/74055102?v=4", + "profile": "https://saransh-cpp.github.io/", + "contributions": [ + "code" + ] } ], "contributorsPerLine": 7, diff --git a/README.md b/README.md index 146186d2c3..591302ffc5 100644 --- a/README.md +++ b/README.md @@ -256,6 +256,7 @@ Thanks especially to the gracious help of Awkward Array contributors (including
Ahmad-AlSubaie

💻
Manasvi Goyal

💻
Aryan Roy

💻 +
Saransh

💻 From d2853d28d71d635d97c537039a8b2299103ce167 Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Wed, 31 Aug 2022 19:21:52 +0200 Subject: [PATCH 4/5] feat: retrieve multiple columns from RDataFrame in a single event loop (#1625) Currently from_rdataframe function converts a selected column to a native Awkward Array. This function pulls one column at a time: this simplifies the interface and Awkward Arrays can be inexpensively joined with awkward zip. This PR is extending this API as the users requested not to trigger RDF event loop multiple times. * Proposed change to ak.from_rdataframe: from: out = ak.from_rdataframe( rdf, column="x", ) to: out = ak.from_rdataframe( rdf, columns=("x", "y",), ) * Implement handling multiple columns in jagged_builders, generalise for any depth using a recursive function * Implement an index-based filter for Awkward types * Add tests --- .../_connect/rdataframe/from_rdataframe.py | 314 +++++++++++------- .../_v2/_connect/rdataframe/to_rdataframe.py | 2 + src/awkward/_v2/cpp-headers/awkward/utils.h | 14 +- .../cpp-headers/rdataframe/jagged_builders.h | 73 +--- .../_v2/operations/ak_from_rdataframe.py | 12 +- tests/v2/test_1374-to-rdataframe.py | 19 +- tests/v2/test_1473-from-rdataframe.py | 42 +-- tests/v2/test_1508-awkward-from-rdataframe.py | 4 +- .../test_1613-generator-tolayout-records.py | 2 +- tests/v2/test_1620-layout-builders.py | 16 +- ...t_1625-multiple-columns-from-rdataframe.py | 266 +++++++++++++++ 11 files changed, 522 insertions(+), 242 deletions(-) create mode 100644 tests/v2/test_1625-multiple-columns-from-rdataframe.py diff --git a/src/awkward/_v2/_connect/rdataframe/from_rdataframe.py b/src/awkward/_v2/_connect/rdataframe/from_rdataframe.py index abad41db2f..4bd8a91067 100644 --- a/src/awkward/_v2/_connect/rdataframe/from_rdataframe.py +++ b/src/awkward/_v2/_connect/rdataframe/from_rdataframe.py @@ -12,10 +12,21 @@ from awkward._v2.types.numpytype import primitive_to_dtype cpp_type_of = { - "float64": "double", + "bool": "bool", + "int8": "int8_t", + "uint8": "uint8_t", + "int16": "int16_t", + "uint16": "uint16_t", + "int32": "int32_t", + "uint32": "uint32_t", "int64": "int64_t", + "uint64": "uint64_t", + "float32": "float", + "float64": "double", + "complex64": "std::complex", "complex128": "std::complex", - "uint8": "uint8_t", + "datetime64": "std::time_t", + "timedelta64": "std::difftime", } np = ak.nplike.NumpyMetadata.instance() @@ -43,140 +54,193 @@ assert done is True -def from_rdataframe(data_frame, column): - def _wrap_as_record_array(array): - layout = array.layout if isinstance(array, ak._v2.highlevel.Array) else array - return ak._v2._util.wrap( - ak._v2.contents.RecordArray( - fields=[column], - contents=[layout], - ), - highlevel=True, - ) +def from_rdataframe(data_frame, columns): + def form_dtype(form): + if isinstance(form, ak._v2.forms.NumpyForm) and form.inner_shape == (): + return primitive_to_dtype(form.primitive) + elif isinstance(form, ak._v2.forms.ListOffsetForm): + return form_dtype(form.content) + + def empty_buffers(cpp_buffers_self, names_nbytes): + buffers = {} + for item in names_nbytes: + buffers[item.first] = ak.nplike.numpy.empty(item.second) + cpp_buffers_self.append( + item.first, + buffers[item.first].ctypes.data_as(ctypes.POINTER(ctypes.c_ubyte)), + ) + return buffers - # Cast input node to base RNode type - data_frame_rnode = cppyy.gbl.ROOT.RDF.AsRNode(data_frame) + def cpp_builder_type(depth, data_type): + if depth == 1: + return f"awkward::LayoutBuilder::Numpy<{data_type}>>" + else: + return ( + "awkward::LayoutBuilder::ListOffset" + ) - column_type = data_frame_rnode.GetColumnType(column) - form_str = ROOT.awkward.type_to_form[column_type](0) + def cpp_fill_offsets_and_flatten(depth): + if depth == 1: + return "\nfor (auto it : vec1) {\n" + " builder1.append(it);\n" + "}\n" + else: + return ( + f"for (auto const& vec{depth - 1} : vec{depth}) " + + "{\n" + + f" auto& builder{depth - 1} = builder{depth}.begin_list();\n" + + " " + + cpp_fill_offsets_and_flatten(depth - 1) + + "\n" + + f" builder{depth}.end_list();\n" + + "}\n" + ) - # 'Take' is a lazy action: - result_ptrs = data_frame_rnode.Take[column_type](column) - - if form_str.startswith("{"): - form = ak._v2.forms.from_json(form_str) - list_depth = form.purelist_depth - if list_depth > 4: - raise ak._v2._util.error( - NotImplementedError( - "Retrieving arbitrary depth nested containers is not implemented yet." - ) + def cpp_fill_function(depth): + if depth == 1: + return ( + "template\n" + + "void\n" + + "fill_from(BUILDER& builder, ROOT::RDF::RResultPtr>& result) {" + + " for (auto it : result) {\n" + + " builder.append(it);\n" + + " }\n" + + "}\n" + ) + else: + return ( + "template\n" + + "void\n" + + f"fill_offsets_and_flatten{depth}(BUILDER& builder{depth}, ROOT::RDF::RResultPtr>& result) " + + "{\n" + + f" for (auto const& vec{depth - 1} : result) " + + "{\n" + + f" auto& builder{depth - 1} = builder{depth}.begin_list();\n" + + " " + + cpp_fill_offsets_and_flatten(depth - 1) + + "\n" + + f" builder{depth}.end_list();\n" + + "}\n" + + "}\n" ) - def supported(form): - if form.purelist_depth == 1: - # special case for a list of strings form - return isinstance( - form, (ak._v2.forms.ListOffsetForm, ak._v2.forms.NumpyForm) - ) - else: - return isinstance(form, ak._v2.forms.ListOffsetForm) and supported( - form.content - ) + is_indexed = True if "awkward_index_" in data_frame.GetColumnNames() else False - if not supported(form): - raise ak._v2._util.error(NotImplementedError) + # Register Take action for each column + # 'Take' is a lazy action: + result_ptrs = {} + column_types = {} + contents_index = None + columns = ( + columns + ("awkward_index_",) + if (is_indexed and "awkward_index_" not in columns) + else columns + ) + for col in columns: + column_types[col] = data_frame.GetColumnType(col) + result_ptrs[col] = data_frame.Take[column_types[col]](col) + + contents = {} + awkward_contents = {} + contents_index = {} + for col in columns: + col_type = column_types[col] + if ROOT.awkward.is_awkward_type[col_type](): # Retrieve Awkward arrays + + # ROOT::RDF::RResultPtr::begin Returns an iterator to the beginning of + # the contained object if this makes sense, throw a compilation error otherwise. + # + # Does not trigger event loop and execution of all actions booked in + # the associated RLoopManager. + lookup = result_ptrs[col].begin().lookup() + generator = lookup[col].generator + layout = generator.tolayout(lookup[col], 0, ()) + awkward_contents[col] = layout + + else: # Convert the C++ vectors to Awkward arrays + form_str = ROOT.awkward.type_to_form[col_type](0) + form = ak._v2.forms.from_json(form_str) + + list_depth = form.purelist_depth + form_dtype_name = form_dtype(form).name + data_type = cpp_type_of[form_dtype_name] + + # pull in the CppBuffers (after which we can import from it) + CppBuffers = cppyy.gbl.awkward.CppBuffers[col_type] + cpp_buffers_self = CppBuffers(result_ptrs[col]) + + if isinstance(form, ak._v2.forms.NumpyForm): + + NumpyBuilder = cppyy.gbl.awkward.LayoutBuilder.Numpy[data_type] + builder = NumpyBuilder() + builder_type = type(builder).__cpp_name__ + + cpp_buffers_self.fill_from[builder_type, col_type]( + builder, result_ptrs[col] + ) - def form_dtype(form): - if form.purelist_depth == 1: - # special case for a list of strings form - return ( - primitive_to_dtype(form.content.primitive) - if isinstance(form, ak._v2.forms.ListOffsetForm) - else primitive_to_dtype(form.primitive) + names_nbytes = cpp_buffers_self.names_nbytes[builder_type](builder) + buffers = empty_buffers(cpp_buffers_self, names_nbytes) + cpp_buffers_self.to_char_buffers[builder_type](builder) + + elif isinstance(form, ak._v2.forms.ListOffsetForm): + if isinstance(form.content, ak._v2.forms.NumpyForm): + # NOTE: list_depth == 2 or 1 if its the list of strings + list_depth = 2 + + ListOffsetBuilder = cppyy.gbl.awkward.LayoutBuilder.ListOffset[ + "int64_t", + cpp_builder_type(list_depth - 1, data_type), + ] + builder = ListOffsetBuilder() + builder_type = type(builder).__cpp_name__ + + if not hasattr( + cppyy.gbl.awkward, f"fill_offsets_and_flatten{list_depth}" + ): + done = cppyy.cppdef( + "namespace awkward {" + cpp_fill_function(list_depth) + "}" + ) + assert done is True + + fill_from_func = getattr( + cppyy.gbl.awkward, f"fill_offsets_and_flatten{list_depth}" ) + fill_from_func[builder_type, col_type](builder, result_ptrs[col]) else: - return form_dtype(form.content) - - def empty_buffers(cpp_buffers_self, names_nbytes): - buffers = {} - for item in names_nbytes: - buffers[item.first] = ak.nplike.numpy.empty(item.second) - cpp_buffers_self.append( - item.first, - buffers[item.first].ctypes.data_as(ctypes.POINTER(ctypes.c_ubyte)), + raise ak._v2._util.error( + AssertionError(f"unrecognized Form: {type(form)}") ) - return buffers - - data_type = cpp_type_of[form_dtype(form).name] - - # pull in the CppBuffers (after which we can import from it) - CppBuffers = cppyy.gbl.awkward.CppBuffers[column_type] - cpp_buffers_self = CppBuffers(result_ptrs) - - if isinstance(form, ak._v2.forms.NumpyForm): - - NumpyBuilder = cppyy.gbl.awkward.LayoutBuilder.Numpy[data_type] - builder = NumpyBuilder() - builder_type = type(builder).__cpp_name__ - - cpp_buffers_self.fill_from[builder_type](builder) - - elif isinstance(form, ak._v2.forms.ListOffsetForm) and isinstance( - form.content, ak._v2.forms.NumpyForm - ): - # NOTE: list_depth == 2 or 1 if its the list of strings - ListOffsetBuilder = cppyy.gbl.awkward.LayoutBuilder.ListOffset[ - "int64_t", - f"awkward::LayoutBuilder::Numpy<{data_type}", - ] - builder = ListOffsetBuilder() - builder_type = type(builder).__cpp_name__ - cpp_buffers_self.fill_offsets_and_flatten_2[builder_type](builder) + names_nbytes = cpp_buffers_self.names_nbytes[builder_type](builder) + buffers = empty_buffers(cpp_buffers_self, names_nbytes) + cpp_buffers_self.to_char_buffers[builder_type](builder) - elif list_depth == 3: - ListOffsetBuilder = cppyy.gbl.awkward.LayoutBuilder.ListOffset[ - "int64_t", - f"awkward::LayoutBuilder::ListOffset", - ] - builder = ListOffsetBuilder() - builder_type = type(builder).__cpp_name__ - - cpp_buffers_self.fill_offsets_and_flatten_3[builder_type](builder) + array = ak._v2.from_buffers( + form, + builder.length(), + buffers, + ) + if col == "awkward_index_": + contents_index = ak._v2.index.Index64( + array.layout.to_numpy(allow_missing=True) + ) + else: + contents[col] = array.layout + + for col, content in awkward_contents.items(): + # wrap Awkward array in IndexedArray only if needed + if contents_index is not None and len(contents_index) < len(content): + array = ak._v2._util.wrap( + ak._v2.contents.IndexedArray(contents_index, content), + highlevel=True, + ) + contents[col] = array.layout else: - ListOffsetBuilder = cppyy.gbl.awkward.LayoutBuilder.ListOffset[ - "int64_t", - f"awkward::LayoutBuilder::ListOffset>", - ] - builder = ListOffsetBuilder() - builder_type = type(builder).__cpp_name__ - - cpp_buffers_self.fill_offsets_and_flatten_4[builder_type](builder) - - names_nbytes = cpp_buffers_self.names_nbytes[builder_type](builder) - buffers = empty_buffers(cpp_buffers_self, names_nbytes) - cpp_buffers_self.to_char_buffers[builder_type, data_type](builder) - - array = ak._v2.from_buffers( - form, - builder.length(), - buffers, - ) - return _wrap_as_record_array(array) - - elif form_str == "awkward type": - - # ROOT::RDF::RResultPtr::begin Returns an iterator to the beginning of - # the contained object if this makes sense, throw a compilation error otherwise. - # - # Does not trigger event loop and execution of all actions booked in - # the associated RLoopManager. - lookup = result_ptrs.begin().lookup() - generator = lookup[column].generator - layout = generator.tolayout(lookup[column], 0, ()) - - return _wrap_as_record_array(layout) - else: - raise ak._v2._util.error(NotImplementedError) + contents[col] = content + + return ak._v2._util.wrap( + ak._v2.contents.RecordArray(list(contents.values()), list(contents.keys())), + highlevel=True, + ) diff --git a/src/awkward/_v2/_connect/rdataframe/to_rdataframe.py b/src/awkward/_v2/_connect/rdataframe/to_rdataframe.py index 2222e2c015..f984ecbb1c 100644 --- a/src/awkward/_v2/_connect/rdataframe/to_rdataframe.py +++ b/src/awkward/_v2/_connect/rdataframe/to_rdataframe.py @@ -314,4 +314,6 @@ class {array_data_source} final (self.data_ptrs_list), ) + rdf = rdf.Define("awkward_index_", "(int64_t)rdfentry_") + return rdf diff --git a/src/awkward/_v2/cpp-headers/awkward/utils.h b/src/awkward/_v2/cpp-headers/awkward/utils.h index 7429497a54..6795b0d979 100644 --- a/src/awkward/_v2/cpp-headers/awkward/utils.h +++ b/src/awkward/_v2/cpp-headers/awkward/utils.h @@ -1,7 +1,7 @@ // BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE -#ifndef AWKWARD_UTILS_H_ -#define AWKWARD_UTILS_H_ +#ifndef AWKWARD_CPP_HEADERS_UTILS_H_ +#define AWKWARD_CPP_HEADERS_UTILS_H_ #include #include @@ -17,6 +17,7 @@ namespace awkward { template const std::string type_to_name() { + std::cout << "Type " << typeid(T).name() << " is not recognized." << std::endl; return typeid(T).name(); } @@ -249,6 +250,13 @@ namespace awkward { return "unsupported type"; } + /// @brief Check if an RDataFrame column is an Awkward Array. + template + bool + is_awkward_type() { + return (std::string(typeid(T).name()).find("awkward") != std::string::npos); + } + /// @class visit_impl /// /// @brief Class to index tuple at runtime. @@ -299,4 +307,4 @@ namespace awkward { } // namespace awkward -#endif // AWKWARD_UTILS_H_ +#endif // AWKWARD_CPP_HEADERS_UTILS_H_ diff --git a/src/awkward/_v2/cpp-headers/rdataframe/jagged_builders.h b/src/awkward/_v2/cpp-headers/rdataframe/jagged_builders.h index 914f567ab5..59ef4a6d50 100644 --- a/src/awkward/_v2/cpp-headers/rdataframe/jagged_builders.h +++ b/src/awkward/_v2/cpp-headers/rdataframe/jagged_builders.h @@ -44,85 +44,20 @@ namespace awkward { std::cout << std::endl; } - template + template void - fill_from(BUILDER& builder) const { - for (auto it : result_) { + fill_from(BUILDER& builder, ROOT::RDF::RResultPtr>& result) const { + for (auto it : result) { builder.append(it); } } - template + template void to_char_buffers(BUILDER& builder) { builder.to_char_buffers(buffers_uint8_ptr_); } - template - void - fill_offsets_and_flatten_2(BUILDER& builder) const { - for (auto const& vec : result_) { - auto& subbuilder = builder.begin_list(); - for (auto it : vec) { - subbuilder.append(it); - } - builder.end_list(); - } - } - - template - void - fill_offsets_and_flatten_3(BUILDER& builder) const { - for (auto const& vec_of_vecs : result_) { - auto& builder1 = builder.begin_list(); - for (auto const& vec : vec_of_vecs) { - auto& builder2 = builder1.begin_list(); - for (auto it : vec) { - builder2.append(it); - } - builder1.end_list(); - } - builder.end_list(); - } - } - - template - void - fill_offsets_and_flatten_4(BUILDER& builder) const { - for (auto const& vec_of_vecs_of_vecs : result_) { - auto& builder1 = builder.begin_list(); - for (auto const& vec_of_vecs : vec_of_vecs_of_vecs) { - auto& builder2 = builder1.begin_list(); - for (auto const& vec : vec_of_vecs) { - auto& builder3 = builder2.begin_list(); - for (auto it : vec) { - builder3.append(it); - } - builder2.end_list(); - } - builder1.end_list(); - } - builder.end_list(); - } - } - - template - void - recurse_fill_from(int64_t level, BUILDER& builder, ITERABLE& result) const { - if (level == 0) { - for (auto it : result) { - builder.append(it); - } - } - else { - auto& next_builder = builder.begin_list(); - for (auto& it : result) { - recurse_fill_from(level - 1, next_builder, it); - } - next_builder.end_list(); - } - } - private: ROOT::RDF::RResultPtr>& result_; std::map map_names_nbytes_; diff --git a/src/awkward/_v2/operations/ak_from_rdataframe.py b/src/awkward/_v2/operations/ak_from_rdataframe.py index c37f6f0ba9..20bf2da6b5 100644 --- a/src/awkward/_v2/operations/ak_from_rdataframe.py +++ b/src/awkward/_v2/operations/ak_from_rdataframe.py @@ -3,12 +3,12 @@ import awkward as ak -def from_rdataframe(data_frame, column): +def from_rdataframe(data_frame, columns): """ Args: data_frame (`ROOT.RDataFrame`): ROOT RDataFrame to convert into an Awkward Array. - column (str): A column to be converted to Awkward Array. + columns (str or tuple of str): A column or multiple columns to be converted to Awkward Array. Converts ROOT Data Frame columns into an Awkward Array. @@ -18,22 +18,22 @@ def from_rdataframe(data_frame, column): "ak._v2.from_rdataframe", dict( data_frame=data_frame, - column=column, + columns=columns, ), ): return _impl( data_frame, - column, + columns, ) def _impl( data_frame, - column, + columns, ): import awkward._v2._connect.rdataframe.from_rdataframe # noqa: F401 return ak._v2._connect.rdataframe.from_rdataframe.from_rdataframe( data_frame, - column, + columns, ) diff --git a/tests/v2/test_1374-to-rdataframe.py b/tests/v2/test_1374-to-rdataframe.py index de31930976..f6d917290c 100644 --- a/tests/v2/test_1374-to-rdataframe.py +++ b/tests/v2/test_1374-to-rdataframe.py @@ -26,7 +26,7 @@ def test_two_columns(): data_frame = ak._v2.to_rdataframe( {"x": ak_array_1, "y": ak_array_2}, flatlist_as_rvec=True ) - assert set(data_frame.GetColumnNames()) == {"x", "y"} + assert set(data_frame.GetColumnNames()) == {"x", "y", "awkward_index_"} assert data_frame.GetColumnType("x") == "ROOT::VecOps::RVec" assert data_frame.GetColumnType("y").startswith("awkward::ListArray_") @@ -38,7 +38,7 @@ def test_two_columns_as_rvecs(): ) data_frame = ak._v2.to_rdataframe({"x": ak_array_1, "y": ak_array_2}) - assert set(data_frame.GetColumnNames()) == {"x", "y"} + assert set(data_frame.GetColumnNames()) == {"x", "y", "awkward_index_"} assert data_frame.GetColumnType("x") == "double" assert data_frame.GetColumnType("y").startswith("awkward::Record_") @@ -120,7 +120,7 @@ def test_two_columns_as_vecs(): data_frame = ak._v2.operations.to_rdataframe( {"x": ak_array_1, "y": ak_array_2}, flatlist_as_rvec=False ) - assert set(data_frame.GetColumnNames()) == {"x", "y"} + assert set(data_frame.GetColumnNames()) == {"x", "y", "awkward_index_"} assert data_frame.GetColumnType("x") == "double" assert data_frame.GetColumnType("y").startswith("awkward::Record_") @@ -166,7 +166,7 @@ def test_two_columns_transform_filter(): ) data_frame = ak._v2.to_rdataframe({"one": example1, "two": example2}) - assert set(data_frame.GetColumnNames()) == {"one", "two"} + assert set(data_frame.GetColumnNames()) == {"one", "two", "awkward_index_"} compiler( """ @@ -181,7 +181,12 @@ def test_two_columns_transform_filter(): data_frame_transformed = ROOT.MyTransformation[data_frame.GetColumnType("one")]( ROOT.RDF.AsRNode(data_frame) ) - assert set(data_frame_transformed.GetColumnNames()) == {"neg_one", "one", "two"} + assert set(data_frame_transformed.GetColumnNames()) == { + "neg_one", + "one", + "two", + "awkward_index_", + } assert data_frame_transformed.Count().GetValue() == 5 data_frame2 = data_frame.Filter("one > 2.5") @@ -194,9 +199,9 @@ def test_two_columns_transform_filter(): def test_jims_example1(): array = ak._v2.Array([{"x": 1.1}, {"x": 2.2}, {"x": 3.3}, {"x": 4.4}, {"x": 5.5}]) data_frame = ak._v2.to_rdataframe({"some_array": array}) - assert set(data_frame.GetColumnNames()) == {"some_array"} + assert set(data_frame.GetColumnNames()) == {"some_array", "awkward_index_"} data_frame_y = data_frame.Define("y", "some_array.x()") - assert set(data_frame_y.GetColumnNames()) == {"some_array", "y"} + assert set(data_frame_y.GetColumnNames()) == {"some_array", "y", "awkward_index_"} cpp_list = ", ".join(str(e) for e in array.x.to_list()) diff --git a/tests/v2/test_1473-from-rdataframe.py b/tests/v2/test_1473-from-rdataframe.py index 9df4bc5638..cb98d70aaa 100644 --- a/tests/v2/test_1473-from-rdataframe.py +++ b/tests/v2/test_1473-from-rdataframe.py @@ -41,7 +41,7 @@ def test_to_from_data_frame_large(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert len(ak_array_in) == len(ak_array_out) @@ -56,7 +56,7 @@ def test_data_frame_boolean(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out.to_list() @@ -70,7 +70,7 @@ def test_data_frame_integers(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -84,7 +84,7 @@ def test_data_frame_real(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -100,7 +100,7 @@ def test_data_frame_complex(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -114,7 +114,7 @@ def test_data_frame_strings(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -128,7 +128,7 @@ def test_data_frame_vec_of_integers(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -142,7 +142,7 @@ def test_data_frame_vec_of_real(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -158,7 +158,7 @@ def test_data_frame_vec_of_complex(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -172,7 +172,7 @@ def test_data_frame_vec_of_strings(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -186,7 +186,7 @@ def test_data_frame_vec_of_vec_of_integers(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -200,7 +200,7 @@ def test_data_frame_vec_of_vec_of_real(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -216,7 +216,7 @@ def test_data_frame_vec_of_vec_of_complex(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -227,13 +227,13 @@ def test_rdata_frame_vecs_as_records(): ak_array_x = ak._v2.from_rdataframe( data_frame_xy, - column="x", + columns=("x",), ) assert ak_array_x["x"].layout.form == ak._v2.forms.NumpyForm("float64") ak_record_array_x = ak._v2.from_rdataframe( data_frame_xy, - column="x", + columns=("x",), ) assert ak_record_array_x.layout.form == ak._v2.forms.RecordForm( [ak._v2.forms.NumpyForm("float64")], "x" @@ -241,7 +241,7 @@ def test_rdata_frame_vecs_as_records(): ak_record_array_y = ak._v2.from_rdataframe( data_frame_xy, - column="y", + columns=("y",), ) ak_array = ak._v2.zip([ak_record_array_x, ak_record_array_y]) assert ak_array.layout.form == ak._v2.forms.RecordForm( @@ -259,7 +259,7 @@ def test_rdata_frame_vecs_of_complex(): ak_array_y = ak._v2.from_rdataframe( data_frame_xy, - column="y", + columns=("y",), ) assert ak_array_y["y"].layout.form == ak._v2.forms.NumpyForm("complex128") @@ -285,7 +285,7 @@ def test_rdata_frame_rvecs_as_records(): array = ak._v2.from_rdataframe( data_frame_x_y_r, - column="r", + columns=("r",), ) assert array.layout.form == ak._v2.forms.RecordForm( @@ -303,7 +303,7 @@ def test_to_from_data_frame(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_out["x"].layout.content.is_contiguous is True @@ -318,7 +318,7 @@ def test_to_from_data_frame_rvec_of_rvec(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -333,7 +333,7 @@ def test_to_from_data_frame_rvec_of_rvec_of_rvec(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() diff --git a/tests/v2/test_1508-awkward-from-rdataframe.py b/tests/v2/test_1508-awkward-from-rdataframe.py index 1c3336aa82..602ddc73eb 100644 --- a/tests/v2/test_1508-awkward-from-rdataframe.py +++ b/tests/v2/test_1508-awkward-from-rdataframe.py @@ -37,7 +37,7 @@ def test_refcount(): array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert array.to_list() == array_out["x"].to_list() @@ -122,7 +122,7 @@ def test_data_frame_vec_of_vec_of_integers(): ak_array_out = ak._v2.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() diff --git a/tests/v2/test_1613-generator-tolayout-records.py b/tests/v2/test_1613-generator-tolayout-records.py index 3125620832..bdbe628f49 100644 --- a/tests/v2/test_1613-generator-tolayout-records.py +++ b/tests/v2/test_1613-generator-tolayout-records.py @@ -334,6 +334,6 @@ def test_data_frame_from_json(): data_frame = ak._v2.to_rdataframe({"variants": array}) out = ak._v2.from_rdataframe( data_frame, - column="variants", + columns=("variants",), ) assert array.to_list() == out["variants"].to_list() diff --git a/tests/v2/test_1620-layout-builders.py b/tests/v2/test_1620-layout-builders.py index 52ef187d9c..4428c58f1a 100644 --- a/tests/v2/test_1620-layout-builders.py +++ b/tests/v2/test_1620-layout-builders.py @@ -20,7 +20,7 @@ def test_data_frame_integers(): ak_array_out = ak.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -34,7 +34,7 @@ def test_data_frame_double(): ak_array_out = ak.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -48,7 +48,7 @@ def test_data_frame_char(): ak_array_out = ak.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -62,7 +62,7 @@ def test_data_frame_complex(): ak_array_out = ak.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -76,7 +76,7 @@ def test_data_frame_listoffset_integers(): ak_array_out = ak.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -97,7 +97,7 @@ def test_data_frame_listoffset_listoffset_double(): ak_array_out = ak.from_rdataframe( data_frame, - column="x", + columns=("x",), ) assert ak_array_in.to_list() == ak_array_out["x"].to_list() @@ -141,7 +141,7 @@ def test_data_frame_vec_of_vec(): assert rdf3.GetColumnType("output") == "vector >" out = ak.from_rdataframe( rdf3, - column="output", + columns=("output",), ) assert out["output"].to_list() == (array["y"] * array["y"] * 1.0).to_list() @@ -172,7 +172,7 @@ def test_data_frame_vec_of_vec(): assert rdf3.GetColumnType("output2") == "vector > >" out = ak.from_rdataframe( # noqa: F841 rdf3, - column="output2", + columns=("output2",), ) result = ak.Array( [ diff --git a/tests/v2/test_1625-multiple-columns-from-rdataframe.py b/tests/v2/test_1625-multiple-columns-from-rdataframe.py new file mode 100644 index 0000000000..1d4d864205 --- /dev/null +++ b/tests/v2/test_1625-multiple-columns-from-rdataframe.py @@ -0,0 +1,266 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest # noqa: F401 +import numpy as np # noqa: F401 +import awkward._v2 as ak # noqa: F401 + + +ROOT = pytest.importorskip("ROOT") + + +compiler = ROOT.gInterpreter.Declare + + +def test_data_frame_integers(): + ak_array_x = ak.Array([1, 2, 3, 4, 5]) + ak_array_y = ak.Array([1.1, 2.2, 3.3, 4.4, 5.5]) + + data_frame = ak.to_rdataframe({"x": ak_array_x, "y": ak_array_y}) + + assert data_frame.GetColumnType("x") == "int64_t" + assert data_frame.GetColumnType("y") == "double" + + ak_array_out = ak.from_rdataframe( + data_frame, + columns=("x", "y"), + ) + assert ak_array_x.to_list() == ak_array_out["x"].to_list() + assert ak_array_y.to_list() == ak_array_out["y"].to_list() + + +def test_data_frame_vec_of_vec_of_real(): + ak_array_in = ak.Array([[[1.1], [2.2]], [[3.3], [4.4, 5.5]]]) + + data_frame = ak.to_rdataframe({"x": ak_array_in}) + + assert data_frame.GetColumnType("x").startswith("awkward::ListArray_") + + ak_array_out = ak.from_rdataframe( + data_frame, + columns=("x",), + ) + assert ak_array_in.to_list() == ak_array_out["x"].to_list() + + +def test_data_frame_filter(): + ak_array_x = ak.Array([1, 2, 3, 4, 5]) + ak_array_y = ak.Array([1.1, 2.2, 3.3, 4.4, 5.5]) + + data_frame = ak.to_rdataframe({"x": ak_array_x, "y": ak_array_y}) + rdf3 = data_frame.Filter("x > 3") + + assert data_frame.GetColumnType("x") == "int64_t" + assert data_frame.GetColumnType("y") == "double" + + ak_array_out = ak.from_rdataframe( + rdf3, + columns=( + "x", + "y", + ), + ) + assert ak_array_x[3:].to_list() == ak_array_out["x"].to_list() + assert ak_array_y[3:].to_list() == ak_array_out["y"].to_list() + + +def test_data_frame_rvec_filter(): + ak_array_x = ak.Array([[1, 2], [3], [4, 5]]) + ak_array_y = ak.Array([[1.0, 1.1], [2.2, 3.3, 4.4], [5.5]]) + + data_frame = ak.to_rdataframe({"x": ak_array_x, "y": ak_array_y}) + rdf3 = data_frame.Filter("x.size() >= 2") + + assert data_frame.GetColumnType("x") == "ROOT::VecOps::RVec" + assert data_frame.GetColumnType("y") == "ROOT::VecOps::RVec" + + ak_array_out = ak.from_rdataframe( + rdf3, + columns=( + "x", + "y", + ), + ) + assert ak_array_out["x"].to_list() == [[1, 2], [4, 5]] + assert ak_array_out["y"].to_list() == [[1.0, 1.1], [5.5]] + + rdf4 = data_frame.Filter("y.size() == 2") + ak_array_out = ak.from_rdataframe( + rdf4, + columns=( + "x", + "y", + ), + ) + assert ak_array_out["x"].to_list() == [[1, 2]] + assert ak_array_out["y"].to_list() == [[1.0, 1.1]] + + +def test_data_frame_double(): + ak_array_in = ak.Array([1.1, 2.2, 3.3, 4.4, 5.5]) + + data_frame = ak.to_rdataframe({"x": ak_array_in}) + + assert data_frame.GetColumnType("x") == "double" + + ak_array_out = ak.from_rdataframe(data_frame, columns=("x",)) + assert ak_array_in.to_list() == ak_array_out["x"].to_list() + + +def test_data_frame_vec_of_vec(): + array = ak.Array( + [ + [ + {"x": 1.1, "y": [1]}, + {"x": None, "y": [1, 2]}, + {"x": 3.3, "y": [1, 2, 3]}, + ], + [], + [{"x": None, "y": [1, 2, 3, 4]}, {"x": 5.5, "y": [1, 2, 3, 4, 5]}], + ] + ) + # ] * 10000) + + rdf2 = ak.to_rdataframe({"array": array}) + # We create a matrix RxC here + # Note when dimensions R and C are large, the following code suffers + # from potential performance penalties caused by frequent reallocation + # of memory by the push_back() function. This should be used only when + # vector dimensions are not known in advance. + rdf3 = rdf2.Define( + "output", + """ + std::vector> tmp1; + + for (auto record : array) { + std::vector tmp2; + for (auto number : record.y()) { + tmp2.push_back(number * number); + } + tmp1.push_back(tmp2); + } + return tmp1; + """, + ) + + assert rdf3.GetColumnType("output") == "vector >" + + rdf4 = rdf3.Define( + "output2", + """ + std::vector>> tmp1; + + for (auto record : array) { + std::vector> tmp2; + // we can check if it's None: + // if (record.x().has_value()) + // or set it to 1 so that we do not scale: + double x_number = record.x().value_or(1); + for (auto number : record.y()) { + std::vector tmp3; + for (int64_t i = 0; i < std::rint(x_number); i++) { + double value = x_number * number; + tmp3.push_back(value); + } + tmp2.push_back(tmp3); + } + tmp1.push_back(tmp2); + } + return tmp1; + """, + ) + assert rdf4.GetColumnType("output2") == "vector > >" + + out = ak.from_rdataframe( # noqa: F841 + rdf4, + columns=( + "output", + "output2", + ), + ) + + assert out["output"].to_list() == (array["y"] * array["y"] * 1.0).to_list() + result = ak.Array( + [ + [ + [[1.1]], # "x" is 1 - "y" values are unchanged, and each is nesed + [ + [1.0], + [2.0], + ], # "x" is None - "y" values are unchanged, and each is nesed + [ + [3.3, 3.3, 3.3], + [6.6, 6.6, 6.6], + [9.899999999999999, 9.899999999999999, 9.899999999999999], + ], # "x" is 3.3 - "y" values are scaled by 3.3 and each is nesed 3 times + ], + [], + [ + [ + [1.0], + [2.0], + [3.0], + [4.0], + ], # "x" is None - "y" values are unchanged, and each is nesed + [ + [5.5, 5.5, 5.5, 5.5, 5.5, 5.5], + [11.0, 11.0, 11.0, 11.0, 11.0, 11.0], + [16.5, 16.5, 16.5, 16.5, 16.5, 16.5], + [22.0, 22.0, 22.0, 22.0, 22.0, 22.0], + [27.5, 27.5, 27.5, 27.5, 27.5, 27.5], + ], # "x" is 5.5 - "y" values are scaled by 5.5 and each is nesed 5 times + ], + ] + ) + assert out["output2"].to_list() == result.to_list() + + +def test_rdata_frame_rvecs_as_records(): + data_frame = ROOT.RDataFrame(1024) + coordDefineCode = """ROOT::VecOps::RVec {0}(len); + std::transform({0}.begin(), {0}.end(), {0}.begin(), [](double){{return gRandom->Uniform(-1.0, 1.0);}}); + return {0};""" + + data_frame_x_y = ( + data_frame.Define("len", "gRandom->Uniform(0, 16)") + .Define("x", coordDefineCode.format("x")) + .Define("y", coordDefineCode.format("y")) + ) + + # Now we have in hands d, a RDataFrame with two columns, x and y, which + # hold collections of coordinates. The size of these collections vary. + # Let's now define radii out of x and y. We'll do it treating the collections + # stored in the columns without looping on the individual elements. + data_frame_x_y_r = data_frame_x_y.Define("r", "sqrt(x*x + y*y)") + assert data_frame_x_y_r.GetColumnType("r") == "ROOT::VecOps::RVec" + + array = ak.from_rdataframe( + data_frame_x_y_r, + columns=( + "x", + "y", + "r", + ), + ) + + assert array["x"].layout.form == ak.forms.ListOffsetForm( + "i64", ak.forms.NumpyForm("float64") + ) + assert array["y"].layout.form == ak.forms.ListOffsetForm( + "i64", ak.forms.NumpyForm("float64") + ) + assert array["r"].layout.form == ak.forms.ListOffsetForm( + "i64", ak.forms.NumpyForm("float64") + ) + + assert array.layout.form == ak.forms.RecordForm( + [ + ak.forms.ListOffsetForm("i64", ak.forms.NumpyForm("float64")), + ak.forms.ListOffsetForm("i64", ak.forms.NumpyForm("float64")), + ak.forms.ListOffsetForm("i64", ak.forms.NumpyForm("float64")), + ], + [ + "x", + "y", + "r", + ], + ) From 9901b44d35ae60cc95cb81f5fe5e86b55d6898e9 Mon Sep 17 00:00:00 2001 From: Saransh Date: Wed, 31 Aug 2022 23:44:05 +0530 Subject: [PATCH 5/5] fix: pass a copy of `RecordArray`'s internal fields in HL API (#1650) * fix: pass a copy of `RecordArray`'s internal fields in HL API * We know that parameters is a dict. Co-authored-by: Jim Pivarski --- src/awkward/_v2/operations/ak_fields.py | 2 +- src/awkward/_v2/operations/ak_parameters.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/awkward/_v2/operations/ak_fields.py b/src/awkward/_v2/operations/ak_fields.py index 9c6571735a..2faa8a48a6 100644 --- a/src/awkward/_v2/operations/ak_fields.py +++ b/src/awkward/_v2/operations/ak_fields.py @@ -27,4 +27,4 @@ def fields(array): def _impl(array): layout = ak._v2.operations.to_layout(array, allow_record=True, allow_other=False) - return layout.fields + return layout.fields.copy() diff --git a/src/awkward/_v2/operations/ak_parameters.py b/src/awkward/_v2/operations/ak_parameters.py index efa034d746..1b90916cbc 100644 --- a/src/awkward/_v2/operations/ak_parameters.py +++ b/src/awkward/_v2/operations/ak_parameters.py @@ -1,5 +1,8 @@ # BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE +import copy +import numbers + import awkward as ak np = ak.nplike.NumpyMetadata.instance() @@ -27,13 +30,13 @@ def parameters(array): def _impl(array): if isinstance(array, (ak._v2.highlevel.Array, ak._v2.highlevel.Record)): - return array.layout.parameters + return _copy(array.layout.parameters) elif isinstance( array, (ak._v2.contents.Content, ak._v2.record.Record), ): - return array.parameters + return _copy(array.parameters) elif isinstance(array, ak._v2.highlevel.ArrayBuilder): return array.snapshot().layout.parameters @@ -43,3 +46,10 @@ def _impl(array): else: return {} + + +def _copy(what): + if all(isinstance(x, (str, numbers.Real)) for x in what.values()): + return what.copy() + else: + return copy.deepcopy(what)