From f13872784ebab7e3f611d2607f82ccd3b051fd93 Mon Sep 17 00:00:00 2001 From: Kouhei Sutou Date: Sun, 29 Oct 2017 01:14:05 +0900 Subject: [PATCH] [GLib] Add GArrowRecordBatchBuilder --- c_glib/arrow-glib/Makefile.am | 3 + c_glib/arrow-glib/array-builder.cpp | 28 +- c_glib/arrow-glib/array-builder.h | 2 + c_glib/arrow-glib/arrow-glib.h | 1 + c_glib/arrow-glib/arrow-glib.hpp | 1 + c_glib/arrow-glib/meson.build | 3 + c_glib/arrow-glib/table-builder.cpp | 303 ++++++++++++++++++++++ c_glib/arrow-glib/table-builder.h | 56 ++++ c_glib/arrow-glib/table-builder.hpp | 27 ++ c_glib/doc/reference/arrow-glib-docs.sgml | 4 + c_glib/test/test-record-batch-builder.rb | 84 ++++++ 11 files changed, 510 insertions(+), 2 deletions(-) create mode 100644 c_glib/arrow-glib/table-builder.cpp create mode 100644 c_glib/arrow-glib/table-builder.h create mode 100644 c_glib/arrow-glib/table-builder.hpp create mode 100644 c_glib/test/test-record-batch-builder.rb diff --git a/c_glib/arrow-glib/Makefile.am b/c_glib/arrow-glib/Makefile.am index 2066fa77cef84..bf68ec4910e77 100644 --- a/c_glib/arrow-glib/Makefile.am +++ b/c_glib/arrow-glib/Makefile.am @@ -57,6 +57,7 @@ libarrow_glib_la_headers = \ record-batch.h \ schema.h \ table.h \ + table-builder.h \ tensor.h \ type.h @@ -97,6 +98,7 @@ libarrow_glib_la_sources = \ record-batch.cpp \ schema.cpp \ table.cpp \ + table-builder.cpp \ tensor.cpp \ type.cpp \ $(libarrow_glib_la_headers) \ @@ -133,6 +135,7 @@ libarrow_glib_la_cpp_headers = \ record-batch.hpp \ schema.hpp \ table.hpp \ + table-builder.hpp \ tensor.hpp \ type.hpp diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp index bea7e8342c797..86e7f985be4a4 100644 --- a/c_glib/arrow-glib/array-builder.cpp +++ b/c_glib/arrow-glib/array-builder.cpp @@ -202,6 +202,7 @@ G_BEGIN_DECLS typedef struct GArrowArrayBuilderPrivate_ { arrow::ArrayBuilder *array_builder; + gboolean have_ownership; } GArrowArrayBuilderPrivate; enum { @@ -225,7 +226,9 @@ garrow_array_builder_finalize(GObject *object) priv = GARROW_ARRAY_BUILDER_GET_PRIVATE(object); - delete priv->array_builder; + if (priv->have_ownership) { + delete priv->array_builder; + } G_OBJECT_CLASS(garrow_array_builder_parent_class)->finalize(object); } @@ -267,6 +270,10 @@ garrow_array_builder_get_property(GObject *object, static void garrow_array_builder_init(GArrowArrayBuilder *builder) { + GArrowArrayBuilderPrivate *priv; + + priv = GARROW_ARRAY_BUILDER_GET_PRIVATE(builder); + priv->have_ownership = TRUE; } static void @@ -301,7 +308,24 @@ garrow_array_builder_new(const std::shared_ptr &type, return NULL; } return garrow_array_builder_new_raw(arrow_builder.release()); -}; +} + +/** + * garrow_array_builder_release_ownership: (skip) + * @builder: A #GArrowArrayBuilder. + * + * Release ownership of `arrow::ArrayBuilder` in `builder`. + * + * Since: 0.8.8 + */ +void +garrow_array_builder_release_ownership(GArrowArrayBuilder *builder) +{ + GArrowArrayBuilderPrivate *priv; + + priv = GARROW_ARRAY_BUILDER_GET_PRIVATE(builder); + priv->have_ownership = FALSE; +} /** * garrow_array_builder_finish: diff --git a/c_glib/arrow-glib/array-builder.h b/c_glib/arrow-glib/array-builder.h index d9e91212e1663..19dadb30999bd 100644 --- a/c_glib/arrow-glib/array-builder.h +++ b/c_glib/arrow-glib/array-builder.h @@ -35,6 +35,8 @@ struct _GArrowArrayBuilderClass GObjectClass parent_class; }; +void garrow_array_builder_release_ownership(GArrowArrayBuilder *builder); + GArrowArray *garrow_array_builder_finish (GArrowArrayBuilder *builder, GError **error); diff --git a/c_glib/arrow-glib/arrow-glib.h b/c_glib/arrow-glib/arrow-glib.h index 47f324dc04640..fb1b37e2c688a 100644 --- a/c_glib/arrow-glib/arrow-glib.h +++ b/c_glib/arrow-glib/arrow-glib.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #include diff --git a/c_glib/arrow-glib/arrow-glib.hpp b/c_glib/arrow-glib/arrow-glib.hpp index 7fc6c4828d8c5..0c411346ad430 100644 --- a/c_glib/arrow-glib/arrow-glib.hpp +++ b/c_glib/arrow-glib/arrow-glib.hpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build index 226c696bdcf37..464a002e78b0c 100644 --- a/c_glib/arrow-glib/meson.build +++ b/c_glib/arrow-glib/meson.build @@ -30,6 +30,7 @@ sources = files( 'record-batch.cpp', 'schema.cpp', 'table.cpp', + 'table-builder.cpp', 'tensor.cpp', 'type.cpp', ) @@ -70,6 +71,7 @@ c_headers = files( 'record-batch.h', 'schema.h', 'table.h', + 'table-builder.h', 'tensor.h', 'type.h', ) @@ -110,6 +112,7 @@ cpp_headers = files( 'record-batch.hpp', 'schema.hpp', 'table.hpp', + 'table-builder.hpp', 'tensor.hpp', 'type.hpp', ) diff --git a/c_glib/arrow-glib/table-builder.cpp b/c_glib/arrow-glib/table-builder.cpp new file mode 100644 index 0000000000000..e87314bf52b9f --- /dev/null +++ b/c_glib/arrow-glib/table-builder.cpp @@ -0,0 +1,303 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include +#include +#include + +G_BEGIN_DECLS + +/** + * SECTION: table-builder + * @section_id: table-builder-classes + * @title: Table builder classes + * @include: arrow-glib/arrow-glib.h + * + * #GArrowRecordBatchBuilder is a class to create + * new #GArrowRecordBatch. + */ + +typedef struct GArrowRecordBatchBuilderPrivate_ { + arrow::RecordBatchBuilder *record_batch_builder; + GPtrArray *fields; +} GArrowRecordBatchBuilderPrivate; + +enum { + PROP_0, + PROP_RECORD_BATCH_BUILDER +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchBuilder, + garrow_record_batch_builder, + G_TYPE_OBJECT) + +#define GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(object) \ + static_cast( \ + garrow_record_batch_builder_get_instance_private( \ + GARROW_RECORD_BATCH_BUILDER(object))) + +static void +garrow_record_batch_builder_constructed(GObject *object) +{ + auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(object); + auto arrow_builder = priv->record_batch_builder; + auto n_fields = arrow_builder->num_fields(); + priv->fields = g_ptr_array_new_full(n_fields, g_object_unref); + for (int i = 0; i < n_fields; ++i) { + auto arrow_array_builder = arrow_builder->GetField(i); + auto array_builder = garrow_array_builder_new_raw(arrow_array_builder); + garrow_array_builder_release_ownership(array_builder); + g_ptr_array_add(priv->fields, array_builder); + } + + G_OBJECT_CLASS(garrow_record_batch_builder_parent_class)->constructed(object); +} + +static void +garrow_record_batch_builder_finalize(GObject *object) +{ + auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(object); + + g_ptr_array_free(priv->fields, TRUE); + delete priv->record_batch_builder; + + G_OBJECT_CLASS(garrow_record_batch_builder_parent_class)->finalize(object); +} + +static void +garrow_record_batch_builder_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RECORD_BATCH_BUILDER: + priv->record_batch_builder = + static_cast(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_builder_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_record_batch_builder_init(GArrowRecordBatchBuilder *builder) +{ +} + +static void +garrow_record_batch_builder_class_init(GArrowRecordBatchBuilderClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->constructed = garrow_record_batch_builder_constructed; + gobject_class->finalize = garrow_record_batch_builder_finalize; + gobject_class->set_property = garrow_record_batch_builder_set_property; + gobject_class->get_property = garrow_record_batch_builder_get_property; + + GParamSpec *spec; + spec = g_param_spec_pointer("record-batch-builder", + "RecordBatch builder", + "The raw arrow::RecordBatchBuilder *", + static_cast(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, + PROP_RECORD_BATCH_BUILDER, + spec); +} + +/** + * garrow_record_batch_builder_new: + * @schema: A #GArrowSchema. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowRecordBatchBuilder on success, + * %NULL on error. + * + * Since: 0.8.0 + */ +GArrowRecordBatchBuilder * +garrow_record_batch_builder_new(GArrowSchema *schema, GError **error) +{ + auto arrow_schema = garrow_schema_get_raw(schema); + auto memory_pool = arrow::default_memory_pool(); + std::unique_ptr arrow_builder; + auto status = arrow::RecordBatchBuilder::Make(arrow_schema, + memory_pool, + &arrow_builder); + if (garrow_error_check(error, status, "[record-batch-builder][new]")) { + return garrow_record_batch_builder_new_raw(arrow_builder.release()); + } else { + return NULL; + } +} + +/** + * garrow_record_batch_builder_get_initial_capacity: + * @builder: A #GArrowRecordBatchBuilder. + * + * Returns: The initial capacity for array builders. + * + * Since: 0.8.0 + */ +gint64 +garrow_record_batch_builder_get_initial_capacity(GArrowRecordBatchBuilder *builder) +{ + auto arrow_builder = garrow_record_batch_builder_get_raw(builder); + return arrow_builder->initial_capacity(); +} + +/** + * garrow_record_batch_builder_set_initial_capacity: + * @builder: A #GArrowRecordBatchBuilder. + * @capacity: The new initial capacity for array builders. + * + * Since: 0.8.0 + */ +void +garrow_record_batch_builder_set_initial_capacity(GArrowRecordBatchBuilder *builder, + gint64 capacity) +{ + auto arrow_builder = garrow_record_batch_builder_get_raw(builder); + arrow_builder->SetInitialCapacity(capacity); +} + +/** + * garrow_record_batch_builder_get_schema: + * @builder: A #GArrowRecordBatchBuilder. + * + * Returns: (transfer full): The #GArrowSchema of the record batch builder. + * + * Since: 0.8.0 + */ +GArrowSchema * +garrow_record_batch_builder_get_schema(GArrowRecordBatchBuilder *builder) +{ + auto arrow_builder = garrow_record_batch_builder_get_raw(builder); + auto arrow_schema = arrow_builder->schema(); + return garrow_schema_new_raw(&arrow_schema); +} + +/** + * garrow_record_batch_builder_get_n_fields: + * @builder: A #GArrowRecordBatchBuilder. + * + * Returns: The number of fields. + * + * Since: 0.8.0 + */ +gint +garrow_record_batch_builder_get_n_fields(GArrowRecordBatchBuilder *builder) +{ + auto arrow_builder = garrow_record_batch_builder_get_raw(builder); + return arrow_builder->num_fields(); +} + +/** + * garrow_record_batch_builder_get_field: + * @builder: A #GArrowRecordBatchBuilder. + * @i: The field index. If it's negative, index is counted backward + * from the end of the fields. `-1` means the last field. + * + * Returns: (transfer none) (nullable): The #GArrowArrayBuilder for + * the `i`-th field on success, %NULL on out of index. + * + * Since: 0.8.0 + */ +GArrowArrayBuilder * +garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder, + gint i) +{ + auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(builder); + if (i < 0) { + i += priv->fields->len; + } + if (i < 0) { + return NULL; + } + if (static_cast(i) >= priv->fields->len) { + return NULL; + } + + return GARROW_ARRAY_BUILDER(g_ptr_array_index(priv->fields, i)); +} + +/** + * garrow_record_batch_builder_flush: + * @builder: A #GArrowRecordBatchBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full): The built #GArrowRecordBatch on success, + * %NULL on error. + * + * Since: 0.8.0 + */ +GArrowRecordBatch * +garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder, + GError **error) +{ + auto arrow_builder = garrow_record_batch_builder_get_raw(builder); + std::shared_ptr arrow_record_batch; + auto status = arrow_builder->Flush(&arrow_record_batch); + if (garrow_error_check(error, status, "[record-batch-builder][flush]")) { + return garrow_record_batch_new_raw(&arrow_record_batch); + } else { + return NULL; + } +} + +G_END_DECLS + +GArrowRecordBatchBuilder * +garrow_record_batch_builder_new_raw(arrow::RecordBatchBuilder *arrow_builder) +{ + auto builder = g_object_new(GARROW_TYPE_RECORD_BATCH_BUILDER, + "record-batch-builder", arrow_builder, + NULL); + return GARROW_RECORD_BATCH_BUILDER(builder); +} + +arrow::RecordBatchBuilder * +garrow_record_batch_builder_get_raw(GArrowRecordBatchBuilder *builder) +{ + auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(builder); + return priv->record_batch_builder; +} diff --git a/c_glib/arrow-glib/table-builder.h b/c_glib/arrow-glib/table-builder.h new file mode 100644 index 0000000000000..d05525e54f52e --- /dev/null +++ b/c_glib/arrow-glib/table-builder.h @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include + +G_BEGIN_DECLS + +#define GARROW_TYPE_RECORD_BATCH_BUILDER (garrow_record_batch_builder_get_type()) +G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchBuilder, + garrow_record_batch_builder, + GARROW, + RECORD_BATCH_BUILDER, + GObject) +struct _GArrowRecordBatchBuilderClass +{ + GObjectClass parent_class; +}; + +GArrowRecordBatchBuilder *garrow_record_batch_builder_new(GArrowSchema *schema, + GError **error); + +gint64 garrow_record_batch_builder_get_initial_capacity(GArrowRecordBatchBuilder *builder); +void garrow_record_batch_builder_set_initial_capacity(GArrowRecordBatchBuilder *builder, + gint64 capacity); +GArrowSchema *garrow_record_batch_builder_get_schema(GArrowRecordBatchBuilder *builder); + +gint garrow_record_batch_builder_get_n_fields(GArrowRecordBatchBuilder *builder); +GArrowArrayBuilder *garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder, + gint i); + +GArrowRecordBatch *garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder, + GError **error); + + +G_END_DECLS diff --git a/c_glib/arrow-glib/table-builder.hpp b/c_glib/arrow-glib/table-builder.hpp new file mode 100644 index 0000000000000..cf93ded9b4b65 --- /dev/null +++ b/c_glib/arrow-glib/table-builder.hpp @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +#include + +GArrowRecordBatchBuilder *garrow_record_batch_builder_new_raw(arrow::RecordBatchBuilder *arrow_builder); +arrow::RecordBatchBuilder *garrow_record_batch_builder_get_raw(GArrowRecordBatchBuilder *builder); diff --git a/c_glib/doc/reference/arrow-glib-docs.sgml b/c_glib/doc/reference/arrow-glib-docs.sgml index fc161a5864c24..a504ef1148383 100644 --- a/c_glib/doc/reference/arrow-glib-docs.sgml +++ b/c_glib/doc/reference/arrow-glib-docs.sgml @@ -72,6 +72,10 @@ + + Table builder + + Buffer diff --git a/c_glib/test/test-record-batch-builder.rb b/c_glib/test/test-record-batch-builder.rb new file mode 100644 index 0000000000000..1bb72820a5860 --- /dev/null +++ b/c_glib/test/test-record-batch-builder.rb @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestRecordBatchBuilder < Test::Unit::TestCase + include Helper::Buildable + + def setup + @fields = [ + Arrow::Field.new("visible", Arrow::BooleanDataType.new), + Arrow::Field.new("point", Arrow::Int32DataType.new), + ] + @schema = Arrow::Schema.new(@fields) + @builder = Arrow::RecordBatchBuilder.new(@schema) + end + + def test_initial_capacity + @builder.initial_capacity = 128 + assert_equal(128, @builder.initial_capacity) + end + + def test_schema + assert_equal(@schema, @builder.schema) + end + + def test_n_fields + assert_equal(@fields.size, @builder.n_fields) + end + + sub_test_case("#get_field") do + def test_valid + assert_equal(Arrow::BooleanArrayBuilder, + @builder.get_field(0).class) + end + + def test_negative + assert_equal(Arrow::Int32ArrayBuilder, + @builder.get_field(-1).class) + end + + def test_too_negative + assert_nil(@builder.get_field(-@fields.size - 1)) + end + + def test_too_large + assert_nil(@builder.get_field(@fields.size)) + end + end + + def test_flush + arrays = { + "visible" => build_boolean_array([true, false, true]), + "point" => build_int32_array([1, -1, 0]), + } + arrays.each_with_index do |(_, array), i| + @builder.get_field(i).append_values(array.values, []) + end + assert_equal(build_record_batch(arrays), + @builder.flush) + + arrays = { + "visible" => build_boolean_array([false, true]), + "point" => build_int32_array([10, -10]), + } + arrays.each_with_index do |(_, array), i| + @builder.get_field(i).append_values(array.values, []) + end + assert_equal(build_record_batch(arrays), + @builder.flush) + end +end