Skip to content

Commit

Permalink
Bootstrap end-to-end exposure in Python, wrap DataType and Field types
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Mar 7, 2016
1 parent 572cdf2 commit ea2f3ec
Show file tree
Hide file tree
Showing 25 changed files with 417 additions and 160 deletions.
82 changes: 36 additions & 46 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,17 @@ if ("$ENV{CMAKE_EXPORT_COMPILE_COMMANDS}" STREQUAL "1")
set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
endif()

# Enable using a custom GCC toolchain to build Arrow
if (NOT "$ENV{ARROW_GCC_ROOT}" STREQUAL "")
set(GCC_ROOT $ENV{ARROW_GCC_ROOT})
set(CMAKE_C_COMPILER ${GCC_ROOT}/bin/gcc)
set(CMAKE_CXX_COMPILER ${GCC_ROOT}/bin/g++)
endif()

if(APPLE)
# In newer versions of CMake, this is the default setting
set(CMAKE_MACOSX_RPATH 1)
endif()

find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
endif(CCACHE_FOUND)

# ----------------------------------------------------------------------
# cmake options

Expand Down Expand Up @@ -126,38 +125,16 @@ endif ()
# Add common flags
set(CMAKE_CXX_FLAGS "${CXX_COMMON_FLAGS} ${CMAKE_CXX_FLAGS}")

# Required to avoid static linking errors with dependencies
add_definitions(-fPIC)

# Determine compiler version
include(CompilerInfo)

if ("${COMPILER_FAMILY}" STREQUAL "clang")
# Clang helpfully provides a few extensions from C++11 such as the 'override'
# keyword on methods. This doesn't change behavior, and we selectively enable
# it in src/gutil/port.h only on clang. So, we can safely use it, and don't want
# to trigger warnings when we do so.
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-c++11-extensions")

# Using Clang with ccache causes a bunch of spurious warnings that are
# purportedly fixed in the next version of ccache. See the following for details:
#
# http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html
# http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments")

# Only hardcode -fcolor-diagnostics if stderr is opened on a terminal. Otherwise
# the color codes show up as noisy artifacts.
#
# This test is imperfect because 'cmake' and 'make' can be run independently
# (with different terminal options), and we're testing during the former.
execute_process(COMMAND test -t 2 RESULT_VARIABLE ARROW_IS_TTY)
if ((${ARROW_IS_TTY} EQUAL 0) AND (NOT ("$ENV{TERM}" STREQUAL "dumb")))
message("Running in a controlling terminal")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcolor-diagnostics")
else()
message("Running without a controlling terminal or in a dumb terminal")
endif()
endif()

# Sanity check linking option.
Expand Down Expand Up @@ -278,12 +255,6 @@ set(LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
include_directories(src)

############################################################
# Visibility
############################################################
# For generate_export_header() and add_compiler_export_flags().
include(GenerateExportHeader)

############################################################
# Testing
############################################################
Expand Down Expand Up @@ -456,21 +427,33 @@ endif()
# Subdirectories
############################################################

add_subdirectory(src/arrow)
add_subdirectory(src/arrow/util)
add_subdirectory(src/arrow/table)
add_subdirectory(src/arrow/types)

set(LINK_LIBS
arrow_util
arrow_table
arrow_types)
set(LIBARROW_LINK_LIBS
)

set(ARROW_SRCS
src/arrow/array.cc
src/arrow/builder.cc
src/arrow/field.cc
src/arrow/type.cc

src/arrow/table/column.cc
src/arrow/table/schema.cc
src/arrow/table/table.cc

src/arrow/types/construct.cc
src/arrow/types/floating.cc
src/arrow/types/integer.cc
src/arrow/types/json.cc
src/arrow/types/list.cc
src/arrow/types/primitive.cc
src/arrow/types/string.cc
src/arrow/types/struct.cc
src/arrow/types/union.cc

src/arrow/util/bit-util.cc
src/arrow/util/buffer.cc
src/arrow/util/memory-pool.cc
src/arrow/util/status.cc
)

set(LIBARROW_LINKAGE "SHARED")
Expand All @@ -479,8 +462,15 @@ add_library(arrow
${LIBARROW_LINKAGE}
${ARROW_SRCS}
)
target_link_libraries(arrow ${LINK_LIBS})
set_target_properties(arrow PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(arrow
PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}")
target_link_libraries(arrow ${LIBARROW_LINK_LIBS})

add_subdirectory(src/arrow)
add_subdirectory(src/arrow/util)
add_subdirectory(src/arrow/table)
add_subdirectory(src/arrow/types)

install(TARGETS arrow
LIBRARY DESTINATION lib
Expand Down
18 changes: 18 additions & 0 deletions cpp/src/arrow/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,25 @@
// specific language governing permissions and limitations
// under the License.

// Coarse public API while the library is in development

#ifndef ARROW_API_H
#define ARROW_API_H

#include "arrow/array.h"
#include "arrow/builder.h"
#include "arrow/field.h"
#include "arrow/type.h"

#include "arrow/table/column.h"
#include "arrow/table/schema.h"
#include "arrow/table/table.h"

#include "arrow/types/boolean.h"
#include "arrow/types/floating.h"
#include "arrow/types/integer.h"
#include "arrow/types/list.h"
#include "arrow/types/string.h"
#include "arrow/types/struct.h"

#endif // ARROW_API_H
15 changes: 0 additions & 15 deletions cpp/src/arrow/table/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,6 @@
# arrow_table
#######################################

set(TABLE_SRCS
column.cc
schema.cc
table.cc
)

set(TABLE_LIBS
)

add_library(arrow_table STATIC
${TABLE_SRCS}
)
target_link_libraries(arrow_table ${TABLE_LIBS})
SET_TARGET_PROPERTIES(arrow_table PROPERTIES LINKER_LANGUAGE CXX)

# Headers: top level
install(FILES
column.h
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/arrow/table/schema-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,10 @@ TEST_F(TestSchema, ToString) {
auto schema = std::make_shared<Schema>(fields);

std::string result = schema->ToString();
std::string expected = R"(f0 ?int32
f1 uint8
f2 ?string
f3 ?list<?int16>
std::string expected = R"(f0 int32
f1 uint8 not null
f2 string
f3 list<int16>
)";

ASSERT_EQ(expected, result);
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

namespace arrow {

const std::shared_ptr<NullType> NA = std::make_shared<NullType>();
const std::shared_ptr<BooleanType> BOOL = std::make_shared<BooleanType>();
const std::shared_ptr<UInt8Type> UINT8 = std::make_shared<UInt8Type>();
const std::shared_ptr<UInt16Type> UINT16 = std::make_shared<UInt16Type>();
Expand Down
45 changes: 24 additions & 21 deletions cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,49 +71,46 @@ struct LogicalType {
UINT64 = 7,
INT64 = 8,

// A boolean value represented as 1 byte
BOOL = 9,

// A boolean value represented as 1 bit
BIT = 10,
BOOL = 9,

// 4-byte floating point value
FLOAT = 11,
FLOAT = 10,

// 8-byte floating point value
DOUBLE = 12,
DOUBLE = 11,

// CHAR(N): fixed-length UTF8 string with length N
CHAR = 13,
CHAR = 12,

// UTF8 variable-length string as List<Char>
STRING = 14,
STRING = 13,

// VARCHAR(N): Null-terminated string type embedded in a CHAR(N + 1)
VARCHAR = 15,
VARCHAR = 14,

// Variable-length bytes (no guarantee of UTF8-ness)
BINARY = 16,
BINARY = 15,

// By default, int32 days since the UNIX epoch
DATE = 17,
DATE = 16,

// Exact timestamp encoded with int64 since UNIX epoch
// Default unit millisecond
TIMESTAMP = 18,
TIMESTAMP = 17,

// Timestamp as double seconds since the UNIX epoch
TIMESTAMP_DOUBLE = 19,
TIMESTAMP_DOUBLE = 18,

// Exact time encoded with int64, default unit millisecond
TIME = 20,
TIME = 19,

// Precision- and scale-based decimal type. Storage type depends on the
// parameters.
DECIMAL = 21,
DECIMAL = 20,

// Decimal value encoded as a text string
DECIMAL_TEXT = 22,
DECIMAL_TEXT = 21,

// A list of some logical data type
LIST = 30,
Expand Down Expand Up @@ -141,7 +138,9 @@ struct DataType {
type(type),
nullable(nullable) {}

virtual bool Equals(const DataType* other) {
virtual ~DataType() {}

bool Equals(const DataType* other) {
// Call with a pointer so more friendly to subclasses
return this == other || (this->type == other->type &&
this->nullable == other->nullable);
Expand Down Expand Up @@ -184,11 +183,10 @@ struct PrimitiveType : public DataType {
: DataType(Derived::type_enum, nullable) {}

virtual std::string ToString() const {
std::string result;
if (nullable) {
result.append("?");
std::string result(static_cast<const Derived*>(this)->name());
if (!nullable) {
result.append(" not null");
}
result.append(static_cast<const Derived*>(this)->name());
return result;
}
};
Expand All @@ -205,6 +203,10 @@ struct PrimitiveType : public DataType {
return NAME; \
}

struct NullType : public PrimitiveType<NullType> {
PRIMITIVE_DECL(NullType, void, NA, 0, "null");
};

struct BooleanType : public PrimitiveType<BooleanType> {
PRIMITIVE_DECL(BooleanType, uint8_t, BOOL, 1, "bool");
};
Expand Down Expand Up @@ -249,6 +251,7 @@ struct DoubleType : public PrimitiveType<DoubleType> {
PRIMITIVE_DECL(DoubleType, double, DOUBLE, 8, "double");
};

extern const std::shared_ptr<NullType> NA;
extern const std::shared_ptr<BooleanType> BOOL;
extern const std::shared_ptr<UInt8Type> UINT8;
extern const std::shared_ptr<UInt16Type> UINT16;
Expand Down
21 changes: 0 additions & 21 deletions cpp/src/arrow/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,6 @@
# arrow_types
#######################################

set(TYPES_SRCS
construct.cc
floating.cc
integer.cc
json.cc
list.cc
primitive.cc
string.cc
struct.cc
union.cc
)

set(TYPES_LIBS
)

add_library(arrow_types STATIC
${TYPES_SRCS}
)
target_link_libraries(arrow_types ${TYPES_LIBS})
SET_TARGET_PROPERTIES(arrow_types PROPERTIES LINKER_LANGUAGE CXX)

# Headers: top level
install(FILES
boolean.h
Expand Down
5 changes: 1 addition & 4 deletions cpp/src/arrow/types/json.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@

#include <vector>

#include "arrow/types/boolean.h"
#include "arrow/types/integer.h"
#include "arrow/types/floating.h"
#include "arrow/types/null.h"
#include "arrow/type.h"
#include "arrow/types/string.h"
#include "arrow/types/union.h"

Expand Down
7 changes: 4 additions & 3 deletions cpp/src/arrow/types/list-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,18 @@ TEST(TypesTest, TestListType) {
ASSERT_EQ(list_type.type, LogicalType::LIST);

ASSERT_EQ(list_type.name(), string("list"));
ASSERT_EQ(list_type.ToString(), string("?list<?uint8>"));
ASSERT_EQ(list_type.ToString(), string("list<uint8>"));

ASSERT_EQ(list_type.value_type->type, vt->type);
ASSERT_EQ(list_type.value_type->type, vt->type);

std::shared_ptr<DataType> st = std::make_shared<StringType>(false);
std::shared_ptr<DataType> lt = std::make_shared<ListType>(st, false);
ASSERT_EQ(lt->ToString(), string("list<string>"));
ASSERT_EQ(lt->ToString(), string("list<string not null> not null"));

ListType lt2(lt, false);
ASSERT_EQ(lt2.ToString(), string("list<list<string>>"));
ASSERT_EQ(lt2.ToString(),
string("list<list<string not null> not null> not null"));
}

// ----------------------------------------------------------------------
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/types/list.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ namespace arrow {

std::string ListType::ToString() const {
std::stringstream s;
if (this->nullable) {
s << "?";
}
s << "list<" << value_type->ToString() << ">";
if (!this->nullable) {
s << " not null";
}
return s.str();
}

Expand Down
Loading

0 comments on commit ea2f3ec

Please sign in to comment.