Skip to content

Commit

Permalink
[FEAT][C++] Not allow setting custom namespace for code clarity (#415)
Browse files Browse the repository at this point in the history
Signed-off-by: acezen <[email protected]>
  • Loading branch information
acezen authored Mar 27, 2024
1 parent f77897e commit 6e77ed7
Show file tree
Hide file tree
Showing 65 changed files with 391 additions and 450 deletions.
6 changes: 0 additions & 6 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,10 @@ project(graph-archive LANGUAGES C CXX VERSION ${GAR_VERSION})
# cmake options
# ------------------------------------------------------------------------------

option(NAMESPACE "User specific namespace, default is graphar" OFF)
option(BUILD_TESTS "Build unit tests" OFF)
option(BUILD_EXAMPLES "Build examples" OFF)
option(BUILD_BENCHMARKS "Build benchmarks" OFF)

if (NAMESPACE)
add_definitions(-DGAR_NAMESPACE=${NAMESPACE})
else()
add_definitions(-DGAR_NAMESPACE=graphar)
endif()
# ------------------------------------------------------------------------------
# setting default cmake type to Release
# ------------------------------------------------------------------------------
Expand Down
13 changes: 0 additions & 13 deletions cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,6 @@ Release build:
$ make -j8 # if you have 8 CPU cores, otherwise adjust, use -j`nproc` for all cores
```

Build with a custom namespace:

The `namespace` is configurable. By default,
it is defined in `namespace graphar`; however this can be toggled by
setting `NAMESPACE` option with cmake:

```bash
$ mkdir build
$ cd build
$ cmake -DNAMESPACE=MyNamespace ..
$ make -j8 # if you have 8 CPU cores, otherwise adjust, use -j`nproc` for all cores
```

Build the Apache Arrow dependency from source:

By default, GraphAr try to find Apache Arrow in the system. This can be configured to build Arrow dependency automatically from source:
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/arrow_chunk_reader_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include "gar/reader/arrow_chunk_reader.h"
#include "gar/util/adj_list_type.h"

namespace GAR_NAMESPACE_INTERNAL {
namespace graphar {

BENCHMARK_DEFINE_F(BenchmarkFixture, CreateVertexPropertyArrowChunkReader)
(::benchmark::State& state) { // NOLINT
Expand Down Expand Up @@ -149,4 +149,4 @@ BENCHMARK_REGISTER_F(BenchmarkFixture, VertexPropertyArrowChunkReaderReadChunk);
BENCHMARK_REGISTER_F(BenchmarkFixture, AdjListArrowChunkReaderReadChunk);
BENCHMARK_REGISTER_F(BenchmarkFixture, AdjListOffsetArrowChunkReaderReadChunk);
BENCHMARK_REGISTER_F(BenchmarkFixture, AdjListOffsetArrowChunkReaderReadChunk);
} // namespace GAR_NAMESPACE_INTERNAL
} // namespace graphar
12 changes: 6 additions & 6 deletions cpp/benchmarks/benchmark_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,26 +26,26 @@
#ifndef CPP_BENCHMARKS_BENCHMARK_UTIL_H_
#define CPP_BENCHMARKS_BENCHMARK_UTIL_H_

namespace GAR_NAMESPACE_INTERNAL {
namespace graphar {

// Return the value of the GAR_TEST_DATA environment variable or return error
// Status
GAR_NAMESPACE::Status GetTestResourceRoot(std::string* out) {
Status GetTestResourceRoot(std::string* out) {
const char* c_root = std::getenv("GAR_TEST_DATA");
if (!c_root) {
return GAR_NAMESPACE::Status::IOError(
return Status::IOError(
"Test resources not found, set GAR_TEST_DATA to <repo root>/testing");
}
// FIXME(@acezen): This is a hack to get around the fact that the testing
*out = std::string(c_root);
return GAR_NAMESPACE::Status::OK();
return Status::OK();
}

class BenchmarkFixture : public ::benchmark::Fixture {
public:
void SetUp(const ::benchmark::State& state) override {
std::string root;
GAR_NAMESPACE::Status status = GetTestResourceRoot(&root);
Status status = GetTestResourceRoot(&root);
path_ = root + "/ldbc_sample/parquet/ldbc_sample.graph.yml";
auto maybe_graph_info = GraphInfo::Load(path_);
graph_info_ = maybe_graph_info.value();
Expand All @@ -57,6 +57,6 @@ class BenchmarkFixture : public ::benchmark::Fixture {
std::string path_;
std::shared_ptr<GraphInfo> graph_info_;
};
} // namespace GAR_NAMESPACE_INTERNAL
} // namespace graphar

#endif // CPP_BENCHMARKS_BENCHMARK_UTIL_H_
4 changes: 2 additions & 2 deletions cpp/benchmarks/graph_info_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include "./benchmark_util.h"
#include "gar/graph_info.h"

namespace GAR_NAMESPACE_INTERNAL {
namespace graphar {

static void CreateGraphInfo(::benchmark::State& state, // NOLINT
const std::string& path) {
Expand All @@ -38,4 +38,4 @@ BENCHMARK_DEFINE_F(BenchmarkFixture, InitialGraphInfo)
}
BENCHMARK_REGISTER_F(BenchmarkFixture, InitialGraphInfo);

} // namespace GAR_NAMESPACE_INTERNAL
} // namespace graphar
51 changes: 24 additions & 27 deletions cpp/examples/bfs_father_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,37 +29,36 @@ int main(int argc, char* argv[]) {
// read file and construct graph info
std::string path =
TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml";
auto graph_info = GAR_NAMESPACE::GraphInfo::Load(path).value();
auto graph_info = graphar::GraphInfo::Load(path).value();

// get the person vertices of graph
std::string label = "person";
ASSERT(graph_info->GetVertexInfo(label) != nullptr);
auto maybe_vertices =
GAR_NAMESPACE::VerticesCollection::Make(graph_info, label);
auto maybe_vertices = graphar::VerticesCollection::Make(graph_info, label);
ASSERT(maybe_vertices.status().ok());
auto vertices = maybe_vertices.value();
int num_vertices = vertices->size();
std::cout << "num_vertices: " << num_vertices << std::endl;

// get the "person_knows_person" edges of graph
std::string src_label = "person", edge_label = "knows", dst_label = "person";
auto maybe_edges = GAR_NAMESPACE::EdgesCollection::Make(
auto maybe_edges = graphar::EdgesCollection::Make(
graph_info, src_label, edge_label, dst_label,
GAR_NAMESPACE::AdjListType::unordered_by_source);
graphar::AdjListType::unordered_by_source);
ASSERT(!maybe_edges.has_error());
auto& edges = maybe_edges.value();

// run bfs algorithm
GAR_NAMESPACE::IdType root = 0;
graphar::IdType root = 0;
std::vector<int32_t> distance(num_vertices);
std::vector<GAR_NAMESPACE::IdType> pre(num_vertices);
for (GAR_NAMESPACE::IdType i = 0; i < num_vertices; i++) {
std::vector<graphar::IdType> pre(num_vertices);
for (graphar::IdType i = 0; i < num_vertices; i++) {
distance[i] = (i == root ? 0 : -1);
pre[i] = (i == root ? root : -1);
}
auto it_begin = edges->begin(), it_end = edges->end();
for (int iter = 0;; iter++) {
GAR_NAMESPACE::IdType count = 0;
graphar::IdType count = 0;
for (auto it = it_begin; it != it_end; ++it) {
auto src = it.source(), dst = it.destination();
if (distance[src] == iter && distance[dst] == -1) {
Expand All @@ -80,11 +79,11 @@ int main(int argc, char* argv[]) {
// Append the bfs result to the vertex info as a property group
// and write to file
// construct property group
GAR_NAMESPACE::Property bfs("bfs", GAR_NAMESPACE::int32(), false);
GAR_NAMESPACE::Property father("father", GAR_NAMESPACE::int64(), false);
std::vector<GAR_NAMESPACE::Property> property_vector = {bfs, father};
auto group = GAR_NAMESPACE::CreatePropertyGroup(property_vector,
GAR_NAMESPACE::FileType::CSV);
graphar::Property bfs("bfs", graphar::int32(), false);
graphar::Property father("father", graphar::int64(), false);
std::vector<graphar::Property> property_vector = {bfs, father};
auto group =
graphar::CreatePropertyGroup(property_vector, graphar::FileType::CSV);

// extend the vertex_info
auto vertex_info = graph_info->GetVertexInfo(label);
Expand All @@ -97,15 +96,14 @@ int main(int argc, char* argv[]) {
ASSERT(extend_info->Dump().status().ok());
ASSERT(extend_info->Save("/tmp/person-new-bfs-father.vertex.yml").ok());
// construct vertex property writer
GAR_NAMESPACE::VertexPropertyWriter writer(extend_info, "file:///tmp/");
graphar::VertexPropertyWriter writer(extend_info, "file:///tmp/");
// convert results to arrow::Table
std::vector<std::shared_ptr<arrow::Array>> arrays;
std::vector<std::shared_ptr<arrow::Field>> schema_vector;
schema_vector.push_back(arrow::field(
bfs.name, GAR_NAMESPACE::DataType::DataTypeToArrowDataType(bfs.type)));
bfs.name, graphar::DataType::DataTypeToArrowDataType(bfs.type)));
schema_vector.push_back(arrow::field(
father.name,
GAR_NAMESPACE::DataType::DataTypeToArrowDataType(father.type)));
father.name, graphar::DataType::DataTypeToArrowDataType(father.type)));
arrow::Int32Builder array_builder1;
ASSERT(array_builder1.Reserve(num_vertices).ok());
ASSERT(array_builder1.AppendValues(distance).ok());
Expand Down Expand Up @@ -137,24 +135,23 @@ int main(int argc, char* argv[]) {
dst_label = "person";
int edge_chunk_size = 1024, src_chunk_size = 100, dst_chunk_size = 100;
bool directed = true;
auto version = GAR_NAMESPACE::InfoVersion::Parse("gar/v1").value();
auto al = GAR_NAMESPACE::CreateAdjacentList(
GAR_NAMESPACE::AdjListType::ordered_by_source,
GAR_NAMESPACE::FileType::CSV);
auto new_edge_info = GAR_NAMESPACE::CreateEdgeInfo(
auto version = graphar::InfoVersion::Parse("gar/v1").value();
auto al = graphar::CreateAdjacentList(graphar::AdjListType::ordered_by_source,
graphar::FileType::CSV);
auto new_edge_info = graphar::CreateEdgeInfo(
src_label, edge_label, dst_label, edge_chunk_size, src_chunk_size,
dst_chunk_size, directed, {al}, {}, "", version);
ASSERT(new_edge_info->IsValidated());
// save & dump
ASSERT(!new_edge_info->Dump().has_error());
ASSERT(new_edge_info->Save("/tmp/person_bfs_person.edge.yml").ok());
GAR_NAMESPACE::builder::EdgesBuilder edges_builder(
new_edge_info, "file:///tmp/",
GAR_NAMESPACE::AdjListType::ordered_by_source, num_vertices);
graphar::builder::EdgesBuilder edges_builder(
new_edge_info, "file:///tmp/", graphar::AdjListType::ordered_by_source,
num_vertices);
for (int i = 0; i < num_vertices; i++) {
if (i == root || pre[i] == -1)
continue;
GAR_NAMESPACE::builder::Edge e(pre[i], i);
graphar::builder::Edge e(pre[i], i);
ASSERT(edges_builder.AddEdge(e).ok());
}
ASSERT(edges_builder.Dump().ok());
Expand Down
31 changes: 15 additions & 16 deletions cpp/examples/bfs_pull_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,43 +28,42 @@ int main(int argc, char* argv[]) {
// read file and construct graph info
std::string path =
TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml";
auto graph_info = GAR_NAMESPACE::GraphInfo::Load(path).value();
auto graph_info = graphar::GraphInfo::Load(path).value();

// construct vertices collection
std::string label = "person";
ASSERT(graph_info->GetVertexInfo(label) != nullptr);
auto maybe_vertices =
GAR_NAMESPACE::VerticesCollection::Make(graph_info, label);
auto maybe_vertices = graphar::VerticesCollection::Make(graph_info, label);
ASSERT(maybe_vertices.status().ok());
auto& vertices = maybe_vertices.value();
int num_vertices = vertices->size();
std::cout << "num_vertices: " << num_vertices << std::endl;

// construct edges collection
std::string src_label = "person", edge_label = "knows", dst_label = "person";
auto maybe_edges = GAR_NAMESPACE::EdgesCollection::Make(
auto maybe_edges = graphar::EdgesCollection::Make(
graph_info, src_label, edge_label, dst_label,
GAR_NAMESPACE::AdjListType::ordered_by_dest);
graphar::AdjListType::ordered_by_dest);
ASSERT(!maybe_edges.has_error());
auto& edges = maybe_edges.value();

// run bfs algorithm
GAR_NAMESPACE::IdType root = 0;
graphar::IdType root = 0;
std::vector<int32_t> distance(num_vertices);
for (GAR_NAMESPACE::IdType i = 0; i < num_vertices; i++)
for (graphar::IdType i = 0; i < num_vertices; i++)
distance[i] = (i == root ? 0 : -1);
auto it_begin = edges->begin(), it_end = edges->end();
auto it = it_begin;
for (int iter = 0;; iter++) {
GAR_NAMESPACE::IdType count = 0;
graphar::IdType count = 0;
it.to_begin();
for (GAR_NAMESPACE::IdType vid = 0; vid < num_vertices; vid++) {
for (graphar::IdType vid = 0; vid < num_vertices; vid++) {
if (distance[vid] == -1) {
if (!it.first_dst(it, vid))
continue;
// if (!it.first_dst(it_begin, vid)) continue;
do {
GAR_NAMESPACE::IdType src = it.source(), dst = it.destination();
graphar::IdType src = it.source(), dst = it.destination();
if (distance[src] == iter) {
distance[dst] = distance[src] + 1;
count++;
Expand All @@ -83,10 +82,10 @@ int main(int argc, char* argv[]) {

// extend the original vertex info and write results to gar using writer
// construct property group
GAR_NAMESPACE::Property bfs("bfs-pull", GAR_NAMESPACE::int32(), false);
std::vector<GAR_NAMESPACE::Property> property_vector = {bfs};
auto group = GAR_NAMESPACE::CreatePropertyGroup(
property_vector, GAR_NAMESPACE::FileType::PARQUET);
graphar::Property bfs("bfs-pull", graphar::int32(), false);
std::vector<graphar::Property> property_vector = {bfs};
auto group =
graphar::CreatePropertyGroup(property_vector, graphar::FileType::PARQUET);
// extend the vertex_info
auto vertex_info = graph_info->GetVertexInfo(label);
auto maybe_extend_info = vertex_info->AddPropertyGroup(group);
Expand All @@ -97,12 +96,12 @@ int main(int argc, char* argv[]) {
ASSERT(extend_info->Dump().status().ok());
ASSERT(extend_info->Save("/tmp/person-new-bfs-pull.vertex.yml").ok());
// construct vertex property writer
GAR_NAMESPACE::VertexPropertyWriter writer(extend_info, "/tmp/");
graphar::VertexPropertyWriter writer(extend_info, "/tmp/");
// convert results to arrow::Table
std::vector<std::shared_ptr<arrow::Array>> arrays;
std::vector<std::shared_ptr<arrow::Field>> schema_vector;
schema_vector.push_back(arrow::field(
bfs.name, GAR_NAMESPACE::DataType::DataTypeToArrowDataType(bfs.type)));
bfs.name, graphar::DataType::DataTypeToArrowDataType(bfs.type)));
arrow::Int32Builder array_builder;
ASSERT(array_builder.Reserve(num_vertices).ok());
ASSERT(array_builder.AppendValues(distance).ok());
Expand Down
31 changes: 15 additions & 16 deletions cpp/examples/bfs_push_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,43 +28,42 @@ int main(int argc, char* argv[]) {
// read file and construct graph info
std::string path =
TEST_DATA_DIR + "/ldbc_sample/parquet/ldbc_sample.graph.yml";
auto graph_info = GAR_NAMESPACE::GraphInfo::Load(path).value();
auto graph_info = graphar::GraphInfo::Load(path).value();

// construct vertices collection
std::string label = "person";
ASSERT(graph_info->GetVertexInfo(label) != nullptr);
auto maybe_vertices =
GAR_NAMESPACE::VerticesCollection::Make(graph_info, label);
auto maybe_vertices = graphar::VerticesCollection::Make(graph_info, label);
ASSERT(maybe_vertices.status().ok());
auto& vertices = maybe_vertices.value();
int num_vertices = vertices->size();
std::cout << "num_vertices: " << num_vertices << std::endl;

// construct edges collection
std::string src_label = "person", edge_label = "knows", dst_label = "person";
auto maybe_edges = GAR_NAMESPACE::EdgesCollection::Make(
auto maybe_edges = graphar::EdgesCollection::Make(
graph_info, src_label, edge_label, dst_label,
GAR_NAMESPACE::AdjListType::ordered_by_source);
graphar::AdjListType::ordered_by_source);
ASSERT(!maybe_edges.has_error());
auto& edges = maybe_edges.value();

// run bfs algorithm
GAR_NAMESPACE::IdType root = 0;
graphar::IdType root = 0;
std::vector<int32_t> distance(num_vertices);
for (GAR_NAMESPACE::IdType i = 0; i < num_vertices; i++)
for (graphar::IdType i = 0; i < num_vertices; i++)
distance[i] = (i == root ? 0 : -1);
auto it_begin = edges->begin(), it_end = edges->end();
auto it = it_begin;
for (int iter = 0;; iter++) {
GAR_NAMESPACE::IdType count = 0;
graphar::IdType count = 0;
it.to_begin();
for (GAR_NAMESPACE::IdType vid = 0; vid < num_vertices; vid++) {
for (graphar::IdType vid = 0; vid < num_vertices; vid++) {
if (distance[vid] == iter) {
if (!it.first_src(it, vid))
continue;
// if (!it.first_src(it_begin, vid)) continue;
do {
GAR_NAMESPACE::IdType src = it.source(), dst = it.destination();
graphar::IdType src = it.source(), dst = it.destination();
if (distance[dst] == -1) {
distance[dst] = distance[src] + 1;
count++;
Expand All @@ -82,10 +81,10 @@ int main(int argc, char* argv[]) {

// extend the original vertex info and write results to gar using writer
// construct property group
GAR_NAMESPACE::Property bfs("bfs-push", GAR_NAMESPACE::int32(), false);
std::vector<GAR_NAMESPACE::Property> property_vector = {bfs};
auto group = GAR_NAMESPACE::CreatePropertyGroup(
property_vector, GAR_NAMESPACE::FileType::PARQUET);
graphar::Property bfs("bfs-push", graphar::int32(), false);
std::vector<graphar::Property> property_vector = {bfs};
auto group =
graphar::CreatePropertyGroup(property_vector, graphar::FileType::PARQUET);
// extend the vertex_info
auto vertex_info = graph_info->GetVertexInfo(label);
auto maybe_extend_info = vertex_info->AddPropertyGroup(group);
Expand All @@ -96,12 +95,12 @@ int main(int argc, char* argv[]) {
ASSERT(extend_info->Dump().status().ok());
ASSERT(extend_info->Save("/tmp/person-new-bfs-push.vertex.yml").ok());
// construct vertex property writer
GAR_NAMESPACE::VertexPropertyWriter writer(extend_info, "/tmp/");
graphar::VertexPropertyWriter writer(extend_info, "/tmp/");
// convert results to arrow::Table
std::vector<std::shared_ptr<arrow::Array>> arrays;
std::vector<std::shared_ptr<arrow::Field>> schema_vector;
schema_vector.push_back(arrow::field(
bfs.name, GAR_NAMESPACE::DataType::DataTypeToArrowDataType(bfs.type)));
bfs.name, graphar::DataType::DataTypeToArrowDataType(bfs.type)));
arrow::Int32Builder array_builder;
ASSERT(array_builder.Reserve(num_vertices).ok());
ASSERT(array_builder.AppendValues(distance).ok());
Expand Down
Loading

0 comments on commit 6e77ed7

Please sign in to comment.