From f722f4549c1aba14155a31b0cbaa90d76f6f45eb Mon Sep 17 00:00:00 2001 From: Arthur Peters Date: Fri, 18 Dec 2020 12:30:21 -0600 Subject: [PATCH 1/5] Wrap jaccard for C++ and Python --- libgalois/CMakeLists.txt | 1 + libgalois/include/galois/analytics/bfs/bfs.h | 2 +- .../galois/analytics/jaccard/jaccard.h | 70 +++++++ .../include/galois/analytics/sssp/sssp.h | 2 +- libgalois/src/analytics/jaccard/jaccard.cpp | 178 ++++++++++++++++++ lonestar/analytics/cpu/jaccard/jaccard.cpp | 92 +++------ python/galois/CMakeLists.txt | 2 +- python/galois/analytics/CMakeLists.txt | 10 +- python/galois/analytics/__init__.py | 1 + python/galois/analytics/_wrappers.pyx | 129 ++++++++++--- python/galois/analytics/plan.pxd | 11 ++ python/galois/analytics/plan.pyx | 15 ++ tests/test_cpp_algos.py | 40 +++- 13 files changed, 451 insertions(+), 102 deletions(-) create mode 100644 libgalois/include/galois/analytics/jaccard/jaccard.h create mode 100644 libgalois/src/analytics/jaccard/jaccard.cpp create mode 100644 python/galois/analytics/plan.pxd create mode 100644 python/galois/analytics/plan.pyx diff --git a/libgalois/CMakeLists.txt b/libgalois/CMakeLists.txt index 14eed1ee32..13536fdcca 100644 --- a/libgalois/CMakeLists.txt +++ b/libgalois/CMakeLists.txt @@ -47,6 +47,7 @@ set(sources src/analytics/bfs/bfs.cpp src/analytics/sssp/sssp.cpp src/analytics/connected_components/connected_components.cpp + src/analytics/jaccard/jaccard.cpp ) if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") diff --git a/libgalois/include/galois/analytics/bfs/bfs.h b/libgalois/include/galois/analytics/bfs/bfs.h index fe6df654da..f941479c45 100644 --- a/libgalois/include/galois/analytics/bfs/bfs.h +++ b/libgalois/include/galois/analytics/bfs/bfs.h @@ -10,7 +10,7 @@ namespace galois::analytics { /// A computational plan to for BFS, specifying the algorithm and any parameters /// associated with it. -class BfsPlan : Plan { +class BfsPlan : public Plan { public: enum Algorithm { kAsyncTile = 0, kAsync, kSyncTile, kSync }; diff --git a/libgalois/include/galois/analytics/jaccard/jaccard.h b/libgalois/include/galois/analytics/jaccard/jaccard.h new file mode 100644 index 0000000000..d2d1ad4878 --- /dev/null +++ b/libgalois/include/galois/analytics/jaccard/jaccard.h @@ -0,0 +1,70 @@ +#ifndef GALOIS_LIBGALOIS_GALOIS_ANALYTICS_JACCARD_JACCARD_H_ +#define GALOIS_LIBGALOIS_GALOIS_ANALYTICS_JACCARD_JACCARD_H_ + +#include "galois/Properties.h" +#include "galois/analytics/Plan.h" +#include "galois/graphs/PropertyFileGraph.h" +#include "galois/graphs/PropertyGraph.h" + +namespace galois::analytics { + +/// A computational plan to for Jaccard, specifying the algorithm and any parameters +/// associated with it. +class JaccardPlan : public Plan { +public: + enum EdgeSorting { + /// The edges may be sorted, but may not. + /// Jaccard may optimistically use a sorted algorithm and fail over to an + /// unsorted one if unsorted edges are detected. + kUnknown, + /// The edges are known to be sorted by destination. + /// Use faster sorted intersection algorithm. + kSorted, + /// The edges are known to be unsorted. + /// Use slower hash-table intersection algorithm. + kUnsorted, + }; + +private: + EdgeSorting edge_sorting_; + + JaccardPlan(Architecture architecture, EdgeSorting edge_sorting) + : Plan(architecture), edge_sorting_(edge_sorting) {} + +public: + JaccardPlan() : JaccardPlan(kCPU, kUnknown) {} + JaccardPlan& operator=(const JaccardPlan&) = default; + + EdgeSorting edge_sorting() const { return edge_sorting_; } + + /// The graph's edge lists are not sorted; use an algorithm that handles that. + static JaccardPlan Unsorted() { return {kCPU, kUnsorted}; } + + /// The graph's edge lists are sorted; optimize based on this. + static JaccardPlan Sorted() { return {kCPU, kSorted}; } + + /// Automatically choose an algorithm. + /// May either use the unsorted algorithm, or use an algorithm that attempts + /// the sorted algorithm, but checks for out of order edges. + static JaccardPlan Automatic() { return {}; } +}; + +/// The tag for the output property of Jaccard in PropertyGraphs. +using JaccardSimilarity = galois::PODProperty; + +// TODO: Do we need to support float output? (For large graphs that want to use +// less memory, maybe) + +/// Compute the Jaccard similarity between each node and compare_node. The +/// result is stored in a property named by output_property_name. The plan +/// controls the assumptions made about edge list ordering. +/// The property named output_property_name is created by this function and may +/// not exist before the call. +GALOIS_EXPORT Result Jaccard( + graphs::PropertyFileGraph* pfg, size_t compare_node, + const std::string& output_property_name, + JaccardPlan plan = JaccardPlan::Automatic()); + +} // namespace galois::analytics + +#endif diff --git a/libgalois/include/galois/analytics/sssp/sssp.h b/libgalois/include/galois/analytics/sssp/sssp.h index fc5455fbf6..36dd6badef 100644 --- a/libgalois/include/galois/analytics/sssp/sssp.h +++ b/libgalois/include/galois/analytics/sssp/sssp.h @@ -13,7 +13,7 @@ namespace galois::analytics { /// A computational plan to for SSSP, specifying the algorithm and any /// parameters associated with it. -class SsspPlan : Plan { +class SsspPlan : public Plan { public: /// Algorithm selectors for Single-Source Shortest Path enum Algorithm { diff --git a/libgalois/src/analytics/jaccard/jaccard.cpp b/libgalois/src/analytics/jaccard/jaccard.cpp new file mode 100644 index 0000000000..d2f3db6110 --- /dev/null +++ b/libgalois/src/analytics/jaccard/jaccard.cpp @@ -0,0 +1,178 @@ +/* + * This file belongs to the Galois project, a C++ library for exploiting + * parallelism. The code is being released under the terms of the 3-Clause BSD + * License (a copy is located in LICENSE.txt at the top-level directory). + * + * Copyright (C) 2019, The University of Texas at Austin. All rights reserved. + * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS + * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF + * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF + * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH + * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances + * shall University be liable for incidental, special, indirect, direct or + * consequential damages or loss of profits, interruption of business, or + * related expenses which may arise from use of Software or Documentation, + * including but not limited to those resulting from defects in Software and/or + * Documentation, or loss or inaccuracy of data of any kind. + */ + +#include "galois/analytics/jaccard/jaccard.h" + +#include +#include +#include +#include + +#include "galois/analytics/Utils.h" + +using namespace galois::analytics; + +using NodeData = std::tuple; +using EdgeData = std::tuple<>; + +typedef galois::graphs::PropertyGraph Graph; +typedef typename Graph::Node GNode; + +namespace { + +struct IntersectWithSortedEdgeList { +private: + const GNode base_; + const Graph& graph_; + +public: + IntersectWithSortedEdgeList(const Graph& graph, GNode base) + : base_(base), graph_(graph) {} + + uint32_t operator()(GNode n2) { + uint32_t intersection_size = 0; + // Iterate over the edges of both n2 and base in sync, based on the + // assumption that edges lists are sorted. + auto edges_n2_iter = graph_.edge_begin(n2); + auto edges_n2_end = graph_.edge_end(n2); + auto edges_base_iter = graph_.edge_begin(base_); + auto edges_base_end = graph_.edge_end(base_); + while (edges_n2_iter != edges_n2_end && edges_base_iter != edges_base_end) { + auto edge_n2_dst = graph_.GetEdgeDest(*edges_n2_iter); + auto edge_base_dst = graph_.GetEdgeDest(*edges_base_iter); + if (edge_n2_dst == edge_base_dst) { + intersection_size++; + edges_n2_iter++; + edges_base_iter++; + } else if (edge_n2_dst > edge_base_dst) { + edges_base_iter++; + } else if (edge_n2_dst < edge_base_dst) { + edges_n2_iter++; + } + } + return intersection_size; + } +}; + +struct IntersectWithUnsortedEdgeList { +private: + std::unordered_set base_neighbors; + const Graph& graph_; + +public: + IntersectWithUnsortedEdgeList(const Graph& graph, GNode base) + : graph_(graph) { + // Collect all the neighbors of the base node into a hash set. + for (const auto& e : graph.edges(base)) { + auto dest = graph.GetEdgeDest(e); + base_neighbors.emplace(*dest); + } + } + + uint32_t operator()(GNode n2) { + uint32_t intersection_size = 0; + for (const auto& e : graph_.edges(n2)) { + auto neighbor = graph_.GetEdgeDest(e); + if (base_neighbors.count(*neighbor) > 0) + intersection_size++; + } + return intersection_size; + } +}; + +template +galois::Result +JaccardImpl( + galois::graphs::PropertyGraph, std::tuple<>>& + graph, + size_t compare_node, JaccardPlan /*plan*/) { + if (compare_node >= graph.size()) { + return galois::ErrorCode::InvalidArgument; + } + + auto it = graph.begin(); + std::advance(it, compare_node); + Graph::Node base = *it; + + uint32_t base_size = graph.edge_end(base) - graph.edge_begin(base); + + IntersectAlgorithm intersect_with_base{graph, base}; + + // Compute the similarity for each node + galois::do_all( + galois::iterate(graph), + [&](const GNode& n2) { + double& n2_data = graph.GetData(n2); + uint32_t n2_size = graph.edge_end(n2) - graph.edge_begin(n2); + // Count the number of neighbors of n2 and the number that are shared + // with base + uint32_t intersection_size = intersect_with_base(n2); + // Compute the similarity + uint32_t union_size = base_size + n2_size - intersection_size; + double similarity = + union_size > 0 ? (double)intersection_size / union_size : 1; + // Store the similarity back into the graph. + n2_data = similarity; + }, + galois::loopname("Jaccard")); + + return galois::ResultSuccess(); +} + +} // namespace + +galois::Result +galois::analytics::Jaccard( + graphs::PropertyFileGraph* pfg, size_t compare_node, + const std::string& output_property_name, JaccardPlan plan) { + if (auto result = + ConstructNodeProperties(pfg, {output_property_name}); + !result) { + return result.error(); + } + + auto pg_result = Graph::Make(pfg, {output_property_name}, {}); + if (!pg_result) { + return pg_result.error(); + } + + galois::Result r = galois::ResultSuccess(); + switch (plan.edge_sorting()) { + case JaccardPlan::kUnknown: + // TODO: It would be possible to start with the sorted case and then + // fail to the unsorted case if unsorted nodes are detected. + case JaccardPlan::kUnsorted: + r = JaccardImpl( + pg_result.value(), compare_node, plan); + break; + case JaccardPlan::kSorted: + r = JaccardImpl( + pg_result.value(), compare_node, plan); + break; + } + + if (!r) { + // Undo property creation. + if (auto r1 = pfg->RemoveNodeProperty(output_property_name); !r1) { + GALOIS_LOG_WARN("{}", r1.error()); + } + } + + return r; +} diff --git a/lonestar/analytics/cpu/jaccard/jaccard.cpp b/lonestar/analytics/cpu/jaccard/jaccard.cpp index 34c772ce86..df3f56defc 100644 --- a/lonestar/analytics/cpu/jaccard/jaccard.cpp +++ b/lonestar/analytics/cpu/jaccard/jaccard.cpp @@ -19,9 +19,10 @@ #include #include -#include #include +#include + #include "Lonestar/BoilerPlate.h" namespace cll = llvm::cl; @@ -44,7 +45,7 @@ static cll::opt report_node( cll::desc("Node to report the similarity of (default value 1)"), cll::init(1)); -struct NodeValue : public galois::PODProperty {}; +using NodeValue = galois::PODProperty; using NodeData = std::tuple; using EdgeData = std::tuple<>; @@ -52,42 +53,6 @@ using EdgeData = std::tuple<>; typedef galois::graphs::PropertyGraph Graph; typedef typename Graph::Node GNode; -void -algo(Graph* graph, const GNode& base) { - std::unordered_set base_neighbors; - - // Collect all the neighbors of the base node into a hash set. - for (const auto& e : graph->edges(base)) { - auto dest = graph->GetEdgeDest(e); - base_neighbors.emplace(*dest); - } - - // Compute the similarity for each node - galois::do_all( - galois::iterate(*graph), - [&](const GNode& n2) { - double& n2_data = graph->GetData(n2); - uint32_t n2_size = 0, intersection_size = 0; - // TODO: Using a sorted edge list would allow a much faster intersection - // operation. Use that here. - // Count the number of neighbors of n2 and the number that are shared - // with base - for (const auto& e : graph->edges(n2)) { - auto neighbor = graph->GetEdgeDest(e); - if (base_neighbors.count(*neighbor) > 0) - intersection_size++; - n2_size++; - } - uint32_t union_size = - base_neighbors.size() + n2_size - intersection_size; - double similarity = - union_size > 0 ? (double)intersection_size / union_size : 1; - // Store the similarity back into the graph. - n2_data = similarity; - }, - galois::steal(), galois::loopname("jaccard")); -} - int main(int argc, char** argv) { std::unique_ptr G = @@ -99,48 +64,45 @@ main(int argc, char** argv) { std::cout << "Reading from file: " << inputFile << "\n"; std::unique_ptr pfg = MakeFileGraph(inputFile, edge_property_name); + std::string output_property_name = "jaccard_output_property"; - auto result = ConstructNodeProperties(pfg.get()); - if (!result) { - GALOIS_LOG_FATAL("failed to construct node properties: {}", result.error()); - } + std::cout << "Read " << pfg->topology().num_nodes() << " nodes, " + << pfg->topology().num_edges() << " edges\n"; - auto pg_result = - galois::graphs::PropertyGraph::Make(pfg.get()); - if (!pg_result) { - GALOIS_LOG_FATAL("could not make property graph: {}", pg_result.error()); - } - Graph graph = pg_result.value(); - - std::cout << "Read " << graph.num_nodes() << " nodes, " << graph.num_edges() - << " edges\n"; - - if (base_node >= graph.size() || report_node >= graph.size()) { + if (base_node >= pfg->topology().num_nodes() || + report_node >= pfg->topology().num_nodes()) { std::cerr << "failed to set report: " << report_node << " or failed to set base: " << base_node << "\n"; abort(); } - auto it = graph.begin(); - std::advance(it, base_node.getValue()); - GNode base = *it; - it = graph.begin(); - std::advance(it, report_node.getValue()); - GNode report = *it; - galois::reportPageAlloc("MeminfoPre"); galois::StatTimer execTime("Timer_0"); execTime.start(); - algo(&graph, base); + if (auto r = galois::analytics::Jaccard( + pfg.get(), base_node, output_property_name, + galois::analytics::JaccardPlan::Automatic()); + !r) { + GALOIS_LOG_FATAL( + "Jaccard failed: {} {}", r.error().category().name(), + r.error().message()); + } execTime.stop(); galois::reportPageAlloc("MeminfoPost"); + auto pg_result = galois::graphs::PropertyGraph::Make( + pfg.get(), {output_property_name}, {}); + if (!pg_result) { + GALOIS_LOG_FATAL("could not make property graph: {}", pg_result.error()); + } + Graph graph = pg_result.value(); + std::cout << "Node " << report_node << " has similarity " - << graph.GetData(report) << "\n"; + << graph.GetData(report_node) << "\n"; // Sanity checking code galois::GReduceMax max_similarity; @@ -152,7 +114,7 @@ main(int argc, char** argv) { galois::iterate(graph), [&](const GNode& i) { double similarity = graph.GetData(i); - if ((unsigned int)i != (unsigned int)base) { + if ((unsigned int)i != (unsigned int)base_node) { max_similarity.update(similarity); min_similarity.update(similarity); } @@ -162,12 +124,12 @@ main(int argc, char** argv) { galois::gInfo( "Maximum similarity (excluding base) is ", max_similarity.reduce()); galois::gInfo("Minimum similarity is ", min_similarity.reduce()); - galois::gInfo("Base similarity is ", graph.GetData(base)); + galois::gInfo("Base similarity is ", graph.GetData(base_node)); // TODO: Verify? if (!skipVerify) { - if (graph.GetData(base) == 1.0) { + if (graph.GetData(base_node) == 1.0) { std::cout << "Verification successful.\n"; } else { GALOIS_LOG_FATAL( diff --git a/python/galois/CMakeLists.txt b/python/galois/CMakeLists.txt index b1a9307fd6..bc12d64437 100644 --- a/python/galois/CMakeLists.txt +++ b/python/galois/CMakeLists.txt @@ -87,10 +87,10 @@ python_extension_module(timer) target_link_libraries(timer Galois::shmem) # Symlink all python source file into binary directory so that documentation generation can load python modules correctly. +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/analytics) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lonestar/analytics) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/numba_support) file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/util) -file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/analytics) file(GLOB_RECURSE PY_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.py) foreach(X IN LISTS PY_FILES) file(CREATE_LINK ${CMAKE_CURRENT_SOURCE_DIR}/${X} ${CMAKE_CURRENT_BINARY_DIR}/${X} SYMBOLIC) diff --git a/python/galois/analytics/CMakeLists.txt b/python/galois/analytics/CMakeLists.txt index 2acc1fd3b1..9df5859f83 100644 --- a/python/galois/analytics/CMakeLists.txt +++ b/python/galois/analytics/CMakeLists.txt @@ -1,12 +1,18 @@ # Avoid collisions with existing application targets with the same name. # The expected public names (without _) are provided using pure python modules. + +add_cython_target(plan plan.pyx CXX OUTPUT_VAR PLAN_SOURCES) +add_library(plan MODULE ${PLAN_SOURCES}) +python_extension_module(plan) +target_link_libraries(plan Galois::shmem) + add_cython_target(_wrappers _wrappers.pyx CXX OUTPUT_VAR WRAPPERS_SOURCES) add_library(_wrappers MODULE ${WRAPPERS_SOURCES}) python_extension_module(_wrappers) -add_dependencies(_wrappers graphs) +add_dependencies(_wrappers plan) target_link_libraries(_wrappers Galois::shmem) install( - TARGETS _wrappers + TARGETS _wrappers plan LIBRARY DESTINATION python/galois/analytics ) diff --git a/python/galois/analytics/__init__.py b/python/galois/analytics/__init__.py index 3d6ae2c1f1..b2e7c40214 100644 --- a/python/galois/analytics/__init__.py +++ b/python/galois/analytics/__init__.py @@ -1,2 +1,3 @@ from galois.analytics._wrappers import bfs, bfs_assert_valid, BfsPlan, BfsStatistics from galois.analytics._wrappers import sssp, sssp_assert_valid, SsspPlan, SsspStatistics +from galois.analytics._wrappers import jaccard, JaccardPlan diff --git a/python/galois/analytics/_wrappers.pyx b/python/galois/analytics/_wrappers.pyx index 00942de1f7..5c2d315978 100644 --- a/python/galois/analytics/_wrappers.pyx +++ b/python/galois/analytics/_wrappers.pyx @@ -5,23 +5,20 @@ from libcpp.string cimport string from libcpp cimport bool from galois.cpp.libgalois.graphs.Graph cimport PropertyFileGraph from galois.property_graph cimport PropertyGraph +from galois.analytics.plan cimport _Plan, Plan from libc.stdint cimport uint64_t, uint32_t from enum import Enum -cdef extern from "galois/Analytics.h" namespace "galois::analytics" nogil: - enum Architecture: - kCPU - kGPU - kDistributed - - cppclass Plan: - Architecture architecture() const +cdef inline default_value(v, d): + if v is None: + return d + return v # BFS cdef extern from "galois/Analytics.h" namespace "galois::analytics" nogil: - cppclass _BfsPlan "galois::analytics::BfsPlan": + cppclass _BfsPlan "galois::analytics::BfsPlan" (_Plan): enum Algorithm: kAsyncTile "galois::analytics::BfsPlan::kAsyncTile" kAsync "galois::analytics::BfsPlan::kAsync" @@ -79,25 +76,28 @@ class _BfsAlgorithm(Enum): Sync = _BfsPlan.Algorithm.kSync -cdef class BfsPlan: +cdef class BfsPlan(Plan): cdef: - _BfsPlan underlying + _BfsPlan underlying_ + + cdef _Plan* underlying(self) except NULL: + return &self.underlying_ @staticmethod cdef BfsPlan make(_BfsPlan u): f = BfsPlan.__new__(BfsPlan) - f.underlying = u + f.underlying_ = u return f Algorithm = _BfsAlgorithm @property def algorithm(self) -> _BfsAlgorithm: - return _BfsAlgorithm(self.underlying.algorithm()) + return _BfsAlgorithm(self.underlying_.algorithm()) @property def edge_tile_size(self) -> int: - return self.underlying.edge_tile_size() + return self.underlying_.edge_tile_size() @staticmethod def async_tile(edge_tile_size=None): @@ -129,7 +129,7 @@ def bfs(PropertyGraph pg, size_t start_node, str output_property_name, BfsPlan p output_property_name_bytes = bytes(output_property_name, "utf-8") output_property_name_cstr = output_property_name_bytes with nogil: - handle_result_void(Bfs(pg.underlying.get(), start_node, output_property_name_cstr, plan.underlying)) + handle_result_void(Bfs(pg.underlying.get(), start_node, output_property_name_cstr, plan.underlying_)) def bfs_assert_valid(PropertyGraph pg, str property_name): output_property_name_bytes = bytes(property_name, "utf-8") @@ -181,7 +181,7 @@ cdef class BfsStatistics: # SSSP cdef extern from "galois/Analytics.h" namespace "galois::analytics" nogil: - cppclass _SsspPlan "galois::analytics::SsspPlan": + cppclass _SsspPlan "galois::analytics::SsspPlan" (_Plan): enum Algorithm: kDeltaTile "galois::analytics::SsspPlan::kDeltaTile" kDeltaStep "galois::analytics::SsspPlan::kDeltaStep" @@ -275,44 +275,42 @@ class _SsspAlgorithm(Enum): Automatic = _SsspPlan.Algorithm.kAutomatic -cdef default_value(v, d): - if v is None: - return d - return v - -cdef class SsspPlan: +cdef class SsspPlan(Plan): cdef: - _SsspPlan underlying + _SsspPlan underlying_ + + cdef _Plan* underlying(self) except NULL: + return &self.underlying_ @staticmethod cdef SsspPlan make(_SsspPlan u): f = SsspPlan.__new__(SsspPlan) - f.underlying = u + f.underlying_ = u return f def __init__(self, graph = None): if graph is None: - self.underlying = _SsspPlan() + self.underlying_ = _SsspPlan() else: if not isinstance(graph, PropertyGraph): raise TypeError(graph) - self.underlying = _SsspPlan((graph).underlying.get()) + self.underlying_ = _SsspPlan((graph).underlying.get()) Algorithm = _SsspAlgorithm @property def algorithm(self) -> _SsspAlgorithm: - return _BfsAlgorithm(self.underlying.algorithm()) + return _BfsAlgorithm(self.underlying_.algorithm()) @property def delta(self) -> int: - return self.underlying.delta() + return self.underlying_.delta() @property def edge_tile_size(self) -> int: - return self.underlying.edge_tile_size() + return self.underlying_.edge_tile_size() @staticmethod def delta_tile(delta=None, edge_tile_size=None): @@ -381,7 +379,7 @@ def sssp(PropertyGraph pg, size_t start_node, str edge_weight_property_name, str output_property_name_cstr = output_property_name_bytes with nogil: handle_result_void(Sssp(pg.underlying.get(), start_node, edge_weight_property_name_cstr, - output_property_name_cstr, plan.underlying)) + output_property_name_cstr, plan.underlying_)) def sssp_assert_valid(PropertyGraph pg, size_t start_node, str edge_weight_property_name, str output_property_name): edge_weight_property_name_bytes = bytes(edge_weight_property_name, "utf-8") @@ -429,4 +427,75 @@ cdef class SsspStatistics: return str(ss.str(), "ascii") +# Jaccard + + +cdef extern from "galois/analytics/jaccard/jaccard.h" namespace "galois::analytics" nogil: + cppclass _JaccardPlan "galois::analytics::JaccardPlan" (_Plan): + enum EdgeSorting: + kSorted "galois::analytics::JaccardPlan::kSorted" + kUnsorted "galois::analytics::JaccardPlan::kUnsorted" + kUnknown "galois::analytics::JaccardPlan::kUnknown" + + _JaccardPlan.EdgeSorting edge_sorting() const + + @staticmethod + _JaccardPlan Sorted() + + @staticmethod + _JaccardPlan Unsorted() + + @staticmethod + _JaccardPlan Automatic() + + + std_result[void] Jaccard(PropertyFileGraph* pfg, size_t compare_node, + string output_property_name, _JaccardPlan plan) + + +class _JaccardEdgeSorting(Enum): + Sorted = _JaccardPlan.EdgeSorting.kSorted + Unsorted = _JaccardPlan.EdgeSorting.kUnsorted + kUnknown = _JaccardPlan.EdgeSorting.kUnknown + + +cdef class JaccardPlan(Plan): + cdef: + _JaccardPlan underlying_ + + cdef _Plan* underlying(self) except NULL: + return &self.underlying_ + + EdgeSorting = _JaccardEdgeSorting + + @staticmethod + cdef JaccardPlan make(_JaccardPlan u): + f = JaccardPlan.__new__(JaccardPlan) + f.underlying_ = u + return f + + @property + def edge_sorting(self) -> _JaccardEdgeSorting: + return _JaccardEdgeSorting(self.underlying_.edge_sorting()) + + @staticmethod + def sorted(): + return JaccardPlan.make(_JaccardPlan.Sorted()) + + @staticmethod + def unsorted(): + return JaccardPlan.make(_JaccardPlan.Unsorted()) + + @staticmethod + def automatic(): + return JaccardPlan.make(_JaccardPlan.Automatic()) + + +def jaccard(PropertyGraph pg, size_t compare_node, str output_property_name, + JaccardPlan plan = JaccardPlan.automatic()): + output_property_name_bytes = bytes(output_property_name, "utf-8") + output_property_name_cstr = output_property_name_bytes + with nogil: + handle_result_void(Jaccard(pg.underlying.get(), compare_node, output_property_name_cstr, plan.underlying_)) + # TODO(amp): Wrap ConnectedComponents diff --git a/python/galois/analytics/plan.pxd b/python/galois/analytics/plan.pxd new file mode 100644 index 0000000000..c5e2fc3b5d --- /dev/null +++ b/python/galois/analytics/plan.pxd @@ -0,0 +1,11 @@ +cdef extern from "galois/analytics/Plan.h" namespace "galois::analytics" nogil: + enum _Architecture "galois::analytics::Architecture": + kCPU + kGPU + kDistributed + + cppclass _Plan "galois::analytics::Plan": + _Architecture architecture() const + +cdef class Plan: + cdef _Plan* underlying(self) except NULL diff --git a/python/galois/analytics/plan.pyx b/python/galois/analytics/plan.pyx new file mode 100644 index 0000000000..20986413a1 --- /dev/null +++ b/python/galois/analytics/plan.pyx @@ -0,0 +1,15 @@ +from enum import Enum + + +class Architecture(Enum): + CPU = _Architecture.kCPU + GPU = _Architecture.kGPU + Distributed = _Architecture.kDistributed + + +cdef class Plan: + cdef _Plan* underlying(self) except NULL: + raise NotImplementedError() + + def architecture(self) -> Architecture: + return Architecture(self.underlying().architecture()) diff --git a/tests/test_cpp_algos.py b/tests/test_cpp_algos.py index ce2cbed1ba..57da98febd 100644 --- a/tests/test_cpp_algos.py +++ b/tests/test_cpp_algos.py @@ -1,9 +1,13 @@ -from pytest import raises +import pytest +from pytest import raises, approx -from galois.analytics import bfs, bfs_assert_valid, BfsStatistics, sssp, sssp_assert_valid, SsspStatistics +from galois.analytics import bfs, bfs_assert_valid, BfsStatistics, sssp, sssp_assert_valid, SsspStatistics, jaccard, JaccardPlan from galois.property_graph import PropertyGraph from pyarrow import Schema, table +import numpy as np + +from galois.property_graph import PropertyGraph from galois.lonestar.analytics.bfs import verify_bfs from galois.lonestar.analytics.sssp import verify_sssp @@ -72,4 +76,36 @@ def test_sssp(property_graph: PropertyGraph): verify_sssp(property_graph, start_node, new_property_id) +def test_jaccard(property_graph: PropertyGraph): + property_name = "NewProp" + compare_node = 0 + + jaccard(property_graph, compare_node, property_name) + + node_schema: Schema = property_graph.node_schema() + num_node_properties = len(node_schema) + new_property_id = num_node_properties - 1 + assert node_schema.names[new_property_id] == property_name + + similarities: np.ndarray = property_graph.get_node_property(property_name).to_numpy() + assert similarities[compare_node] == 1 + assert similarities[1917] == approx(0.28571428) + assert similarities[2812] == approx(0.01428571) + + +@pytest.mark.skip("Not supported yet") +def test_jaccard_sorted(property_graph: PropertyGraph): + sort_all_edges_by_dest(property_graph) + + property_name = "NewProp" + compare_node = 0 + + jaccard(property_graph, compare_node, property_name, JaccardPlan.sorted()) + + similarities: np.ndarray = property_graph.get_node_property(property_name).to_numpy() + assert similarities[compare_node] == 1 + assert similarities[1917] == approx(0.28571428) + assert similarities[2812] == approx(0.01428571) + + # TODO: Add more tests. From 5068111a64cc33fe2372424866aa0451dc36332c Mon Sep 17 00:00:00 2001 From: Arthur Peters Date: Mon, 21 Dec 2020 16:39:09 -0600 Subject: [PATCH 2/5] Make SortAllEdgesByDest return an arrow Array. Also, more consistently use PropertyFileGraph* instead of & --- .../include/galois/graphs/PropertyFileGraph.h | 4 +- .../include/galois/graphs/PropertyGraph.h | 2 +- libgalois/src/PropertyFileGraph.cpp | 39 ++++++++++++++----- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/libgalois/include/galois/graphs/PropertyFileGraph.h b/libgalois/include/galois/graphs/PropertyFileGraph.h index 5e9090d20c..736ef5aaef 100644 --- a/libgalois/include/galois/graphs/PropertyFileGraph.h +++ b/libgalois/include/galois/graphs/PropertyFileGraph.h @@ -362,7 +362,7 @@ class GALOIS_EXPORT PropertyFileGraph { /// ascending order. /// This also returns the permutation vector (mapping from old /// indices to the new indices) which results due to the sorting. -GALOIS_EXPORT Result> SortAllEdgesByDest( +GALOIS_EXPORT Result> SortAllEdgesByDest( PropertyFileGraph* pfg); /// FindEdgeSortedByDest finds the "node_to_find" id in the @@ -371,7 +371,7 @@ GALOIS_EXPORT Result> SortAllEdgesByDest( /// This returns the matched edge index if 'node_to_find' is present /// in the edgelist of 'node' else edge end if 'node_to_find' is not found. GALOIS_EXPORT uint64_t FindEdgeSortedByDest( - const PropertyFileGraph& graph, uint32_t node, uint32_t node_to_find); + const PropertyFileGraph* graph, uint32_t node, uint32_t node_to_find); /// SortNodesByDegree relables node ids by sorting in the descending /// order by node degree diff --git a/libgalois/include/galois/graphs/PropertyGraph.h b/libgalois/include/galois/graphs/PropertyGraph.h index 4b17a3e478..f57b9a6229 100644 --- a/libgalois/include/galois/graphs/PropertyGraph.h +++ b/libgalois/include/galois/graphs/PropertyGraph.h @@ -195,7 +195,7 @@ FindEdgeSortedByDest( const GraphTy& graph, typename GraphTy::Node node, typename GraphTy::Node node_to_find) { auto edge_matched = galois::graphs::FindEdgeSortedByDest( - graph.GetPropertyFileGraph(), node, node_to_find); + &graph.GetPropertyFileGraph(), node, node_to_find); return typename GraphTy::edge_iterator(edge_matched); } diff --git a/libgalois/src/PropertyFileGraph.cpp b/libgalois/src/PropertyFileGraph.cpp index bff2060e58..5489380967 100644 --- a/libgalois/src/PropertyFileGraph.cpp +++ b/libgalois/src/PropertyFileGraph.cpp @@ -315,7 +315,7 @@ galois::graphs::PropertyFileGraph::SetTopology( return galois::ResultSuccess(); } -galois::Result> +galois::Result> galois::graphs::SortAllEdgesByDest(galois::graphs::PropertyFileGraph* pfg) { auto view_result_dests = galois::ConstructPropertyView( @@ -326,8 +326,19 @@ galois::graphs::SortAllEdgesByDest(galois::graphs::PropertyFileGraph* pfg) { auto out_dests_view = std::move(view_result_dests.value()); - std::vector permutation_vec(pfg->topology().num_edges()); - std::iota(permutation_vec.begin(), permutation_vec.end(), uint64_t{0}); + arrow::UInt64Builder permutation_vec_builder; + if (auto r = permutation_vec_builder.Resize(pfg->topology().num_edges()); + !r.ok()) { + return ErrorCode::ArrowError; + } + // Getting a mutable reference to an index is definitely allowed. It's + // less clear if taking a pointer to it and using offsets is officially + // supported. But, ArrayBuilder::Advance explicitly mentions memcpy into the + // internal buffer. So I think it actually is. + uint64_t* permutation_vec_data = &permutation_vec_builder[0]; + std::iota( + permutation_vec_data, + permutation_vec_data + permutation_vec_builder.capacity(), uint64_t{0}); auto comparator = [&](uint64_t a, uint64_t b) { return out_dests_view[a] < out_dests_view[b]; }; @@ -337,24 +348,34 @@ galois::graphs::SortAllEdgesByDest(galois::graphs::PropertyFileGraph* pfg) { [&](uint64_t n) { auto edge_range = pfg->topology().edge_range(n); std::sort( - permutation_vec.begin() + edge_range.first, - permutation_vec.begin() + edge_range.second, comparator); + permutation_vec_data + edge_range.first, + permutation_vec_data + edge_range.second, comparator); std::sort( &out_dests_view[0] + edge_range.first, &out_dests_view[0] + edge_range.second); }, galois::steal()); - return permutation_vec; + if (auto r = permutation_vec_builder.Advance(pfg->topology().num_edges()); + !r.ok()) { + return ErrorCode::ArrowError; + } + + std::shared_ptr out; + if (permutation_vec_builder.Finish(&out).ok()) { + return out; + } else { + return ErrorCode::ArrowError; + } } uint64_t galois::graphs::FindEdgeSortedByDest( - const galois::graphs::PropertyFileGraph& graph, uint32_t node, + const galois::graphs::PropertyFileGraph* graph, uint32_t node, uint32_t node_to_find) { auto view_result_dests = galois::ConstructPropertyView( - graph.topology().out_dests.get()); + graph->topology().out_dests.get()); if (!view_result_dests) { GALOIS_LOG_FATAL( "Unable to construct property view on topology destinations : {}", @@ -363,7 +384,7 @@ galois::graphs::FindEdgeSortedByDest( auto out_dests_view = std::move(view_result_dests.value()); - auto edge_range = graph.topology().edge_range(node); + auto edge_range = graph->topology().edge_range(node); using edge_iterator = boost::counting_iterator; auto edge_matched = std::lower_bound( edge_iterator(edge_range.first), edge_iterator(edge_range.second), From 93cb4e6215f8d33b71b48ea21bb389c6d3b61b90 Mon Sep 17 00:00:00 2001 From: Arthur Peters Date: Mon, 21 Dec 2020 19:28:36 -0600 Subject: [PATCH 3/5] Add Python wrappers for utility algorithms in PropertyFileGraph.h. --- python/galois/analytics/__init__.py | 1 + python/galois/analytics/_wrappers.pyx | 48 +++++++++++++++++++++++--- python/galois/property_graph.pyx.jinja | 5 +-- tests/test_cpp_algos.py | 42 ++++++++++++++++++++-- 4 files changed, 88 insertions(+), 8 deletions(-) diff --git a/python/galois/analytics/__init__.py b/python/galois/analytics/__init__.py index b2e7c40214..f7bdd2da73 100644 --- a/python/galois/analytics/__init__.py +++ b/python/galois/analytics/__init__.py @@ -1,3 +1,4 @@ from galois.analytics._wrappers import bfs, bfs_assert_valid, BfsPlan, BfsStatistics from galois.analytics._wrappers import sssp, sssp_assert_valid, SsspPlan, SsspStatistics from galois.analytics._wrappers import jaccard, JaccardPlan +from galois.analytics._wrappers import sort_all_edges_by_dest, find_edge_sorted_by_dest, sort_nodes_by_degree diff --git a/python/galois/analytics/_wrappers.pyx b/python/galois/analytics/_wrappers.pyx index 5c2d315978..3cb558a1a5 100644 --- a/python/galois/analytics/_wrappers.pyx +++ b/python/galois/analytics/_wrappers.pyx @@ -1,12 +1,15 @@ -from galois.cpp.libstd.boost cimport std_result, handle_result_void, handle_result_assert, raise_error_code -from galois.cpp.libstd.iostream cimport ostream, ostringstream from libc.stddef cimport ptrdiff_t +from libc.stdint cimport uint64_t, uint32_t from libcpp.string cimport string -from libcpp cimport bool +from libcpp.memory cimport shared_ptr, static_pointer_cast + +from pyarrow.lib cimport CArray, CUInt64Array, pyarrow_wrap_array + +from galois.cpp.libstd.boost cimport std_result, handle_result_void, handle_result_assert, raise_error_code +from galois.cpp.libstd.iostream cimport ostream, ostringstream from galois.cpp.libgalois.graphs.Graph cimport PropertyFileGraph from galois.property_graph cimport PropertyGraph from galois.analytics.plan cimport _Plan, Plan -from libc.stdint cimport uint64_t, uint32_t from enum import Enum @@ -15,6 +18,43 @@ cdef inline default_value(v, d): return d return v +cdef shared_ptr[CUInt64Array] handle_result_shared_cuint64array(std_result[shared_ptr[CUInt64Array]] res) \ + nogil except *: + if not res.has_value(): + with gil: + raise_error_code(res.error()) + return res.value() + + +# "Algorithms" from PropertyFileGraph + +cdef extern from "galois/graphs/PropertyFileGraph.h" namespace "galois::graphs" nogil: + std_result[shared_ptr[CUInt64Array]] SortAllEdgesByDest(PropertyFileGraph* pfg); + + uint64_t FindEdgeSortedByDest(const PropertyFileGraph* graph, uint32_t node, uint32_t node_to_find); + + std_result[void] SortNodesByDegree(PropertyFileGraph* pfg); + + +def sort_all_edges_by_dest(PropertyGraph pg): + with nogil: + res = handle_result_shared_cuint64array(SortAllEdgesByDest(pg.underlying.get())) + return pyarrow_wrap_array(static_pointer_cast[CArray, CUInt64Array](res)) + + +def find_edge_sorted_by_dest(PropertyGraph pg, uint32_t node, uint32_t node_to_find): + with nogil: + res = FindEdgeSortedByDest(pg.underlying.get(), node, node_to_find) + if res == pg.edges(node)[-1] + 1: + return None + return res + + +def sort_nodes_by_degree(PropertyGraph pg): + with nogil: + handle_result_void(SortNodesByDegree(pg.underlying.get())) + + # BFS cdef extern from "galois/Analytics.h" namespace "galois::analytics" nogil: diff --git a/python/galois/property_graph.pyx.jinja b/python/galois/property_graph.pyx.jinja index 87b67559ad..9da0bfb224 100644 --- a/python/galois/property_graph.pyx.jinja +++ b/python/galois/property_graph.pyx.jinja @@ -17,9 +17,10 @@ cdef _convert_string_list(l): return [bytes(s, "utf-8") for s in l or []] -cdef shared_ptr[PropertyFileGraph] handle_result_value(std_result[unique_ptr[PropertyFileGraph]] res) except *: +cdef shared_ptr[PropertyFileGraph] handle_result_value(std_result[unique_ptr[PropertyFileGraph]] res) nogil except *: if not res.has_value(): - raise_error_code(res.error()) + with gil: + raise_error_code(res.error()) return to_shared(res.value()) # diff --git a/tests/test_cpp_algos.py b/tests/test_cpp_algos.py index 57da98febd..2a8941fcbf 100644 --- a/tests/test_cpp_algos.py +++ b/tests/test_cpp_algos.py @@ -1,13 +1,24 @@ import pytest from pytest import raises, approx -from galois.analytics import bfs, bfs_assert_valid, BfsStatistics, sssp, sssp_assert_valid, SsspStatistics, jaccard, JaccardPlan -from galois.property_graph import PropertyGraph from pyarrow import Schema, table import numpy as np from galois.property_graph import PropertyGraph +from galois.analytics import ( + bfs, + bfs_assert_valid, + BfsStatistics, + sssp, + sssp_assert_valid, + SsspStatistics, + jaccard, + JaccardPlan, + sort_all_edges_by_dest, + find_edge_sorted_by_dest, + sort_nodes_by_degree, +) from galois.lonestar.analytics.bfs import verify_bfs from galois.lonestar.analytics.sssp import verify_sssp @@ -28,6 +39,33 @@ def test_assert_valid(property_graph: PropertyGraph): bfs_assert_valid(property_graph, "Prop2") +def test_sort_all_edges_by_dest(property_graph: PropertyGraph): + nodes_to_check = 10 + original_dests = [[property_graph.get_edge_dst(e) for e in property_graph.edges(n)] for n in range(nodes_to_check)] + print(original_dests[0]) + mapping = sort_all_edges_by_dest(property_graph) + new_dests = [[property_graph.get_edge_dst(e) for e in property_graph.edges(n)] for n in range(nodes_to_check)] + for n in range(nodes_to_check): + assert len(original_dests[n]) == len(new_dests[n]) + my_mapping = [mapping[e].as_py() for e in property_graph.edges(n)] + for i in range(len(my_mapping)): + assert original_dests[n][i] == new_dests[n][my_mapping[i] - property_graph.edges(n)[0]] + original_dests[n].sort() + assert original_dests[n] == new_dests[n] + + +def test_find_edge_sorted_by_dest(property_graph: PropertyGraph): + sort_all_edges_by_dest(property_graph) + assert find_edge_sorted_by_dest(property_graph, 0, 1000) is None + assert find_edge_sorted_by_dest(property_graph, 0, 1967) == 2 + + +def test_sort_nodes_by_degree(property_graph: PropertyGraph): + sort_nodes_by_degree(property_graph) + assert len(property_graph.edges(0)) == 108 + # TODO: More detailed check. + + def test_bfs(property_graph: PropertyGraph): property_name = "NewProp" start_node = 0 From ff61671cfd8644e7fbac4e67c2f9d1b1def67c27 Mon Sep 17 00:00:00 2001 From: Arthur Peters Date: Fri, 8 Jan 2021 11:44:37 -0600 Subject: [PATCH 4/5] Add Jaccard utility routines. --- .../galois/analytics/jaccard/jaccard.h | 39 ++++++--- libgalois/src/analytics/jaccard/jaccard.cpp | 79 +++++++++++++++++-- lonestar/analytics/cpu/jaccard/CMakeLists.txt | 2 +- lonestar/analytics/cpu/jaccard/jaccard.cpp | 36 +++------ python/galois/analytics/__init__.py | 2 +- python/galois/analytics/_wrappers.pyx | 68 +++++++++++++--- python/galois/analytics/plan.pxd | 1 + tests/test_cpp_algos.py | 31 ++++++-- 8 files changed, 197 insertions(+), 61 deletions(-) diff --git a/libgalois/include/galois/analytics/jaccard/jaccard.h b/libgalois/include/galois/analytics/jaccard/jaccard.h index d2d1ad4878..29ad155951 100644 --- a/libgalois/include/galois/analytics/jaccard/jaccard.h +++ b/libgalois/include/galois/analytics/jaccard/jaccard.h @@ -1,6 +1,8 @@ #ifndef GALOIS_LIBGALOIS_GALOIS_ANALYTICS_JACCARD_JACCARD_H_ #define GALOIS_LIBGALOIS_GALOIS_ANALYTICS_JACCARD_JACCARD_H_ +#include + #include "galois/Properties.h" #include "galois/analytics/Plan.h" #include "galois/graphs/PropertyFileGraph.h" @@ -32,7 +34,11 @@ class JaccardPlan : public Plan { : Plan(architecture), edge_sorting_(edge_sorting) {} public: + /// Automatically choose an algorithm. + /// May either use the unsorted algorithm, or use an algorithm that attempts + /// the sorted algorithm, but checks for out of order edges. JaccardPlan() : JaccardPlan(kCPU, kUnknown) {} + JaccardPlan& operator=(const JaccardPlan&) = default; EdgeSorting edge_sorting() const { return edge_sorting_; } @@ -42,28 +48,39 @@ class JaccardPlan : public Plan { /// The graph's edge lists are sorted; optimize based on this. static JaccardPlan Sorted() { return {kCPU, kSorted}; } - - /// Automatically choose an algorithm. - /// May either use the unsorted algorithm, or use an algorithm that attempts - /// the sorted algorithm, but checks for out of order edges. - static JaccardPlan Automatic() { return {}; } }; /// The tag for the output property of Jaccard in PropertyGraphs. using JaccardSimilarity = galois::PODProperty; -// TODO: Do we need to support float output? (For large graphs that want to use -// less memory, maybe) - /// Compute the Jaccard similarity between each node and compare_node. The /// result is stored in a property named by output_property_name. The plan /// controls the assumptions made about edge list ordering. /// The property named output_property_name is created by this function and may /// not exist before the call. GALOIS_EXPORT Result Jaccard( - graphs::PropertyFileGraph* pfg, size_t compare_node, - const std::string& output_property_name, - JaccardPlan plan = JaccardPlan::Automatic()); + graphs::PropertyFileGraph* pfg, uint32_t compare_node, + const std::string& output_property_name, JaccardPlan plan = {}); + +GALOIS_EXPORT Result JaccardAssertValid( + graphs::PropertyFileGraph* pfg, uint32_t compare_node, + const std::string& property_name); + +struct GALOIS_EXPORT JaccardStatistics { + /// The maximum similarity excluding the comparison node. + double max_similarity; + /// The minimum similarity + double min_similarity; + /// The average similarity excluding the comparison node. + double average_similarity; + + /// Print the statistics in a human readable form. + void Print(std::ostream& os = std::cout); + + static galois::Result Compute( + galois::graphs::PropertyFileGraph* pfg, uint32_t compare_node, + const std::string& property_name); +}; } // namespace galois::analytics diff --git a/libgalois/src/analytics/jaccard/jaccard.cpp b/libgalois/src/analytics/jaccard/jaccard.cpp index d2f3db6110..0ad99d4536 100644 --- a/libgalois/src/analytics/jaccard/jaccard.cpp +++ b/libgalois/src/analytics/jaccard/jaccard.cpp @@ -139,7 +139,7 @@ JaccardImpl( galois::Result galois::analytics::Jaccard( - graphs::PropertyFileGraph* pfg, size_t compare_node, + graphs::PropertyFileGraph* pfg, uint32_t compare_node, const std::string& output_property_name, JaccardPlan plan) { if (auto result = ConstructNodeProperties(pfg, {output_property_name}); @@ -155,7 +155,7 @@ galois::analytics::Jaccard( galois::Result r = galois::ResultSuccess(); switch (plan.edge_sorting()) { case JaccardPlan::kUnknown: - // TODO: It would be possible to start with the sorted case and then + // TODO(amp): It would be possible to start with the sorted case and then // fail to the unsorted case if unsorted nodes are detected. case JaccardPlan::kUnsorted: r = JaccardImpl( @@ -167,12 +167,77 @@ galois::analytics::Jaccard( break; } - if (!r) { - // Undo property creation. - if (auto r1 = pfg->RemoveNodeProperty(output_property_name); !r1) { - GALOIS_LOG_WARN("{}", r1.error()); + return r; +} + +constexpr static const double EPSILON = 1e-6; + +galois::Result +galois::analytics::JaccardAssertValid( + galois::graphs::PropertyFileGraph* pfg, uint32_t compare_node, + const std::string& property_name) { + auto pg_result = galois::graphs::PropertyGraph::Make( + pfg, {property_name}, {}); + if (!pg_result) { + return pg_result.error(); + } + Graph graph = pg_result.value(); + + if (abs(graph.GetData(compare_node) - 1.0) > EPSILON) { + return galois::ErrorCode::AssertionFailed; + } + + auto is_bad = [&graph](const GNode& n) { + auto& similarity = graph.template GetData(n); + if (similarity > 1 || similarity < 0) { + return true; } + return false; + }; + + if (galois::ParallelSTL::find_if(graph.begin(), graph.end(), is_bad) != + graph.end()) { + return galois::ErrorCode::AssertionFailed; } - return r; + return galois::ResultSuccess(); +} + +galois::Result +galois::analytics::JaccardStatistics::Compute( + galois::graphs::PropertyFileGraph* pfg, uint32_t compare_node, + const std::string& property_name) { + auto pg_result = galois::graphs::PropertyGraph::Make( + pfg, {property_name}, {}); + if (!pg_result) { + return pg_result.error(); + } + Graph graph = pg_result.value(); + + galois::GReduceMax max_similarity; + galois::GReduceMin min_similarity; + galois::GAccumulator total_similarity; + + galois::do_all( + galois::iterate(graph), + [&](const GNode& i) { + double similarity = graph.GetData(i); + if ((unsigned int)i != (unsigned int)compare_node) { + max_similarity.update(similarity); + min_similarity.update(similarity); + total_similarity += similarity; + } + }, + galois::loopname("Jaccard Statistics"), galois::no_stats()); + + return JaccardStatistics{ + max_similarity.reduce(), min_similarity.reduce(), + total_similarity.reduce() / (graph.size() - 1)}; +} + +void +galois::analytics::JaccardStatistics::Print(std::ostream& os) { + os << "Maximum similarity = " << max_similarity << std::endl; + os << "Minimum similarity = " << min_similarity << std::endl; + os << "Average similarity = " << average_similarity << std::endl; } diff --git a/lonestar/analytics/cpu/jaccard/CMakeLists.txt b/lonestar/analytics/cpu/jaccard/CMakeLists.txt index 66f99aa7c5..b66b9b3334 100644 --- a/lonestar/analytics/cpu/jaccard/CMakeLists.txt +++ b/lonestar/analytics/cpu/jaccard/CMakeLists.txt @@ -3,4 +3,4 @@ add_dependencies(apps jaccard-cpu) target_link_libraries(jaccard-cpu PRIVATE Galois::shmem lonestar) install(TARGETS jaccard-cpu DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT apps EXCLUDE_FROM_ALL) # add_test_scale(small1 jaccard-cpu "${BASEINPUT}/reference/structured/rome99.gr") -add_test_scale(small2 jaccard-cpu INPUT rmat15 INPUT_URI "${BASEINPUT}/propertygraphs/rmat15_cleaned_symmetric" --noverify NO_VERIFY) +add_test_scale(small2 jaccard-cpu INPUT rmat15 INPUT_URI "${BASEINPUT}/propertygraphs/rmat15_cleaned_symmetric" NO_VERIFY) diff --git a/lonestar/analytics/cpu/jaccard/jaccard.cpp b/lonestar/analytics/cpu/jaccard/jaccard.cpp index df3f56defc..7e09daab2b 100644 --- a/lonestar/analytics/cpu/jaccard/jaccard.cpp +++ b/lonestar/analytics/cpu/jaccard/jaccard.cpp @@ -83,7 +83,7 @@ main(int argc, char** argv) { if (auto r = galois::analytics::Jaccard( pfg.get(), base_node, output_property_name, - galois::analytics::JaccardPlan::Automatic()); + galois::analytics::JaccardPlan()); !r) { GALOIS_LOG_FATAL( "Jaccard failed: {} {}", r.error().category().name(), @@ -104,32 +104,18 @@ main(int argc, char** argv) { std::cout << "Node " << report_node << " has similarity " << graph.GetData(report_node) << "\n"; - // Sanity checking code - galois::GReduceMax max_similarity; - galois::GReduceMin min_similarity; - max_similarity.reset(); - min_similarity.reset(); - - galois::do_all( - galois::iterate(graph), - [&](const GNode& i) { - double similarity = graph.GetData(i); - if ((unsigned int)i != (unsigned int)base_node) { - max_similarity.update(similarity); - min_similarity.update(similarity); - } - }, - galois::loopname("Sanity check"), galois::no_stats()); - - galois::gInfo( - "Maximum similarity (excluding base) is ", max_similarity.reduce()); - galois::gInfo("Minimum similarity is ", min_similarity.reduce()); - galois::gInfo("Base similarity is ", graph.GetData(base_node)); - - // TODO: Verify? + auto stats_result = galois::analytics::JaccardStatistics::Compute( + pfg.get(), base_node, output_property_name); + if (!stats_result) { + GALOIS_LOG_FATAL( + "could not make compute statistics: {}", stats_result.error()); + } + + stats_result.value().Print(); if (!skipVerify) { - if (graph.GetData(base_node) == 1.0) { + if (galois::analytics::JaccardAssertValid( + pfg.get(), base_node, output_property_name)) { std::cout << "Verification successful.\n"; } else { GALOIS_LOG_FATAL( diff --git a/python/galois/analytics/__init__.py b/python/galois/analytics/__init__.py index f7bdd2da73..b978adcba7 100644 --- a/python/galois/analytics/__init__.py +++ b/python/galois/analytics/__init__.py @@ -1,4 +1,4 @@ from galois.analytics._wrappers import bfs, bfs_assert_valid, BfsPlan, BfsStatistics from galois.analytics._wrappers import sssp, sssp_assert_valid, SsspPlan, SsspStatistics -from galois.analytics._wrappers import jaccard, JaccardPlan +from galois.analytics._wrappers import jaccard, jaccard_assert_valid, JaccardPlan, JaccardStatistics from galois.analytics._wrappers import sort_all_edges_by_dest, find_edge_sorted_by_dest, sort_nodes_by_degree diff --git a/python/galois/analytics/_wrappers.pyx b/python/galois/analytics/_wrappers.pyx index 3cb558a1a5..befb7844ec 100644 --- a/python/galois/analytics/_wrappers.pyx +++ b/python/galois/analytics/_wrappers.pyx @@ -479,19 +479,30 @@ cdef extern from "galois/analytics/jaccard/jaccard.h" namespace "galois::analyti _JaccardPlan.EdgeSorting edge_sorting() const + _JaccardPlan() + @staticmethod _JaccardPlan Sorted() @staticmethod _JaccardPlan Unsorted() - @staticmethod - _JaccardPlan Automatic() - - std_result[void] Jaccard(PropertyFileGraph* pfg, size_t compare_node, string output_property_name, _JaccardPlan plan) + std_result[void] JaccardAssertValid(PropertyFileGraph* pfg, size_t compare_node, + string output_property_name) + + cppclass _JaccardStatistics "galois::analytics::JaccardStatistics": + double max_similarity + double min_similarity + double average_similarity + void Print(ostream) + + @staticmethod + std_result[_JaccardStatistics] Compute(PropertyFileGraph* pfg, size_t compare_node, + string output_property_name) + class _JaccardEdgeSorting(Enum): Sorted = _JaccardPlan.EdgeSorting.kSorted @@ -526,16 +537,55 @@ cdef class JaccardPlan(Plan): def unsorted(): return JaccardPlan.make(_JaccardPlan.Unsorted()) - @staticmethod - def automatic(): - return JaccardPlan.make(_JaccardPlan.Automatic()) - def jaccard(PropertyGraph pg, size_t compare_node, str output_property_name, - JaccardPlan plan = JaccardPlan.automatic()): + JaccardPlan plan = JaccardPlan()): output_property_name_bytes = bytes(output_property_name, "utf-8") output_property_name_cstr = output_property_name_bytes with nogil: handle_result_void(Jaccard(pg.underlying.get(), compare_node, output_property_name_cstr, plan.underlying_)) + +def jaccard_assert_valid(PropertyGraph pg, size_t compare_node, str output_property_name): + output_property_name_bytes = bytes(output_property_name, "utf-8") + output_property_name_cstr = output_property_name_bytes + with nogil: + handle_result_assert(JaccardAssertValid(pg.underlying.get(), compare_node, output_property_name_cstr)) + + +cdef _JaccardStatistics handle_result_JaccardStatistics(std_result[_JaccardStatistics] res) nogil except *: + if not res.has_value(): + with gil: + raise_error_code(res.error()) + return res.value() + + +cdef class JaccardStatistics: + cdef _JaccardStatistics underlying + + def __init__(self, PropertyGraph pg, size_t compare_node, str output_property_name): + output_property_name_bytes = bytes(output_property_name, "utf-8") + output_property_name_cstr = output_property_name_bytes + with nogil: + self.underlying = handle_result_JaccardStatistics(_JaccardStatistics.Compute( + pg.underlying.get(), compare_node, output_property_name_cstr)) + + @property + def max_similarity(self): + return self.underlying.max_similarity + + @property + def min_similarity(self): + return self.underlying.min_similarity + + @property + def average_similarity(self): + return self.underlying.average_similarity + + def __str__(self) -> str: + cdef ostringstream ss + self.underlying.Print(ss) + return str(ss.str(), "ascii") + + # TODO(amp): Wrap ConnectedComponents diff --git a/python/galois/analytics/plan.pxd b/python/galois/analytics/plan.pxd index c5e2fc3b5d..0368c92766 100644 --- a/python/galois/analytics/plan.pxd +++ b/python/galois/analytics/plan.pxd @@ -7,5 +7,6 @@ cdef extern from "galois/analytics/Plan.h" namespace "galois::analytics" nogil: cppclass _Plan "galois::analytics::Plan": _Architecture architecture() const + cdef class Plan: cdef _Plan* underlying(self) except NULL diff --git a/tests/test_cpp_algos.py b/tests/test_cpp_algos.py index 2a8941fcbf..8e4487c4c9 100644 --- a/tests/test_cpp_algos.py +++ b/tests/test_cpp_algos.py @@ -1,4 +1,3 @@ -import pytest from pytest import raises, approx from pyarrow import Schema, table @@ -18,11 +17,16 @@ sort_all_edges_by_dest, find_edge_sorted_by_dest, sort_nodes_by_degree, + jaccard_assert_valid, + JaccardStatistics, ) from galois.lonestar.analytics.bfs import verify_bfs from galois.lonestar.analytics.sssp import verify_sssp +NODES_TO_SAMPLE = 10 + + def test_assert_valid(property_graph: PropertyGraph): with raises(AssertionError): bfs_assert_valid(property_graph, "workFrom") @@ -40,17 +44,17 @@ def test_assert_valid(property_graph: PropertyGraph): def test_sort_all_edges_by_dest(property_graph: PropertyGraph): - nodes_to_check = 10 - original_dests = [[property_graph.get_edge_dst(e) for e in property_graph.edges(n)] for n in range(nodes_to_check)] + original_dests = [[property_graph.get_edge_dst(e) for e in property_graph.edges(n)] for n in range(NODES_TO_SAMPLE)] print(original_dests[0]) mapping = sort_all_edges_by_dest(property_graph) - new_dests = [[property_graph.get_edge_dst(e) for e in property_graph.edges(n)] for n in range(nodes_to_check)] - for n in range(nodes_to_check): + new_dests = [[property_graph.get_edge_dst(e) for e in property_graph.edges(n)] for n in range(NODES_TO_SAMPLE)] + for n in range(NODES_TO_SAMPLE): assert len(original_dests[n]) == len(new_dests[n]) my_mapping = [mapping[e].as_py() for e in property_graph.edges(n)] for i in range(len(my_mapping)): assert original_dests[n][i] == new_dests[n][my_mapping[i] - property_graph.edges(n)[0]] original_dests[n].sort() + assert original_dests[n] == new_dests[n] @@ -63,7 +67,11 @@ def test_find_edge_sorted_by_dest(property_graph: PropertyGraph): def test_sort_nodes_by_degree(property_graph: PropertyGraph): sort_nodes_by_degree(property_graph) assert len(property_graph.edges(0)) == 108 - # TODO: More detailed check. + last_node_n_edges = 108 + for n in range(1, NODES_TO_SAMPLE): + v = len(property_graph.edges(n)) + assert v <= last_node_n_edges + last_node_n_edges = v def test_bfs(property_graph: PropertyGraph): @@ -125,13 +133,20 @@ def test_jaccard(property_graph: PropertyGraph): new_property_id = num_node_properties - 1 assert node_schema.names[new_property_id] == property_name + jaccard_assert_valid(property_graph, compare_node, property_name) + + stats = JaccardStatistics(property_graph, compare_node, property_name) + + assert stats.max_similarity == approx(1) + assert stats.min_similarity == approx(0) + assert stats.average_similarity == approx(0.000637853) + similarities: np.ndarray = property_graph.get_node_property(property_name).to_numpy() assert similarities[compare_node] == 1 assert similarities[1917] == approx(0.28571428) assert similarities[2812] == approx(0.01428571) -@pytest.mark.skip("Not supported yet") def test_jaccard_sorted(property_graph: PropertyGraph): sort_all_edges_by_dest(property_graph) @@ -140,6 +155,8 @@ def test_jaccard_sorted(property_graph: PropertyGraph): jaccard(property_graph, compare_node, property_name, JaccardPlan.sorted()) + jaccard_assert_valid(property_graph, compare_node, property_name) + similarities: np.ndarray = property_graph.get_node_property(property_name).to_numpy() assert similarities[compare_node] == 1 assert similarities[1917] == approx(0.28571428) From 0ad61a0ab562f9674100b09928b70034708450ae Mon Sep 17 00:00:00 2001 From: Arthur Peters Date: Fri, 8 Jan 2021 12:47:10 -0600 Subject: [PATCH 5/5] Clean up files and file names. --- .../ConnectedComponents.cpp | 1465 ----------------- lonestar/analytics/cpu/jaccard/CMakeLists.txt | 2 +- .../jaccard/{jaccard.cpp => jaccard_cli.cpp} | 0 3 files changed, 1 insertion(+), 1466 deletions(-) delete mode 100644 lonestar/analytics/cpu/connected-components/ConnectedComponents.cpp rename lonestar/analytics/cpu/jaccard/{jaccard.cpp => jaccard_cli.cpp} (100%) diff --git a/lonestar/analytics/cpu/connected-components/ConnectedComponents.cpp b/lonestar/analytics/cpu/connected-components/ConnectedComponents.cpp deleted file mode 100644 index a1bd604a6b..0000000000 --- a/lonestar/analytics/cpu/connected-components/ConnectedComponents.cpp +++ /dev/null @@ -1,1465 +0,0 @@ -/* - * This file belongs to the Galois project, a C++ library for exploiting - * parallelism. The code is being released under the terms of the 3-Clause BSD - * License (a copy is located in LICENSE.txt at the top-level directory). - * - * Copyright (C) 2018, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - */ - -#include -#include -#include -#include -#include -#include - -#include "Lonestar/BoilerPlate.h" -#include "galois/AtomicHelpers.h" -#include "galois/Bag.h" -#include "galois/Galois.h" -#include "galois/ParallelSTL.h" -#include "galois/Reduction.h" -#include "galois/Timer.h" -#include "galois/UnionFind.h" -#include "galois/graphs/LCGraph.h" -#include "galois/graphs/OCGraph.h" -#include "galois/graphs/TypeTraits.h" -#include "galois/runtime/Profile.h" -#include "llvm/Support/CommandLine.h" - -const char* name = "Connected Components"; -const char* desc = "Computes the connected components of a graph"; - -namespace cll = llvm::cl; - -enum Algo { - serial, - labelProp, - synchronous, - async, - edgeasync, - blockedasync, - edgetiledasync, - afforest, - edgeafforest, - edgetiledafforest, -}; - -static cll::opt inputFile( - cll::Positional, cll::desc(""), cll::Required); -static cll::opt algo( - "algo", cll::desc("Choose an algorithm:"), - cll::values( - clEnumValN(Algo::async, "Async", "Asynchronous"), - clEnumValN(Algo::edgeasync, "EdgeAsync", "Edge-Asynchronous"), - clEnumValN( - Algo::edgetiledasync, "EdgetiledAsync", - "EdgeTiled-Asynchronous (default)"), - clEnumValN(Algo::blockedasync, "BlockedAsync", "Blocked asynchronous"), - clEnumValN( - Algo::labelProp, "LabelProp", "Using label propagation algorithm"), - clEnumValN(Algo::serial, "Serial", "Serial"), - clEnumValN(Algo::synchronous, "Sync", "Synchronous"), - clEnumValN(Algo::afforest, "Afforest", "Using Afforest sampling"), - clEnumValN( - Algo::edgeafforest, "EdgeAfforest", - "Using Afforest sampling, Edge-wise"), - clEnumValN( - Algo::edgetiledafforest, "EdgetiledAfforest", - "Using Afforest sampling, EdgeTiled") - - ), - cll::init(Algo::edgetiledasync)); - -static cll::opt largestComponentFilename( - "outputLargestComponent", cll::desc("[output graph file]"), cll::init("")); -static cll::opt permutationFilename( - "outputNodePermutation", cll::desc("[output node permutation file]"), - cll::init("")); -#ifndef NDEBUG -enum OutputEdgeType { void_, int32_, int64_ }; -static cll::opt memoryLimit( - "memoryLimit", cll::desc("Memory limit for out-of-core algorithms (in MB)"), - cll::init(~0U)); -static cll::opt writeEdgeType( - "edgeType", cll::desc("Input/Output edge type:"), - cll::values( - clEnumValN(OutputEdgeType::void_, "void", "no edge values"), - clEnumValN(OutputEdgeType::int32_, "int32", "32 bit edge values"), - clEnumValN(OutputEdgeType::int64_, "int64", "64 bit edge values")), - cll::init(OutputEdgeType::void_)); -#endif - -// TODO (bozhi) LLVM commandline library now supports option categorization. -// Categorize params when libllvm is updated to make -help beautiful! -// static cll::OptionCategory ParamCat("Algorithm-Specific Parameters", -// "Only used for specific algorithms."); -static cll::opt EDGE_TILE_SIZE( - "edgeTileSize", - cll::desc("(For Edgetiled algos) Size of edge tiles " - "(default 512)"), - // cll::cat(ParamCat), - cll::init(512)); // 512 -> 64 -static const int CHUNK_SIZE = 1; -//! parameter for the Vertex Neighbor Sampling step of Afforest algorithm -static cll::opt NEIGHBOR_SAMPLES( - "vns", - cll::desc("(For Afforest and its variants) number of edges " - "per vertice to process initially for exposing " - "partial connectivity (default 2)"), - // cll::cat(ParamCat), - cll::init(2)); -//! parameter for the Large Component Skipping step of Afforest algorithm -static cll::opt COMPONENT_SAMPLES( - "lcs", - cll::desc("(For Afforest and its variants) number of times " - "randomly sampling over vertices to approximately " - "capture the largest intermediate component " - "(default 1024)"), - // cll::cat(ParamCat), - cll::init(1024)); - -struct Node : public galois::UnionFindNode { - using ComponentType = Node*; - - Node() : galois::UnionFindNode(const_cast(this)) {} - Node(const Node& o) : galois::UnionFindNode(o.m_component) {} - - Node& operator=(const Node& o) { - Node c(o); - std::swap(c, *this); - return *this; - } - - ComponentType component() { return this->get(); } - bool isRepComp(unsigned int) { return false; } -}; - -const unsigned int LABEL_INF = std::numeric_limits::max(); - -/** - * Serial connected components algorithm. Just use union-find. - */ -struct SerialAlgo { - using ComponentType = Node*; - struct NodeComponent { - using ArrowType = arrow::CTypeTraits::ArrowType; - using ViewType = galois::PODPropertyView; - }; - - using NodeData = std::tuple; - using EdgeData = std::tuple<>; - typedef galois::graphs::PropertyGraph Graph; - typedef typename Graph::Node GNode; - - void Initialize(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - graph->GetData(node) = new Node(); - }); - } - - void Deallocate(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - delete graph->GetData(node); - }); - } - - void operator()(Graph* graph) { - for (const GNode& src : *graph) { - auto& sdata = graph->GetData(src); - for (const auto& ii : graph->edges(src)) { - auto dest = graph->GetEdgeDest(ii); - auto& ddata = graph->GetData(dest); - sdata->merge(ddata); - } - } - - for (const GNode& src : *graph) { - auto& sdata = graph->GetData(src); - sdata->compress(); - } - } -}; - -struct LabelPropAlgo { - using ComponentType = unsigned int; - struct NodeComponent { - using ArrowType = arrow::CTypeTraits::ArrowType; - using ViewType = galois::PODPropertyView>; - }; - struct NodeOldComponent : public galois::PODProperty {}; - - using NodeData = std::tuple; - using EdgeData = std::tuple<>; - typedef galois::graphs::PropertyGraph Graph; - typedef typename Graph::Node GNode; - - void Initialize(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - graph->GetData(node).store(node); - graph->GetData(node) = LABEL_INF; - }); - } - - void Deallocate(Graph*) {} - - void operator()(Graph* graph) { - galois::GReduceLogicalOr changed; - do { - changed.reset(); - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata_current_comp = graph->GetData(src); - auto& sdata_old_comp = graph->GetData(src); - if (sdata_old_comp > sdata_current_comp) { - sdata_old_comp = sdata_current_comp; - - changed.update(true); - - for (auto e : graph->edges(src)) { - auto dest = graph->GetEdgeDest(e); - auto& ddata_current_comp = graph->GetData(dest); - ComponentType label_new = sdata_current_comp; - galois::atomicMin(ddata_current_comp, label_new); - } - } - }, - galois::disable_conflict_detection(), galois::steal(), - galois::loopname("LabelPropAlgo")); - } while (changed.reduce()); - } -}; - -/** - * Synchronous connected components algorithm. Initially all nodes are in - * their own component. Then, we merge endpoints of edges to form the spanning - * tree. Merging is done in two phases to simplify concurrent updates: (1) - * find components and (2) union components. Since the merge phase does not - * do any finds, we only process a fraction of edges at a time; otherwise, - * the union phase may unnecessarily merge two endpoints in the same - * component. - */ -struct SynchronousAlgo { - using ComponentType = Node*; - struct NodeComponent { - using ArrowType = arrow::CTypeTraits::ArrowType; - using ViewType = galois::PODPropertyView; - }; - - using NodeData = std::tuple; - using EdgeData = std::tuple<>; - typedef galois::graphs::PropertyGraph Graph; - typedef typename Graph::Node GNode; - - struct Edge { - GNode src; - Node* ddata; - int count; - Edge(GNode src, Node* ddata, int count) - : src(src), ddata(ddata), count(count) {} - }; - - void Initialize(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - graph->GetData(node) = new Node(); - }); - } - - void Deallocate(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - delete graph->GetData(node); - }); - } - - void operator()(Graph* graph) { - size_t rounds = 0; - galois::GAccumulator empty_merges; - - galois::InsertBag wls[2]; - galois::InsertBag* next_bag; - galois::InsertBag* current_bag; - - current_bag = &wls[0]; - next_bag = &wls[1]; - - galois::do_all(galois::iterate(*graph), [&](const GNode& src) { - for (auto ii : graph->edges(src)) { - auto dest = graph->GetEdgeDest(ii); - if (src >= *dest) - continue; - auto& ddata = graph->GetData(dest); - current_bag->push(Edge(src, ddata, 0)); - break; - } - }); - - while (!current_bag->empty()) { - galois::do_all( - galois::iterate(*current_bag), - [&](const Edge& edge) { - auto& sdata = graph->GetData(edge.src); - if (!sdata->merge(edge.ddata)) - empty_merges += 1; - }, - galois::loopname("Merge")); - - galois::do_all( - galois::iterate(*current_bag), - [&](const Edge& edge) { - GNode src = edge.src; - auto& sdata = graph->GetData(src); - Node* src_component = sdata->findAndCompress(); - Graph::edge_iterator ii = graph->edge_begin(src); - Graph::edge_iterator ei = graph->edge_end(src); - int count = edge.count + 1; - std::advance(ii, count); - for (; ii != ei; ++ii, ++count) { - auto dest = graph->GetEdgeDest(ii); - if (src >= *dest) - continue; - auto& ddata = graph->GetData(dest); - Node* dest_component = ddata->findAndCompress(); - if (src_component != dest_component) { - next_bag->push(Edge(src, dest_component, count)); - break; - } - } - }, - galois::loopname("Find")); - - current_bag->clear(); - std::swap(current_bag, next_bag); - rounds += 1; - } - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - sdata->compress(); - }, - galois::steal(), galois::loopname("Compress")); - - galois::ReportStatSingle("CC-Sync", "rounds", rounds); - galois::ReportStatSingle("CC-Sync", "empty_merges", empty_merges.reduce()); - } -}; - -/** - * Like synchronous algorithm, but if we restrict path compression (as done is - * @link{UnionFindNode}), we can perform unions and finds concurrently. - */ -struct AsyncAlgo { - using ComponentType = Node*; - struct NodeComponent { - using ArrowType = arrow::CTypeTraits::ArrowType; - using ViewType = galois::PODPropertyView; - }; - - using NodeData = std::tuple; - using EdgeData = std::tuple<>; - typedef galois::graphs::PropertyGraph Graph; - typedef typename Graph::Node GNode; - - void Initialize(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - graph->GetData(node) = new Node(); - }); - } - - void Deallocate(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - delete graph->GetData(node); - }); - } - - void operator()(Graph* graph) { - galois::GAccumulator empty_merges; - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - - for (const auto& ii : graph->edges(src)) { - auto dest = graph->GetEdgeDest(ii); - auto& ddata = graph->GetData(dest); - - if (src >= *dest) - continue; - - if (!sdata->merge(ddata)) - empty_merges += 1; - } - }, - galois::loopname("CC-Async")); - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - sdata->compress(); - }, - galois::steal(), galois::loopname("CC-Async-Compress")); - - galois::ReportStatSingle("CC-Async", "empty_merges", empty_merges.reduce()); - } -}; - -struct EdgeAsyncAlgo { - using ComponentType = Node*; - struct NodeComponent { - using ArrowType = arrow::CTypeTraits::ArrowType; - using ViewType = galois::PODPropertyView; - }; - - using NodeData = std::tuple; - using EdgeData = std::tuple<>; - typedef galois::graphs::PropertyGraph Graph; - typedef typename Graph::Node GNode; - using Edge = std::pair; - - void Initialize(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - graph->GetData(node) = new Node(); - }); - } - - void Deallocate(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - delete graph->GetData(node); - }); - } - - void operator()(Graph* graph) { - galois::GAccumulator empty_merges; - - galois::InsertBag works; - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - for (const auto& ii : graph->edges(src)) { - if (src < *(graph->GetEdgeDest(ii))) { - works.push_back(std::make_pair(src, ii)); - } - } - }, - galois::loopname("CC-EdgeAsyncInit"), galois::steal()); - - galois::do_all( - galois::iterate(works), - [&](Edge& e) { - auto& sdata = graph->GetData(e.first); - auto dest = graph->GetEdgeDest(e.second); - auto& ddata = graph->GetData(dest); - - if (e.first > *dest) - // continue; - ; - else if (!sdata->merge(ddata)) { - empty_merges += 1; - } - }, - galois::loopname("CC-EdgeAsync"), galois::steal()); - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - sdata->compress(); - }, - galois::steal(), galois::loopname("CC-Async-Compress")); - - galois::ReportStatSingle("CC-Async", "empty_merges", empty_merges.reduce()); - } -}; - -/** - * Improve performance of async algorithm by following machine topology. - */ -struct BlockedAsyncAlgo { - using ComponentType = Node*; - struct NodeComponent { - using ArrowType = arrow::CTypeTraits::ArrowType; - using ViewType = galois::PODPropertyView; - }; - - using NodeData = std::tuple; - using EdgeData = std::tuple<>; - typedef galois::graphs::PropertyGraph Graph; - typedef typename Graph::Node GNode; - using Edge = std::pair; - - void Initialize(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - graph->GetData(node) = new Node(); - }); - } - - void Deallocate(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - delete graph->GetData(node); - }); - } - - struct WorkItem { - GNode src; - Graph::edge_iterator start; - }; - - //! Add the next edge between components to the worklist - template - static void process( - Graph* graph, const GNode& src, const Graph::edge_iterator& start, - Pusher& pusher) { - auto& sdata = graph->GetData(src); - int count = 1; - for (Graph::edge_iterator ii = start, ei = graph->edge_end(src); ii != ei; - ++ii, ++count) { - auto dest = graph->GetEdgeDest(ii); - auto& ddata = graph->GetData(dest); - - if (src >= *dest) - continue; - - if (sdata->merge(ddata)) { - if (Limit == 0 || count != Limit) - continue; - } - - if (MakeContinuation || (Limit != 0 && count == Limit)) { - WorkItem item = {src, ii + 1}; - pusher.push(item); - break; - } - } - } - - void operator()(Graph* graph) { - galois::InsertBag items; - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto start = graph->edge_begin(src); - if (galois::substrate::ThreadPool::getSocket() == 0) { - process(graph, src, start, items); - } else { - process(graph, src, start, items); - } - }, - galois::loopname("Initialize")); - - galois::for_each( - galois::iterate(items), - [&](const WorkItem& item, auto& ctx) { - process(graph, item.src, item.start, ctx); - }, - galois::loopname("Merge"), - galois::wl>()); - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - sdata->compress(); - }, - galois::steal(), galois::loopname("CC-Async-Compress")); - } -}; - -struct EdgeTiledAsyncAlgo { - using ComponentType = Node*; - struct NodeComponent { - using ArrowType = arrow::CTypeTraits::ArrowType; - using ViewType = galois::PODPropertyView; - }; - - using NodeData = std::tuple; - using EdgeData = std::tuple<>; - typedef galois::graphs::PropertyGraph Graph; - typedef typename Graph::Node GNode; - using Edge = std::pair; - - void Initialize(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - graph->GetData(node) = new Node(); - }); - } - - void Deallocate(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - delete graph->GetData(node); - }); - } - - struct EdgeTile { - // Node* sData; - GNode src; - Graph::edge_iterator beg; - Graph::edge_iterator end; - }; - - /*struct EdgeTileMaker { - EdgeTile operator() (Node* sdata, Graph::edge_iterator beg, - Graph::edge_iterator end) const{ return EdgeTile{sdata, beg, end}; - } - };*/ - - void operator()(Graph* graph) { - galois::GAccumulator empty_merges; - - galois::InsertBag works; - - std::cout << "INFO: Using edge tile size of " << EDGE_TILE_SIZE - << " and chunk size of " << CHUNK_SIZE << "\n"; - std::cout << "WARNING: Performance varies considerably due to parameter.\n"; - std::cout - << "WARNING: Do not expect the default to be good for your graph.\n"; - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto beg = graph->edge_begin(src); - const auto& end = graph->edge_end(src); - - assert(beg <= end); - if ((end - beg) > EDGE_TILE_SIZE) { - for (; beg + EDGE_TILE_SIZE < end;) { - const auto& ne = beg + EDGE_TILE_SIZE; - assert(ne < end); - works.push_back(EdgeTile{src, beg, ne}); - beg = ne; - } - } - - if ((end - beg) > 0) { - works.push_back(EdgeTile{src, beg, end}); - } - }, - galois::loopname("CC-EdgeTiledAsyncInit"), galois::steal()); - - galois::do_all( - galois::iterate(works), - [&](const EdgeTile& tile) { - const auto& src = tile.src; - auto& sdata = graph->GetData(src); - - for (auto ii = tile.beg; ii != tile.end; ++ii) { - auto dest = graph->GetEdgeDest(ii); - if (src >= *dest) - continue; - - auto& ddata = graph->GetData(dest); - if (!sdata->merge(ddata)) - empty_merges += 1; - } - }, - galois::loopname("CC-edgetiledAsync"), galois::steal(), - galois::chunk_size() // 16 -> 1 - ); - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - sdata->compress(); - }, - galois::steal(), galois::loopname("CC-Async-Compress")); - - galois::ReportStatSingle( - "CC-edgeTiledAsync", "empty_merges", empty_merges.reduce()); - } -}; - -template -ComponentType -approxLargestComponent(Graph* graph) { - using map_type = std::unordered_map< - ComponentType, int, std::hash, - std::equal_to, - galois::gstl::Pow2Alloc>>; - using pair_type = std::pair; - - map_type comp_freq(COMPONENT_SAMPLES); - std::random_device rd; - std::mt19937 rng(rd()); - std::uniform_int_distribution dist(0, graph->size() - 1); - for (uint32_t i = 0; i < COMPONENT_SAMPLES; i++) { - ComponentType ndata = graph->template GetData(dist(rng)); - comp_freq[ndata->component()]++; - } - - assert(!comp_freq.empty()); - auto most_frequent = std::max_element( - comp_freq.begin(), comp_freq.end(), - [](const pair_type& a, const pair_type& b) { - return a.second < b.second; - }); - - galois::gDebug( - "Approximate largest intermediate component: ", most_frequent->first, - " (hit rate ", 100.0 * (most_frequent->second) / COMPONENT_SAMPLES, "%)"); - - return most_frequent->first; -} - -/** - * CC w/ Afforest sampling. - * - * [1] M. Sutton, T. Ben-Nun and A. Barak, "Optimizing Parallel Graph - * Connectivity Computation via Subgraph Sampling," 2018 IEEE International - * Parallel and Distributed Processing Symposium (IPDPS), Vancouver, BC, 2018, - * pp. 12-21. - */ -struct AfforestAlgo { - struct NodeAfforest : public galois::UnionFindNode { - using ComponentType = NodeAfforest*; - - NodeAfforest() - : galois::UnionFindNode(const_cast(this)) { - } - NodeAfforest(const NodeAfforest& o) - : galois::UnionFindNode(o.m_component) {} - - ComponentType component() { return this->get(); } - bool isRepComp(unsigned int) { return false; } // verify - - public: - void link(NodeAfforest* b) { - NodeAfforest* a = m_component.load(std::memory_order_relaxed); - b = b->m_component.load(std::memory_order_relaxed); - while (a != b) { - if (a < b) - std::swap(a, b); - // Now a > b - NodeAfforest* ac = a->m_component.load(std::memory_order_relaxed); - if ((ac == a && a->m_component.compare_exchange_strong(a, b)) || - (b == ac)) - break; - a = (a->m_component.load(std::memory_order_relaxed)) - ->m_component.load(std::memory_order_relaxed); - b = b->m_component.load(std::memory_order_relaxed); - } - } - }; - - struct NodeComponent { - using ArrowType = arrow::CTypeTraits::ArrowType; - using ViewType = galois::PODPropertyView; - }; - - using NodeData = std::tuple; - using EdgeData = std::tuple<>; - typedef galois::graphs::PropertyGraph Graph; - typedef typename Graph::Node GNode; - - void Initialize(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - graph->GetData(node) = new NodeAfforest(); - }); - } - - void Deallocate(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - delete graph->GetData(node); - }); - } - using ComponentType = NodeAfforest::ComponentType; - - void operator()(Graph* graph) { - // (bozhi) should NOT go through single direction in sampling step: nodes - // with edges less than NEIGHBOR_SAMPLES will fail - for (uint32_t r = 0; r < NEIGHBOR_SAMPLES; ++r) { - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - Graph::edge_iterator ii = graph->edge_begin(src); - Graph::edge_iterator ei = graph->edge_end(src); - for (std::advance(ii, r); ii < ei; ii++) { - auto dest = graph->GetEdgeDest(ii); - auto& sdata = graph->GetData(src); - ComponentType ddata = graph->GetData(dest); - sdata->link(ddata); - break; - } - }, - galois::steal(), galois::loopname("Afforest-VNS-Link")); - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - sdata->compress(); - }, - galois::steal(), galois::loopname("Afforest-VNS-Compress")); - } - - galois::StatTimer StatTimer_Sampling("Afforest-LCS-Sampling"); - StatTimer_Sampling.start(); - const ComponentType c = - approxLargestComponent(graph); - StatTimer_Sampling.stop(); - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - if (sdata->component() == c) - return; - Graph::edge_iterator ii = graph->edge_begin(src); - Graph::edge_iterator ei = graph->edge_end(src); - for (std::advance(ii, NEIGHBOR_SAMPLES.getValue()); ii < ei; ++ii) { - auto dest = graph->GetEdgeDest(ii); - auto& ddata = graph->GetData(dest); - sdata->link(ddata); - } - }, - galois::steal(), galois::loopname("Afforest-LCS-Link")); - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - sdata->compress(); - }, - galois::steal(), galois::loopname("Afforest-LCS-Compress")); - } -}; - -/** - * Edge CC w/ Afforest sampling - */ -struct EdgeAfforestAlgo { - struct NodeAfforestEdge : public galois::UnionFindNode { - using ComponentType = NodeAfforestEdge*; - - NodeAfforestEdge() - : galois::UnionFindNode( - const_cast(this)) {} - NodeAfforestEdge(const NodeAfforestEdge& o) - : galois::UnionFindNode(o.m_component) {} - - ComponentType component() { return this->get(); } - bool isRepComp(unsigned int) { return false; } // verify - - public: - NodeAfforestEdge* hook_min(NodeAfforestEdge* b, NodeAfforestEdge* c = 0) { - NodeAfforestEdge* a = m_component.load(std::memory_order_relaxed); - b = b->m_component.load(std::memory_order_relaxed); - while (a != b) { - if (a < b) - std::swap(a, b); - // Now a > b - NodeAfforestEdge* ac = a->m_component.load(std::memory_order_relaxed); - if (ac == a && a->m_component.compare_exchange_strong(a, b)) { - if (b == c) - return a; //! return victim - return 0; - } - if (b == ac) { - return 0; - } - a = (a->m_component.load(std::memory_order_relaxed)) - ->m_component.load(std::memory_order_relaxed); - b = b->m_component.load(std::memory_order_relaxed); - } - return 0; - } - }; - - using ComponentType = NodeAfforestEdge::ComponentType; - struct NodeComponent { - using ArrowType = arrow::CTypeTraits::ArrowType; - using ViewType = galois::PODPropertyView; - }; - - using NodeData = std::tuple; - using EdgeData = std::tuple<>; - typedef galois::graphs::PropertyGraph Graph; - typedef typename Graph::Node GNode; - - using Edge = std::pair; - - void Initialize(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - graph->GetData(node) = new NodeAfforestEdge(); - }); - } - - void Deallocate(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - delete graph->GetData(node); - }); - } - void operator()(Graph* graph) { - // (bozhi) should NOT go through single direction in sampling step: nodes - // with edges less than NEIGHBOR_SAMPLES will fail - for (uint32_t r = 0; r < NEIGHBOR_SAMPLES; ++r) { - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - Graph::edge_iterator ii = graph->edge_begin(src); - Graph::edge_iterator ei = graph->edge_end(src); - std::advance(ii, r); - if (ii < ei) { - auto dest = graph->GetEdgeDest(ii); - auto& sdata = graph->GetData(src); - auto& ddata = graph->GetData(dest); - sdata->hook_min(ddata); - } - }, - galois::steal(), galois::loopname("EdgeAfforest-VNS-Link")); - } - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - sdata->compress(); - }, - galois::steal(), galois::loopname("EdgeAfforest-VNS-Compress")); - - galois::StatTimer StatTimer_Sampling("EdgeAfforest-LCS-Sampling"); - StatTimer_Sampling.start(); - const ComponentType c = - approxLargestComponent(graph); - StatTimer_Sampling.stop(); - const ComponentType c0 = (graph->GetData(0)); - - galois::InsertBag works; - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - if (sdata->component() == c) - return; - auto beg = graph->edge_begin(src); - const auto end = graph->edge_end(src); - - for (std::advance(beg, NEIGHBOR_SAMPLES.getValue()); beg < end; - beg++) { - auto dest = graph->GetEdgeDest(beg); - auto& ddata = graph->GetData(dest); - if (src < *dest || c == ddata->component()) { - works.push_back(std::make_pair(src, *dest)); - } - } - }, - galois::loopname("EdgeAfforest-LCS-Assembling"), galois::steal()); - - galois::for_each( - galois::iterate(works), - [&](const Edge& e, auto& ctx) { - auto& sdata = graph->GetData(e.first); - if (sdata->component() == c) - return; - auto& ddata = graph->GetData(e.second); - ComponentType victim = sdata->hook_min(ddata, c); - if (victim) { - auto src = victim - c0; // TODO (bozhi) tricky! - for (auto ii : graph->edges(src)) { - auto dest = graph->GetEdgeDest(ii); - ctx.push_back(std::make_pair(*dest, src)); - } - } - }, - galois::disable_conflict_detection(), - galois::loopname("EdgeAfforest-LCS-Link")); - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - sdata->compress(); - }, - galois::steal(), galois::loopname("EdgeAfforest-LCS-Compress")); - } -}; - -/** - * Edgetiled CC w/ Afforest sampling - */ -struct EdgeTiledAfforestAlgo { - struct NodeAfforest : public galois::UnionFindNode { - using ComponentType = NodeAfforest*; - - NodeAfforest() - : galois::UnionFindNode(const_cast(this)) { - } - NodeAfforest(const NodeAfforest& o) - : galois::UnionFindNode(o.m_component) {} - - ComponentType component() { return this->get(); } - bool isRepComp(unsigned int) { return false; } // verify - - public: - void link(NodeAfforest* b) { - NodeAfforest* a = m_component.load(std::memory_order_relaxed); - b = b->m_component.load(std::memory_order_relaxed); - while (a != b) { - if (a < b) - std::swap(a, b); - // Now a > b - NodeAfforest* ac = a->m_component.load(std::memory_order_relaxed); - if ((ac == a && a->m_component.compare_exchange_strong(a, b)) || - (b == ac)) - break; - a = (a->m_component.load(std::memory_order_relaxed)) - ->m_component.load(std::memory_order_relaxed); - b = b->m_component.load(std::memory_order_relaxed); - } - } - }; - - struct NodeComponent { - using ArrowType = arrow::CTypeTraits::ArrowType; - using ViewType = galois::PODPropertyView; - }; - - using NodeData = std::tuple; - using EdgeData = std::tuple<>; - typedef galois::graphs::PropertyGraph Graph; - typedef typename Graph::Node GNode; - - void Initialize(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - graph->GetData(node) = new NodeAfforest(); - }); - } - - void Deallocate(Graph* graph) { - galois::do_all(galois::iterate(*graph), [&](const GNode& node) { - delete graph->GetData(node); - }); - } - - using ComponentType = NodeAfforest::ComponentType; - - struct EdgeTile { - GNode src; - Graph::edge_iterator beg; - Graph::edge_iterator end; - }; - - void operator()(Graph* graph) { - // (bozhi) should NOT go through single direction in sampling step: nodes - // with edges less than NEIGHBOR_SAMPLES will fail - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto ii = graph->edge_begin(src); - const auto end = graph->edge_end(src); - for (uint32_t r = 0; r < NEIGHBOR_SAMPLES && ii < end; ++r, ++ii) { - auto dest = graph->GetEdgeDest(ii); - auto& sdata = graph->GetData(src); - auto& ddata = graph->GetData(dest); - sdata->link(ddata); - } - }, - galois::steal(), galois::loopname("EdgetiledAfforest-VNS-Link")); - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - sdata->compress(); - }, - galois::steal(), galois::loopname("EdgetiledAfforest-VNS-Compress")); - - galois::StatTimer StatTimer_Sampling("EdgetiledAfforest-LCS-Sampling"); - StatTimer_Sampling.start(); - const ComponentType c = - approxLargestComponent(graph); - StatTimer_Sampling.stop(); - - galois::InsertBag works; - std::cout << "INFO: Using edge tile size of " << EDGE_TILE_SIZE - << " and chunk size of " << CHUNK_SIZE << "\n"; - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - if (sdata->component() == c) - return; - auto beg = graph->edge_begin(src); - const auto end = graph->edge_end(src); - - for (std::advance(beg, NEIGHBOR_SAMPLES.getValue()); - beg + EDGE_TILE_SIZE < end;) { - auto ne = beg + EDGE_TILE_SIZE; - assert(ne < end); - works.push_back(EdgeTile{src, beg, ne}); - beg = ne; - } - - if ((end - beg) > 0) { - works.push_back(EdgeTile{src, beg, end}); - } - }, - galois::loopname("EdgetiledAfforest-LCS-Tiling"), galois::steal()); - - galois::do_all( - galois::iterate(works), - [&](const EdgeTile& tile) { - auto& sdata = graph->GetData(tile.src); - if (sdata->component() == c) - return; - for (auto ii = tile.beg; ii < tile.end; ++ii) { - auto dest = graph->GetEdgeDest(ii); - auto& ddata = graph->GetData(dest); - sdata->link(ddata); - } - }, - galois::steal(), galois::chunk_size(), - galois::loopname("EdgetiledAfforest-LCS-Link")); - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& src) { - auto& sdata = graph->GetData(src); - sdata->compress(); - }, - galois::steal(), galois::loopname("EdgetiledAfforest-LCS-Compress")); - } -}; - -template -bool -verify( - Graph*, - typename std::enable_if::value>::type* = - 0) { - return true; -} - -template -bool -verify( - Graph* graph, typename std::enable_if< - !galois::graphs::is_segmented::value>::type* = 0) { - using GNode = typename Graph::Node; - - auto is_bad = [&graph](const GNode& n) { - auto& me = graph->template GetData(n); - for (auto ii : graph->edges(n)) { - auto dest = graph->GetEdgeDest(ii); - auto& data = graph->template GetData(dest); - if (data->component() != me->component()) { - std::cerr << std::dec << "not in same component: " << (unsigned int)n - << " (" << me->component() << ")" - << " and " << (unsigned int)(*dest) << " (" - << data->component() << ")" - << "\n"; - return true; - } - } - return false; - }; - - return galois::ParallelSTL::find_if(graph->begin(), graph->end(), is_bad) == - graph->end(); -} - -template <> -bool -verify( - LabelPropAlgo::Graph* graph, - typename std::enable_if< - !galois::graphs::is_segmented::value>::type*) { - using GNode = typename LabelPropAlgo::Graph::Node; - auto is_bad = [&graph](const GNode& n) { - auto& me = graph->template GetData(n); - for (auto ii : graph->edges(n)) { - auto dest = graph->GetEdgeDest(ii); - auto& data = graph->template GetData(dest); - if (data != me) { - std::cerr << std::dec << "not in same component: " << (unsigned int)n - << " (" << me << ")" - << " and " << (unsigned int)(*dest) << " (" << data << ")" - << "\n"; - return true; - } - } - return false; - }; - - return galois::ParallelSTL::find_if(graph->begin(), graph->end(), is_bad) == - graph->end(); -} - -template -typename Algo::ComponentType -findLargest(Graph* graph) { - using GNode = typename Graph::Node; - using ComponentType = typename Algo::ComponentType; - - using Map = galois::gstl::Map; - - auto reduce = [](Map& lhs, Map&& rhs) -> Map& { - Map v{std::move(rhs)}; - - for (auto& kv : v) { - if (lhs.count(kv.first) == 0) { - lhs[kv.first] = 0; - } - lhs[kv.first] += kv.second; - } - - return lhs; - }; - - auto mapIdentity = []() { return Map(); }; - - auto accumMap = galois::make_reducible(reduce, mapIdentity); - - galois::GAccumulator accumReps; - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& x) { - auto& n = graph->template GetData(x); - if (n->isRep()) { - accumReps += 1; - return; - } - - // Don't add reps to table to avoid adding components of size - // 1 - accumMap.update(Map{std::make_pair(n->component(), 1)}); - }, - galois::loopname("CountLargest")); - - Map& map = accumMap.reduce(); - size_t reps = accumReps.reduce(); - - using ComponentSizePair = std::pair; - - auto sizeMax = [](const ComponentSizePair& a, const ComponentSizePair& b) { - if (a.second > b.second) { - return a; - } - return b; - }; - - auto identity = []() { return ComponentSizePair{}; }; - - auto maxComp = galois::make_reducible(sizeMax, identity); - - galois::do_all(galois::iterate(map), [&](const ComponentSizePair& x) { - maxComp.update(x); - }); - - ComponentSizePair largest = maxComp.reduce(); - - // Compensate for dropping representative node of components - double ratio = graph->size() - reps + map.size(); - size_t largestSize = largest.second + 1; - if (ratio) { - ratio = largestSize / ratio; - } - - std::cout << "Total components: " << reps << "\n"; - std::cout << "Number of non-trivial components: " << map.size() - << " (largest size: " << largestSize << " [" << ratio << "])\n"; - - return largest.first; -} - -template <> -typename LabelPropAlgo::ComponentType -findLargest(LabelPropAlgo::Graph* graph) { - using GNode = typename LabelPropAlgo::Graph::Node; - using ComponentType = typename LabelPropAlgo::ComponentType; - - using Map = galois::gstl::Map; - - auto reduce = [](Map& lhs, Map&& rhs) -> Map& { - Map v{std::move(rhs)}; - - for (auto& kv : v) { - if (lhs.count(kv.first) == 0) { - lhs[kv.first] = 0; - } - lhs[kv.first] += kv.second; - } - - return lhs; - }; - - auto mapIdentity = []() { return Map(); }; - - auto accumMap = galois::make_reducible(reduce, mapIdentity); - - galois::GAccumulator accumReps; - - galois::do_all( - galois::iterate(*graph), - [&](const GNode& x) { - auto& n = - graph->template GetData(x); - if (n.load() == x) { - accumReps += 1; - return; - } - - // Don't add reps to table to avoid adding components of size - // 1 - accumMap.update(Map{std::make_pair(n.load(), 1)}); - }, - galois::loopname("CountLargest")); - - Map& map = accumMap.reduce(); - size_t reps = accumReps.reduce(); - - using ComponentSizePair = std::pair; - - auto sizeMax = [](const ComponentSizePair& a, const ComponentSizePair& b) { - if (a.second > b.second) { - return a; - } - return b; - }; - - auto identity = []() { return ComponentSizePair{}; }; - - auto maxComp = galois::make_reducible(sizeMax, identity); - - galois::do_all(galois::iterate(map), [&](const ComponentSizePair& x) { - maxComp.update(x); - }); - - ComponentSizePair largest = maxComp.reduce(); - - // Compensate for dropping representative node of components - double ratio = graph->size() - reps + map.size(); - size_t largestSize = largest.second + 1; - if (ratio) { - ratio = largestSize / ratio; - } - - std::cout << "Total components: " << reps << "\n"; - std::cout << "Number of non-trivial components: " << map.size() - << " (largest size: " << largestSize << " [" << ratio << "])\n"; - - return largest.first; -} - -template -void -run() { - using Graph = typename Algo::Graph; - - Algo algo; - - std::cout << "Reading from file: " << inputFile << "\n"; - std::unique_ptr pfg = - MakeFileGraph(inputFile, edge_property_name); - - auto result = ConstructNodeProperties(pfg.get()); - if (!result) { - GALOIS_LOG_FATAL("cannot make graph: {}", result.error()); - } - - auto pg_result = galois::graphs::PropertyGraph< - typename Algo::NodeData, typename Algo::EdgeData>::Make(pfg.get()); - if (!pg_result) { - GALOIS_LOG_FATAL("could not make property graph: {}", pg_result.error()); - } - Graph graph = pg_result.value(); - - std::cout << "Read " << graph.num_nodes() << " nodes, " << graph.num_edges() - << " edges\n"; - - algo.Initialize(&graph); - - galois::Prealloc(1, 3 * graph.size() * sizeof(typename Algo::NodeData)); - galois::reportPageAlloc("MeminfoPre"); - - galois::StatTimer execTime("Timer_0"); - execTime.start(); - algo(&graph); - execTime.stop(); - - galois::reportPageAlloc("MeminfoPost"); - - if (!skipVerify || largestComponentFilename != "" || - permutationFilename != "") { - findLargest(&graph); - if (!verify(&graph)) { - algo.Initialize(&graph); - GALOIS_DIE("verification failed"); - } - } - algo.Deallocate(&graph); -} - -int -main(int argc, char** argv) { - std::unique_ptr G = - LonestarStart(argc, argv, name, desc, nullptr, &inputFile); - - galois::StatTimer totalTime("TimerTotal"); - totalTime.start(); - - if (!symmetricGraph) { - GALOIS_DIE( - "This application requires a symmetric graph input;" - " please use the -symmetricGraph flag " - " to indicate the input is a symmetric graph."); - } - - switch (algo) { - case Algo::async: - run(); - break; - case Algo::edgeasync: - run(); - break; - case Algo::edgetiledasync: - run(); - break; - case Algo::blockedasync: - run(); - break; - case Algo::labelProp: - run(); - break; - case Algo::serial: - run(); - break; - case Algo::synchronous: - run(); - break; - case Algo::afforest: - run(); - break; - case Algo::edgeafforest: - run(); - break; - case Algo::edgetiledafforest: - run(); - break; - - default: - std::cerr << "Unknown algorithm\n"; - abort(); - } - - totalTime.stop(); - - return 0; -} diff --git a/lonestar/analytics/cpu/jaccard/CMakeLists.txt b/lonestar/analytics/cpu/jaccard/CMakeLists.txt index b66b9b3334..799cb08693 100644 --- a/lonestar/analytics/cpu/jaccard/CMakeLists.txt +++ b/lonestar/analytics/cpu/jaccard/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(jaccard-cpu jaccard.cpp) +add_executable(jaccard-cpu jaccard_cli.cpp) add_dependencies(apps jaccard-cpu) target_link_libraries(jaccard-cpu PRIVATE Galois::shmem lonestar) install(TARGETS jaccard-cpu DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT apps EXCLUDE_FROM_ALL) diff --git a/lonestar/analytics/cpu/jaccard/jaccard.cpp b/lonestar/analytics/cpu/jaccard/jaccard_cli.cpp similarity index 100% rename from lonestar/analytics/cpu/jaccard/jaccard.cpp rename to lonestar/analytics/cpu/jaccard/jaccard_cli.cpp