Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENG-276] Wrap graph algorithms for Python #177

Merged
merged 6 commits into from
Apr 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 43 additions & 15 deletions libgalois/include/katana/analytics/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <algorithm>
#include <random>
#include <utility>

#include "katana/ErrorCode.h"
#include "katana/Properties.h"
Expand Down Expand Up @@ -72,38 +73,65 @@ ConstructEdgeProperties(
}

class TemporaryPropertyGuard {
katana::PropertyGraph* pfg_;
static thread_local int temporary_property_counter;

katana::PropertyGraph* pg_{nullptr};
std::string name_;

std::string GetPropertyName() {
// Use this as part of the property name since this will delete the property
// when it is deconstructed so this name should be unique at any given time.
// Use a thread local counter and the thread ID to get a unique name.
// `this` is not unique because we support moves.
return fmt::format(
"__katana_temporary_property_{}", reinterpret_cast<uintptr_t>(this));
"__katana_temporary_property_{}_{}", std::this_thread::get_id(),
temporary_property_counter++);
}

void Deinit() {
if (!pg_) {
return;
}

if (auto r = pg_->RemoveNodeProperty(name_); !r) {
if (r.error() != ErrorCode::PropertyNotFound) {
// Log an error if something goes wrong other than the property not
// existing.
KATANA_LOG_WARN("Failed to remove temporary property: {}", r.error());
}
}
Clear();
}

void Clear() { pg_ = nullptr; }

public:
TemporaryPropertyGuard() = default;

TemporaryPropertyGuard(PropertyGraph* pg, std::string name)
: pfg_(pg), name_(name) {}
: pg_(pg), name_(std::move(name)) {}

explicit TemporaryPropertyGuard(katana::PropertyGraph* pg)
explicit TemporaryPropertyGuard(PropertyGraph* pg)
: TemporaryPropertyGuard(pg, GetPropertyName()) {}

const TemporaryPropertyGuard& operator=(const TemporaryPropertyGuard&) =
delete;
TemporaryPropertyGuard(const TemporaryPropertyGuard&) = delete;

std::string name() const { return name_; }
TemporaryPropertyGuard(TemporaryPropertyGuard&& rhs) noexcept
: pg_(rhs.pg_), name_(std::move(rhs.name_)) {
rhs.Clear();
}

~TemporaryPropertyGuard() {
if (auto r = pfg_->RemoveNodeProperty(name_); !r) {
if (r.error() != katana::ErrorCode::PropertyNotFound) {
// Log an error if something goes wrong other than the property not
// existing.
KATANA_LOG_WARN("Failed to remove temporary property: {}", r.error());
}
}
TemporaryPropertyGuard& operator=(TemporaryPropertyGuard&& rhs) noexcept {
Deinit();
pg_ = rhs.pg_;
name_ = std::move(rhs.name_);
rhs.Clear();
return *this;
}

std::string name() const { return name_; }

~TemporaryPropertyGuard() { Deinit(); }
};

} // namespace katana::analytics
Expand Down
2 changes: 2 additions & 0 deletions libgalois/include/katana/analytics/k_truss/k_truss.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ class KTrussPlan : public Plan {
/// but have reasonable defaults.
/// The property named output_property_name is created by this function and may
/// not exist before the call.
///
/// @warning This algorithm will reorder nodes and edges in the graph.
KATANA_EXPORT Result<void> KTruss(
PropertyGraph* pg, uint32_t k_truss_number,
const std::string& output_property_name, KTrussPlan plan = KTrussPlan());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,30 +23,31 @@ class LocalClusteringCoefficientPlan : public Plan {
};

static const Relabeling kDefaultRelabeling = kAutoRelabel;
static const bool kDefaultEdgeSorted = false;
static const bool kDefaultEdgesSorted = false;

private:
Algorithm algorithm_;
Relabeling relabeling_;
bool edges_sorted_;
Relabeling relabeling_;

LocalClusteringCoefficientPlan(
Architecture architecture, Algorithm algorithm, bool edges_sorted,
Relabeling relabeling)
: Plan(architecture),
algorithm_(algorithm),
relabeling_(relabeling),
edges_sorted_(edges_sorted) {}
edges_sorted_(edges_sorted),
relabeling_(relabeling) {}

public:
LocalClusteringCoefficientPlan()
: LocalClusteringCoefficientPlan{
kCPU, kOrderedCountPerThread, kDefaultEdgeSorted,
kCPU, kOrderedCountPerThread, kDefaultEdgesSorted,
kDefaultRelabeling} {}

Algorithm algorithm() const { return algorithm_; }
Relabeling relabeling() const { return relabeling_; }
// TODO(amp): These parameters should be documented.
bool edges_sorted() const { return edges_sorted_; }
Relabeling relabeling() const { return relabeling_; }

/**
* An ordered count algorithm that sorts the nodes by degree before
Expand All @@ -57,28 +58,28 @@ class LocalClusteringCoefficientPlan : public Plan {
* @param edges_sorted Are the edges of the graph already sorted.
* @param relabeling Should the algorithm relabel the nodes.
*/
static LocalClusteringCoefficientPlan LocalClusteringCoefficientAtomics(
bool edges_sorted = kDefaultEdgeSorted,
static LocalClusteringCoefficientPlan OrderedCountAtomics(
bool edges_sorted = kDefaultEdgesSorted,
Relabeling relabeling = kDefaultRelabeling) {
return {kCPU, kOrderedCountAtomics, edges_sorted, relabeling};
}

static LocalClusteringCoefficientPlan LocalClusteringCoefficientPerThread(
bool edges_sorted = kDefaultEdgeSorted,
static LocalClusteringCoefficientPlan OrderedCountPerThread(
bool edges_sorted = kDefaultEdgesSorted,
Relabeling relabeling = kDefaultRelabeling) {
return {kCPU, kOrderedCountPerThread, edges_sorted, relabeling};
}
};

/**
* Count the total number of triangles in the graph. The graph must be
* symmetric!
*
* This algorithm copies the graph internally.
* Compute the local clustering coefficient for each node in the graph.
* The graph must be symmetric!
*
* @param pg The graph to process.
* @param output_property_name name of the output property
* @param plan
*
* @warning This algorithm will reorder nodes and edges in the graph.
*/
KATANA_EXPORT Result<void> LocalClusteringCoefficient(
PropertyGraph* pg, const std::string& output_property_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,24 @@ namespace katana::analytics {
/// parameters associated with it.
class LouvainClusteringPlan : public Plan {
public:
/// Algorithm selectors for Single-Source Shortest Path
enum Algorithm {
kDoAll,
};

static const bool kEnableVF = false;
static constexpr double kModularityThresholdPerRound = 0.01;
static constexpr double kModularityThresholdTotal = 0.01;
static const uint32_t kMaxIterations = 10;
static const uint32_t kMinGraphSize = 100;
static const bool kDefaultEnableVF = false;
static constexpr double kDefaultModularityThresholdPerRound = 0.01;
static constexpr double kDefaultModularityThresholdTotal = 0.01;
static const uint32_t kDefaultMaxIterations = 10;
static const uint32_t kDefaultMinGraphSize = 100;

// Don't allow people to directly construct these, so as to have only one
// consistent way to configure.
private:
Algorithm algorithm_;
//Flag to enable vertex following optimization.
bool enable_vf_;
//Threshold for modularity gain per round.
double modularity_threshold_per_round_;
//Threshold for overall modularity gain.
double modularity_threshold_total_;
//Maximum number of iterations to execute.
uint32_t max_iterations_;
//Minimum coarsened graph size
uint32_t min_graph_size_;

LouvainClusteringPlan(
Expand All @@ -53,25 +47,38 @@ class LouvainClusteringPlan : public Plan {

public:
LouvainClusteringPlan()
: LouvainClusteringPlan{kCPU, kDoAll, false, 0.01, 0.01, 10, 100} {}
: LouvainClusteringPlan{
kCPU,
kDoAll,
kDefaultEnableVF,
kDefaultModularityThresholdPerRound,
kDefaultModularityThresholdTotal,
kDefaultMaxIterations,
kDefaultMinGraphSize} {}

Algorithm algorithm() const { return algorithm_; }
bool is_enable_vf() const { return enable_vf_; }
/// Enable vertex following optimization
bool enable_vf() const { return enable_vf_; }
/// Threshold for modularity gain per round.
double modularity_threshold_per_round() const {
return modularity_threshold_per_round_;
}
/// Threshold for overall modularity gain.
double modularity_threshold_total() const {
return modularity_threshold_total_;
}
/// Maximum number of iterations to execute.
uint32_t max_iterations() const { return max_iterations_; }
/// Minimum coarsened graph size
uint32_t min_graph_size() const { return min_graph_size_; }

static LouvainClusteringPlan DoAll(
bool enable_vf = kEnableVF,
double modularity_threshold_per_round = kModularityThresholdPerRound,
double modularity_threshold_total = kModularityThresholdTotal,
uint32_t max_iterations = kMaxIterations,
uint32_t min_graph_size = kMinGraphSize) {
bool enable_vf = kDefaultEnableVF,
double modularity_threshold_per_round =
kDefaultModularityThresholdPerRound,
double modularity_threshold_total = kDefaultModularityThresholdTotal,
uint32_t max_iterations = kDefaultMaxIterations,
uint32_t min_graph_size = kDefaultMinGraphSize) {
return {
kCPU,
kDoAll,
Expand All @@ -86,7 +93,7 @@ class LouvainClusteringPlan : public Plan {
/// Compute the Louvain Clustering for pg.
/// The edge weights are taken from the property named
/// edge_weight_property_name (which may be a 32- or 64-bit sign or unsigned
/// int), and the computed cluster ids are stored in the property named
/// int), and the computed cluster IDs are stored in the property named
/// output_property_name (as uint32_t).
/// The property named output_property_name is created by this function and may
/// not exist before the call.
Expand Down
68 changes: 52 additions & 16 deletions libgalois/include/katana/analytics/random_walks/random_walks.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,19 @@ namespace katana::analytics {

/// A computational plan to for random walks, specifying the algorithm and any
/// parameters associated with it.
class RandomWalksPlan : Plan {
enum Algo { node2vec, edge2vec };

class RandomWalksPlan : public Plan {
public:
/// Algorithm selectors for Connected-components
/// Algorithm selectors for random walks
enum Algorithm { kNode2Vec, kEdge2Vec };

static const Algorithm kDefaultAlgorithm = kNode2Vec;
static const uint32_t kDefaultWalkLength = 1;
static const uint32_t kDefaultNumberOfWalks = 1;
constexpr static const double kDefaultBackwardProbability = 1.0;
constexpr static const double kDefaultForwardProbability = 1.0;
static const uint32_t kDefaultMaxIterations = 10;
static const uint32_t kDefaultNumberOfEdgeTypes = 1;

// Don't allow people to directly construct these, so as to have only one
// consistent way to configure.
private:
Expand Down Expand Up @@ -50,23 +56,51 @@ class RandomWalksPlan : Plan {
number_of_edge_types_(number_of_edge_types) {}

public:
// kChunkSize is a fixed const int (default value: 1)
// kChunkSize is fixed at 1
static const int kChunkSize;

RandomWalksPlan() : RandomWalksPlan{kCPU, kNode2Vec, 1, 1, 1.0, 1.0, 10, 1} {}
RandomWalksPlan()
: RandomWalksPlan{
kCPU,
kDefaultAlgorithm,
kDefaultWalkLength,
kDefaultNumberOfWalks,
kDefaultBackwardProbability,
kDefaultForwardProbability,
kDefaultMaxIterations,
kDefaultNumberOfEdgeTypes} {}

Algorithm algorithm() const { return algorithm_; }

// TODO(amp): The parameters walk_length, number_of_walks,
// backward_probability, and forward_probability control the expected output,
// not the algorithm used to compute the output. So they need to be parameters
// on the algorithm function, not in the plan. The plan should be parameters
// which do not change the expected output (though they may cause selecting a
// different correct output).

/// Length of random walks.
uint32_t walk_length() const { return walk_length_; }

/// Number of walks per node.
uint32_t number_of_walks() const { return number_of_walks_; }

/// Probability of moving back to parent.
double backward_probability() const { return backward_probability_; }

/// Probability of moving forward (2-hops).
double forward_probability() const { return forward_probability_; }

uint32_t max_iterations() const { return max_iterations_; }

uint32_t number_of_edge_types() const { return number_of_edge_types_; }

/// Node2Vec algorithm to generate random walks on the graph
static RandomWalksPlan Node2Vec(
uint32_t walk_length, uint32_t number_of_walks,
double backward_probability, double forward_probability) {
uint32_t walk_length = kDefaultWalkLength,
uint32_t number_of_walks = kDefaultNumberOfWalks,
double backward_probability = kDefaultBackwardProbability,
double forward_probability = kDefaultBackwardProbability) {
return {
kCPU,
kNode2Vec,
Expand All @@ -81,9 +115,12 @@ class RandomWalksPlan : Plan {
/// Edge2Vec algorithm to generate random walks on the graph.
/// Takes the heterogeneity of the edges into account
static RandomWalksPlan Edge2Vec(
uint32_t walk_length, uint32_t number_of_walks,
double backward_probability, double forward_probability,
uint32_t max_iterations, uint32_t number_of_edge_types) {
uint32_t walk_length = kDefaultWalkLength,
uint32_t number_of_walks = kDefaultNumberOfWalks,
double backward_probability = kDefaultBackwardProbability,
double forward_probability = kDefaultBackwardProbability,
uint32_t max_iterations = kDefaultMaxIterations,
uint32_t number_of_edge_types = kDefaultNumberOfEdgeTypes) {
return {
kCPU,
kNode2Vec,
Expand All @@ -96,11 +133,10 @@ class RandomWalksPlan : Plan {
}
};

/// Compute the random-walks for pg. The pg is expected to be
/// symmetric.
/// The parameters can be specified, but have reasonable defaults. Not all parameters
/// are used by the algorithms.
/// The generated random-walks generated are return in Katana::InsertBag.
/// Compute the random-walks for pg. The pg is expected to be symmetric. The
/// parameters can be specified, but have reasonable defaults. Not all
/// parameters are used by the algorithms. The generated random-walks generated
/// are returned as a vector of vectors.
KATANA_EXPORT Result<std::vector<std::vector<uint32_t>>> RandomWalks(
PropertyGraph* pg, RandomWalksPlan plan = RandomWalksPlan());

Expand Down
Loading