Skip to content

Commit

Permalink
Clean up libgalois algorithms and add TODOs for notable issues.
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurp committed Apr 21, 2021
1 parent 44ef570 commit e62ff06
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class LocalClusteringCoefficientPlan : public Plan {
kDefaultRelabeling} {}

Algorithm algorithm() const { return algorithm_; }
// TODO(amp): These parameters should be documented.
Relabeling relabeling() const { return relabeling_; }
bool edges_sorted() const { return edges_sorted_; }

Expand All @@ -70,6 +71,7 @@ class LocalClusteringCoefficientPlan : public Plan {
}
};

// TODO(amp): The doc string was not updated.
/**
* Count the total number of triangles in the graph. The graph must be
* symmetric!
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ namespace katana::analytics {
/// parameters associated with it.
class LouvainClusteringPlan : public Plan {
public:
/// Algorithm selectors for Single-Source Shortest Path
enum Algorithm {
kDoAll,
};
Expand Down Expand Up @@ -53,9 +52,17 @@ class LouvainClusteringPlan : public Plan {

public:
LouvainClusteringPlan()
: LouvainClusteringPlan{kCPU, kDoAll, false, 0.01, 0.01, 10, 100} {}
: LouvainClusteringPlan{
kCPU,
kDoAll,
kEnableVF,
kModularityThresholdPerRound,
kModularityThresholdTotal,
kMaxIterations,
kMinGraphSize} {}

Algorithm algorithm() const { return algorithm_; }
// TODO(amp): These parameters should be documented.
bool is_enable_vf() const { return enable_vf_; }
double modularity_threshold_per_round() const {
return modularity_threshold_per_round_;
Expand Down Expand Up @@ -86,7 +93,7 @@ class LouvainClusteringPlan : public Plan {
/// Compute the Louvain Clustering for pg.
/// The edge weights are taken from the property named
/// edge_weight_property_name (which may be a 32- or 64-bit sign or unsigned
/// int), and the computed cluster ids are stored in the property named
/// int), and the computed cluster IDs are stored in the property named
/// output_property_name (as uint32_t).
/// The property named output_property_name is created by this function and may
/// not exist before the call.
Expand Down
66 changes: 51 additions & 15 deletions libgalois/include/katana/analytics/random_walks/random_walks.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,19 @@ namespace katana::analytics {

/// A computational plan to for random walks, specifying the algorithm and any
/// parameters associated with it.
class RandomWalksPlan : Plan {
enum Algo { node2vec, edge2vec };

class RandomWalksPlan : public Plan {
public:
/// Algorithm selectors for Connected-components
enum Algorithm { kNode2Vec, kEdge2Vec };

static const Algorithm kDefaultAlgorithm = kNode2Vec;
static const uint32_t kDefaultWalkLength = 1;
static const uint32_t kDefaultNumberOfWalks = 1;
constexpr static const double kDefaultBackwardProbability = 1.0;
constexpr static const double kDefaultForwardProbability = 1.0;
static const uint32_t kDefaultMaxIterations = 10;
static const uint32_t kDefaultNumberOfEdgeTypes = 1;

// Don't allow people to directly construct these, so as to have only one
// consistent way to configure.
private:
Expand Down Expand Up @@ -50,23 +56,51 @@ class RandomWalksPlan : Plan {
number_of_edge_types_(number_of_edge_types) {}

public:
// kChunkSize is a fixed const int (default value: 1)
// kChunkSize is fixed at 1
static const int kChunkSize;

RandomWalksPlan() : RandomWalksPlan{kCPU, kNode2Vec, 1, 1, 1.0, 1.0, 10, 1} {}
RandomWalksPlan()
: RandomWalksPlan{
kCPU,
kDefaultAlgorithm,
kDefaultWalkLength,
kDefaultNumberOfWalks,
kDefaultBackwardProbability,
kDefaultForwardProbability,
kDefaultMaxIterations,
kDefaultNumberOfEdgeTypes} {}

Algorithm algorithm() const { return algorithm_; }

// TODO(amp): The parameters walk_length, number_of_walks,
// backward_probability, and forward_probability control the expected output,
// not the algorithm used to compute the output. So they need to be parameters
// on the algorithm function, not in the plan. The plan should be parameters
// which do not change the expected output (though they may cause selecting a
// different correct output).

/// Length of random walks.
uint32_t walk_length() const { return walk_length_; }

/// Number of walks per node.
uint32_t number_of_walks() const { return number_of_walks_; }

/// Probability of moving back to parent.
double backward_probability() const { return backward_probability_; }

/// Probability of moving forward (2-hops).
double forward_probability() const { return forward_probability_; }

uint32_t max_iterations() const { return max_iterations_; }

uint32_t number_of_edge_types() const { return number_of_edge_types_; }

/// Node2Vec algorithm to generate random walks on the graph
static RandomWalksPlan Node2Vec(
uint32_t walk_length, uint32_t number_of_walks,
double backward_probability, double forward_probability) {
uint32_t walk_length = kDefaultWalkLength,
uint32_t number_of_walks = kDefaultNumberOfWalks,
double backward_probability = kDefaultBackwardProbability,
double forward_probability = kDefaultBackwardProbability) {
return {
kCPU,
kNode2Vec,
Expand All @@ -81,9 +115,12 @@ class RandomWalksPlan : Plan {
/// Edge2Vec algorithm to generate random walks on the graph.
/// Takes the heterogeneity of the edges into account
static RandomWalksPlan Edge2Vec(
uint32_t walk_length, uint32_t number_of_walks,
double backward_probability, double forward_probability,
uint32_t max_iterations, uint32_t number_of_edge_types) {
uint32_t walk_length = kDefaultWalkLength,
uint32_t number_of_walks = kDefaultNumberOfWalks,
double backward_probability = kDefaultBackwardProbability,
double forward_probability = kDefaultBackwardProbability,
uint32_t max_iterations = kDefaultMaxIterations,
uint32_t number_of_edge_types = kDefaultNumberOfEdgeTypes) {
return {
kCPU,
kNode2Vec,
Expand All @@ -96,11 +133,10 @@ class RandomWalksPlan : Plan {
}
};

/// Compute the random-walks for pg. The pg is expected to be
/// symmetric.
/// The parameters can be specified, but have reasonable defaults. Not all parameters
/// are used by the algorithms.
/// The generated random-walks generated are return in Katana::InsertBag.
/// Compute the random-walks for pg. The pg is expected to be symmetric. The
/// parameters can be specified, but have reasonable defaults. Not all
/// parameters are used by the algorithms. The generated random-walks generated
/// are returned as a vector of vectors.
KATANA_EXPORT Result<std::vector<std::vector<uint32_t>>> RandomWalks(
PropertyGraph* pg, RandomWalksPlan plan = RandomWalksPlan());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ class SubGraphExtractionPlan : public Plan {

Algorithm algorithm() const { return algorithm_; }

// TODO(amp): This algorithm defines the semantics of the call. If there were
// an algorithm that, for instance, took a list of edges, that would need to
// be a different function, not just a different plan, since it takes
// semantically different arguments. I do think this should have a plan, even
// if there is only one concrete algorithm, but it should be defined and
// documented in terms of the concrete algorithm, not the semantics of the
// function (which is described well below).

/**
* The node-set algorithm:
* Given a set of node ids, this algorithm constructs a new sub-graph
Expand All @@ -41,12 +49,13 @@ class SubGraphExtractionPlan : public Plan {
* The new sub-graph is independent of the original graph.
*
* @param pg The graph to process.
* @param node_vec Set of node ids
* @param node_vec Set of node IDs
* @param plan
*/
KATANA_EXPORT katana::Result<std::unique_ptr<katana::PropertyGraph>>
SubGraphExtraction(
katana::PropertyGraph* pg, const std::vector<uint32_t>& node_vec,
katana::PropertyGraph* pg,
const std::vector<katana::PropertyGraph::Node>& node_vec,
SubGraphExtractionPlan plan = {});
// const std::vector<std::string>& node_properties_to_copy, const std::vector<std::string>& edge_properties_to_copy);

Expand Down
49 changes: 24 additions & 25 deletions libgalois/src/analytics/random_walks/random_walks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@ struct Node2VecAlgo {
return graph.num_nodes();
}
double total_wt = degree[n];
prob = prob * total_wt;

uint32_t edge_index = std::floor(prob);
uint32_t edge_index = std::floor(prob * total_wt);
auto edge = graph.edge_begin(n) + edge_index;
return *graph.GetEdgeDest(edge);
}
Expand Down Expand Up @@ -79,7 +78,7 @@ struct Node2VecAlgo {
uint64_t total_walks = graph.size() * plan_.number_of_walks();

katana::do_all(
katana::iterate((uint64_t)0, total_walks),
katana::iterate(0UL, total_walks),
[&](uint64_t idx) {
GNode n = idx % graph.size();

Expand Down Expand Up @@ -144,7 +143,7 @@ struct Node2VecAlgo {
alpha = prob_forward;
}

if (alpha >= y) {
if (y <= alpha) {
//accept y
walk.push_back(std::move(nbr));
break;
Expand All @@ -153,7 +152,7 @@ struct Node2VecAlgo {
}
}

(*walks).push(std::move(walk));
walks->push(std::move(walk));
},
katana::steal(), katana::chunk_size<RandomWalksPlan::kChunkSize>(),
katana::loopname("Node2vec walks"), katana::no_stats());
Expand Down Expand Up @@ -234,7 +233,7 @@ struct Edge2VecAlgo {
uint64_t total_walks = graph.size() * plan_.number_of_walks();

katana::do_all(
katana::iterate((uint64_t)0, total_walks),
katana::iterate(0UL, total_walks),
[&](uint64_t idx) {
GNode n = idx % graph.size();

Expand Down Expand Up @@ -338,8 +337,8 @@ struct Edge2VecAlgo {
num_edge_types[type]++;
}

(*per_thread_num_edge_types_walks.getLocal())
.emplace_back(std::move(num_edge_types));
per_thread_num_edge_types_walks.getLocal()->emplace_back(
std::move(num_edge_types));
});

for (unsigned j = 0; j < katana::getActiveThreads(); ++j) {
Expand All @@ -359,8 +358,7 @@ struct Edge2VecAlgo {
transformed_num_edge_types_walks.resize(plan_.number_of_edge_types() + 1);

katana::do_all(
katana::iterate((uint32_t)0, plan_.number_of_edge_types() + 1),
[&](uint32_t j) {
katana::iterate(0U, plan_.number_of_edge_types() + 1), [&](uint32_t j) {
for (uint32_t i = 0; i < rows; i++) {
transformed_num_edge_types_walks[j].push_back(
num_edge_types_walks[i][j]);
Expand Down Expand Up @@ -395,8 +393,8 @@ struct Edge2VecAlgo {
const std::vector<std::vector<uint32_t>>&
transformed_num_edge_types_walks,
const std::vector<double>& means) {
std::vector<uint32_t> x = transformed_num_edge_types_walks[i];
std::vector<uint32_t> y = transformed_num_edge_types_walks[j];
const std::vector<uint32_t>& x = transformed_num_edge_types_walks[i];
const std::vector<uint32_t>& y = transformed_num_edge_types_walks[j];

double sum = 0.0;
double sig1 = 0.0;
Expand Down Expand Up @@ -425,8 +423,7 @@ struct Edge2VecAlgo {
transformed_num_edge_types_walks,
const std::vector<double>& means) {
katana::do_all(
katana::iterate((uint32_t)1, plan_.number_of_edge_types() + 1),
[&](uint32_t i) {
katana::iterate(1U, plan_.number_of_edge_types() + 1), [&](uint32_t i) {
for (uint32_t j = 1; j <= plan_.number_of_edge_types(); j++) {
double pearson_corr =
pearsonCorr(i, j, transformed_num_edge_types_walks, means);
Expand Down Expand Up @@ -468,12 +465,11 @@ struct Edge2VecAlgo {
template <typename Graph>
void
InitializeDegrees(const Graph& graph, katana::LargeArray<uint64_t>* degree) {
katana::do_all(
katana::iterate(graph),
[&](typename Graph::Node n) {
(*degree)[n] = std::distance(graph.edge_begin(n), graph.edge_end(n));
},
katana::steal());
katana::do_all(katana::iterate(graph), [&](typename Graph::Node n) {
// Treat this as O(1) time because subtracting iterators is just pointer
// or number subtraction. So don't use steal().
(*degree)[n] = graph.edges(n).size();
});
}

} //namespace
Expand All @@ -485,6 +481,12 @@ RandomWalksWithWrap(katana::PropertyGraph* pg, RandomWalksPlan plan) {
return res.error();
}

// TODO(amp): This is incorrect. For Node2vec this needs to be:
// Algorithm::Graph::Make(pg, {}, {}) // Ignoring all properties.
// For Edge2vec this needs to be:
// Algorithm::Graph::Make(pg, {}, {edge_type_property_name})
// The current version requires the input to have exactly the properties
// expected by the algorithm implementation.
auto pg_result = Algorithm::Graph::Make(pg);
if (!pg_result) {
return pg_result.error();
Expand All @@ -510,11 +512,8 @@ RandomWalksWithWrap(katana::PropertyGraph* pg, RandomWalksPlan plan) {
degree.deallocate();

std::vector<std::vector<uint32_t>> walks_in_vector;
walks_in_vector.reserve(pg->num_nodes() * plan.number_of_walks());
// Copy to vector of vectors.
for (auto walk : walks) {
walks_in_vector.push_back(walk);
}
walks_in_vector.reserve(plan.number_of_walks());
std::move(walks.begin(), walks.end(), std::back_inserter(walks_in_vector));
return walks_in_vector;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ SubGraphNodeSet(
katana::PropertyGraph* graph, const std::vector<uint32_t>& node_set) {
auto subgraph = std::make_unique<katana::PropertyGraph>();
if (node_set.empty()) {
return std::unique_ptr<katana::PropertyGraph>(std::move(subgraph));
return subgraph;
}

uint64_t num_nodes = node_set.size();
Expand Down Expand Up @@ -106,7 +106,7 @@ SubGraphNodeSet(
return r.error();
}

return std::unique_ptr<katana::PropertyGraph>(std::move(subgraph));
return subgraph;
}
} // namespace

Expand Down

0 comments on commit e62ff06

Please sign in to comment.