Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up libgalois algorithms and add TODOs for notable issues. #171

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class LocalClusteringCoefficientPlan : public Plan {
kDefaultRelabeling} {}

Algorithm algorithm() const { return algorithm_; }
// TODO(amp): These parameters should be documented.
Relabeling relabeling() const { return relabeling_; }
bool edges_sorted() const { return edges_sorted_; }

Expand All @@ -70,6 +71,7 @@ class LocalClusteringCoefficientPlan : public Plan {
}
};

// TODO(amp): The doc string was not updated.
/**
* Count the total number of triangles in the graph. The graph must be
* symmetric!
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ namespace katana::analytics {
/// parameters associated with it.
class LouvainClusteringPlan : public Plan {
public:
/// Algorithm selectors for Single-Source Shortest Path
enum Algorithm {
kDoAll,
};
Expand Down Expand Up @@ -53,9 +52,17 @@ class LouvainClusteringPlan : public Plan {

public:
LouvainClusteringPlan()
: LouvainClusteringPlan{kCPU, kDoAll, false, 0.01, 0.01, 10, 100} {}
: LouvainClusteringPlan{
kCPU,
kDoAll,
kEnableVF,
kModularityThresholdPerRound,
kModularityThresholdTotal,
kMaxIterations,
kMinGraphSize} {}

Algorithm algorithm() const { return algorithm_; }
// TODO(amp): These parameters should be documented.
bool is_enable_vf() const { return enable_vf_; }
double modularity_threshold_per_round() const {
return modularity_threshold_per_round_;
Expand Down Expand Up @@ -86,7 +93,7 @@ class LouvainClusteringPlan : public Plan {
/// Compute the Louvain Clustering for pg.
/// The edge weights are taken from the property named
/// edge_weight_property_name (which may be a 32- or 64-bit sign or unsigned
/// int), and the computed cluster ids are stored in the property named
/// int), and the computed cluster IDs are stored in the property named
/// output_property_name (as uint32_t).
/// The property named output_property_name is created by this function and may
/// not exist before the call.
Expand Down
66 changes: 51 additions & 15 deletions libgalois/include/katana/analytics/random_walks/random_walks.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,19 @@ namespace katana::analytics {

/// A computational plan to for random walks, specifying the algorithm and any
/// parameters associated with it.
class RandomWalksPlan : Plan {
enum Algo { node2vec, edge2vec };

class RandomWalksPlan : public Plan {
public:
/// Algorithm selectors for Connected-components
enum Algorithm { kNode2Vec, kEdge2Vec };

static const Algorithm kDefaultAlgorithm = kNode2Vec;
static const uint32_t kDefaultWalkLength = 1;
static const uint32_t kDefaultNumberOfWalks = 1;
constexpr static const double kDefaultBackwardProbability = 1.0;
constexpr static const double kDefaultForwardProbability = 1.0;
static const uint32_t kDefaultMaxIterations = 10;
static const uint32_t kDefaultNumberOfEdgeTypes = 1;

// Don't allow people to directly construct these, so as to have only one
// consistent way to configure.
private:
Expand Down Expand Up @@ -50,23 +56,51 @@ class RandomWalksPlan : Plan {
number_of_edge_types_(number_of_edge_types) {}

public:
// kChunkSize is a fixed const int (default value: 1)
// kChunkSize is fixed at 1
static const int kChunkSize;

RandomWalksPlan() : RandomWalksPlan{kCPU, kNode2Vec, 1, 1, 1.0, 1.0, 10, 1} {}
RandomWalksPlan()
: RandomWalksPlan{
kCPU,
kDefaultAlgorithm,
kDefaultWalkLength,
kDefaultNumberOfWalks,
kDefaultBackwardProbability,
kDefaultForwardProbability,
kDefaultMaxIterations,
kDefaultNumberOfEdgeTypes} {}

Algorithm algorithm() const { return algorithm_; }

// TODO(amp): The parameters walk_length, number_of_walks,
// backward_probability, and forward_probability control the expected output,
// not the algorithm used to compute the output. So they need to be parameters
// on the algorithm function, not in the plan. The plan should be parameters
// which do not change the expected output (though they may cause selecting a
// different correct output).

/// Length of random walks.
uint32_t walk_length() const { return walk_length_; }

/// Number of walks per node.
uint32_t number_of_walks() const { return number_of_walks_; }

/// Probability of moving back to parent.
double backward_probability() const { return backward_probability_; }

/// Probability of moving forward (2-hops).
double forward_probability() const { return forward_probability_; }

uint32_t max_iterations() const { return max_iterations_; }

uint32_t number_of_edge_types() const { return number_of_edge_types_; }

/// Node2Vec algorithm to generate random walks on the graph
static RandomWalksPlan Node2Vec(
uint32_t walk_length, uint32_t number_of_walks,
double backward_probability, double forward_probability) {
uint32_t walk_length = kDefaultWalkLength,
uint32_t number_of_walks = kDefaultNumberOfWalks,
double backward_probability = kDefaultBackwardProbability,
double forward_probability = kDefaultBackwardProbability) {
return {
kCPU,
kNode2Vec,
Expand All @@ -81,9 +115,12 @@ class RandomWalksPlan : Plan {
/// Edge2Vec algorithm to generate random walks on the graph.
/// Takes the heterogeneity of the edges into account
static RandomWalksPlan Edge2Vec(
uint32_t walk_length, uint32_t number_of_walks,
double backward_probability, double forward_probability,
uint32_t max_iterations, uint32_t number_of_edge_types) {
uint32_t walk_length = kDefaultWalkLength,
uint32_t number_of_walks = kDefaultNumberOfWalks,
double backward_probability = kDefaultBackwardProbability,
double forward_probability = kDefaultBackwardProbability,
uint32_t max_iterations = kDefaultMaxIterations,
uint32_t number_of_edge_types = kDefaultNumberOfEdgeTypes) {
return {
kCPU,
kNode2Vec,
Expand All @@ -96,11 +133,10 @@ class RandomWalksPlan : Plan {
}
};

/// Compute the random-walks for pg. The pg is expected to be
/// symmetric.
/// The parameters can be specified, but have reasonable defaults. Not all parameters
/// are used by the algorithms.
/// The generated random-walks generated are return in Katana::InsertBag.
/// Compute the random-walks for pg. The pg is expected to be symmetric. The
/// parameters can be specified, but have reasonable defaults. Not all
/// parameters are used by the algorithms. The generated random-walks generated
/// are returned as a vector of vectors.
KATANA_EXPORT Result<std::vector<std::vector<uint32_t>>> RandomWalks(
PropertyGraph* pg, RandomWalksPlan plan = RandomWalksPlan());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ class SubGraphExtractionPlan : public Plan {

Algorithm algorithm() const { return algorithm_; }

// TODO(amp): This algorithm defines the semantics of the call. If there were
// an algorithm that, for instance, took a list of edges, that would need to
// be a different function, not just a different plan, since it takes
// semantically different arguments. I do think this should have a plan, even
// if there is only one concrete algorithm, but it should be defined and
// documented in terms of the concrete algorithm, not the semantics of the
// function (which is described well below).

/**
* The node-set algorithm:
* Given a set of node ids, this algorithm constructs a new sub-graph
Expand All @@ -41,12 +49,13 @@ class SubGraphExtractionPlan : public Plan {
* The new sub-graph is independent of the original graph.
*
* @param pg The graph to process.
* @param node_vec Set of node ids
* @param node_vec Set of node IDs
* @param plan
*/
KATANA_EXPORT katana::Result<std::unique_ptr<katana::PropertyGraph>>
SubGraphExtraction(
katana::PropertyGraph* pg, const std::vector<uint32_t>& node_vec,
katana::PropertyGraph* pg,
const std::vector<katana::PropertyGraph::Node>& node_vec,
SubGraphExtractionPlan plan = {});
// const std::vector<std::string>& node_properties_to_copy, const std::vector<std::string>& edge_properties_to_copy);

Expand Down
47 changes: 24 additions & 23 deletions libgalois/src/analytics/random_walks/random_walks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@ struct Node2VecAlgo {
return graph.num_nodes();
}
double total_wt = degree[n];
prob = prob * total_wt;

uint32_t edge_index = std::floor(prob);
uint32_t edge_index = std::floor(prob * total_wt);
auto edge = graph.edge_begin(n) + edge_index;
return *graph.GetEdgeDest(edge);
}
Expand Down Expand Up @@ -79,7 +78,7 @@ struct Node2VecAlgo {
uint64_t total_walks = graph.size() * plan_.number_of_walks();

katana::do_all(
katana::iterate((uint64_t)0, total_walks),
katana::iterate(uint64_t(0), total_walks),
[&](uint64_t idx) {
GNode n = idx % graph.size();

Expand Down Expand Up @@ -144,7 +143,7 @@ struct Node2VecAlgo {
alpha = prob_forward;
}

if (alpha >= y) {
if (y <= alpha) {
//accept y
walk.push_back(std::move(nbr));
break;
Expand All @@ -153,7 +152,7 @@ struct Node2VecAlgo {
}
}

(*walks).push(std::move(walk));
walks->push(std::move(walk));
},
katana::steal(), katana::chunk_size<RandomWalksPlan::kChunkSize>(),
katana::loopname("Node2vec walks"), katana::no_stats());
Expand Down Expand Up @@ -234,7 +233,7 @@ struct Edge2VecAlgo {
uint64_t total_walks = graph.size() * plan_.number_of_walks();

katana::do_all(
katana::iterate((uint64_t)0, total_walks),
katana::iterate(uint64_t(0), total_walks),
[&](uint64_t idx) {
GNode n = idx % graph.size();

Expand Down Expand Up @@ -338,8 +337,8 @@ struct Edge2VecAlgo {
num_edge_types[type]++;
}

(*per_thread_num_edge_types_walks.getLocal())
.emplace_back(std::move(num_edge_types));
per_thread_num_edge_types_walks.getLocal()->emplace_back(
std::move(num_edge_types));
});

for (unsigned j = 0; j < katana::getActiveThreads(); ++j) {
Expand All @@ -359,7 +358,7 @@ struct Edge2VecAlgo {
transformed_num_edge_types_walks.resize(plan_.number_of_edge_types() + 1);

katana::do_all(
katana::iterate((uint32_t)0, plan_.number_of_edge_types() + 1),
katana::iterate(uint32_t(0), plan_.number_of_edge_types() + 1),
[&](uint32_t j) {
for (uint32_t i = 0; i < rows; i++) {
transformed_num_edge_types_walks[j].push_back(
Expand Down Expand Up @@ -395,8 +394,8 @@ struct Edge2VecAlgo {
const std::vector<std::vector<uint32_t>>&
transformed_num_edge_types_walks,
const std::vector<double>& means) {
std::vector<uint32_t> x = transformed_num_edge_types_walks[i];
std::vector<uint32_t> y = transformed_num_edge_types_walks[j];
const std::vector<uint32_t>& x = transformed_num_edge_types_walks[i];
const std::vector<uint32_t>& y = transformed_num_edge_types_walks[j];

double sum = 0.0;
double sig1 = 0.0;
Expand Down Expand Up @@ -425,7 +424,7 @@ struct Edge2VecAlgo {
transformed_num_edge_types_walks,
const std::vector<double>& means) {
katana::do_all(
katana::iterate((uint32_t)1, plan_.number_of_edge_types() + 1),
katana::iterate(uint32_t(1), plan_.number_of_edge_types() + 1),
[&](uint32_t i) {
for (uint32_t j = 1; j <= plan_.number_of_edge_types(); j++) {
double pearson_corr =
Expand Down Expand Up @@ -468,12 +467,11 @@ struct Edge2VecAlgo {
template <typename Graph>
void
InitializeDegrees(const Graph& graph, katana::LargeArray<uint64_t>* degree) {
katana::do_all(
katana::iterate(graph),
[&](typename Graph::Node n) {
(*degree)[n] = std::distance(graph.edge_begin(n), graph.edge_end(n));
},
katana::steal());
katana::do_all(katana::iterate(graph), [&](typename Graph::Node n) {
// Treat this as O(1) time because subtracting iterators is just pointer
// or number subtraction. So don't use steal().
(*degree)[n] = graph.edges(n).size();
});
}

} //namespace
Expand All @@ -485,6 +483,12 @@ RandomWalksWithWrap(katana::PropertyGraph* pg, RandomWalksPlan plan) {
return res.error();
}

// TODO(amp): This is incorrect. For Node2vec this needs to be:
// Algorithm::Graph::Make(pg, {}, {}) // Ignoring all properties.
// For Edge2vec this needs to be:
// Algorithm::Graph::Make(pg, {}, {edge_type_property_name})
// The current version requires the input to have exactly the properties
// expected by the algorithm implementation.
Comment on lines +486 to +491
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This issue needs to be addressed to wrap RandomWalks. The algorithm will currently not work.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. We need to separate them out into different applications.

auto pg_result = Algorithm::Graph::Make(pg);
if (!pg_result) {
return pg_result.error();
Expand All @@ -510,11 +514,8 @@ RandomWalksWithWrap(katana::PropertyGraph* pg, RandomWalksPlan plan) {
degree.deallocate();

std::vector<std::vector<uint32_t>> walks_in_vector;
walks_in_vector.reserve(pg->num_nodes() * plan.number_of_walks());
// Copy to vector of vectors.
for (auto walk : walks) {
walks_in_vector.push_back(walk);
}
walks_in_vector.reserve(plan.number_of_walks());
std::move(walks.begin(), walks.end(), std::back_inserter(walks_in_vector));
return walks_in_vector;
}

Expand Down