-
Notifications
You must be signed in to change notification settings - Fork 65
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
local_clustering_coefficient, louvain_clustering, subgraph_extraction XXX: This commit adds failing tests. I think the algorithms are wrong.
- Loading branch information
Showing
8 changed files
with
448 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
119 changes: 119 additions & 0 deletions
119
python/katana/analytics/_local_clustering_coefficient.pyx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
from katana.cpp.libstd.iostream cimport ostream, ostringstream | ||
from katana.cpp.libgalois.graphs.Graph cimport _PropertyGraph | ||
from katana.cpp.libsupport.result cimport handle_result_void, handle_result_assert, raise_error_code, Result | ||
from katana.analytics.plan cimport Plan, _Plan | ||
from katana.property_graph cimport PropertyGraph | ||
|
||
from libcpp.string cimport string | ||
from libcpp cimport bool | ||
|
||
from enum import Enum | ||
|
||
# TODO(amp): Module needs documenting. | ||
|
||
|
||
cdef extern from "katana/analytics/local_clustering_coefficient/local_clustering_coefficient.h" namespace "katana::analytics" nogil: | ||
cppclass _LocalClusteringCoefficientPlan "katana::analytics::LocalClusteringCoefficientPlan" (_Plan): | ||
enum Algorithm: | ||
kOrderedCountAtomics "katana::analytics::LocalClusteringCoefficientPlan::kOrderedCountAtomics" | ||
kOrderedCountPerThread "katana::analytics::LocalClusteringCoefficientPlan::kOrderedCountPerThread" | ||
|
||
enum Relabeling: | ||
kRelabel "katana::analytics::LocalClusteringCoefficientPlan::kRelabel" | ||
kNoRelabel "katana::analytics::LocalClusteringCoefficientPlan::kNoRelabel" | ||
kAutoRelabel "katana::analytics::LocalClusteringCoefficientPlan::kAutoRelabel" | ||
|
||
_LocalClusteringCoefficientPlan.Algorithm algorithm() const | ||
_LocalClusteringCoefficientPlan.Relabeling relabeling() const | ||
bool edges_sorted() const | ||
|
||
# LocalClusteringCoefficientPlan() | ||
|
||
@staticmethod | ||
_LocalClusteringCoefficientPlan OrderedCountAtomics( | ||
bool edges_sorted, | ||
_LocalClusteringCoefficientPlan.Relabeling relabeling | ||
) | ||
@staticmethod | ||
_LocalClusteringCoefficientPlan OrderedCountPerThread( | ||
bool edges_sorted, | ||
_LocalClusteringCoefficientPlan.Relabeling relabeling | ||
) | ||
|
||
_LocalClusteringCoefficientPlan.Relabeling kDefaultRelabeling "katana::analytics::LocalClusteringCoefficientPlan::kDefaultRelabeling" | ||
bool kDefaultEdgesSorted "katana::analytics::LocalClusteringCoefficientPlan::kDefaultEdgesSorted" | ||
|
||
Result[void] LocalClusteringCoefficient(_PropertyGraph* pfg, const string& output_property_name, _LocalClusteringCoefficientPlan plan) | ||
|
||
|
||
class _LocalClusteringCoefficientPlanAlgorithm(Enum): | ||
OrderedCountAtomics = _LocalClusteringCoefficientPlan.Algorithm.kOrderedCountAtomics | ||
OrderedCountPerThread = _LocalClusteringCoefficientPlan.Algorithm.kOrderedCountPerThread | ||
|
||
|
||
cdef _relabeling_to_python(v): | ||
if v == _LocalClusteringCoefficientPlan.Relabeling.kRelabel: | ||
return True | ||
elif v == _LocalClusteringCoefficientPlan.Relabeling.kNoRelabel: | ||
return False | ||
else: | ||
return None | ||
|
||
|
||
cdef _relabeling_from_python(v): | ||
if v is None: | ||
return _LocalClusteringCoefficientPlan.Relabeling.kAutoRelabel | ||
elif v: | ||
return _LocalClusteringCoefficientPlan.Relabeling.kRelabel | ||
else: | ||
return _LocalClusteringCoefficientPlan.Relabeling.kNoRelabel | ||
|
||
|
||
cdef class LocalClusteringCoefficientPlan(Plan): | ||
cdef: | ||
_LocalClusteringCoefficientPlan underlying_ | ||
|
||
cdef _Plan* underlying(self) except NULL: | ||
return &self.underlying_ | ||
|
||
Algorithm = _LocalClusteringCoefficientPlanAlgorithm | ||
|
||
@staticmethod | ||
cdef LocalClusteringCoefficientPlan make(_LocalClusteringCoefficientPlan u): | ||
f = <LocalClusteringCoefficientPlan>LocalClusteringCoefficientPlan.__new__(LocalClusteringCoefficientPlan) | ||
f.underlying_ = u | ||
return f | ||
|
||
@property | ||
def algorithm(self) -> Algorithm: | ||
return _LocalClusteringCoefficientPlanAlgorithm(self.underlying_.algorithm()) | ||
|
||
@property | ||
def relabeling(self): | ||
return self.underlying_.relabeling() | ||
|
||
@property | ||
def edges_sorted(self) -> bool: | ||
return self.underlying_.edges_sorted() | ||
|
||
@staticmethod | ||
def ordered_count_atomics( | ||
relabeling = _relabeling_to_python(kDefaultRelabeling), | ||
bool edges_sorted = kDefaultEdgesSorted | ||
): | ||
return LocalClusteringCoefficientPlan.make(_LocalClusteringCoefficientPlan.OrderedCountAtomics( | ||
edges_sorted, _relabeling_from_python(relabeling))) | ||
|
||
@staticmethod | ||
def ordered_count_per_thread( | ||
relabeling = _relabeling_to_python(kDefaultRelabeling), | ||
bool edges_sorted = kDefaultEdgesSorted | ||
): | ||
return LocalClusteringCoefficientPlan.make(_LocalClusteringCoefficientPlan.OrderedCountPerThread( | ||
edges_sorted, _relabeling_from_python(relabeling))) | ||
|
||
|
||
def local_clustering_coefficient(PropertyGraph pg, str output_property_name, LocalClusteringCoefficientPlan plan = LocalClusteringCoefficientPlan()): | ||
cdef string output_property_name_str = bytes(output_property_name, "utf-8") | ||
with nogil: | ||
handle_result_void(LocalClusteringCoefficient(pg.underlying.get(), output_property_name_str, plan.underlying_)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
from katana.cpp.libstd.iostream cimport ostream, ostringstream | ||
from katana.cpp.libgalois.graphs.Graph cimport _PropertyGraph | ||
from katana.cpp.libsupport.result cimport handle_result_void, handle_result_assert, raise_error_code, Result | ||
from katana.analytics.plan cimport Plan, _Plan | ||
from katana.property_graph cimport PropertyGraph | ||
|
||
from libc.stdint cimport uint32_t, uint64_t | ||
from libcpp.string cimport string | ||
from libcpp cimport bool | ||
|
||
from enum import Enum | ||
|
||
# TODO(amp): Module needs documenting. | ||
|
||
|
||
cdef extern from "katana/analytics/louvain_clustering/louvain_clustering.h" namespace "katana::analytics" nogil: | ||
cppclass _LouvainClusteringPlan "katana::analytics::LouvainClusteringPlan" (_Plan): | ||
enum Algorithm: | ||
kDoAll "katana::analytics::LouvainClusteringPlan::kDoAll" | ||
|
||
_LouvainClusteringPlan.Algorithm algorithm() const | ||
bool enable_vf() const | ||
double modularity_threshold_per_round() const | ||
double modularity_threshold_total() const | ||
uint32_t max_iterations() const | ||
uint32_t min_graph_size() const | ||
|
||
# LouvainClusteringPlan() | ||
|
||
@staticmethod | ||
_LouvainClusteringPlan DoAll( | ||
bool enable_vf, | ||
double modularity_threshold_per_round, | ||
double modularity_threshold_total, | ||
uint32_t max_iterations, | ||
uint32_t min_graph_size | ||
) | ||
|
||
bool kDefaultEnableVF "katana::analytics::LouvainClusteringPlan::kDefaultEnableVF" | ||
double kDefaultModularityThresholdPerRound "katana::analytics::LouvainClusteringPlan::kDefaultModularityThresholdPerRound" | ||
double kDefaultModularityThresholdTotal "katana::analytics::LouvainClusteringPlan::kDefaultModularityThresholdTotal" | ||
uint32_t kDefaultMaxIterations "katana::analytics::LouvainClusteringPlan::kDefaultMaxIterations" | ||
uint32_t kDefaultMinGraphSize "katana::analytics::LouvainClusteringPlan::kDefaultMinGraphSize" | ||
|
||
Result[void] LouvainClustering(_PropertyGraph* pfg, const string& edge_weight_property_name,const string& output_property_name, _LouvainClusteringPlan plan) | ||
|
||
Result[void] LouvainClusteringAssertValid(_PropertyGraph* pfg, | ||
const string& edge_weight_property_name, | ||
const string& output_property_name | ||
) | ||
|
||
cppclass _LouvainClusteringStatistics "katana::analytics::LouvainClusteringStatistics": | ||
uint64_t n_clusters | ||
uint64_t n_non_trivial_clusters | ||
uint64_t largest_cluster_size | ||
double largest_cluster_proportion | ||
double modularity | ||
|
||
void Print(ostream os) | ||
|
||
@staticmethod | ||
Result[_LouvainClusteringStatistics] Compute(_PropertyGraph* pfg, | ||
const string& edge_weight_property_name, | ||
const string& output_property_name | ||
) | ||
|
||
|
||
class _LouvainClusteringPlanAlgorithm(Enum): | ||
DoAll = _LouvainClusteringPlan.Algorithm.kDoAll | ||
|
||
|
||
cdef class LouvainClusteringPlan(Plan): | ||
cdef: | ||
_LouvainClusteringPlan underlying_ | ||
|
||
cdef _Plan* underlying(self) except NULL: | ||
return &self.underlying_ | ||
|
||
Algorithm = _LouvainClusteringPlanAlgorithm | ||
|
||
@staticmethod | ||
cdef LouvainClusteringPlan make(_LouvainClusteringPlan u): | ||
f = <LouvainClusteringPlan>LouvainClusteringPlan.__new__(LouvainClusteringPlan) | ||
f.underlying_ = u | ||
return f | ||
|
||
@property | ||
def algorithm(self) -> Algorithm: | ||
return _LouvainClusteringPlanAlgorithm(self.underlying_.algorithm()) | ||
|
||
@property | ||
def enable_vf(self) -> bool: | ||
return self.underlying_.enable_vf() | ||
|
||
@property | ||
def modularity_threshold_per_round(self) -> double: | ||
return self.underlying_.modularity_threshold_per_round() | ||
|
||
@property | ||
def modularity_threshold_total(self) -> double: | ||
return self.underlying_.modularity_threshold_total() | ||
|
||
@property | ||
def max_iterations(self) -> uint32_t: | ||
return self.underlying_.max_iterations() | ||
|
||
@property | ||
def min_graph_size(self) -> uint32_t: | ||
return self.underlying_.min_graph_size() | ||
|
||
|
||
@staticmethod | ||
def do_all( | ||
bool enable_vf = kDefaultEnableVF, | ||
double modularity_threshold_per_round = kDefaultModularityThresholdPerRound, | ||
double modularity_threshold_total = kDefaultModularityThresholdTotal, | ||
uint32_t max_iterations = kDefaultMaxIterations, | ||
uint32_t min_graph_size = kDefaultMinGraphSize | ||
) -> LouvainClusteringPlan: | ||
return LouvainClusteringPlan.make(_LouvainClusteringPlan.DoAll( | ||
enable_vf, modularity_threshold_per_round, modularity_threshold_total, max_iterations, min_graph_size)) | ||
|
||
|
||
def louvain_clustering(PropertyGraph pg, str edge_weight_property_name, str output_property_name, LouvainClusteringPlan plan = LouvainClusteringPlan()): | ||
cdef string edge_weight_property_name_str = bytes(edge_weight_property_name, "utf-8") | ||
cdef string output_property_name_str = bytes(output_property_name, "utf-8") | ||
with nogil: | ||
handle_result_void(LouvainClustering(pg.underlying.get(), edge_weight_property_name_str, output_property_name_str, plan.underlying_)) | ||
|
||
|
||
def louvain_clustering_assert_valid(PropertyGraph pg, str edge_weight_property_name, str output_property_name ): | ||
cdef string edge_weight_property_name_str = bytes(edge_weight_property_name, "utf-8") | ||
cdef string output_property_name_str = bytes(output_property_name, "utf-8") | ||
with nogil: | ||
handle_result_assert(LouvainClusteringAssertValid(pg.underlying.get(), | ||
edge_weight_property_name_str, | ||
output_property_name_str | ||
)) | ||
|
||
|
||
cdef _LouvainClusteringStatistics handle_result_LouvainClusteringStatistics(Result[_LouvainClusteringStatistics] res) nogil except *: | ||
if not res.has_value(): | ||
with gil: | ||
raise_error_code(res.error()) | ||
return res.value() | ||
|
||
|
||
cdef class LouvainClusteringStatistics: | ||
cdef _LouvainClusteringStatistics underlying | ||
|
||
def __init__(self, PropertyGraph pg, | ||
str edge_weight_property_name, | ||
str output_property_name | ||
): | ||
cdef string edge_weight_property_name_str = bytes(edge_weight_property_name, "utf-8") | ||
cdef string output_property_name_str = bytes(output_property_name, "utf-8") | ||
with nogil: | ||
self.underlying = handle_result_LouvainClusteringStatistics(_LouvainClusteringStatistics.Compute( | ||
pg.underlying.get(), | ||
edge_weight_property_name_str, | ||
output_property_name_str | ||
)) | ||
|
||
@property | ||
def n_clusters(self) -> uint64_t: | ||
return self.underlying.n_clusters | ||
|
||
@property | ||
def n_non_trivial_clusters(self) -> uint64_t: | ||
return self.underlying.n_non_trivial_clusters | ||
|
||
@property | ||
def largest_cluster_size(self) -> uint64_t: | ||
return self.underlying.largest_cluster_size | ||
|
||
@property | ||
def largest_cluster_proportion(self) -> double: | ||
return self.underlying.largest_cluster_proportion | ||
|
||
@property | ||
def modularity(self) -> double: | ||
return self.underlying.modularity | ||
|
||
|
||
def __str__(self) -> str: | ||
cdef ostringstream ss | ||
self.underlying.Print(ss) | ||
return str(ss.str(), "ascii") |
Oops, something went wrong.