Skip to content

Commit

Permalink
Wrap algorithms for Python.
Browse files Browse the repository at this point in the history
local_clustering_coefficient, louvain_clustering,
subgraph_extraction

XXX: This commit adds failing tests. I think the algorithms are
wrong.
  • Loading branch information
arthurp committed Apr 22, 2021
1 parent 81b8738 commit f6a9c07
Show file tree
Hide file tree
Showing 8 changed files with 448 additions and 1 deletion.
14 changes: 14 additions & 0 deletions python/katana/analytics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@
.. automodule:: katana.analytics._independent_set
.. automodule:: katana.analytics._louvain_clustering
.. automodule:: katana.analytics._local_clustering_coefficient
.. automodule:: katana.analytics._subgraph_extraction
.. automodule:: katana.analytics._jaccard
.. automodule:: katana.analytics._k_core
Expand Down Expand Up @@ -67,6 +73,14 @@
IndependentSetStatistics,
)
from katana.analytics._jaccard import jaccard, jaccard_assert_valid, JaccardPlan, JaccardStatistics
from katana.analytics._louvain_clustering import (
louvain_clustering,
louvain_clustering_assert_valid,
LouvainClusteringPlan,
LouvainClusteringStatistics,
)
from katana.analytics._local_clustering_coefficient import local_clustering_coefficient, LocalClusteringCoefficientPlan
from katana.analytics._subgraph_extraction import subgraph_extraction, SubGraphExtractionPlan
from katana.analytics._k_core import k_core, k_core_assert_valid, KCorePlan, KCoreStatistics
from katana.analytics._k_truss import k_truss, k_truss_assert_valid, KTrussPlan, KTrussStatistics
from katana.analytics._pagerank import pagerank, pagerank_assert_valid, PagerankPlan, PagerankStatistics
Expand Down
119 changes: 119 additions & 0 deletions python/katana/analytics/_local_clustering_coefficient.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
from katana.cpp.libstd.iostream cimport ostream, ostringstream
from katana.cpp.libgalois.graphs.Graph cimport _PropertyGraph
from katana.cpp.libsupport.result cimport handle_result_void, handle_result_assert, raise_error_code, Result
from katana.analytics.plan cimport Plan, _Plan
from katana.property_graph cimport PropertyGraph

from libcpp.string cimport string
from libcpp cimport bool

from enum import Enum

# TODO(amp): Module needs documenting.


cdef extern from "katana/analytics/local_clustering_coefficient/local_clustering_coefficient.h" namespace "katana::analytics" nogil:
cppclass _LocalClusteringCoefficientPlan "katana::analytics::LocalClusteringCoefficientPlan" (_Plan):
enum Algorithm:
kOrderedCountAtomics "katana::analytics::LocalClusteringCoefficientPlan::kOrderedCountAtomics"
kOrderedCountPerThread "katana::analytics::LocalClusteringCoefficientPlan::kOrderedCountPerThread"

enum Relabeling:
kRelabel "katana::analytics::LocalClusteringCoefficientPlan::kRelabel"
kNoRelabel "katana::analytics::LocalClusteringCoefficientPlan::kNoRelabel"
kAutoRelabel "katana::analytics::LocalClusteringCoefficientPlan::kAutoRelabel"

_LocalClusteringCoefficientPlan.Algorithm algorithm() const
_LocalClusteringCoefficientPlan.Relabeling relabeling() const
bool edges_sorted() const

# LocalClusteringCoefficientPlan()

@staticmethod
_LocalClusteringCoefficientPlan OrderedCountAtomics(
bool edges_sorted,
_LocalClusteringCoefficientPlan.Relabeling relabeling
)
@staticmethod
_LocalClusteringCoefficientPlan OrderedCountPerThread(
bool edges_sorted,
_LocalClusteringCoefficientPlan.Relabeling relabeling
)

_LocalClusteringCoefficientPlan.Relabeling kDefaultRelabeling "katana::analytics::LocalClusteringCoefficientPlan::kDefaultRelabeling"
bool kDefaultEdgesSorted "katana::analytics::LocalClusteringCoefficientPlan::kDefaultEdgesSorted"

Result[void] LocalClusteringCoefficient(_PropertyGraph* pfg, const string& output_property_name, _LocalClusteringCoefficientPlan plan)


class _LocalClusteringCoefficientPlanAlgorithm(Enum):
OrderedCountAtomics = _LocalClusteringCoefficientPlan.Algorithm.kOrderedCountAtomics
OrderedCountPerThread = _LocalClusteringCoefficientPlan.Algorithm.kOrderedCountPerThread


cdef _relabeling_to_python(v):
if v == _LocalClusteringCoefficientPlan.Relabeling.kRelabel:
return True
elif v == _LocalClusteringCoefficientPlan.Relabeling.kNoRelabel:
return False
else:
return None


cdef _relabeling_from_python(v):
if v is None:
return _LocalClusteringCoefficientPlan.Relabeling.kAutoRelabel
elif v:
return _LocalClusteringCoefficientPlan.Relabeling.kRelabel
else:
return _LocalClusteringCoefficientPlan.Relabeling.kNoRelabel


cdef class LocalClusteringCoefficientPlan(Plan):
cdef:
_LocalClusteringCoefficientPlan underlying_

cdef _Plan* underlying(self) except NULL:
return &self.underlying_

Algorithm = _LocalClusteringCoefficientPlanAlgorithm

@staticmethod
cdef LocalClusteringCoefficientPlan make(_LocalClusteringCoefficientPlan u):
f = <LocalClusteringCoefficientPlan>LocalClusteringCoefficientPlan.__new__(LocalClusteringCoefficientPlan)
f.underlying_ = u
return f

@property
def algorithm(self) -> Algorithm:
return _LocalClusteringCoefficientPlanAlgorithm(self.underlying_.algorithm())

@property
def relabeling(self):
return self.underlying_.relabeling()

@property
def edges_sorted(self) -> bool:
return self.underlying_.edges_sorted()

@staticmethod
def ordered_count_atomics(
relabeling = _relabeling_to_python(kDefaultRelabeling),
bool edges_sorted = kDefaultEdgesSorted
):
return LocalClusteringCoefficientPlan.make(_LocalClusteringCoefficientPlan.OrderedCountAtomics(
edges_sorted, _relabeling_from_python(relabeling)))

@staticmethod
def ordered_count_per_thread(
relabeling = _relabeling_to_python(kDefaultRelabeling),
bool edges_sorted = kDefaultEdgesSorted
):
return LocalClusteringCoefficientPlan.make(_LocalClusteringCoefficientPlan.OrderedCountPerThread(
edges_sorted, _relabeling_from_python(relabeling)))


def local_clustering_coefficient(PropertyGraph pg, str output_property_name, LocalClusteringCoefficientPlan plan = LocalClusteringCoefficientPlan()):
cdef string output_property_name_str = bytes(output_property_name, "utf-8")
with nogil:
handle_result_void(LocalClusteringCoefficient(pg.underlying.get(), output_property_name_str, plan.underlying_))
188 changes: 188 additions & 0 deletions python/katana/analytics/_louvain_clustering.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
from katana.cpp.libstd.iostream cimport ostream, ostringstream
from katana.cpp.libgalois.graphs.Graph cimport _PropertyGraph
from katana.cpp.libsupport.result cimport handle_result_void, handle_result_assert, raise_error_code, Result
from katana.analytics.plan cimport Plan, _Plan
from katana.property_graph cimport PropertyGraph

from libc.stdint cimport uint32_t, uint64_t
from libcpp.string cimport string
from libcpp cimport bool

from enum import Enum

# TODO(amp): Module needs documenting.


cdef extern from "katana/analytics/louvain_clustering/louvain_clustering.h" namespace "katana::analytics" nogil:
cppclass _LouvainClusteringPlan "katana::analytics::LouvainClusteringPlan" (_Plan):
enum Algorithm:
kDoAll "katana::analytics::LouvainClusteringPlan::kDoAll"

_LouvainClusteringPlan.Algorithm algorithm() const
bool enable_vf() const
double modularity_threshold_per_round() const
double modularity_threshold_total() const
uint32_t max_iterations() const
uint32_t min_graph_size() const

# LouvainClusteringPlan()

@staticmethod
_LouvainClusteringPlan DoAll(
bool enable_vf,
double modularity_threshold_per_round,
double modularity_threshold_total,
uint32_t max_iterations,
uint32_t min_graph_size
)

bool kDefaultEnableVF "katana::analytics::LouvainClusteringPlan::kDefaultEnableVF"
double kDefaultModularityThresholdPerRound "katana::analytics::LouvainClusteringPlan::kDefaultModularityThresholdPerRound"
double kDefaultModularityThresholdTotal "katana::analytics::LouvainClusteringPlan::kDefaultModularityThresholdTotal"
uint32_t kDefaultMaxIterations "katana::analytics::LouvainClusteringPlan::kDefaultMaxIterations"
uint32_t kDefaultMinGraphSize "katana::analytics::LouvainClusteringPlan::kDefaultMinGraphSize"

Result[void] LouvainClustering(_PropertyGraph* pfg, const string& edge_weight_property_name,const string& output_property_name, _LouvainClusteringPlan plan)

Result[void] LouvainClusteringAssertValid(_PropertyGraph* pfg,
const string& edge_weight_property_name,
const string& output_property_name
)

cppclass _LouvainClusteringStatistics "katana::analytics::LouvainClusteringStatistics":
uint64_t n_clusters
uint64_t n_non_trivial_clusters
uint64_t largest_cluster_size
double largest_cluster_proportion
double modularity

void Print(ostream os)

@staticmethod
Result[_LouvainClusteringStatistics] Compute(_PropertyGraph* pfg,
const string& edge_weight_property_name,
const string& output_property_name
)


class _LouvainClusteringPlanAlgorithm(Enum):
DoAll = _LouvainClusteringPlan.Algorithm.kDoAll


cdef class LouvainClusteringPlan(Plan):
cdef:
_LouvainClusteringPlan underlying_

cdef _Plan* underlying(self) except NULL:
return &self.underlying_

Algorithm = _LouvainClusteringPlanAlgorithm

@staticmethod
cdef LouvainClusteringPlan make(_LouvainClusteringPlan u):
f = <LouvainClusteringPlan>LouvainClusteringPlan.__new__(LouvainClusteringPlan)
f.underlying_ = u
return f

@property
def algorithm(self) -> Algorithm:
return _LouvainClusteringPlanAlgorithm(self.underlying_.algorithm())

@property
def enable_vf(self) -> bool:
return self.underlying_.enable_vf()

@property
def modularity_threshold_per_round(self) -> double:
return self.underlying_.modularity_threshold_per_round()

@property
def modularity_threshold_total(self) -> double:
return self.underlying_.modularity_threshold_total()

@property
def max_iterations(self) -> uint32_t:
return self.underlying_.max_iterations()

@property
def min_graph_size(self) -> uint32_t:
return self.underlying_.min_graph_size()


@staticmethod
def do_all(
bool enable_vf = kDefaultEnableVF,
double modularity_threshold_per_round = kDefaultModularityThresholdPerRound,
double modularity_threshold_total = kDefaultModularityThresholdTotal,
uint32_t max_iterations = kDefaultMaxIterations,
uint32_t min_graph_size = kDefaultMinGraphSize
) -> LouvainClusteringPlan:
return LouvainClusteringPlan.make(_LouvainClusteringPlan.DoAll(
enable_vf, modularity_threshold_per_round, modularity_threshold_total, max_iterations, min_graph_size))


def louvain_clustering(PropertyGraph pg, str edge_weight_property_name, str output_property_name, LouvainClusteringPlan plan = LouvainClusteringPlan()):
cdef string edge_weight_property_name_str = bytes(edge_weight_property_name, "utf-8")
cdef string output_property_name_str = bytes(output_property_name, "utf-8")
with nogil:
handle_result_void(LouvainClustering(pg.underlying.get(), edge_weight_property_name_str, output_property_name_str, plan.underlying_))


def louvain_clustering_assert_valid(PropertyGraph pg, str edge_weight_property_name, str output_property_name ):
cdef string edge_weight_property_name_str = bytes(edge_weight_property_name, "utf-8")
cdef string output_property_name_str = bytes(output_property_name, "utf-8")
with nogil:
handle_result_assert(LouvainClusteringAssertValid(pg.underlying.get(),
edge_weight_property_name_str,
output_property_name_str
))


cdef _LouvainClusteringStatistics handle_result_LouvainClusteringStatistics(Result[_LouvainClusteringStatistics] res) nogil except *:
if not res.has_value():
with gil:
raise_error_code(res.error())
return res.value()


cdef class LouvainClusteringStatistics:
cdef _LouvainClusteringStatistics underlying

def __init__(self, PropertyGraph pg,
str edge_weight_property_name,
str output_property_name
):
cdef string edge_weight_property_name_str = bytes(edge_weight_property_name, "utf-8")
cdef string output_property_name_str = bytes(output_property_name, "utf-8")
with nogil:
self.underlying = handle_result_LouvainClusteringStatistics(_LouvainClusteringStatistics.Compute(
pg.underlying.get(),
edge_weight_property_name_str,
output_property_name_str
))

@property
def n_clusters(self) -> uint64_t:
return self.underlying.n_clusters

@property
def n_non_trivial_clusters(self) -> uint64_t:
return self.underlying.n_non_trivial_clusters

@property
def largest_cluster_size(self) -> uint64_t:
return self.underlying.largest_cluster_size

@property
def largest_cluster_proportion(self) -> double:
return self.underlying.largest_cluster_proportion

@property
def modularity(self) -> double:
return self.underlying.modularity


def __str__(self) -> str:
cdef ostringstream ss
self.underlying.Print(ss)
return str(ss.str(), "ascii")
Loading

0 comments on commit f6a9c07

Please sign in to comment.