Skip to content

Commit

Permalink
optimize filterNonShortestPath() for betweeness_centrality (#12)
Browse files Browse the repository at this point in the history
network10000 dataset test:
                   before  after
(depth=4 sample=1) 395s    25s
(depth=3 sample=2) 4300s   35s

same as the closeness_centrality

Change-Id: Ia0c557434bf25f9d13a0b1dc19f66024e08c89df
  • Loading branch information
javeme authored and imbajin committed Nov 9, 2022
1 parent fa3ace6 commit 7a72d7d
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -401,10 +401,10 @@ protected void commitIfNeeded() {
}
}

public static final class TopMap {
public static final class TopMap<K> {

private final long topN;
private Map<Id, MutableLong> tops;
private Map<K, MutableLong> tops;

public TopMap(long topN) {
this.topN = topN;
Expand All @@ -415,19 +415,28 @@ public int size() {
return this.tops.size();
}

public void put(Id key, long value) {
this.put(key, Long.valueOf(value));
public MutableLong get(K key) {
return this.tops.get(key);
}

public void put(Id key, Long value) {
public void add(K key, long value) {
MutableLong mlong = this.tops.get(key);
if (mlong == null) {
mlong = new MutableLong(value);
this.tops.put(key, mlong);
}
mlong.add(value);
}

public void put(K key, long value) {
this.tops.put(key, new MutableLong(value));
// keep 2x buffer
if (this.tops.size() > this.topN * 2) {
this.shrinkIfNeeded(this.topN);
}
}

public Set<Map.Entry<Id, MutableLong>> entrySet() {
public Set<Map.Entry<K, MutableLong>> entrySet() {
this.shrinkIfNeeded(this.topN);
return this.tops.entrySet();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,20 @@

package com.baidu.hugegraph.job.algorithm.cent;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.tinkerpop.gremlin.process.traversal.Pop;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import org.apache.tinkerpop.gremlin.structure.Vertex;

import com.baidu.hugegraph.backend.id.Id;
import com.baidu.hugegraph.job.Job;
import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm;
import com.baidu.hugegraph.structure.HugeElement;

public abstract class AbstractCentAlgorithm extends AbstractAlgorithm {

Expand Down Expand Up @@ -106,5 +112,28 @@ protected GraphTraversal<Vertex, Vertex> constructPathUnit(
}
return unit;
}

protected GraphTraversal<Vertex, Vertex> filterNonShortestPath(
GraphTraversal<Vertex, Vertex>
t) {
long size = this.graph().traversal().V().limit(MAX_QUERY_LIMIT)
.count().next();
Map<Pair<Id, Id>, Integer> triples = new HashMap<>((int) size);
return t.filter(it -> {
Id start = it.<HugeElement>path(Pop.first, "v").id();
Id end = it.<HugeElement>path(Pop.last, "v").id();
int len = it.<List<?>>path(Pop.all, "v").size();
Pair<Id, Id> key = Pair.of(start, end);
Integer shortest = triples.get(key);
if (shortest != null && shortest != len) {
// ignore non shortest path
return false;
}
if (shortest == null) {
triples.put(key, len);
}
return true;
});
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,25 +73,12 @@ public Object betweenessCentrality(int depth,
sourceCLabel);
t = constructPath(t, degree, sample, sourceLabel, sourceCLabel);
t = t.emit().until(__.loops().is(P.gte(depth)));
t = filterNonShortestPath(t);

@SuppressWarnings({ "unchecked", "deprecation" })
GraphTraversal<Vertex, Vertex> tf = t.filter(
__.project("x","y","z")
.by(__.select(Pop.first, "v").id())
.by(__.select(Pop.last, "v").id())
.by(__.select(Pop.all, "v").count(Scope.local))
.as("triple")
.coalesce(__.select("x","y").as("a")
.select("triples").unfold().as("t")
.select("x","y").where(P.eq("a")).select("t"),
__.store("triples"))
.select("z").as("length")
.select("triple").select("z").where(P.eq("length")));

GraphTraversal<Vertex, ?> tg = tf.select(Pop.all, "v")
.unfold().id()
.groupCount().order(Scope.local)
.by(Column.values, Order.desc);
GraphTraversal<Vertex, ?> tg = t.select(Pop.all, "v")
.unfold().id()
.groupCount().order(Scope.local)
.by(Column.values, Order.desc);
GraphTraversal<Vertex, ?> tLimit = topN <= 0L ? tg :
tg.limit(Scope.local, topN);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,26 +82,13 @@ public Object closenessCentrality(int depth,
sourceCLabel);
t = constructPath(t, degree, sample, sourceLabel, sourceCLabel);
t = t.emit().until(__.loops().is(P.gte(depth)));

@SuppressWarnings({ "unchecked", "deprecation" })
GraphTraversal<Vertex, Vertex> tf = t.filter(
__.project("x","y","z")
.by(__.select(Pop.first, "v").id())
.by(__.select(Pop.last, "v").id())
.by(__.select(Pop.all, "v").count(Scope.local))
.as("triple")
.coalesce(__.select("x","y").as("a")
.select("triples").unfold().as("t")
.select("x","y").where(P.eq("a")).select("t"),
__.store("triples"))
.select("z").as("length")
.select("triple").select("z").where(P.eq("length")));
t = filterNonShortestPath(t);

GraphTraversal<Vertex, ?> tg;
tg = tf.group().by(__.select(Pop.first, "v").id())
.by(__.select(Pop.all, "v").count(Scope.local)
.sack(Operator.div).sack().sum())
.order(Scope.local).by(Column.values, Order.desc);
tg = t.group().by(__.select(Pop.first, "v").id())
.by(__.select(Pop.all, "v").count(Scope.local)
.sack(Operator.div).sack().sum())
.order(Scope.local).by(Column.values, Order.desc);
GraphTraversal<Vertex, ?> tLimit = topN <= 0L ? tg :
tg.limit(Scope.local, topN);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public Object degreeCentrality(Directions direction, long topN) {
Iterator<Edge> edges = this.edges(direction);

JsonMap degrees = new JsonMap();
TopMap tops = new TopMap(topN);
TopMap<Id> tops = new TopMap<>(topN);
Id vertex = null;
long degree = 0L;
long total = 0L;
Expand Down Expand Up @@ -111,7 +111,7 @@ protected Object degreeCentrality(long topN) {
assert topN >= 0L;
long total = 0L;
JsonMap degrees = new JsonMap();
TopMap tops = new TopMap(topN);
TopMap<Id> tops = new TopMap<>(topN);

GraphTraversalSource traversal = this.graph().traversal();
Iterator<Vertex> vertices = this.vertices();
Expand Down

0 comments on commit 7a72d7d

Please sign in to comment.