From 56bf814287c501f9f382f944d66728938ea2b006 Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Wed, 15 Apr 2020 11:03:59 +0800 Subject: [PATCH] optimize filterNonShortestPath() for betweeness_centrality (#12) network10000 dataset test: before after (depth=4 sample=1) 395s 25s (depth=3 sample=2) 4300s 35s same as the closeness_centrality Change-Id: Ia0c557434bf25f9d13a0b1dc19f66024e08c89df --- .../job/algorithm/AbstractAlgorithm.java | 21 ++++++++++---- .../algorithm/cent/AbstractCentAlgorithm.java | 29 +++++++++++++++++++ .../cent/BetweenessCentralityAlgorithm.java | 23 ++++----------- .../cent/ClosenessCentralityAlgorithm.java | 23 ++++----------- .../cent/DegreeCentralityAlgorithm.java | 4 +-- 5 files changed, 56 insertions(+), 44 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 8387a69f92..e77473668c 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -401,10 +401,10 @@ protected void commitIfNeeded() { } } - public static final class TopMap { + public static final class TopMap { private final long topN; - private Map tops; + private Map tops; public TopMap(long topN) { this.topN = topN; @@ -415,11 +415,20 @@ public int size() { return this.tops.size(); } - public void put(Id key, long value) { - this.put(key, Long.valueOf(value)); + public MutableLong get(K key) { + return this.tops.get(key); } - public void put(Id key, Long value) { + public void add(K key, long value) { + MutableLong mlong = this.tops.get(key); + if (mlong == null) { + mlong = new MutableLong(value); + this.tops.put(key, mlong); + } + mlong.add(value); + } + + public void put(K key, long value) { this.tops.put(key, new MutableLong(value)); // keep 2x buffer if (this.tops.size() > this.topN * 2) { @@ -427,7 +436,7 @@ public void put(Id key, Long value) { } } - public Set> entrySet() { + public Set> entrySet() { this.shrinkIfNeeded(this.topN); return this.tops.entrySet(); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index 37492e456f..c36743176c 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -19,14 +19,20 @@ package com.baidu.hugegraph.job.algorithm.cent; +import java.util.HashMap; +import java.util.List; import java.util.Map; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.tinkerpop.gremlin.process.traversal.Pop; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; import org.apache.tinkerpop.gremlin.structure.Vertex; +import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; +import com.baidu.hugegraph.structure.HugeElement; public abstract class AbstractCentAlgorithm extends AbstractAlgorithm { @@ -106,5 +112,28 @@ protected GraphTraversal constructPathUnit( } return unit; } + + protected GraphTraversal filterNonShortestPath( + GraphTraversal + t) { + long size = this.graph().traversal().V().limit(MAX_QUERY_LIMIT) + .count().next(); + Map, Integer> triples = new HashMap<>((int) size); + return t.filter(it -> { + Id start = it.path(Pop.first, "v").id(); + Id end = it.path(Pop.last, "v").id(); + int len = it.>path(Pop.all, "v").size(); + Pair key = Pair.of(start, end); + Integer shortest = triples.get(key); + if (shortest != null && shortest != len) { + // ignore non shortest path + return false; + } + if (shortest == null) { + triples.put(key, len); + } + return true; + }); + } } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java index ae1b8bb743..9a72d2f626 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java @@ -73,25 +73,12 @@ public Object betweenessCentrality(int depth, sourceCLabel); t = constructPath(t, degree, sample, sourceLabel, sourceCLabel); t = t.emit().until(__.loops().is(P.gte(depth))); + t = filterNonShortestPath(t); - @SuppressWarnings({ "unchecked", "deprecation" }) - GraphTraversal tf = t.filter( - __.project("x","y","z") - .by(__.select(Pop.first, "v").id()) - .by(__.select(Pop.last, "v").id()) - .by(__.select(Pop.all, "v").count(Scope.local)) - .as("triple") - .coalesce(__.select("x","y").as("a") - .select("triples").unfold().as("t") - .select("x","y").where(P.eq("a")).select("t"), - __.store("triples")) - .select("z").as("length") - .select("triple").select("z").where(P.eq("length"))); - - GraphTraversal tg = tf.select(Pop.all, "v") - .unfold().id() - .groupCount().order(Scope.local) - .by(Column.values, Order.desc); + GraphTraversal tg = t.select(Pop.all, "v") + .unfold().id() + .groupCount().order(Scope.local) + .by(Column.values, Order.desc); GraphTraversal tLimit = topN <= 0L ? tg : tg.limit(Scope.local, topN); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java index d890db8087..96e9709fef 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java @@ -82,26 +82,13 @@ public Object closenessCentrality(int depth, sourceCLabel); t = constructPath(t, degree, sample, sourceLabel, sourceCLabel); t = t.emit().until(__.loops().is(P.gte(depth))); - - @SuppressWarnings({ "unchecked", "deprecation" }) - GraphTraversal tf = t.filter( - __.project("x","y","z") - .by(__.select(Pop.first, "v").id()) - .by(__.select(Pop.last, "v").id()) - .by(__.select(Pop.all, "v").count(Scope.local)) - .as("triple") - .coalesce(__.select("x","y").as("a") - .select("triples").unfold().as("t") - .select("x","y").where(P.eq("a")).select("t"), - __.store("triples")) - .select("z").as("length") - .select("triple").select("z").where(P.eq("length"))); + t = filterNonShortestPath(t); GraphTraversal tg; - tg = tf.group().by(__.select(Pop.first, "v").id()) - .by(__.select(Pop.all, "v").count(Scope.local) - .sack(Operator.div).sack().sum()) - .order(Scope.local).by(Column.values, Order.desc); + tg = t.group().by(__.select(Pop.first, "v").id()) + .by(__.select(Pop.all, "v").count(Scope.local) + .sack(Operator.div).sack().sum()) + .order(Scope.local).by(Column.values, Order.desc); GraphTraversal tLimit = topN <= 0L ? tg : tg.limit(Scope.local, topN); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java index 81bd336729..5f6781b21b 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java @@ -67,7 +67,7 @@ public Object degreeCentrality(Directions direction, long topN) { Iterator edges = this.edges(direction); JsonMap degrees = new JsonMap(); - TopMap tops = new TopMap(topN); + TopMap tops = new TopMap<>(topN); Id vertex = null; long degree = 0L; long total = 0L; @@ -111,7 +111,7 @@ protected Object degreeCentrality(long topN) { assert topN >= 0L; long total = 0L; JsonMap degrees = new JsonMap(); - TopMap tops = new TopMap(topN); + TopMap tops = new TopMap<>(topN); GraphTraversalSource traversal = this.graph().traversal(); Iterator vertices = this.vertices();