From 6ff96f92a09317b99f86f032dc1846b4271cb08c Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Thu, 9 Apr 2020 11:22:56 +0800 Subject: [PATCH 01/33] implement 8 olap algorithms (#4) * add olap algo api * improve source filter * fix louvain shaking with limit degree * catch exception for lpa and louvain * add 3 params for lpa: label,source_label,percision * improve louvain node store * remove vertices from class Community * move showCommunity to AbstractCommAlgorithm * add some parameters to AbstractAlgorithm * improve louvain log * improve clearPass and communities check * split louvain cache * fix degreeCentrality bug: degree is always < 500 Change-Id: I2341b981dab44f43ac50ae0f8fa5e51b7acc1b5a --- .../baidu/hugegraph/api/job/AlgorithmAPI.java | 84 ++ .../com/baidu/hugegraph/job/AlgorithmJob.java | 71 ++ .../job/algorithm/AbstractAlgorithm.java | 516 +++++++++++++ .../hugegraph/job/algorithm/Algorithm.java | 35 + .../job/algorithm/AlgorithmPool.java | 71 ++ .../job/algorithm/CountEdgeAlgorithm.java | 79 ++ .../job/algorithm/CountVertexAlgorithm.java | 79 ++ .../algorithm/cent/AbstractCentAlgorithm.java | 113 +++ .../cent/BetweenessCentralityAlgorithm.java | 101 +++ .../cent/ClosenessCentralityAlgorithm.java | 111 +++ .../cent/DegreeCentralityAlgorithm.java | 140 ++++ .../cent/EigenvectorCentralityAlgorithm.java | 100 +++ .../algorithm/comm/AbstractCommAlgorithm.java | 78 ++ .../comm/ClusterCoeffcientAlgorithm.java | 70 ++ .../job/algorithm/comm/LouvainAlgorithm.java | 83 ++ .../job/algorithm/comm/LouvainTraverser.java | 715 ++++++++++++++++++ .../job/algorithm/comm/LpaAlgorithm.java | 263 +++++++ .../comm/TriangleCountAlgorithm.java | 153 ++++ 18 files changed, 2862 insertions(+) create mode 100644 hugegraph-api/src/main/java/com/baidu/hugegraph/api/job/AlgorithmAPI.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/AlgorithmJob.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Algorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountEdgeAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountVertexAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/AbstractCommAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java diff --git a/hugegraph-api/src/main/java/com/baidu/hugegraph/api/job/AlgorithmAPI.java b/hugegraph-api/src/main/java/com/baidu/hugegraph/api/job/AlgorithmAPI.java new file mode 100644 index 0000000000..c965e02a56 --- /dev/null +++ b/hugegraph-api/src/main/java/com/baidu/hugegraph/api/job/AlgorithmAPI.java @@ -0,0 +1,84 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.api.job; + +import java.util.Map; + +import javax.inject.Singleton; +import javax.ws.rs.Consumes; +import javax.ws.rs.NotFoundException; +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.core.Context; + +import org.slf4j.Logger; + +import com.baidu.hugegraph.HugeGraph; +import com.baidu.hugegraph.api.API; +import com.baidu.hugegraph.api.filter.StatusFilter.Status; +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.core.GraphManager; +import com.baidu.hugegraph.job.AlgorithmJob; +import com.baidu.hugegraph.job.JobBuilder; +import com.baidu.hugegraph.server.RestServer; +import com.baidu.hugegraph.util.E; +import com.baidu.hugegraph.util.JsonUtil; +import com.baidu.hugegraph.util.Log; +import com.codahale.metrics.annotation.Timed; +import com.google.common.collect.ImmutableMap; + +@Path("graphs/{graph}/jobs/algorithm") +@Singleton +public class AlgorithmAPI extends API { + + private static final Logger LOG = Log.logger(RestServer.class); + + @POST + @Timed + @Path("/{name}") + @Status(Status.CREATED) + @Consumes(APPLICATION_JSON) + @Produces(APPLICATION_JSON_WITH_CHARSET) + public Map post(@Context GraphManager manager, + @PathParam("graph") String graph, + @PathParam("name") String algorithm, + Map parameters) { + LOG.debug("Graph [{}] schedule algorithm job: {}", graph, parameters); + E.checkArgument(algorithm != null && !algorithm.isEmpty(), + "The algorithm name can't be empty"); + if (parameters == null) { + parameters = ImmutableMap.of(); + } + if (!AlgorithmJob.check(algorithm, parameters)) { + throw new NotFoundException("Not found algorithm: " + algorithm); + } + + HugeGraph g = graph(manager, graph); + Map input = ImmutableMap.of("algorithm", algorithm, + "parameters", parameters); + JobBuilder builder = JobBuilder.of(g); + builder.name("algorithm:" + algorithm) + .input(JsonUtil.toJson(input)) + .job(new AlgorithmJob()); + return ImmutableMap.of("task_id", builder.schedule().id()); + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/AlgorithmJob.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/AlgorithmJob.java new file mode 100644 index 0000000000..7e752ac429 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/AlgorithmJob.java @@ -0,0 +1,71 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job; + +import java.util.Map; + +import com.baidu.hugegraph.job.algorithm.Algorithm; +import com.baidu.hugegraph.job.algorithm.AlgorithmPool; +import com.baidu.hugegraph.util.E; +import com.baidu.hugegraph.util.JsonUtil; + +public class AlgorithmJob extends Job { + + public static final String TASK_TYPE = "algorithm"; + + public static boolean check(String name, Map parameters) { + Algorithm algorithm = AlgorithmPool.instance().find(name); + if (algorithm == null) { + return false; + } + algorithm.checkParameters(parameters); + return true; + } + + @Override + public String type() { + return TASK_TYPE; + } + + @Override + public Object execute() throws Exception { + String input = this.task().input(); + E.checkArgumentNotNull(input, "The input can't be null"); + @SuppressWarnings("unchecked") + Map map = JsonUtil.fromJson(input, Map.class); + + Object value = map.get("algorithm"); + E.checkArgument(value instanceof String, + "Invalid algorithm name '%s'", value); + String name = (String) value; + + value = map.get("parameters"); + E.checkArgument(value instanceof Map, + "Invalid algorithm parameters '%s'", value); + @SuppressWarnings("unchecked") + Map parameters = (Map) value; + + AlgorithmPool pool = AlgorithmPool.instance(); + Algorithm algorithm = pool.find(name); + E.checkArgument(algorithm != null, + "There is no algorithm named '%s'", name); + return algorithm.call(this, parameters); + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java new file mode 100644 index 0000000000..660ef9f8f8 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -0,0 +1,516 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm; + + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.concurrent.Callable; + +import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.lang3.mutable.MutableLong; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.structure.Edge; +import org.apache.tinkerpop.gremlin.structure.Element; +import org.apache.tinkerpop.gremlin.structure.Property; +import org.apache.tinkerpop.gremlin.structure.Transaction; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.HugeException; +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.backend.query.ConditionQuery; +import com.baidu.hugegraph.backend.query.Query; +import com.baidu.hugegraph.iterator.FilterIterator; +import com.baidu.hugegraph.iterator.FlatMapperIterator; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.testutil.Whitebox; +import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; +import com.baidu.hugegraph.type.HugeType; +import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.type.define.HugeKeys; +import com.baidu.hugegraph.util.Bytes; +import com.baidu.hugegraph.util.E; +import com.baidu.hugegraph.util.JsonUtil; + +import jersey.repackaged.com.google.common.base.Objects; + +@SuppressWarnings("deprecation") // StringEscapeUtils +public abstract class AbstractAlgorithm implements Algorithm { + + public static final long MAX_RESULT_SIZE = 100L * Bytes.MB; + public static final long MAX_QUERY_LIMIT = 10000000L; // about 10GB + public static final int BATCH = 500; + + public static final String CATEGORY_AGGR = "aggregate"; + public static final String CATEGORY_PATH = "path"; + public static final String CATEGORY_RANK = "rank"; + public static final String CATEGORY_SIMI = "similarity"; + public static final String CATEGORY_COMM = "community"; + public static final String CATEGORY_CENT = "centrality"; + + public static final String KEY_DIRECTION = "direction"; + public static final String KEY_LABEL = "label"; + public static final String KEY_DEPTH = "depth"; + public static final String KEY_DEGREE = "degree"; + public static final String KEY_SAMPLE = "sample"; + public static final String KEY_SOURCE_SAMPLE = "source_sample"; + public static final String KEY_SOURCE_LABEL = "source_label"; + public static final String KEY_SOURCE_CLABEL = "source_clabel"; + public static final String KEY_TOP = "top"; + public static final String KEY_TIMES = "times"; + public static final String KEY_STABLE_TIMES = "stable_times"; + public static final String KEY_PRECISION = "precision"; + public static final String KEY_SHOW_COMM = "show_community"; + public static final String KEY_CLEAR = "clear"; + public static final String KEY_CAPACITY = "capacity"; + public static final String KEY_LIMIT = "limit"; + + public static final long DEFAULT_CAPACITY = 10000000L; + public static final long DEFAULT_LIMIT = 100L; + public static final long DEFAULT_DEGREE = 100L; + public static final long DEFAULT_SAMPLE = 1L; + public static final long DEFAULT_TIMES = 20L; + public static final long DEFAULT_STABLE_TIMES= 3L; + public static final double DEFAULT_PRECISION = 1.0 / 1000; + + @Override + public void checkParameters(Map parameters) { + E.checkArgument(parameters.isEmpty(), + "Unnecessary parameters: %s", parameters); + } + + protected static int depth(Map parameters) { + int depth = parameterInt(parameters, KEY_DEPTH); + E.checkArgument(depth > 0, + "The value of %s must be > 0, but got %s", + KEY_DEPTH, depth); + return depth; + } + + protected static String edgeLabel(Map parameters) { + if (!parameters.containsKey(KEY_LABEL)) { + return null; + } + return parameterString(parameters, KEY_LABEL); + } + + protected static Directions direction(Map parameters) { + Object direction = parameter(parameters, KEY_DIRECTION); + return parseDirection(direction); + } + + protected static long top(Map parameters) { + if (!parameters.containsKey(KEY_TOP)) { + return 0L; + } + long top = parameterLong(parameters, KEY_TOP); + E.checkArgument(top >= 0L, + "The value of %s must be >= 0, but got %s", + KEY_TOP, top); + return top; + } + + protected static long degree(Map parameters) { + if (!parameters.containsKey(KEY_DEGREE)) { + return DEFAULT_DEGREE; + } + long degree = parameterLong(parameters, KEY_DEGREE); + HugeTraverser.checkDegree(degree); + return degree; + } + + protected static long capacity(Map parameters) { + if (!parameters.containsKey(KEY_CAPACITY)) { + return DEFAULT_CAPACITY; + } + long capacity = parameterLong(parameters, KEY_CAPACITY); + HugeTraverser.checkCapacity(capacity); + return capacity; + } + + protected static long limit(Map parameters) { + if (!parameters.containsKey(KEY_LIMIT)) { + return DEFAULT_LIMIT; + } + long limit = parameterLong(parameters, KEY_LIMIT); + HugeTraverser.checkLimit(limit); + return limit; + } + + protected static long sample(Map parameters) { + if (!parameters.containsKey(KEY_SAMPLE)) { + return DEFAULT_SAMPLE; + } + long sample = parameterLong(parameters, KEY_SAMPLE); + HugeTraverser.checkPositiveOrNoLimit(sample, KEY_SAMPLE); + return sample; + } + + protected static long sourceSample(Map parameters) { + if (!parameters.containsKey(KEY_SOURCE_SAMPLE)) { + return HugeTraverser.NO_LIMIT; + } + long sample = parameterLong(parameters, KEY_SOURCE_SAMPLE); + HugeTraverser.checkPositiveOrNoLimit(sample, KEY_SOURCE_SAMPLE); + return sample; + } + + protected static String sourceLabel(Map parameters) { + if (!parameters.containsKey(KEY_SOURCE_LABEL)) { + return null; + } + return parameterString(parameters, KEY_SOURCE_LABEL); + } + + protected static String sourceCLabel(Map parameters) { + if (!parameters.containsKey(KEY_SOURCE_CLABEL)) { + return null; + } + return parameterString(parameters, KEY_SOURCE_CLABEL); + } + + public static Object parameter(Map parameters, String key) { + Object value = parameters.get(key); + E.checkArgument(value != null, + "Expect '%s' in parameters: %s", + key, parameters); + return value; + } + + public static String parameterString(Map parameters, + String key) { + Object value = parameter(parameters, key); + E.checkArgument(value instanceof String, + "Expect string value for parameter '%s': '%s'", + key, value); + return (String) value; + } + + public static int parameterInt(Map parameters, + String key) { + Object value = parameter(parameters, key); + E.checkArgument(value instanceof Number, + "Expect int value for parameter '%s': '%s'", + key, value); + return ((Number) value).intValue(); + } + + public static long parameterLong(Map parameters, + String key) { + Object value = parameter(parameters, key); + E.checkArgument(value instanceof Number, + "Expect long value for parameter '%s': '%s'", + key, value); + return ((Number) value).longValue(); + } + + public static double parameterDouble(Map parameters, + String key) { + Object value = parameter(parameters, key); + E.checkArgument(value instanceof Number, + "Expect double value for parameter '%s': '%s'", + key, value); + return ((Number) value).doubleValue(); + } + + public static boolean parameterBoolean(Map parameters, + String key) { + Object value = parameter(parameters, key); + E.checkArgument(value instanceof Boolean, + "Expect boolean value for parameter '%s': '%s'", + key, value); + return ((Boolean) value); + } + + public static Directions parseDirection(Object direction) { + if (direction.equals(Directions.BOTH.toString())) { + return Directions.BOTH; + } else if (direction.equals(Directions.OUT.toString())) { + return Directions.OUT; + } else if (direction.equals(Directions.IN.toString())) { + return Directions.IN; + } else { + throw new IllegalArgumentException(String.format( + "The value of direction must be in [OUT, IN, BOTH], " + + "but got '%s'", direction)); + } + } + + public static class AlgoTraverser extends HugeTraverser { + + private final Job job; + protected long progress; + + public AlgoTraverser(Job job) { + super(job.graph()); + this.job = job; + } + + public void updateProgress(long progress) { + this.job.updateProgress((int) progress); + } + + protected Iterator vertices() { + return this.vertices(Query.NO_LIMIT); + } + + protected Iterator vertices(long limit) { + Query query = new Query(HugeType.VERTEX); + query.capacity(Query.NO_CAPACITY); + query.limit(limit); + return this.graph().vertices(query); + } + + protected Iterator vertices(Object label, String key, + Object value, long limit) { + Iterator vertices = this.vertices(label, limit); + if (key != null) { + vertices = filter(vertices, key, value); + } + return vertices; + } + + protected Iterator vertices(Object label, long limit) { + if (label == null) { + return this.vertices(limit); + } + ConditionQuery query = new ConditionQuery(HugeType.VERTEX); + query.capacity(Query.NO_CAPACITY); + query.limit(limit); + if (label != null) { + query.eq(HugeKeys.LABEL, this.getVertexLabelId(label)); + } + return this.graph().vertices(query); + } + + protected Iterator vertices(Iterator ids) { + return new FlatMapperIterator<>(ids, id -> { + return this.graph().vertices(id); + }); + } + + protected Iterator filter(Iterator vertices, + String key, Object value) { + return new FilterIterator<>(vertices, vertex -> { + boolean matched = match(vertex, key, value); + if (!matched) { + this.updateProgress(++this.progress); + } + return matched; + }); + } + + protected static boolean match(Element elem, String key, Object value) { + Property p = elem.property(key); + return p.isPresent() && Objects.equal(p.value(), value); + } + + protected Iterator edges(Directions dir) { + HugeType type = dir == null ? HugeType.EDGE : dir.type(); + Query query = new Query(type); + query.capacity(Query.NO_CAPACITY); + query.limit(Query.NO_LIMIT); + return this.graph().edges(query); + } + + protected void drop(GraphTraversal traversal) { + this.execute(traversal, () -> { + while (traversal.hasNext()) { + this.updateProgress(++this.progress); + traversal.next().remove(); + this.commitIfNeeded(); + } + return null; + }); + this.graph().tx().commit(); + } + + protected V execute(GraphTraversal traversal, + Callable callback) { + long capacity = Query.defaultCapacity(MAX_QUERY_LIMIT); + try { + return callback.call(); + } catch (Exception e) { + throw new HugeException("Failed to execute algorithm", e); + } finally { + Query.defaultCapacity(capacity); + try { + traversal.close(); + } catch (Exception e) { + throw new HugeException("Can't close traversal", e); + } + } + } + + protected void commitIfNeeded() { + // commit if needed + Transaction tx = this.graph().tx(); + Whitebox.invoke(tx.getClass(), "commitIfGtSize", tx, BATCH); + } + } + + public static final class TopMap { + + private final long topN; + private Map tops; + + public TopMap(long topN) { + this.topN = topN; + this.tops = new HashMap<>(); + } + + public int size() { + return this.tops.size(); + } + + public void put(Id key, long value) { + this.put(key, Long.valueOf(value)); + } + + public void put(Id key, Long value) { + this.tops.put(key, new MutableLong(value)); + // keep 2x buffer + if (this.tops.size() > this.topN * 2) { + this.shrinkIfNeeded(this.topN); + } + } + + public Set> entrySet() { + this.shrinkIfNeeded(this.topN); + return this.tops.entrySet(); + } + + private void shrinkIfNeeded(long limit) { + if (this.tops.size() >= limit && limit != HugeTraverser.NO_LIMIT) { + this.tops = HugeTraverser.topN(this.tops, true, limit); + } + } + } + + public static final class JsonMap { + + private final StringBuilder json; + + public JsonMap() { + this(4 * (int) Bytes.KB); + } + + public JsonMap(int initCapaticy) { + this.json = new StringBuilder(initCapaticy); + } + + public void startObject() { + this.json.append('{'); + } + + public void endObject() { + this.deleteLastComma(); + this.json.append('}'); + } + + public void startList() { + this.json.append('['); + } + + public void endList() { + this.deleteLastComma(); + this.json.append(']'); + } + + public void deleteLastComma() { + int last = this.json.length() - 1; + if (last >= 0 && this.json.charAt(last) == ',') { + this.json.deleteCharAt(last); + } + } + + public void appendKey(String key) { + this.appendString(key).append(':'); + } + + public void append(long value) { + this.json.append(value).append(','); + this.checkSizeLimit(); + } + + public void append(String value) { + this.appendString(value).append(','); + this.checkSizeLimit(); + } + + public void append(Object key, long value) { + this.append(key.toString(), value); + } + + public void append(String key, long value) { + this.appendString(key).append(':'); + this.json.append(value).append(','); + this.checkSizeLimit(); + } + + public void append(Object key, Number value) { + this.append(key.toString(), value); + } + + public void append(String key, Number value) { + this.appendString(key).append(':'); + this.json.append(value).append(','); + this.checkSizeLimit(); + } + + public void append(String key, String value) { + this.appendString(key).append(':'); + this.appendString(value).append(','); + this.checkSizeLimit(); + } + + public void appendRaw(String key, String rawJson) { + this.appendString(key).append(':'); + this.json.append(rawJson).append(','); + this.checkSizeLimit(); + } + + public void append(Set> kvs) { + for (Map.Entry top : kvs) { + this.append(top.getKey(), top.getValue()); + } + } + + private StringBuilder appendString(String str) { + if (str.indexOf('"') >= 0) { + str = StringEscapeUtils.escapeJson(str); + } + return this.json.append('"').append(str).append('"'); + } + + public void checkSizeLimit() { + E.checkArgument(this.json.length() < MAX_RESULT_SIZE, + "The result size exceeds limit %s", + MAX_RESULT_SIZE); + } + + public Object asJson() { + return JsonUtil.asJson(this.json.toString()); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Algorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Algorithm.java new file mode 100644 index 0000000000..6ad200157a --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Algorithm.java @@ -0,0 +1,35 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm; + +import java.util.Map; + +import com.baidu.hugegraph.job.Job; + +public interface Algorithm { + + public String name(); + + public String category(); + + public Object call(Job job, Map parameters); + + public void checkParameters(Map parameters); +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java new file mode 100644 index 0000000000..98f7c89dc6 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java @@ -0,0 +1,71 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import com.baidu.hugegraph.job.algorithm.cent.BetweenessCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.ClosenessCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.DegreeCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.EigenvectorCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.comm.ClusterCoeffcientAlgorithm; +import com.baidu.hugegraph.job.algorithm.comm.LouvainAlgorithm; +import com.baidu.hugegraph.job.algorithm.comm.LpaAlgorithm; +import com.baidu.hugegraph.job.algorithm.comm.TriangleCountAlgorithm; + +public class AlgorithmPool { + + private static final AlgorithmPool INSTANCE = new AlgorithmPool(); + + static { + INSTANCE.register(new CountVertexAlgorithm()); + INSTANCE.register(new CountEdgeAlgorithm()); + + INSTANCE.register(new DegreeCentralityAlgorithm()); + INSTANCE.register(new BetweenessCentralityAlgorithm()); + INSTANCE.register(new ClosenessCentralityAlgorithm()); + INSTANCE.register(new EigenvectorCentralityAlgorithm()); + + INSTANCE.register(new TriangleCountAlgorithm()); + INSTANCE.register(new ClusterCoeffcientAlgorithm()); + INSTANCE.register(new LpaAlgorithm()); + INSTANCE.register(new LouvainAlgorithm()); + } + + private final Map algorithms; + + public AlgorithmPool() { + this.algorithms = new ConcurrentHashMap<>(); + } + + public Algorithm register(Algorithm algo) { + assert !this.algorithms.containsKey(algo.name()); + return this.algorithms.put(algo.name(), algo); + } + + public Algorithm find(String name) { + return this.algorithms.get(name); + } + + public static AlgorithmPool instance() { + return INSTANCE; + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountEdgeAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountEdgeAlgorithm.java new file mode 100644 index 0000000000..9fb1223483 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountEdgeAlgorithm.java @@ -0,0 +1,79 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import org.apache.commons.lang3.mutable.MutableLong; +import org.apache.tinkerpop.gremlin.structure.Edge; + +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.util.JsonUtil; + +public class CountEdgeAlgorithm extends AbstractAlgorithm { + + @Override + public String name() { + return "count_edge"; + } + + @Override + public String category() { + return CATEGORY_AGGR; + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + return traverser.count(); + } + + private static class Traverser extends AlgoTraverser { + + public Traverser(Job job) { + super(job); + } + + public Object count() { + Iterator edges = this.edges(null); + + Map counts = new HashMap<>(); + long total = 0L; + + while (edges.hasNext()) { + Edge edge = edges.next(); + String label = edge.label(); + MutableLong count = counts.get(label); + if (count != null) { + count.increment(); + } else { + counts.put(label, new MutableLong(1L)); + } + total++; + this.updateProgress(total); + } + counts.put("*", new MutableLong(total)); + + return JsonUtil.asJson(counts); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountVertexAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountVertexAlgorithm.java new file mode 100644 index 0000000000..582e0bb691 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountVertexAlgorithm.java @@ -0,0 +1,79 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import org.apache.commons.lang3.mutable.MutableLong; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.util.JsonUtil; + +public class CountVertexAlgorithm extends AbstractAlgorithm { + + @Override + public String name() { + return "count_vertex"; + } + + @Override + public String category() { + return CATEGORY_AGGR; + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + return traverser.count(); + } + + private static class Traverser extends AlgoTraverser { + + public Traverser(Job job) { + super(job); + } + + public Object count() { + Iterator vertices = this.vertices(); + + Map counts = new HashMap<>(); + long total = 0L; + + while (vertices.hasNext()) { + Vertex vertex = vertices.next(); + String label = vertex.label(); + MutableLong count = counts.get(label); + if (count != null) { + count.increment(); + } else { + counts.put(label, new MutableLong(1L)); + } + total++; + this.updateProgress(total); + } + counts.put("*", new MutableLong(total)); + + return JsonUtil.asJson(counts); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java new file mode 100644 index 0000000000..14841043a3 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -0,0 +1,113 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.cent; + +import java.util.Map; + +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; +import com.baidu.hugegraph.job.algorithm.comm.LpaAlgorithm; + +public abstract class AbstractCentAlgorithm extends AbstractAlgorithm { + + protected static final String C_LABEL = LpaAlgorithm.Traverser.C_LABEL; + + @Override + public String category() { + return CATEGORY_CENT; + } + + @Override + public void checkParameters(Map parameters) { + depth(parameters); + degree(parameters); + sample(parameters); + sourceSample(parameters); + sourceLabel(parameters); + sourceCLabel(parameters); + top(parameters); + } + + public static class Traverser extends AlgoTraverser { + + public Traverser(Job job) { + super(job); + } + + protected GraphTraversal constructSource( + String sourceLabel, + long sourceSample, + String sourceCLabel) { + GraphTraversal t = this.graph().traversal() + .withSack(1f).V(); + + if (sourceLabel != null) { + t = t.hasLabel(sourceLabel); + } + + t = t.filter(it -> { + this.updateProgress(++this.progress); + return sourceCLabel == null ? true : + match(it.get(), C_LABEL, sourceCLabel); + }); + + if (sourceSample > 0L) { + t = t.sample((int) sourceSample); + } + + return t; + } + + protected GraphTraversal constructPath( + GraphTraversal t, long degree, + long sample, String sourceLabel, String sourceCLabel) { + GraphTraversal unit = constructPathUnit(degree, sample, + sourceLabel, + sourceCLabel); + t = t.as("v").repeat(__.local(unit).simplePath().as("v")); + + return t; + } + + protected GraphTraversal constructPathUnit( + long degree, long sample, + String sourceLabel, + String sourceCLabel) { + GraphTraversal unit = __.both(); + if (sourceLabel != null) { + unit = unit.hasLabel(sourceLabel); + } + if (sourceCLabel != null) { + unit = unit.has(C_LABEL, sourceCLabel); + } + if (degree != NO_LIMIT) { + unit = unit.limit(degree); + } + if (sample > 0L) { + unit = unit.sample((int) sample); + } + return unit; + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java new file mode 100644 index 0000000000..ae1b8bb743 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java @@ -0,0 +1,101 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.cent; + +import java.util.Map; + +import org.apache.tinkerpop.gremlin.process.traversal.Order; +import org.apache.tinkerpop.gremlin.process.traversal.P; +import org.apache.tinkerpop.gremlin.process.traversal.Pop; +import org.apache.tinkerpop.gremlin.process.traversal.Scope; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; +import org.apache.tinkerpop.gremlin.structure.Column; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.job.Job; + +public class BetweenessCentralityAlgorithm extends AbstractCentAlgorithm { + + @Override + public String name() { + return "betweeness_centrality"; + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + return traverser.betweenessCentrality(depth(parameters), + degree(parameters), + sample(parameters), + sourceLabel(parameters), + sourceSample(parameters), + sourceCLabel(parameters), + top(parameters)); + } + + private static class Traverser extends AbstractCentAlgorithm.Traverser { + + public Traverser(Job job) { + super(job); + } + + public Object betweenessCentrality(int depth, + long degree, + long sample, + String sourceLabel, + long sourceSample, + String sourceCLabel, + long topN) { + assert depth > 0; + assert degree > 0L; + assert topN >= 0L; + + GraphTraversal t = constructSource(sourceLabel, + sourceSample, + sourceCLabel); + t = constructPath(t, degree, sample, sourceLabel, sourceCLabel); + t = t.emit().until(__.loops().is(P.gte(depth))); + + @SuppressWarnings({ "unchecked", "deprecation" }) + GraphTraversal tf = t.filter( + __.project("x","y","z") + .by(__.select(Pop.first, "v").id()) + .by(__.select(Pop.last, "v").id()) + .by(__.select(Pop.all, "v").count(Scope.local)) + .as("triple") + .coalesce(__.select("x","y").as("a") + .select("triples").unfold().as("t") + .select("x","y").where(P.eq("a")).select("t"), + __.store("triples")) + .select("z").as("length") + .select("triple").select("z").where(P.eq("length"))); + + GraphTraversal tg = tf.select(Pop.all, "v") + .unfold().id() + .groupCount().order(Scope.local) + .by(Column.values, Order.desc); + GraphTraversal tLimit = topN <= 0L ? tg : + tg.limit(Scope.local, topN); + + return this.execute(tLimit, () -> tLimit.next()); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java new file mode 100644 index 0000000000..d890db8087 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java @@ -0,0 +1,111 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.cent; + +import java.util.Map; + +import org.apache.tinkerpop.gremlin.process.traversal.Operator; +import org.apache.tinkerpop.gremlin.process.traversal.Order; +import org.apache.tinkerpop.gremlin.process.traversal.P; +import org.apache.tinkerpop.gremlin.process.traversal.Pop; +import org.apache.tinkerpop.gremlin.process.traversal.Scope; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; +import org.apache.tinkerpop.gremlin.structure.Column; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.job.Job; + +public class ClosenessCentralityAlgorithm extends AbstractCentAlgorithm { + + public static final long DEFAULT_DEGREE = 100L; + public static final long DEFAULT_SAMPLE = 1L; + + @Override + public String name() { + return "closeness_centrality"; + } + + @Override + public void checkParameters(Map parameters) { + depth(parameters); + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + return traverser.closenessCentrality(depth(parameters), + degree(parameters), + sample(parameters), + sourceLabel(parameters), + sourceSample(parameters), + sourceCLabel(parameters), + top(parameters)); + } + + private static class Traverser extends AbstractCentAlgorithm.Traverser { + + public Traverser(Job job) { + super(job); + } + + public Object closenessCentrality(int depth, + long degree, + long sample, + String sourceLabel, + long sourceSample, + String sourceCLabel, + long topN) { + assert depth > 0; + assert degree > 0L; + assert topN >= 0L; + + GraphTraversal t = constructSource(sourceLabel, + sourceSample, + sourceCLabel); + t = constructPath(t, degree, sample, sourceLabel, sourceCLabel); + t = t.emit().until(__.loops().is(P.gte(depth))); + + @SuppressWarnings({ "unchecked", "deprecation" }) + GraphTraversal tf = t.filter( + __.project("x","y","z") + .by(__.select(Pop.first, "v").id()) + .by(__.select(Pop.last, "v").id()) + .by(__.select(Pop.all, "v").count(Scope.local)) + .as("triple") + .coalesce(__.select("x","y").as("a") + .select("triples").unfold().as("t") + .select("x","y").where(P.eq("a")).select("t"), + __.store("triples")) + .select("z").as("length") + .select("triple").select("z").where(P.eq("length"))); + + GraphTraversal tg; + tg = tf.group().by(__.select(Pop.first, "v").id()) + .by(__.select(Pop.all, "v").count(Scope.local) + .sack(Operator.div).sack().sum()) + .order(Scope.local).by(Column.values, Order.desc); + GraphTraversal tLimit = topN <= 0L ? tg : + tg.limit(Scope.local, topN); + + return this.execute(tLimit, () -> tLimit.next()); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java new file mode 100644 index 0000000000..81bd336729 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java @@ -0,0 +1,140 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.cent; + +import java.util.Iterator; +import java.util.Map; + +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; +import org.apache.tinkerpop.gremlin.structure.Edge; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.structure.HugeEdge; +import com.baidu.hugegraph.type.define.Directions; + +public class DegreeCentralityAlgorithm extends AbstractCentAlgorithm { + + @Override + public String name() { + return "degree_centrality"; + } + + @Override + public void checkParameters(Map parameters) { + direction(parameters); + top(parameters); + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + return traverser.degreeCentrality(direction(parameters), + top(parameters)); + } + + private static class Traverser extends AlgoTraverser { + + public Traverser(Job job) { + super(job); + } + + public Object degreeCentrality(Directions direction, long topN) { + if (direction == null || direction == Directions.BOTH) { + return degreeCentrality(topN); + } + assert direction == Directions.OUT || direction == Directions.IN; + assert topN >= 0L; + + Iterator edges = this.edges(direction); + + JsonMap degrees = new JsonMap(); + TopMap tops = new TopMap(topN); + Id vertex = null; + long degree = 0L; + long total = 0L; + + degrees.startObject(); + while (edges.hasNext()) { + HugeEdge edge = (HugeEdge) edges.next(); + this.updateProgress(++total); + + Id source = edge.ownerVertex().id(); + if (source.equals(vertex)) { + degree++; + continue; + } + if (vertex != null) { + if (topN <= 0L) { + degrees.append(vertex, degree); + } else { + tops.put(vertex, degree); + } + } + vertex = source; + degree = 1L; + } + + if (vertex != null) { + if (topN <= 0L) { + degrees.append(vertex, degree); + } else { + tops.put(vertex, degree); + degrees.append(tops.entrySet()); + } + } + + degrees.endObject(); + + return degrees.asJson(); + } + + protected Object degreeCentrality(long topN) { + assert topN >= 0L; + long total = 0L; + JsonMap degrees = new JsonMap(); + TopMap tops = new TopMap(topN); + + GraphTraversalSource traversal = this.graph().traversal(); + Iterator vertices = this.vertices(); + + degrees.startObject(); + while (vertices.hasNext()) { + Vertex source = vertices.next(); + this.updateProgress(++total); + + Long degree = traversal.V(source).bothE().count().next(); + if (topN <= 0L) { + degrees.append(source.id(), degree); + } else { + tops.put((Id) source.id(), degree); + } + } + + if (tops.size() > 0) { + degrees.append(tops.entrySet()); + } + degrees.endObject(); + + return degrees.asJson(); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java new file mode 100644 index 0000000000..d87fc79316 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java @@ -0,0 +1,100 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.cent; + +import java.util.Map; + +import org.apache.tinkerpop.gremlin.process.traversal.Order; +import org.apache.tinkerpop.gremlin.process.traversal.Scope; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; +import org.apache.tinkerpop.gremlin.structure.Column; +import org.apache.tinkerpop.gremlin.structure.T; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.job.Job; + +public class EigenvectorCentralityAlgorithm extends AbstractCentAlgorithm { + + public static final long DEFAULT_DEGREE = 100L; + public static final long DEFAULT_SAMPLE = 1L; + + @Override + public String name() { + return "eigenvector_centrality"; + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + return traverser.eigenvectorCentrality(depth(parameters), + degree(parameters), + sample(parameters), + sourceLabel(parameters), + sourceSample(parameters), + sourceCLabel(parameters), + top(parameters)); + } + + private static class Traverser extends AbstractCentAlgorithm.Traverser { + + public Traverser(Job job) { + super(job); + } + + public Object eigenvectorCentrality(int depth, + long degree, + long sample, + String sourceLabel, + long sourceSample, + String sourceCLabel, + long topN) { + assert depth > 0; + assert degree > 0L; + assert topN >= 0L; + + // TODO: support parameters: Directions dir, String label + /* + * g.V().repeat(groupCount('m').by(id) + * .local(both().limit(50).sample(1)) + * .simplePath()) + * .times(4).cap('m') + * .order(local).by(values, desc) + * .limit(local, 100) + */ + + GraphTraversal t = constructSource(sourceLabel, + sourceSample, + sourceCLabel); + GraphTraversal unit = constructPathUnit(degree, sample, + sourceLabel, + sourceCLabel); + t = t.repeat(__.groupCount("m").by(T.id) + .local(unit).simplePath()).times(depth); + + GraphTraversal tCap; + tCap = t.cap("m").order(Scope.local).by(Column.values, Order.desc); + GraphTraversal tLimit = topN <= 0L ? tCap : + tCap.limit(Scope.local, topN); + + return this.execute(tLimit, () -> tLimit.next()); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/AbstractCommAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/AbstractCommAlgorithm.java new file mode 100644 index 0000000000..74b884a063 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/AbstractCommAlgorithm.java @@ -0,0 +1,78 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.comm; + +import java.util.Map; + +import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; +import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; +import com.baidu.hugegraph.util.E; + +public abstract class AbstractCommAlgorithm extends AbstractAlgorithm { + + private static final int MAX_TIMES = 2048; + + @Override + public String category() { + return CATEGORY_COMM; + } + + protected static int times(Map parameters) { + if (!parameters.containsKey(KEY_TIMES)) { + return (int) DEFAULT_TIMES; + } + int times = parameterInt(parameters, KEY_TIMES); + HugeTraverser.checkPositiveOrNoLimit(times, KEY_TIMES); + E.checkArgument(times <= MAX_TIMES, + "The maximum number of iterations is %s, but got %s", + MAX_TIMES, times); + return times; + } + + protected static int stableTimes(Map parameters) { + if (!parameters.containsKey(KEY_STABLE_TIMES)) { + return (int) DEFAULT_STABLE_TIMES; + } + int times = parameterInt(parameters, KEY_STABLE_TIMES); + HugeTraverser.checkPositiveOrNoLimit(times, KEY_STABLE_TIMES); + E.checkArgument(times <= MAX_TIMES, + "The maximum number of stable iterations is %s, " + + "but got %s", MAX_TIMES, times); + return times; + } + + protected static double precision(Map parameters) { + if (!parameters.containsKey(KEY_PRECISION)) { + return DEFAULT_PRECISION; + } + double precision = parameterDouble(parameters, KEY_PRECISION); + E.checkArgument(0d < precision && precision < 1d, + "The %s parameter must be in range(0,1), but got: %s", + KEY_PRECISION, precision); + return precision; + } + + protected static String showCommunity(Map parameters) { + if (!parameters.containsKey(KEY_SHOW_COMM)) { + return null; + } + return parameterString(parameters, KEY_SHOW_COMM); + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java new file mode 100644 index 0000000000..cc893fc1f0 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java @@ -0,0 +1,70 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.comm; + +import java.util.Map; + +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.InsertionOrderUtil; + +public class ClusterCoeffcientAlgorithm extends AbstractCommAlgorithm { + + @Override + public String name() { + return "cluster_coeffcient"; + } + + @Override + public void checkParameters(Map parameters) { + direction(parameters); + degree(parameters); + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + return traverser.clusterCoeffcient(direction(parameters), + degree(parameters)); + } + + private static class Traverser extends TriangleCountAlgorithm.Traverser { + + public Traverser(Job job) { + super(job); + } + + public Object clusterCoeffcient(Directions direction, long degree) { + Map results = this.triangles(direction, degree); + results = InsertionOrderUtil.newMap(results); + + long triangles = results.remove(KEY_TRIANGLES); + long triads = results.remove(KEY_TRIADS); + assert triangles <= triads; + double coeffcient = triads == 0L ? 0d : 1d * triangles / triads; + + @SuppressWarnings({ "unchecked", "rawtypes" }) + Map converted = (Map) results; + converted.put("cluster_coeffcient", coeffcient); + + return results; + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java new file mode 100644 index 0000000000..3f6de63e8c --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java @@ -0,0 +1,83 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.comm; + +import java.util.Map; + +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.util.E; + +public class LouvainAlgorithm extends AbstractCommAlgorithm { + + @Override + public String name() { + return "louvain"; + } + + @Override + public void checkParameters(Map parameters) { + times(parameters); + stableTimes(parameters); + precision(parameters); + degree(parameters); + sourceLabel(parameters); + sourceCLabel(parameters); + showCommunity(parameters); + clearPass(parameters); + } + + @Override + public Object call(Job job, Map parameters) { + String label = sourceLabel(parameters); + String clabel = sourceCLabel(parameters); + long degree = degree(parameters); + + LouvainTraverser traverser = new LouvainTraverser(job, degree, + label, clabel); + Long clearPass = clearPass(parameters); + String showComm = showCommunity(parameters); + try { + if (clearPass != null) { + return traverser.clearPass(clearPass.intValue()); + } else if (showComm != null) { + return traverser.showCommunity(showComm); + } else { + return traverser.louvain(times(parameters), + stableTimes(parameters), + precision(parameters)); + } + } catch (Throwable e) { + job.graph().tx().rollback(); + throw e; + } + } + + protected static Long clearPass(Map parameters) { + if (!parameters.containsKey(KEY_CLEAR)) { + return null; + } + long pass = parameterLong(parameters, KEY_CLEAR); + // TODO: change to checkNonNegative() + E.checkArgument(pass >= 0 || pass == -1, + "The %s parameter must be >= 0 or == -1, but got %s", + KEY_CLEAR, pass); + return pass; + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java new file mode 100644 index 0000000000..0b3d674aaf --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -0,0 +1,715 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.comm; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.commons.lang3.mutable.MutableInt; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; +import org.apache.tinkerpop.gremlin.structure.Edge; +import org.apache.tinkerpop.gremlin.structure.T; +import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.slf4j.Logger; + +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.backend.id.IdGenerator; +import com.baidu.hugegraph.exception.ExistedException; +import com.baidu.hugegraph.iterator.ListIterator; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; +import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm.AlgoTraverser; +import com.baidu.hugegraph.schema.SchemaLabel; +import com.baidu.hugegraph.schema.SchemaManager; +import com.baidu.hugegraph.schema.VertexLabel; +import com.baidu.hugegraph.structure.HugeEdge; +import com.baidu.hugegraph.structure.HugeVertex; +import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.Log; +import com.google.common.collect.ImmutableMap; + +public class LouvainTraverser extends AlgoTraverser { + + public static final String C_PASS = "c_pass-"; + public static final String C_KIN = "c_kin"; + public static final String C_WEIGHT = "c_weight"; + public static final String C_MEMBERS = "c_members"; + + public static final String C_LABEL = LpaAlgorithm.Traverser.C_LABEL; + + private static final long LIMIT = AbstractAlgorithm.MAX_QUERY_LIMIT; + + private static final Logger LOG = Log.logger(LouvainTraverser.class); + + private final GraphTraversalSource g; + private final long m; + private final String sourceLabel; + private final String sourceCLabel; + private final long degree; + private final Cache cache; + + private String passLabel; + + public LouvainTraverser(Job job, long degree, + String sourceLabel, String sourceCLabel) { + super(job); + this.g = this.graph().traversal(); + this.m = this.g.E().count().next(); + this.sourceLabel = sourceLabel; + this.sourceCLabel = sourceCLabel; + this.degree = degree; + this.passLabel = ""; + + this.cache = new Cache(); + } + + @SuppressWarnings("unused") + private Id genId2(int pass, Id cid) { + // gen id for merge-community vertex + String id = cid.toString(); + if (pass == 0) { + // conncat pass with cid + id = pass + "~" + id; + } else { + // replace last pass with current pass + String lastPass = String.valueOf(pass - 1); + assert id.startsWith(lastPass); + id = id.substring(lastPass.length()); + id = pass + id; + } + return IdGenerator.of(id); + } + + private void defineSchemaOfPk() { + String label = this.labelOfPassN(0); + if (this.graph().existsVertexLabel(label) || + this.graph().existsEdgeLabel(label)) { + throw new IllegalArgumentException( + "Please clear historical results before proceeding"); + } + + SchemaManager schema = this.graph().schema(); + schema.propertyKey(C_KIN).asInt() + .ifNotExist().create(); + schema.propertyKey(C_MEMBERS).valueSet().asText() + .ifNotExist().create(); + schema.propertyKey(C_WEIGHT).asFloat() + .ifNotExist().create(); + } + + private void defineSchemaOfPassN(int pass) { + this.passLabel = labelOfPassN(pass); + + SchemaManager schema = this.graph().schema(); + try { + schema.vertexLabel(this.passLabel).useCustomizeStringId() + .properties(C_KIN, C_MEMBERS) + .nullableKeys(C_KIN, C_MEMBERS) + .create(); + schema.edgeLabel(this.passLabel) + .sourceLabel(this.passLabel) + .targetLabel(this.passLabel) + .properties(C_WEIGHT) + .create(); + } catch (ExistedException e) { + throw new IllegalArgumentException( + "Please clear historical results before proceeding", e); + } + } + + private List cpassEdgeLabels() { + List names = new ArrayList<>(); + for (SchemaLabel label : this.graph().schema().getEdgeLabels()) { + String name = label.name(); + if (name.startsWith(C_PASS)) { + names.add(name); + } + } + return names; + } + + private List cpassVertexLabels() { + List names = new ArrayList<>(); + for (SchemaLabel label : this.graph().schema().getVertexLabels()) { + String name = label.name(); + if (name.startsWith(C_PASS)) { + names.add(name); + } + } + return names; + } + + private String labelOfPassN(int n) { + return C_PASS + n; + } + + private float weightOfEdge(Edge e) { + if (e.label().startsWith(C_PASS)) { + assert e.property(C_WEIGHT).isPresent(); + return e.value(C_WEIGHT); + } else if (e.property(C_WEIGHT).isPresent()) { + return e.value(C_WEIGHT); + } + return 1f; + } + + private float weightOfEdges(List edges) { + float weight = 0f; + for (Edge edge : edges) { + weight += weightOfEdge(edge); + } + return weight; + } + + private Vertex newCommunityNode(Id cid, int kin, List members) { + assert !members.isEmpty() : members; + return this.graph().addVertex(T.label, this.passLabel, T.id, cid, + C_KIN, kin, C_MEMBERS, members); + } + + private Vertex makeCommunityNode(Id cid) { + VertexLabel vl = this.graph().vertexLabel(this.passLabel); + return new HugeVertex(this.graph(), cid, vl); + } + + private Edge newCommunityEdge(Vertex source, Vertex target, float weight) { + return source.addEdge(this.passLabel, target, C_WEIGHT, weight); + } + + private void insertNewCommunity(int pass, Id cid, int kin, + List members, + Map cedges) { + // create backend vertex if it's the first time + Id vid = this.cache.genId(pass, cid); + Vertex node = this.newCommunityNode(vid, kin, members); + commitIfNeeded(); + // update backend vertex edges + for (Map.Entry e : cedges.entrySet()) { + float weight = e.getValue().floatValue(); + vid = this.cache.genId(pass, e.getKey()); + Vertex targetV = this.makeCommunityNode(vid); + this.newCommunityEdge(node, targetV, weight); + commitIfNeeded(); + } + LOG.debug("Add new comm: {} kin={} size={}", node, kin, members.size()); + } + + private boolean needSkipVertex(int pass, Vertex v) { + // skip the old intermediate data when first pass + String label = v.label(); + if (label.startsWith(C_PASS)) { + if (pass == 0) { + return true; + } + String lastPassLabel = labelOfPassN(pass - 1); + if (!label.equals(lastPassLabel)) { + return true; + } + } + // skip the vertex with unmatched clabel + if (this.sourceCLabel != null && + !match(v, C_LABEL, this.sourceCLabel)) { + return true; + } + return false; + } + + private Iterator sourceVertices(int pass) { + if (pass > 0) { + // all vertices of merged community + String lastPassLabel = labelOfPassN(pass - 1); + return this.vertices(lastPassLabel, LIMIT); + } else { + assert pass == 0; + // all vertices at the first time + return this.vertices(this.sourceLabel, LIMIT); + } + } + + private List neighbors(Id vid) { + Iterator nbs = this.edgesOfVertex(vid, Directions.BOTH, + (Id) null, this.degree); + @SuppressWarnings("resource") + ListIterator list = new ListIterator<>(LIMIT, nbs); + return (List) list.list(); + } + + private float weightOfVertex(Vertex v, List edges) { + Float value = this.cache.vertexWeight((Id) v.id()); + if (value != null) { + return value; + } + if (edges == null) { + edges = neighbors((Id) v.id()); + } + float weight = weightOfEdges(edges); + this.cache.vertexWeight((Id) v.id(), weight); + return weight; + } + + private int kinOfVertex(Vertex v) { + if (v.label().startsWith(C_PASS) && v.property(C_KIN).isPresent()) { + return v.value(C_KIN); + } + return 0; + } + + private Id cidOfVertex(Vertex v) { + Id vid = (Id) v.id(); + Community c = this.cache.vertex2Community(vid); + return c != null ? c.cid : vid; + } + + // 1: wrap original vertex as community node + // 2: add original vertices to community node, + // and save as community vertex when merge() + // 3: wrap community vertex as community node, + // and repeat step 2 and step 3. + private Community wrapCommunity(Vertex otherV) { + Id vid = (Id) otherV.id(); + Community comm = this.cache.vertex2Community(vid); + if (comm != null) { + return comm; + } + + comm = new Community(vid); + comm.add(this, otherV, null); // will traverse the neighbors of otherV + this.cache.vertex2Community(vid, comm); + return comm; + } + + private Collection> nbCommunities( + int pass, + List edges) { + // comms is a map of cid:[community,weight] + Map> comms = new HashMap<>(); + for (Edge edge : edges) { + Vertex otherV = ((HugeEdge) edge).otherVertex(); + if (needSkipVertex(pass, otherV)) { + // skip the old intermediate data, or filter clabel + continue; + } + Community c = wrapCommunity(otherV); + if (!comms.containsKey(c.cid)) { + comms.put(c.cid, Pair.of(c, new MutableInt(0))); + } + // calc weight between source vertex and neighbor community + comms.get(c.cid).getRight().add(2 * weightOfEdge(edge)); + } + return comms.values(); + } + + private void moveCommunity(Vertex v, List nbs, Community newC) { + Id vid = (Id) v.id(); + + // remove v from old community + Community oldC = this.cache.vertex2Community(vid); + if (oldC != null) { + oldC.remove(this, v, nbs); + } + + // add v to new community + newC.add(this, v, nbs); + LOG.debug("Move {} to comm: {}", v, newC); + + // update community of v + this.cache.vertex2Community(vid, newC); + } + + private double moveCommunities(int pass) { + Iterator vertices = this.sourceVertices(pass); + + // shuffle + //r = r.order().by(shuffle); + + long total = 0L; + long moved = 0L; + while (vertices.hasNext()) { + this.updateProgress(++this.progress); + Vertex v = vertices.next(); + if (needSkipVertex(pass, v)) { + // skip the old intermediate data, or filter clabel + continue; + } + total++; + Id cid = cidOfVertex(v); + List nbs = neighbors((Id) v.id()); + double ki = kinOfVertex(v) + weightOfVertex(v, nbs); + // update community of v if △Q changed + double maxDeltaQ = 0d; + Community bestComm = null; + // list all neighbor communities of v + for (Pair nbc : nbCommunities(pass, nbs)) { + // △Q = (Ki_in - Ki * Etot / m) / 2m + Community otherC = nbc.getLeft(); + // weight between c and otherC + double kiin = nbc.getRight().floatValue(); + // weight of otherC + int tot = otherC.kin() + otherC.kout(); + if (cid.equals(otherC.cid)) { + tot -= ki; + assert tot >= 0; + // expect tot >= 0, but may be something wrong? + if (tot < 0) { + tot = 0; + } + } + double deltaQ = kiin - ki * tot / this.m; + if (deltaQ > maxDeltaQ) { + // TODO: cache otherC for neighbors the same community + maxDeltaQ = deltaQ; + bestComm = otherC; + } + } + if (maxDeltaQ > 0d && !cid.equals(bestComm.cid)) { + moved++; + // move v to the community of maxQ neighbor + moveCommunity(v, nbs, bestComm); + } + } + + // maybe always shocking when set degree limit + return total == 0L ? 0d : (double) moved / total; + } + + private void mergeCommunities(int pass) { + // merge each community as a vertex + Collection>> comms = this.cache.communities(); + this.cache.resetVertexWeight(); + for (Pair> pair : comms) { + Community c = pair.getKey(); + if (c.empty()) { + continue; + } + // update kin and edges between communities + int kin = c.kin(); + Set vertices = pair.getRight(); + assert !vertices.isEmpty(); + List members = new ArrayList<>(vertices.size()); + Map cedges = new HashMap<>(vertices.size()); + for (Id v : vertices) { + members.add(v.toString()); + // collect edges between this community and other communities + List neighbors = neighbors(v); + for (Edge edge : neighbors) { + Vertex otherV = ((HugeEdge) edge).otherVertex(); + if (vertices.contains(otherV.id())) { + // inner edges of this community, will be calc twice + // due to both e-in and e-out are in vertices, + kin += weightOfEdge(edge); + continue; + } + Id otherCid = cidOfVertex(otherV); + if (otherCid.compareTo(c.cid) < 0) { + // skip if it should be collected by otherC + continue; + } + if (!cedges.containsKey(otherCid)) { + cedges.put(otherCid, new MutableInt(0)); + } + cedges.get(otherCid).add(weightOfEdge(edge)); + } + } + // insert new community vertex and edges into storage + this.insertNewCommunity(pass, c.cid, kin, members, cedges); + } + this.graph().tx().commit(); + // reset communities + this.cache.reset(); + } + + public Object louvain(int maxTimes, int stableTimes, double precision) { + assert maxTimes > 0; + assert precision > 0d; + + this.defineSchemaOfPk(); + + /* + * iterate until it has stabilized or + * the maximum number of times is reached + */ + int times = maxTimes; + int movedTimes = 0; + double movedPercent = 0d; + double lastMovedPercent = 0d; + + for (int i = 0; i < maxTimes; i++) { + boolean finished = true; + movedPercent = 0d; + lastMovedPercent = 1d; + int tinyChanges = 0; + while ((movedPercent = this.moveCommunities(i)) > 0d) { + movedTimes++; + finished = false; + if (lastMovedPercent - movedPercent < precision) { + tinyChanges++; + } + if (i == 0 && movedPercent < precision) { + // stop the first round of iterations early + break; + } + if (tinyChanges >= stableTimes) { + // maybe always shaking and falling into an dead loop + break; + } + lastMovedPercent = movedPercent; + } + if (finished) { + times = i; + break; + } else { + this.defineSchemaOfPassN(i); + this.mergeCommunities(i); + } + } + + long communities = 0L; + String commLabel = this.passLabel; + if (!commLabel.isEmpty()) { + GraphTraversal t = this.g.V().hasLabel(commLabel).count(); + communities = this.execute(t, t::next); + } + return ImmutableMap.of("pass_times", times, + "phase1_times", movedTimes, + "last_precision", movedPercent, + "times", maxTimes, + "communities", communities); + } + + public double modularity(int pass) { + // pass: label the last pass + String label = labelOfPassN(pass); + Number kin = this.g.V().hasLabel(label).values(C_KIN).sum().next(); + Number weight = this.g.E().hasLabel(label).values(C_WEIGHT).sum().next(); + double m = kin.intValue() + weight.floatValue() * 2.0d; + double q = 0.0d; + Iterator coms = this.g.V().hasLabel(label); + while (coms.hasNext()) { + Vertex com = coms.next(); + int cin = com.value(C_KIN); + Number cout = this.g.V(com).bothE().values(C_WEIGHT).sum().next(); + double cdegree = cin + cout.floatValue(); + // Q = ∑(I/M - ((2I+O)/2M)^2) + q += cin / m - Math.pow(cdegree / m, 2); + } + return q; + } + + public Collection showCommunity(String community) { + final String C_PASS0 = labelOfPassN(0); + Collection comms = Arrays.asList(community); + boolean reachPass0 = false; + while (comms.size() > 0 && !reachPass0) { + Iterator subComms = this.vertices(comms.iterator()); + comms = new HashSet<>(); + while (subComms.hasNext()) { + this.updateProgress(++this.progress); + Vertex sub = subComms.next(); + if (sub.property(C_MEMBERS).isPresent()) { + Set members = sub.value(C_MEMBERS); + reachPass0 = sub.label().equals(C_PASS0); + comms.addAll(members); + } + } + } + return comms; + } + + public long clearPass(int pass) { + GraphTraversal te = this.g.E(); + if (pass < 0) { + // drop edges of all pass + List els = this.cpassEdgeLabels(); + if (els.size() > 0) { + String first = els.remove(0); + te = te.hasLabel(first, els.toArray(new String[els.size()])); + this.drop(te); + } + // drop schema + for (String label : this.cpassEdgeLabels()) { + this.graph().schema().edgeLabel(label).remove(); + } + } else { + // drop edges of pass N + String label = labelOfPassN(pass); + if (this.graph().existsEdgeLabel(label)) { + te = te.hasLabel(label); + this.drop(te); + // drop schema + this.graph().schema().edgeLabel(label).remove(); + } + } + + GraphTraversal tv = this.g.V(); + if (pass < 0) { + // drop vertices of all pass + List vls = this.cpassVertexLabels(); + if (vls.size() > 0) { + String first = vls.remove(0); + tv = tv.hasLabel(first, vls.toArray(new String[vls.size()])); + this.drop(tv); + } + // drop schema + for (String label : this.cpassVertexLabels()) { + this.graph().schema().vertexLabel(label).remove(); + } + } else { + // drop vertices of pass N + String label = labelOfPassN(pass); + if (this.graph().existsVertexLabel(label)) { + tv = tv.hasLabel(label); + this.drop(tv); + // drop schema + this.graph().schema().vertexLabel(label).remove(); + } + } + + return this.progress; + } + + private static class Community { + + // community id (stored as a backend vertex) + private final Id cid; + // community members size + private int size = 0; + /* + * weight of all edges in community(2X), sum of kin of new members + * [each is from the last pass, stored in backend vertex] + */ + private int kin = 0; + /* + * weight of all edges between communities, sum of kout of new members + * [each is last pass, calculated in real time by neighbors] + */ + // + private int kout = 0; + + public Community(Id cid) { + this.cid = cid; + } + + public boolean empty() { + return this.size <= 0; + } + + public void add(LouvainTraverser t, Vertex v, List nbs) { + this.size++; + this.kin += t.kinOfVertex(v); + this.kout += t.weightOfVertex(v, nbs); + } + + public void remove(LouvainTraverser t, Vertex v, List nbs) { + this.size--; + this.kin -= t.kinOfVertex(v); + this.kout -= t.weightOfVertex(v, nbs); + } + + public int kin() { + return this.kin; + } + + public int kout() { + return this.kout; + } + + @Override + public String toString() { + return String.format("[%s](size=%s kin=%s kout=%s)", + this.cid , this.size, this.kin, this.kout); + } + } + + private static class Cache { + + private final Map vertexWeightCache; + private final Map vertex2Community; + private final Map genIds; + + public Cache() { + this.vertexWeightCache = new HashMap<>(); + this.vertex2Community = new HashMap<>(); + this.genIds = new HashMap<>(); + } + + public Community vertex2Community(Id id) { + return this.vertex2Community.get(id); + } + + public void vertex2Community(Id id, Community c) { + this.vertex2Community.put(id, c); + } + + public Float vertexWeight(Id id) { + return this.vertexWeightCache.get(id); + } + + public void vertexWeight(Id id, float weight) { + this.vertexWeightCache.put(id, weight); + } + + public void reset() { + this.vertexWeightCache.clear(); + this.vertex2Community.clear(); + this.genIds.clear(); + } + + public void resetVertexWeight() { + this.vertexWeightCache.clear(); + } + + public Id genId(int pass, Id cid) { + if (!this.genIds.containsKey(cid)) { + this.genIds.put(cid, this.genIds.size() + 1); + } + String id = pass + "~" + this.genIds.get(cid); + return IdGenerator.of(id); + } + + public Collection>> communities(){ + // TODO: get communities from backend store instead of ram + Map>> comms = new HashMap<>(); + for (Entry e : this.vertex2Community.entrySet()) { + Community c = e.getValue(); + if (c.empty()) { + continue; + } + Pair> pair = comms.get(c.cid); + if (pair == null) { + pair = Pair.of(c, new HashSet<>()); + comms.put(c.cid, pair); + } + // collect members joined to the community [current pass] + pair.getRight().add(e.getKey()); + } + return comms.values(); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java new file mode 100644 index 0000000000..af7b299ae7 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java @@ -0,0 +1,263 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.comm; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import org.apache.commons.lang3.mutable.MutableInt; +import org.apache.tinkerpop.gremlin.process.traversal.Scope; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.schema.SchemaManager; +import com.baidu.hugegraph.schema.VertexLabel; +import com.baidu.hugegraph.type.define.Directions; +import com.google.common.collect.ImmutableMap; + +public class LpaAlgorithm extends AbstractCommAlgorithm { + + @Override + public String name() { + return "lpa"; + } + + @Override + public void checkParameters(Map parameters) { + times(parameters); + precision(parameters); + sourceLabel(parameters); + edgeLabel(parameters); + direction(parameters); + degree(parameters); + showCommunity(parameters); + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + String showComm = showCommunity(parameters); + + try { + if (showComm != null) { + return traverser.showCommunity(showComm); + } else { + return traverser.lpa(sourceLabel(parameters), + edgeLabel(parameters), + direction(parameters), + degree(parameters), + times(parameters), + precision(parameters)); + } + } catch (Throwable e) { + job.graph().tx().rollback(); + throw e; + } + } + + public static class Traverser extends AlgoTraverser { + + public static final String C_LABEL = "c_label"; + private static final long LIMIT = MAX_QUERY_LIMIT; + + private final Random R = new Random(); + + public Traverser(Job job) { + super(job); + } + + public Object lpa(String sourceLabel, String edgeLabel, + Directions dir, long degree, + int maxTimes, double precision) { + assert maxTimes > 0; + assert precision > 0d; + + this.initSchema(); + + int times = maxTimes; + double changedPercent = 0d; + + /* + * Iterate until: + * 1.it has stabilized + * 2.or the maximum number of times is reached + */ + for (int i = 0; i < maxTimes; i++) { + changedPercent = this.detectCommunities(sourceLabel, edgeLabel, + dir, degree); + if (changedPercent <= precision) { + times = i + 1; + break; + } + } + + long communities = this.graph().traversal().V().limit(10000L) + .groupCount().by(C_LABEL) + .count(Scope.local).next(); + return ImmutableMap.of("iteration_times", times, + "last_precision", changedPercent, + "times", maxTimes, + "communities", communities); + } + + public Object showCommunity(String clabel) { + // all vertices with specified c-label + Iterator vertices = this.vertices(LIMIT); + vertices = filter(vertices, C_LABEL, clabel); + + JsonMap json = new JsonMap(); + json.startList(); + while (vertices.hasNext()) { + this.updateProgress(++this.progress); + json.append(vertices.next().id().toString()); + } + json.endList(); + + return json.asJson(); + } + + private double detectCommunities(String sourceLabel, String edgeLabel, + Directions dir, long degree) { + // shuffle: r.order().by(shuffle) + // r = this.graph().traversal().V().sample((int) LIMIT); + + // all vertices + Iterator vertices = this.vertices(sourceLabel, LIMIT); + + long total = 0L; + long changed = 0L; + while (vertices.hasNext()) { + this.updateProgress(++this.progress); + total++; + Vertex v = vertices.next(); + String label = this.voteCommunityOfVertex(v, edgeLabel, + dir, degree); + // update label if it's absent or changed + if (!labelPresent(v) || !label.equals(this.labelOfVertex(v))) { + changed++; + this.updateLabelOfVertex(v, label); + } + } + this.graph().tx().commit(); + + return total == 0L ? 0d : (double) changed / total; + } + + private String voteCommunityOfVertex(Vertex vertex, String edgeLabel, + Directions dir, long degree) { + // neighbors of source vertex v + Id source = (Id) vertex.id(); + Id labelId = this.getEdgeLabelId(edgeLabel); + Iterator neighbors = this.adjacentVertices(source, dir, + labelId, degree); + + // whether or not include vertex itself, greatly affects the result. + // get a larger number of small communities if include itself + //neighbors.inject(v); + + // calculate label frequency + Map labels = new HashMap<>(); + while (neighbors.hasNext()) { + String label = this.labelOfVertex(neighbors.next()); + if (label == null) { + // ignore invalid or not-exist vertex + continue; + } + MutableInt labelCount = labels.get(label); + if (labelCount != null) { + labelCount.increment(); + } else { + labels.put(label, new MutableInt(1)); + } + } + + // isolated vertex + if (labels.size() == 0) { + return this.labelOfVertex(vertex); + } + + // get the labels with maximum frequency + List maxLabels = new ArrayList<>(); + int maxFreq = 1; + for (Map.Entry e : labels.entrySet()) { + int value = e.getValue().intValue(); + if (value > maxFreq) { + maxFreq = value; + maxLabels.clear(); + } + if (value == maxFreq) { + maxLabels.add(e.getKey()); + } + } + + /* + * TODO: + * keep origin label with probability to prevent monster communities + */ + + // random choice + int selected = this.R.nextInt(maxLabels.size()); + return maxLabels.get(selected); + } + + private boolean labelPresent(Vertex vertex) { + return vertex.property(C_LABEL).isPresent(); + } + + private String labelOfVertex(Vertex vertex) { + if (!labelPresent(vertex)) { + return vertex.id().toString(); + } + return vertex.value(C_LABEL); + } + + private String labelOfVertex(Id vid) { + // TODO: cache with Map + Iterator iter = this.graph().vertices(vid); + if (!iter.hasNext()) { + return null; + } + Vertex vertex = iter.next(); + return this.labelOfVertex(vertex); + } + + private void updateLabelOfVertex(Vertex v, String label) { + // TODO: cache with Map + v.property(C_LABEL, label); + this.commitIfNeeded(); + } + + private void initSchema() { + String cl = C_LABEL; + SchemaManager schema = this.graph().schema(); + schema.propertyKey(cl).asText().ifNotExist().create(); + for (VertexLabel vl : schema.getVertexLabels()) { + schema.vertexLabel(vl.name()) + .properties(cl).nullableKeys(cl) + .append(); + } + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java new file mode 100644 index 0000000000..c47d19f655 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java @@ -0,0 +1,153 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.comm; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + +import org.apache.tinkerpop.gremlin.structure.Edge; + +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.structure.HugeEdge; +import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.InsertionOrderUtil; +import com.google.common.collect.ImmutableMap; + +public class TriangleCountAlgorithm extends AbstractCommAlgorithm { + + @Override + public String name() { + return "triangle_count"; + } + + @Override + public void checkParameters(Map parameters) { + direction(parameters); + degree(parameters); + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + return traverser.triangleCount(direction(parameters), + degree(parameters)); + } + + protected static class Traverser extends AlgoTraverser { + + protected static final String KEY_TRIANGLES = "triangles"; + protected static final String KEY_TRIADS = "triads"; + + public Traverser(Job job) { + super(job); + } + + public Object triangleCount(Directions direction, long degree) { + Map results = triangles( direction, degree); + results = InsertionOrderUtil.newMap(results); + results.remove(KEY_TRIADS); + return results; + } + + protected Map triangles(Directions direction, + long degree) { + if (direction == null || direction == Directions.BOTH) { + throw new IllegalArgumentException("Direction must be OUT/IN"); + } + assert direction == Directions.OUT || direction == Directions.IN; + + Iterator edges = this.edges(direction); + + long triangles = 0L; + long triads = 0L; + long total = 0L; + long totalVertices = 0L; + Id vertex = null; + + Set adjVertices = new HashSet<>(); + while (edges.hasNext()) { + HugeEdge edge = (HugeEdge) edges.next(); + this.updateProgress(++total); + + Id source = edge.ownerVertex().id(); + Id target = edge.otherVertex().id(); + if (vertex == source) { + // Ignore and skip the target vertex if exceed degree + if (adjVertices.size() < degree || degree == NO_LIMIT) { + adjVertices.add(target); + } + continue; + } + + if (vertex != null) { + assert vertex != source; + /* + * Find graph mode like this: + * A -> [B,C,D,E,F] + * B -> [D,F] + * E -> [B,C,F] + */ + triangles += this.intersect(direction, degree, adjVertices); + triads += this.localTriads(adjVertices.size()); + totalVertices++; + // Reset for the next source + adjVertices = new HashSet<>(); + } + vertex = source; + adjVertices.add(target); + } + + if (vertex != null) { + triangles += this.intersect(direction, degree, adjVertices); + triads += this.localTriads(adjVertices.size()); + totalVertices++; + } + + String suffix = "_" + direction.string(); + return ImmutableMap.of("edges" + suffix, total, + "vertices" + suffix, totalVertices, + KEY_TRIANGLES, triangles, + KEY_TRIADS, triads); + } + + protected long intersect(Directions dir, long degree, + Set adjVertices) { + long count = 0L; + Iterator vertices; + for (Id v : adjVertices) { + vertices = this.adjacentVertices(v, dir, null, degree); + while (vertices.hasNext()) { + Id vertex = vertices.next(); + if (adjVertices.contains(vertex)) { + count++; + } + } + } + return count; + } + + protected long localTriads(int size) { + return size * (size - 1L) / 2L; + } + } +} From aae07a756089cbfc1a73d5cfdecf3d80e0aab44d Mon Sep 17 00:00:00 2001 From: zhoney Date: Fri, 10 Apr 2020 00:06:02 +0800 Subject: [PATCH 02/33] add fusiform_similarity,rings_detect and kcore ap algorithm (#5) * improve * move c_label to lower layer and add appendRow(value) * add community limit 100w for louvain * improve louvain log * fix louvain bug Change-Id: I886ac3e7a3f0dfd49e66fdf544f97f6f7db615df --- .../job/algorithm/AbstractAlgorithm.java | 31 +- .../job/algorithm/AlgorithmPool.java | 7 + .../algorithm/cent/AbstractCentAlgorithm.java | 3 - .../job/algorithm/comm/KCoreAlgorithm.java | 286 ++++++++++++++++++ .../job/algorithm/comm/LouvainTraverser.java | 21 +- .../job/algorithm/comm/LpaAlgorithm.java | 1 - .../algorithm/path/RingsDetectAlgorithm.java | 112 +++++++ .../FusiformSimilarityAlgorithm.java | 171 +++++++++++ 8 files changed, 623 insertions(+), 9 deletions(-) create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 660ef9f8f8..8db652d0d0 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -84,6 +84,7 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final String KEY_CLEAR = "clear"; public static final String KEY_CAPACITY = "capacity"; public static final String KEY_LIMIT = "limit"; + public static final String KEY_ALPHA = "alpha"; public static final long DEFAULT_CAPACITY = 10000000L; public static final long DEFAULT_LIMIT = 100L; @@ -92,6 +93,9 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final long DEFAULT_TIMES = 20L; public static final long DEFAULT_STABLE_TIMES= 3L; public static final double DEFAULT_PRECISION = 1.0 / 1000; + public static final double DEFAULT_ALPHA = 0.5D; + + public static final String C_LABEL = "c_label"; @Override public void checkParameters(Map parameters) { @@ -119,6 +123,21 @@ protected static Directions direction(Map parameters) { return parseDirection(direction); } + protected static double alpha(Map parameters) { + if (!parameters.containsKey(KEY_ALPHA)) { + return DEFAULT_ALPHA; + } + double alpha = parameterDouble(parameters, KEY_ALPHA); + checkAlpha(alpha); + return alpha; + } + + public static void checkAlpha(double alpha) { + E.checkArgument(alpha > 0 && alpha <= 1.0, + "The alpha of must be in range (0, 1], but got %s", + alpha); + } + protected static long top(Map parameters) { if (!parameters.containsKey(KEY_TOP)) { return 0L; @@ -281,10 +300,15 @@ protected Iterator vertices(long limit) { return this.graph().vertices(query); } + protected Iterator vertices(Object label, Object clabel, + long limit) { + return vertices(label, C_LABEL, clabel, limit); + } + protected Iterator vertices(Object label, String key, Object value, long limit) { Iterator vertices = this.vertices(label, limit); - if (key != null) { + if (value != null) { vertices = filter(vertices, key, value); } return vertices; @@ -490,6 +514,11 @@ public void appendRaw(String key, String rawJson) { this.checkSizeLimit(); } + public void appendRaw(String rawJson) { + this.json.append(rawJson).append(','); + this.checkSizeLimit(); + } + public void append(Set> kvs) { for (Map.Entry top : kvs) { this.append(top.getKey(), top.getValue()); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java index 98f7c89dc6..f1e35fd581 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java @@ -27,9 +27,12 @@ import com.baidu.hugegraph.job.algorithm.cent.DegreeCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.EigenvectorCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.ClusterCoeffcientAlgorithm; +import com.baidu.hugegraph.job.algorithm.comm.KCoreAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.LouvainAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.LpaAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.TriangleCountAlgorithm; +import com.baidu.hugegraph.job.algorithm.path.RingsDetectAlgorithm; +import com.baidu.hugegraph.job.algorithm.similarity.FusiformSimilarityAlgorithm; public class AlgorithmPool { @@ -48,6 +51,10 @@ public class AlgorithmPool { INSTANCE.register(new ClusterCoeffcientAlgorithm()); INSTANCE.register(new LpaAlgorithm()); INSTANCE.register(new LouvainAlgorithm()); + + INSTANCE.register(new FusiformSimilarityAlgorithm()); + INSTANCE.register(new RingsDetectAlgorithm()); + INSTANCE.register(new KCoreAlgorithm()); } private final Map algorithms; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index 14841043a3..fba7a8de7e 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -27,12 +27,9 @@ import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; -import com.baidu.hugegraph.job.algorithm.comm.LpaAlgorithm; public abstract class AbstractCentAlgorithm extends AbstractAlgorithm { - protected static final String C_LABEL = LpaAlgorithm.Traverser.C_LABEL; - @Override public String category() { return CATEGORY_CENT; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java new file mode 100644 index 0000000000..80f10da77e --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java @@ -0,0 +1,286 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.comm; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.mutable.MutableInt; +import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; + +import com.baidu.hugegraph.HugeGraph; +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.backend.query.Query; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.schema.EdgeLabel; +import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser; +import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.CollectionUtil; +import com.baidu.hugegraph.util.E; +import com.baidu.hugegraph.util.JsonUtil; +import com.google.common.collect.ImmutableSet; + +public class KCoreAlgorithm extends AbstractCommAlgorithm { + + public static final String KEY_K = "k"; + public static final String KEY_MERGED = "merged"; + + public static final int DEFAULT_K = 3; + + @Override + public String name() { + return "k_core"; + } + + @Override + public void checkParameters(Map parameters) { + k(parameters); + alpha(parameters); + merged(parameters); + degree(parameters); + sourceLabel(parameters); + sourceCLabel(parameters); + direction(parameters); + edgeLabel(parameters); + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + return traverser.kcore(sourceLabel(parameters), + sourceCLabel(parameters), + direction(parameters), edgeLabel(parameters), + k(parameters), alpha(parameters), + degree(parameters), merged(parameters)); + } + + protected static int k(Map parameters) { + if (!parameters.containsKey(KEY_K)) { + return DEFAULT_K; + } + int k = parameterInt(parameters, KEY_K); + E.checkArgument(k > 1, "The k of kcore must be > 1, but got %s", k); + return k; + } + + protected static boolean merged(Map parameters) { + if (!parameters.containsKey(KEY_MERGED)) { + return false; + } + return parameterBoolean(parameters, KEY_MERGED); + } + + public static class Traverser extends AlgoTraverser { + + public Traverser(Job job) { + super(job); + } + + public Object kcore(String sourceLabel, String sourceCLabel, + Directions dir, String label, int k, double alpha, + long degree, boolean merged) { + HugeGraph graph = this.graph(); + Iterator vertices = this.vertices(sourceLabel, sourceCLabel, + Query.NO_LIMIT); + EdgeLabel edgeLabel = label == null ? null : graph.edgeLabel(label); + + KcoreTraverser traverser = new KcoreTraverser(graph); + JsonMap kcoresJson = new JsonMap(); + kcoresJson.startObject(); + kcoresJson.appendKey("kcores"); + kcoresJson.startList(); + Set> kcoreSet = new HashSet<>(); + while(vertices.hasNext()) { + this.updateProgress(++this.progress); + Vertex vertex = vertices.next(); + Set kcore = traverser.kcore(IteratorUtils.of(vertex), + dir, edgeLabel, k, alpha, + degree); + if (kcore.isEmpty()) { + continue; + } + if (merged) { + mergeKcores(kcoreSet, kcore); + } else { + kcoresJson.appendRaw(JsonUtil.toJson(kcore)); + } + } + if (merged) { + for (Set kcore : kcoreSet) { + kcoresJson.appendRaw(JsonUtil.toJson(kcore)); + } + } + kcoresJson.endList(); + kcoresJson.endObject(); + + return kcoresJson.asJson(); + } + + @SuppressWarnings("unchecked") + private static void mergeKcores(Set> kcores, Set kcore) { + boolean merged = false; + /* + * Iterate to collect merging kcores firstly, because merging + * kcores will be removed from all kcores. + * Besides one new kcore may connect to multiple existing kcores. + */ + Set> mergingKcores = new HashSet<>(); + for (Set existedKcore : kcores) { + if (CollectionUtil.hasIntersection(existedKcore, kcore)) { + mergingKcores.add(existedKcore); + merged = true; + } + } + if (merged) { + for (Set mergingKcore : mergingKcores) { + kcores.remove(mergingKcore); + kcore.addAll(mergingKcore); + } + } + kcores.add(kcore); + } + } + + public static class KcoreTraverser extends FusiformSimilarityTraverser { + + public KcoreTraverser(HugeGraph graph) { + super(graph); + } + + public Set kcore(Iterator vertices, Directions direction, + EdgeLabel label, int k, double alpha, + long degree) { + int minNeighbors = (int) Math.floor(1 / alpha * k); + SimilarsMap map = fusiformSimilarity(vertices, direction, label, + minNeighbors, alpha, k - 1, + 0, null, 1, degree, + NO_LIMIT, NO_LIMIT, true); + if (map.isEmpty()) { + return ImmutableSet.of(); + } + return extractKcore(map, k); + } + + + @SuppressWarnings("unchecked") + private static Set extractKcore(SimilarsMap similarsMap, int k) { + assert similarsMap.size() == 1; + Map.Entry> entry = similarsMap.entrySet() + .iterator().next(); + Id source = entry.getKey(); + Set similars = new HashSet<>(); + for (Similar similar: entry.getValue()) { + similars.add(new KcoreSimilar(similar)); + } + + boolean stop; + do { + stop = true; + // Do statistics + Map counts = new HashMap<>(); + for (KcoreSimilar similar : similars) { + for (Id id : similar.ids()) { + MutableInt count = counts.get(id); + if (count == null) { + count = new MutableInt(0); + counts.put(id, count); + } + count.increment(); + } + } + /* + * Iterate similars to: + * 1. delete failed similar + * 2. delete failed intermediaries in survive similar + * 3. update statistics + */ + Set failedSimilars = new HashSet<>(); + for (KcoreSimilar similar : similars) { + Set failedIds = new HashSet<>(); + for (Id id : similar.ids()) { + MutableInt count = counts.get(id); + if (count.getValue() < k - 1) { + count.decrement(); + failedIds.add(id); + stop = false; + } + } + + Set survivedIds = new HashSet<>(CollectionUtils + .subtract(similar.ids(), failedIds)); + if (survivedIds.size() < k) { + for (Id id : survivedIds) { + counts.get(id).decrement(); + } + failedSimilars.add(similar); + } else { + similar.ids(survivedIds); + } + } + similars = new HashSet<>(CollectionUtils.subtract( + similars, failedSimilars)); + } while (!stop); + + if (similars.isEmpty()) { + return ImmutableSet.of(); + } + Set kcores = new HashSet<>(); + kcores.add(source); + for (KcoreSimilar similar : similars) { + kcores.add(similar.id()); + kcores.addAll(similar.ids()); + } + return kcores; + } + } + + private static class KcoreSimilar extends + FusiformSimilarityTraverser.Similar { + + private Set ids; + + public KcoreSimilar(Id id, double score, List intermediaries) { + super(id, score, intermediaries); + this.ids = null; + } + + public KcoreSimilar(FusiformSimilarityTraverser.Similar similar) { + super(similar.id(), similar.score(), similar.intermediaries()); + this.ids = new HashSet<>(this.intermediaries()); + } + + public Set ids() { + if (this.ids == null) { + this.ids = new HashSet<>(this.intermediaries()); + } + return this.ids; + } + + public void ids(Set ids) { + this.ids = ids; + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index 0b3d674aaf..9c4f80f645 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -19,6 +19,8 @@ package com.baidu.hugegraph.job.algorithm.comm; +import static com.baidu.hugegraph.job.algorithm.AbstractAlgorithm.C_LABEL; + import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -62,29 +64,28 @@ public class LouvainTraverser extends AlgoTraverser { public static final String C_WEIGHT = "c_weight"; public static final String C_MEMBERS = "c_members"; - public static final String C_LABEL = LpaAlgorithm.Traverser.C_LABEL; - private static final long LIMIT = AbstractAlgorithm.MAX_QUERY_LIMIT; + private static final int MAX_COMM_SIZE = 1000000; private static final Logger LOG = Log.logger(LouvainTraverser.class); private final GraphTraversalSource g; - private final long m; private final String sourceLabel; private final String sourceCLabel; private final long degree; private final Cache cache; + private long m; private String passLabel; public LouvainTraverser(Job job, long degree, String sourceLabel, String sourceCLabel) { super(job); this.g = this.graph().traversal(); - this.m = this.g.E().count().next(); this.sourceLabel = sourceLabel; this.sourceCLabel = sourceCLabel; this.degree = degree; + this.m = 1L; this.passLabel = ""; this.cache = new Cache(); @@ -122,6 +123,8 @@ private void defineSchemaOfPk() { .ifNotExist().create(); schema.propertyKey(C_WEIGHT).asFloat() .ifNotExist().create(); + + this.m = this.g.E().count().next(); } private void defineSchemaOfPassN(int pass) { @@ -369,6 +372,11 @@ private double moveCommunities(int pass) { for (Pair nbc : nbCommunities(pass, nbs)) { // △Q = (Ki_in - Ki * Etot / m) / 2m Community otherC = nbc.getLeft(); + if (otherC.size() > MAX_COMM_SIZE) { + LOG.info("Skip community {} due to its size > {}", + otherC, MAX_COMM_SIZE); + continue; + } // weight between c and otherC double kiin = nbc.getRight().floatValue(); // weight of otherC @@ -415,6 +423,7 @@ private void mergeCommunities(int pass) { List members = new ArrayList<>(vertices.size()); Map cedges = new HashMap<>(vertices.size()); for (Id v : vertices) { + this.updateProgress(++this.progress); members.add(v.toString()); // collect edges between this community and other communities List neighbors = neighbors(v); @@ -620,6 +629,10 @@ public boolean empty() { return this.size <= 0; } + public int size() { + return this.size; + } + public void add(LouvainTraverser t, Vertex v, List nbs) { this.size++; this.kin += t.kinOfVertex(v); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java index af7b299ae7..361e9b9a9c 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java @@ -79,7 +79,6 @@ public Object call(Job job, Map parameters) { public static class Traverser extends AlgoTraverser { - public static final String C_LABEL = "c_label"; private static final long LIMIT = MAX_QUERY_LIMIT; private final Random R = new Random(); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java new file mode 100644 index 0000000000..6a1a0add7a --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java @@ -0,0 +1,112 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.path; + +import java.util.Iterator; +import java.util.Map; + +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.HugeGraph; +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.backend.query.Query; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; +import com.baidu.hugegraph.structure.HugeVertex; +import com.baidu.hugegraph.traversal.algorithm.SubGraphTraverser; +import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.JsonUtil; + +public class RingsDetectAlgorithm extends AbstractAlgorithm { + + @Override + public String name() { + return "rings_detect"; + } + + @Override + public String category() { + return CATEGORY_PATH; + } + + @Override + public void checkParameters(Map parameters) { + depth(parameters); + degree(parameters); + capacity(parameters); + limit(parameters); + sourceLabel(parameters); + sourceCLabel(parameters); + direction(parameters); + edgeLabel(parameters); + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + return traverser.rings(sourceLabel(parameters), + sourceCLabel(parameters), + direction(parameters), edgeLabel(parameters), + depth(parameters), degree(parameters), + capacity(parameters), limit(parameters)); + } + + public static class Traverser extends AlgoTraverser { + + public Traverser(Job job) { + super(job); + } + + public Object rings(String sourceLabel, String sourceCLabel, + Directions dir, String label, int depth, + long degree, long capacity, long limit) { + HugeGraph graph = this.graph(); + Iterator vertices = this.vertices(sourceLabel, sourceCLabel, + Query.NO_LIMIT); + JsonMap ringsJson = new JsonMap(); + ringsJson.startObject(); + ringsJson.appendKey("rings"); + ringsJson.startList(); + SubGraphTraverser traverser = new SubGraphTraverser(graph); + while(vertices.hasNext()) { + this.updateProgress(++this.progress); + Id source = ((HugeVertex) vertices.next()).id(); + PathSet rings = traverser.rings(source, dir, label, depth, + true, degree, + capacity, limit); + for (Path ring : rings) { + Id min = null; + for (Id id : ring.vertices()) { + if (min == null || id.compareTo(min) < 0) { + min = id; + } + } + if (source.equals(min)) { + ringsJson.appendRaw(JsonUtil.toJson(ring.vertices())); + } + } + } + ringsJson.endList(); + ringsJson.endObject(); + + return ringsJson.asJson(); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java new file mode 100644 index 0000000000..26ee4e25e5 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java @@ -0,0 +1,171 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.similarity; + +import java.util.Iterator; +import java.util.Map; + +import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; + +import com.baidu.hugegraph.HugeGraph; +import com.baidu.hugegraph.backend.query.Query; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; +import com.baidu.hugegraph.schema.EdgeLabel; +import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser; +import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser.SimilarsMap; +import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; +import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.JsonUtil; + +public class FusiformSimilarityAlgorithm extends AbstractAlgorithm { + + public static final String KEY_MIN_NEIGHBORS = "min_neighbors"; + public static final String KEY_MIN_SIMILARS = "min_similars"; + public static final String KEY_GROUP_PROPERTY = "group_property"; + public static final String KEY_MIN_GROUPS = "min_groups"; + + public static final int DEFAULT_MIN_NEIGHBORS = 10; + public static final int DEFAULT_MIN_SIMILARS = 6; + public static final int DEFAULT_MIN_GROUPS = 1; + + @Override + public String name() { + return "fusiform_similarity"; + } + + @Override + public String category() { + return CATEGORY_SIMI; + } + + @Override + public void checkParameters(Map parameters) { + minNeighbors(parameters); + alpha(parameters); + minSimilars(parameters); + top(parameters); + groupProperty(parameters); + minGroups(parameters); + degree(parameters); + capacity(parameters); + limit(parameters); + sourceLabel(parameters); + sourceCLabel(parameters); + direction(parameters); + edgeLabel(parameters); + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + return traverser.fusiformSimilars(sourceLabel(parameters), + sourceCLabel(parameters), + direction(parameters), + edgeLabel(parameters), + minNeighbors(parameters), + alpha(parameters), + minSimilars(parameters), + top(parameters), + groupProperty(parameters), + minGroups(parameters), + degree(parameters), + capacity(parameters), + limit(parameters)); + } + + protected static int minNeighbors(Map parameters) { + if (!parameters.containsKey(KEY_MIN_NEIGHBORS)) { + return DEFAULT_MIN_NEIGHBORS; + } + int minNeighbors = parameterInt(parameters, KEY_MIN_NEIGHBORS); + HugeTraverser.checkPositive(minNeighbors, "min neighbors"); + return minNeighbors; + } + + protected static int minSimilars(Map parameters) { + if (!parameters.containsKey(KEY_MIN_SIMILARS)) { + return DEFAULT_MIN_SIMILARS; + } + int minSimilars = parameterInt(parameters, KEY_MIN_SIMILARS); + HugeTraverser.checkPositive(minSimilars, "min similars"); + return minSimilars; + } + + protected static String groupProperty(Map parameters) { + if (!parameters.containsKey(KEY_GROUP_PROPERTY)) { + return null; + } + return parameterString(parameters, KEY_GROUP_PROPERTY); + } + + protected static int minGroups(Map parameters) { + if (!parameters.containsKey(KEY_MIN_GROUPS)) { + return DEFAULT_MIN_GROUPS; + } + int minGroups = parameterInt(parameters, KEY_MIN_GROUPS); + HugeTraverser.checkPositive(minGroups, "min groups"); + return minGroups; + } + + protected static class Traverser extends AlgoTraverser { + + public Traverser(Job job) { + super(job); + } + + public Object fusiformSimilars(String sourceLabel, String sourceCLabel, + Directions direction, String label, + int minNeighbors, double alpha, + int minSimilars, long topSimilars, + String groupProperty, int minGroups, + long degree, long capacity, long limit) { + Iterator vertices = this.vertices(sourceLabel, sourceCLabel, + Query.NO_LIMIT); + HugeGraph graph = this.graph(); + EdgeLabel edgeLabel = label == null ? null : graph.edgeLabel(label); + + FusiformSimilarityTraverser traverser = new + FusiformSimilarityTraverser(graph); + JsonMap similarsJson = new JsonMap(); + similarsJson.startObject(); + while(vertices.hasNext()) { + this.updateProgress(++this.progress); + Vertex vertex = vertices.next(); + SimilarsMap similars = traverser.fusiformSimilarity( + IteratorUtils.of(vertex), direction, + edgeLabel, minNeighbors, alpha, + minSimilars, (int) topSimilars, + groupProperty, minGroups, degree, + capacity, limit, true); + if (similars.isEmpty()) { + continue; + } + String result = JsonUtil.toJson(similars.toMap()); + result = result.substring(1, result.length() - 1); + similarsJson.appendRaw(result); + } + similarsJson.endObject(); + + return similarsJson.asJson(); + } + } +} From ae7a9de3f8098a9c5c7afc9f8dc8b39bcab85d51 Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Fri, 10 Apr 2020 17:35:22 +0800 Subject: [PATCH 03/33] set louvain max community members limit 10w (#9) Change-Id: I5cc3be846ff4536ebfba1f9bf54a0adda7409036 --- .../hugegraph/job/algorithm/comm/KCoreAlgorithm.java | 12 +++++++----- .../job/algorithm/comm/LouvainTraverser.java | 8 ++++---- .../job/algorithm/path/RingsDetectAlgorithm.java | 12 +++++++----- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java index 80f10da77e..4cc6a88ba5 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java @@ -72,9 +72,12 @@ public Object call(Job job, Map parameters) { Traverser traverser = new Traverser(job); return traverser.kcore(sourceLabel(parameters), sourceCLabel(parameters), - direction(parameters), edgeLabel(parameters), - k(parameters), alpha(parameters), - degree(parameters), merged(parameters)); + direction(parameters), + edgeLabel(parameters), + k(parameters), + alpha(parameters), + degree(parameters), + merged(parameters)); } protected static int k(Map parameters) { @@ -139,7 +142,6 @@ public Object kcore(String sourceLabel, String sourceCLabel, return kcoresJson.asJson(); } - @SuppressWarnings("unchecked") private static void mergeKcores(Set> kcores, Set kcore) { boolean merged = false; /* @@ -173,7 +175,7 @@ public KcoreTraverser(HugeGraph graph) { public Set kcore(Iterator vertices, Directions direction, EdgeLabel label, int k, double alpha, long degree) { - int minNeighbors = (int) Math.floor(1 / alpha * k); + int minNeighbors = (int) Math.floor(1.0 / alpha * k); SimilarsMap map = fusiformSimilarity(vertices, direction, label, minNeighbors, alpha, k - 1, 0, null, 1, degree, diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index 9c4f80f645..ecb500a7d6 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -65,7 +65,7 @@ public class LouvainTraverser extends AlgoTraverser { public static final String C_MEMBERS = "c_members"; private static final long LIMIT = AbstractAlgorithm.MAX_QUERY_LIMIT; - private static final int MAX_COMM_SIZE = 1000000; + private static final int MAX_COMM_SIZE = 100000; // 10w private static final Logger LOG = Log.logger(LouvainTraverser.class); @@ -372,9 +372,9 @@ private double moveCommunities(int pass) { for (Pair nbc : nbCommunities(pass, nbs)) { // △Q = (Ki_in - Ki * Etot / m) / 2m Community otherC = nbc.getLeft(); - if (otherC.size() > MAX_COMM_SIZE) { - LOG.info("Skip community {} due to its size > {}", - otherC, MAX_COMM_SIZE); + if (otherC.size() >= MAX_COMM_SIZE) { + LOG.info("Skip community {} for {} due to its size >= {}", + otherC.cid, v, MAX_COMM_SIZE); continue; } // weight between c and otherC diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java index 6a1a0add7a..b3ce1ec992 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java @@ -63,9 +63,12 @@ public Object call(Job job, Map parameters) { Traverser traverser = new Traverser(job); return traverser.rings(sourceLabel(parameters), sourceCLabel(parameters), - direction(parameters), edgeLabel(parameters), - depth(parameters), degree(parameters), - capacity(parameters), limit(parameters)); + direction(parameters), + edgeLabel(parameters), + depth(parameters), + degree(parameters), + capacity(parameters), + limit(parameters)); } public static class Traverser extends AlgoTraverser { @@ -89,8 +92,7 @@ public Object rings(String sourceLabel, String sourceCLabel, this.updateProgress(++this.progress); Id source = ((HugeVertex) vertices.next()).id(); PathSet rings = traverser.rings(source, dir, label, depth, - true, degree, - capacity, limit); + true, degree, capacity, limit); for (Path ring : rings) { Id min = null; for (Id id : ring.vertices()) { From 529e4b20ccc7a77d68721ac4c7e70332a5dd4a8d Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Sat, 11 Apr 2020 10:46:04 +0800 Subject: [PATCH 04/33] fix inconsistent error messages with clabel (#10) also improve error message cause reason Change-Id: I15ea8dd651e01ff678a32f19efd3584cd20ffc10 --- .../hugegraph/job/algorithm/AbstractAlgorithm.java | 12 ++++++++++-- .../job/algorithm/cent/AbstractCentAlgorithm.java | 2 +- .../job/algorithm/comm/LouvainTraverser.java | 5 +---- .../hugegraph/job/algorithm/comm/LpaAlgorithm.java | 5 +++-- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 8db652d0d0..8387a69f92 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -344,7 +344,14 @@ protected Iterator filter(Iterator vertices, }); } - protected static boolean match(Element elem, String key, Object value) { + protected boolean match(Element elem, Object clabel) { + return match(elem, C_LABEL, clabel); + } + + protected boolean match(Element elem, String key, Object value) { + // check property key exists + this.graph().propertyKey(key); + // return true if property value exists & equals to specified value Property p = elem.property(key); return p.isPresent() && Objects.equal(p.value(), value); } @@ -375,7 +382,8 @@ protected V execute(GraphTraversal traversal, try { return callback.call(); } catch (Exception e) { - throw new HugeException("Failed to execute algorithm", e); + throw new HugeException("Failed to execute algorithm: %s", + e, e.getMessage()); } finally { Query.defaultCapacity(capacity); try { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index fba7a8de7e..37492e456f 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -66,7 +66,7 @@ protected GraphTraversal constructSource( t = t.filter(it -> { this.updateProgress(++this.progress); return sourceCLabel == null ? true : - match(it.get(), C_LABEL, sourceCLabel); + match(it.get(), sourceCLabel); }); if (sourceSample > 0L) { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index ecb500a7d6..0177d8f2d7 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -19,8 +19,6 @@ package com.baidu.hugegraph.job.algorithm.comm; -import static com.baidu.hugegraph.job.algorithm.AbstractAlgorithm.C_LABEL; - import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -237,8 +235,7 @@ private boolean needSkipVertex(int pass, Vertex v) { } } // skip the vertex with unmatched clabel - if (this.sourceCLabel != null && - !match(v, C_LABEL, this.sourceCLabel)) { + if (this.sourceCLabel != null && !match(v, this.sourceCLabel)) { return true; } return false; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java index 361e9b9a9c..abcdb938cc 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java @@ -35,6 +35,7 @@ import com.baidu.hugegraph.schema.SchemaManager; import com.baidu.hugegraph.schema.VertexLabel; import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.E; import com.google.common.collect.ImmutableMap; public class LpaAlgorithm extends AbstractCommAlgorithm { @@ -122,9 +123,9 @@ public Object lpa(String sourceLabel, String edgeLabel, } public Object showCommunity(String clabel) { + E.checkNotNull(clabel, "clabel"); // all vertices with specified c-label - Iterator vertices = this.vertices(LIMIT); - vertices = filter(vertices, C_LABEL, clabel); + Iterator vertices = this.vertices(null, clabel, LIMIT); JsonMap json = new JsonMap(); json.startList(); From 18f1de4d6ac152c0b25ada85ccc9f7bcc75ac78e Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Wed, 15 Apr 2020 11:03:59 +0800 Subject: [PATCH 05/33] optimize filterNonShortestPath() for betweeness_centrality (#12) network10000 dataset test: before after (depth=4 sample=1) 395s 25s (depth=3 sample=2) 4300s 35s same as the closeness_centrality Change-Id: Ia0c557434bf25f9d13a0b1dc19f66024e08c89df --- .../job/algorithm/AbstractAlgorithm.java | 21 ++++++++++---- .../algorithm/cent/AbstractCentAlgorithm.java | 29 +++++++++++++++++++ .../cent/BetweenessCentralityAlgorithm.java | 23 ++++----------- .../cent/ClosenessCentralityAlgorithm.java | 23 ++++----------- .../cent/DegreeCentralityAlgorithm.java | 4 +-- 5 files changed, 56 insertions(+), 44 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 8387a69f92..e77473668c 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -401,10 +401,10 @@ protected void commitIfNeeded() { } } - public static final class TopMap { + public static final class TopMap { private final long topN; - private Map tops; + private Map tops; public TopMap(long topN) { this.topN = topN; @@ -415,11 +415,20 @@ public int size() { return this.tops.size(); } - public void put(Id key, long value) { - this.put(key, Long.valueOf(value)); + public MutableLong get(K key) { + return this.tops.get(key); } - public void put(Id key, Long value) { + public void add(K key, long value) { + MutableLong mlong = this.tops.get(key); + if (mlong == null) { + mlong = new MutableLong(value); + this.tops.put(key, mlong); + } + mlong.add(value); + } + + public void put(K key, long value) { this.tops.put(key, new MutableLong(value)); // keep 2x buffer if (this.tops.size() > this.topN * 2) { @@ -427,7 +436,7 @@ public void put(Id key, Long value) { } } - public Set> entrySet() { + public Set> entrySet() { this.shrinkIfNeeded(this.topN); return this.tops.entrySet(); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index 37492e456f..c36743176c 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -19,14 +19,20 @@ package com.baidu.hugegraph.job.algorithm.cent; +import java.util.HashMap; +import java.util.List; import java.util.Map; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.tinkerpop.gremlin.process.traversal.Pop; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; import org.apache.tinkerpop.gremlin.structure.Vertex; +import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; +import com.baidu.hugegraph.structure.HugeElement; public abstract class AbstractCentAlgorithm extends AbstractAlgorithm { @@ -106,5 +112,28 @@ protected GraphTraversal constructPathUnit( } return unit; } + + protected GraphTraversal filterNonShortestPath( + GraphTraversal + t) { + long size = this.graph().traversal().V().limit(MAX_QUERY_LIMIT) + .count().next(); + Map, Integer> triples = new HashMap<>((int) size); + return t.filter(it -> { + Id start = it.path(Pop.first, "v").id(); + Id end = it.path(Pop.last, "v").id(); + int len = it.>path(Pop.all, "v").size(); + Pair key = Pair.of(start, end); + Integer shortest = triples.get(key); + if (shortest != null && shortest != len) { + // ignore non shortest path + return false; + } + if (shortest == null) { + triples.put(key, len); + } + return true; + }); + } } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java index ae1b8bb743..9a72d2f626 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java @@ -73,25 +73,12 @@ public Object betweenessCentrality(int depth, sourceCLabel); t = constructPath(t, degree, sample, sourceLabel, sourceCLabel); t = t.emit().until(__.loops().is(P.gte(depth))); + t = filterNonShortestPath(t); - @SuppressWarnings({ "unchecked", "deprecation" }) - GraphTraversal tf = t.filter( - __.project("x","y","z") - .by(__.select(Pop.first, "v").id()) - .by(__.select(Pop.last, "v").id()) - .by(__.select(Pop.all, "v").count(Scope.local)) - .as("triple") - .coalesce(__.select("x","y").as("a") - .select("triples").unfold().as("t") - .select("x","y").where(P.eq("a")).select("t"), - __.store("triples")) - .select("z").as("length") - .select("triple").select("z").where(P.eq("length"))); - - GraphTraversal tg = tf.select(Pop.all, "v") - .unfold().id() - .groupCount().order(Scope.local) - .by(Column.values, Order.desc); + GraphTraversal tg = t.select(Pop.all, "v") + .unfold().id() + .groupCount().order(Scope.local) + .by(Column.values, Order.desc); GraphTraversal tLimit = topN <= 0L ? tg : tg.limit(Scope.local, topN); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java index d890db8087..96e9709fef 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java @@ -82,26 +82,13 @@ public Object closenessCentrality(int depth, sourceCLabel); t = constructPath(t, degree, sample, sourceLabel, sourceCLabel); t = t.emit().until(__.loops().is(P.gte(depth))); - - @SuppressWarnings({ "unchecked", "deprecation" }) - GraphTraversal tf = t.filter( - __.project("x","y","z") - .by(__.select(Pop.first, "v").id()) - .by(__.select(Pop.last, "v").id()) - .by(__.select(Pop.all, "v").count(Scope.local)) - .as("triple") - .coalesce(__.select("x","y").as("a") - .select("triples").unfold().as("t") - .select("x","y").where(P.eq("a")).select("t"), - __.store("triples")) - .select("z").as("length") - .select("triple").select("z").where(P.eq("length"))); + t = filterNonShortestPath(t); GraphTraversal tg; - tg = tf.group().by(__.select(Pop.first, "v").id()) - .by(__.select(Pop.all, "v").count(Scope.local) - .sack(Operator.div).sack().sum()) - .order(Scope.local).by(Column.values, Order.desc); + tg = t.group().by(__.select(Pop.first, "v").id()) + .by(__.select(Pop.all, "v").count(Scope.local) + .sack(Operator.div).sack().sum()) + .order(Scope.local).by(Column.values, Order.desc); GraphTraversal tLimit = topN <= 0L ? tg : tg.limit(Scope.local, topN); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java index 81bd336729..5f6781b21b 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java @@ -67,7 +67,7 @@ public Object degreeCentrality(Directions direction, long topN) { Iterator edges = this.edges(direction); JsonMap degrees = new JsonMap(); - TopMap tops = new TopMap(topN); + TopMap tops = new TopMap<>(topN); Id vertex = null; long degree = 0L; long total = 0L; @@ -111,7 +111,7 @@ protected Object degreeCentrality(long topN) { assert topN >= 0L; long total = 0L; JsonMap degrees = new JsonMap(); - TopMap tops = new TopMap(topN); + TopMap tops = new TopMap<>(topN); GraphTraversalSource traversal = this.graph().traversal(); Iterator vertices = this.vertices(); From 263b14eb2a2457d22894607be7a54337b952ef3c Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Wed, 15 Apr 2020 16:02:44 +0800 Subject: [PATCH 06/33] add direction and label parameter for centrality algorithms (#13) Change-Id: I20b72ea0da673359e2bd21888010290efca81441 --- .../job/algorithm/AbstractAlgorithm.java | 10 ++++- .../algorithm/cent/AbstractCentAlgorithm.java | 25 +++++++++-- .../cent/BetweenessCentralityAlgorithm.java | 12 ++++-- .../cent/ClosenessCentralityAlgorithm.java | 12 ++++-- .../cent/DegreeCentralityAlgorithm.java | 43 ++++++++++++++----- .../cent/EigenvectorCentralityAlgorithm.java | 12 ++++-- 6 files changed, 89 insertions(+), 25 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index e77473668c..248a92bdb1 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -49,6 +49,7 @@ import com.baidu.hugegraph.type.define.Directions; import com.baidu.hugegraph.type.define.HugeKeys; import com.baidu.hugegraph.util.Bytes; +import com.baidu.hugegraph.util.CollectionUtil; import com.baidu.hugegraph.util.E; import com.baidu.hugegraph.util.JsonUtil; @@ -119,6 +120,9 @@ protected static String edgeLabel(Map parameters) { } protected static Directions direction(Map parameters) { + if (!parameters.containsKey(KEY_DIRECTION)) { + return Directions.BOTH; + } Object direction = parameter(parameters, KEY_DIRECTION); return parseDirection(direction); } @@ -437,7 +441,11 @@ public void put(K key, long value) { } public Set> entrySet() { - this.shrinkIfNeeded(this.topN); + if (this.tops.size() <= this.topN) { + this.tops = CollectionUtil.sortByValue(this.tops, false); + } else { + this.shrinkIfNeeded(this.topN); + } return this.tops.entrySet(); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index c36743176c..fb0c33d503 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -27,12 +27,14 @@ import org.apache.tinkerpop.gremlin.process.traversal.Pop; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; +import org.apache.tinkerpop.gremlin.structure.Direction; import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; import com.baidu.hugegraph.structure.HugeElement; +import com.baidu.hugegraph.type.define.Directions; public abstract class AbstractCentAlgorithm extends AbstractAlgorithm { @@ -46,6 +48,8 @@ public void checkParameters(Map parameters) { depth(parameters); degree(parameters); sample(parameters); + direction(parameters); + edgeLabel(parameters); sourceSample(parameters); sourceLabel(parameters); sourceCLabel(parameters); @@ -83,9 +87,11 @@ protected GraphTraversal constructSource( } protected GraphTraversal constructPath( - GraphTraversal t, long degree, - long sample, String sourceLabel, String sourceCLabel) { - GraphTraversal unit = constructPathUnit(degree, sample, + GraphTraversal t, Directions dir, + String label, long degree, long sample, + String sourceLabel, String sourceCLabel) { + GraphTraversal unit = constructPathUnit(dir, label, + degree, sample, sourceLabel, sourceCLabel); t = t.as("v").repeat(__.local(unit).simplePath().as("v")); @@ -94,10 +100,21 @@ protected GraphTraversal constructPath( } protected GraphTraversal constructPathUnit( + Directions dir, String label, long degree, long sample, String sourceLabel, String sourceCLabel) { - GraphTraversal unit = __.both(); + if (dir == null) { + dir = Directions.BOTH; + } + Direction direction = dir.direction(); + + String[] labels = {}; + if (label != null) { + labels = new String[]{label}; + } + + GraphTraversal unit = __.to(direction, labels); if (sourceLabel != null) { unit = unit.hasLabel(sourceLabel); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java index 9a72d2f626..12e3acba02 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java @@ -31,6 +31,7 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.type.define.Directions; public class BetweenessCentralityAlgorithm extends AbstractCentAlgorithm { @@ -42,7 +43,9 @@ public String name() { @Override public Object call(Job job, Map parameters) { Traverser traverser = new Traverser(job); - return traverser.betweenessCentrality(depth(parameters), + return traverser.betweenessCentrality(direction(parameters), + edgeLabel(parameters), + depth(parameters), degree(parameters), sample(parameters), sourceLabel(parameters), @@ -57,7 +60,9 @@ public Traverser(Job job) { super(job); } - public Object betweenessCentrality(int depth, + public Object betweenessCentrality(Directions direction, + String label, + int depth, long degree, long sample, String sourceLabel, @@ -71,7 +76,8 @@ public Object betweenessCentrality(int depth, GraphTraversal t = constructSource(sourceLabel, sourceSample, sourceCLabel); - t = constructPath(t, degree, sample, sourceLabel, sourceCLabel); + t = constructPath(t, direction, label, degree, sample, + sourceLabel, sourceCLabel); t = t.emit().until(__.loops().is(P.gte(depth))); t = filterNonShortestPath(t); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java index 96e9709fef..cb64bd8bc5 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java @@ -32,6 +32,7 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.type.define.Directions; public class ClosenessCentralityAlgorithm extends AbstractCentAlgorithm { @@ -51,7 +52,9 @@ public void checkParameters(Map parameters) { @Override public Object call(Job job, Map parameters) { Traverser traverser = new Traverser(job); - return traverser.closenessCentrality(depth(parameters), + return traverser.closenessCentrality(direction(parameters), + edgeLabel(parameters), + depth(parameters), degree(parameters), sample(parameters), sourceLabel(parameters), @@ -66,7 +69,9 @@ public Traverser(Job job) { super(job); } - public Object closenessCentrality(int depth, + public Object closenessCentrality(Directions direction, + String label, + int depth, long degree, long sample, String sourceLabel, @@ -80,7 +85,8 @@ public Object closenessCentrality(int depth, GraphTraversal t = constructSource(sourceLabel, sourceSample, sourceCLabel); - t = constructPath(t, degree, sample, sourceLabel, sourceCLabel); + t = constructPath(t, direction, label, degree, sample, + sourceLabel, sourceCLabel); t = t.emit().until(__.loops().is(P.gte(depth))); t = filterNonShortestPath(t); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java index 5f6781b21b..a19c098229 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java @@ -22,9 +22,9 @@ import java.util.Iterator; import java.util.Map; -import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; import org.apache.tinkerpop.gremlin.structure.Edge; import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.job.Job; @@ -41,6 +41,7 @@ public String name() { @Override public void checkParameters(Map parameters) { direction(parameters); + edgeLabel(parameters); top(parameters); } @@ -48,6 +49,7 @@ public void checkParameters(Map parameters) { public Object call(Job job, Map parameters) { Traverser traverser = new Traverser(job); return traverser.degreeCentrality(direction(parameters), + edgeLabel(parameters), top(parameters)); } @@ -57,9 +59,11 @@ public Traverser(Job job) { super(job); } - public Object degreeCentrality(Directions direction, long topN) { + public Object degreeCentrality(Directions direction, + String label, + long topN) { if (direction == null || direction == Directions.BOTH) { - return degreeCentrality(topN); + return degreeCentrality(label, topN); } assert direction == Directions.OUT || direction == Directions.IN; assert topN >= 0L; @@ -69,6 +73,7 @@ public Object degreeCentrality(Directions direction, long topN) { JsonMap degrees = new JsonMap(); TopMap tops = new TopMap<>(topN); Id vertex = null; + Id labelId = this.getEdgeLabelId(label); long degree = 0L; long total = 0L; @@ -77,12 +82,20 @@ public Object degreeCentrality(Directions direction, long topN) { HugeEdge edge = (HugeEdge) edges.next(); this.updateProgress(++total); + Id schemaLabel = edge.schemaLabel().id(); + if (labelId != null && !labelId.equals(schemaLabel)) { + continue; + } + Id source = edge.ownerVertex().id(); if (source.equals(vertex)) { + // edges belong to same source vertex degree++; continue; } + if (vertex != null) { + // next vertex found if (topN <= 0L) { degrees.append(vertex, degree); } else { @@ -107,25 +120,26 @@ public Object degreeCentrality(Directions direction, long topN) { return degrees.asJson(); } - protected Object degreeCentrality(long topN) { + protected Object degreeCentrality(String label, long topN) { assert topN >= 0L; long total = 0L; JsonMap degrees = new JsonMap(); TopMap tops = new TopMap<>(topN); - GraphTraversalSource traversal = this.graph().traversal(); Iterator vertices = this.vertices(); degrees.startObject(); while (vertices.hasNext()) { - Vertex source = vertices.next(); + Id source = (Id) vertices.next().id(); this.updateProgress(++total); - Long degree = traversal.V(source).bothE().count().next(); - if (topN <= 0L) { - degrees.append(source.id(), degree); - } else { - tops.put((Id) source.id(), degree); + long degree = this.degree(source, label); + if (degree > 0L) { + if (topN <= 0L) { + degrees.append(source, degree); + } else { + tops.put(source, degree); + } } } @@ -136,5 +150,12 @@ protected Object degreeCentrality(long topN) { return degrees.asJson(); } + + private long degree(Id source, String label) { + Id labelId = this.getEdgeLabelId(label); + Iterator edges = this.edgesOfVertex(source, Directions.BOTH, + labelId, NO_LIMIT); + return IteratorUtils.count(edges); + } } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java index d87fc79316..ce47417c4d 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java @@ -30,6 +30,7 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.type.define.Directions; public class EigenvectorCentralityAlgorithm extends AbstractCentAlgorithm { @@ -44,7 +45,9 @@ public String name() { @Override public Object call(Job job, Map parameters) { Traverser traverser = new Traverser(job); - return traverser.eigenvectorCentrality(depth(parameters), + return traverser.eigenvectorCentrality(direction(parameters), + edgeLabel(parameters), + depth(parameters), degree(parameters), sample(parameters), sourceLabel(parameters), @@ -59,7 +62,9 @@ public Traverser(Job job) { super(job); } - public Object eigenvectorCentrality(int depth, + public Object eigenvectorCentrality(Directions direction, + String label, + int depth, long degree, long sample, String sourceLabel, @@ -83,7 +88,8 @@ public Object eigenvectorCentrality(int depth, GraphTraversal t = constructSource(sourceLabel, sourceSample, sourceCLabel); - GraphTraversal unit = constructPathUnit(degree, sample, + GraphTraversal unit = constructPathUnit(direction, label, + degree, sample, sourceLabel, sourceCLabel); t = t.repeat(__.groupCount("m").by(T.id) From 7151c80c4d446e6e228793ca5396a3a3ff9dbd84 Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Wed, 6 May 2020 15:15:53 +0800 Subject: [PATCH 07/33] louvain: add modularity parameter and fix isolated community lost (#14) * add modularity parameter for louvain * fix: louvain lost isolated community from one to next pass Change-Id: I6a7dadc80635429aa2898939aa337aae01bc8d12 --- .../job/algorithm/AbstractAlgorithm.java | 3 +- .../job/algorithm/comm/LouvainAlgorithm.java | 20 +- .../job/algorithm/comm/LouvainTraverser.java | 187 ++++++++++++------ 3 files changed, 145 insertions(+), 65 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 248a92bdb1..969bda1d8d 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -59,7 +59,7 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final long MAX_RESULT_SIZE = 100L * Bytes.MB; - public static final long MAX_QUERY_LIMIT = 10000000L; // about 10GB + public static final long MAX_QUERY_LIMIT = 100000000L; // about 100GB public static final int BATCH = 500; public static final String CATEGORY_AGGR = "aggregate"; @@ -81,6 +81,7 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final String KEY_TIMES = "times"; public static final String KEY_STABLE_TIMES = "stable_times"; public static final String KEY_PRECISION = "precision"; + public static final String KEY_SHOW_MOD= "show_modularity"; public static final String KEY_SHOW_COMM = "show_community"; public static final String KEY_CLEAR = "clear"; public static final String KEY_CAPACITY = "capacity"; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java index 3f6de63e8c..c0c05f9a22 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java @@ -22,7 +22,7 @@ import java.util.Map; import com.baidu.hugegraph.job.Job; -import com.baidu.hugegraph.util.E; +import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; public class LouvainAlgorithm extends AbstractCommAlgorithm { @@ -39,6 +39,7 @@ public void checkParameters(Map parameters) { degree(parameters); sourceLabel(parameters); sourceCLabel(parameters); + showModularity(parameters); showCommunity(parameters); clearPass(parameters); } @@ -52,10 +53,13 @@ public Object call(Job job, Map parameters) { LouvainTraverser traverser = new LouvainTraverser(job, degree, label, clabel); Long clearPass = clearPass(parameters); + Long modPass = showModularity(parameters); String showComm = showCommunity(parameters); try { if (clearPass != null) { return traverser.clearPass(clearPass.intValue()); + } else if (modPass != null) { + return traverser.modularity(modPass.intValue()); } else if (showComm != null) { return traverser.showCommunity(showComm); } else { @@ -74,10 +78,16 @@ protected static Long clearPass(Map parameters) { return null; } long pass = parameterLong(parameters, KEY_CLEAR); - // TODO: change to checkNonNegative() - E.checkArgument(pass >= 0 || pass == -1, - "The %s parameter must be >= 0 or == -1, but got %s", - KEY_CLEAR, pass); + HugeTraverser.checkNonNegativeOrNoLimit(pass, KEY_CLEAR); + return pass; + } + + protected static Long showModularity(Map parameters) { + if (!parameters.containsKey(KEY_SHOW_MOD)) { + return null; + } + long pass = parameterLong(parameters, KEY_SHOW_MOD); + HugeTraverser.checkNonNegative(pass, KEY_SHOW_MOD); return pass; } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index 0177d8f2d7..a63a1259dc 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -28,6 +28,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.NoSuchElementException; import java.util.Set; import org.apache.commons.lang3.mutable.MutableInt; @@ -52,6 +53,7 @@ import com.baidu.hugegraph.structure.HugeEdge; import com.baidu.hugegraph.structure.HugeVertex; import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.InsertionOrderUtil; import com.baidu.hugegraph.util.Log; import com.google.common.collect.ImmutableMap; @@ -89,23 +91,6 @@ public LouvainTraverser(Job job, long degree, this.cache = new Cache(); } - @SuppressWarnings("unused") - private Id genId2(int pass, Id cid) { - // gen id for merge-community vertex - String id = cid.toString(); - if (pass == 0) { - // conncat pass with cid - id = pass + "~" + id; - } else { - // replace last pass with current pass - String lastPass = String.valueOf(pass - 1); - assert id.startsWith(lastPass); - id = id.substring(lastPass.length()); - id = pass + id; - } - return IdGenerator.of(id); - } - private void defineSchemaOfPk() { String label = this.labelOfPassN(0); if (this.graph().existsVertexLabel(label) || @@ -131,8 +116,7 @@ private void defineSchemaOfPassN(int pass) { SchemaManager schema = this.graph().schema(); try { schema.vertexLabel(this.passLabel).useCustomizeStringId() - .properties(C_KIN, C_MEMBERS) - .nullableKeys(C_KIN, C_MEMBERS) + .properties(C_KIN, C_MEMBERS, C_WEIGHT) .create(); schema.edgeLabel(this.passLabel) .sourceLabel(this.passLabel) @@ -189,9 +173,16 @@ private float weightOfEdges(List edges) { return weight; } - private Vertex newCommunityNode(Id cid, int kin, List members) { + private Vertex newCommunityNode(Id cid, float cweight, + int kin, List members) { assert !members.isEmpty() : members; - return this.graph().addVertex(T.label, this.passLabel, T.id, cid, + /* + * cweight: members size(all pass) of the community, just for debug + * kin: edges weight in the community + * members: members id of the community of last pass + */ + return this.graph().addVertex(T.label, this.passLabel, + T.id, cid, C_WEIGHT, cweight, C_KIN, kin, C_MEMBERS, members); } @@ -204,12 +195,12 @@ private Edge newCommunityEdge(Vertex source, Vertex target, float weight) { return source.addEdge(this.passLabel, target, C_WEIGHT, weight); } - private void insertNewCommunity(int pass, Id cid, int kin, - List members, + private void insertNewCommunity(int pass, Id cid, float cweight, + int kin, List members, Map cedges) { // create backend vertex if it's the first time Id vid = this.cache.genId(pass, cid); - Vertex node = this.newCommunityNode(vid, kin, members); + Vertex node = this.newCommunityNode(vid, cweight, kin, members); commitIfNeeded(); // update backend vertex edges for (Map.Entry e : cedges.entrySet()) { @@ -262,6 +253,7 @@ private List neighbors(Id vid) { } private float weightOfVertex(Vertex v, List edges) { + // degree/weight of vertex Float value = this.cache.vertexWeight((Id) v.id()); if (value != null) { return value; @@ -281,9 +273,21 @@ private int kinOfVertex(Vertex v) { return 0; } - private Id cidOfVertex(Vertex v) { + private float cweightOfVertex(Vertex v) { + if (v.label().startsWith(C_PASS) && v.property(C_WEIGHT).isPresent()) { + return v.value(C_WEIGHT); + } + return 1f; + } + + private Id cidOfVertex(Vertex v, List nbs) { Id vid = (Id) v.id(); Community c = this.cache.vertex2Community(vid); + // ensure source vertex exist in cache + if (c == null) { + c = this.wrapCommunity(v, nbs); + assert c != null; + } return c != null ? c.cid : vid; } @@ -292,15 +296,15 @@ private Id cidOfVertex(Vertex v) { // and save as community vertex when merge() // 3: wrap community vertex as community node, // and repeat step 2 and step 3. - private Community wrapCommunity(Vertex otherV) { - Id vid = (Id) otherV.id(); + private Community wrapCommunity(Vertex v, List nbs) { + Id vid = (Id) v.id(); Community comm = this.cache.vertex2Community(vid); if (comm != null) { return comm; } comm = new Community(vid); - comm.add(this, otherV, null); // will traverse the neighbors of otherV + comm.add(this, v, nbs); this.cache.vertex2Community(vid, comm); return comm; } @@ -316,7 +320,8 @@ private Collection> nbCommunities( // skip the old intermediate data, or filter clabel continue; } - Community c = wrapCommunity(otherV); + // will traverse the neighbors of otherV + Community c = this.wrapCommunity(otherV, null); if (!comms.containsKey(c.cid)) { comms.put(c.cid, Pair.of(c, new MutableInt(0))); } @@ -359,8 +364,8 @@ private double moveCommunities(int pass) { continue; } total++; - Id cid = cidOfVertex(v); List nbs = neighbors((Id) v.id()); + Id cid = cidOfVertex(v, nbs); double ki = kinOfVertex(v) + weightOfVertex(v, nbs); // update community of v if △Q changed double maxDeltaQ = 0d; @@ -377,13 +382,13 @@ private double moveCommunities(int pass) { // weight between c and otherC double kiin = nbc.getRight().floatValue(); // weight of otherC - int tot = otherC.kin() + otherC.kout(); + double tot = otherC.kin() + otherC.kout(); if (cid.equals(otherC.cid)) { tot -= ki; - assert tot >= 0; + assert tot >= 0d; // expect tot >= 0, but may be something wrong? - if (tot < 0) { - tot = 0; + if (tot < 0d) { + tot = 0d; } } double deltaQ = kiin - ki * tot / this.m; @@ -407,6 +412,7 @@ private double moveCommunities(int pass) { private void mergeCommunities(int pass) { // merge each community as a vertex Collection>> comms = this.cache.communities(); + assert this.allMembersExist(comms, pass -1); this.cache.resetVertexWeight(); for (Pair> pair : comms) { Community c = pair.getKey(); @@ -417,6 +423,7 @@ private void mergeCommunities(int pass) { int kin = c.kin(); Set vertices = pair.getRight(); assert !vertices.isEmpty(); + assert vertices.size() == c.size(); List members = new ArrayList<>(vertices.size()); Map cedges = new HashMap<>(vertices.size()); for (Id v : vertices) { @@ -432,7 +439,8 @@ private void mergeCommunities(int pass) { kin += weightOfEdge(edge); continue; } - Id otherCid = cidOfVertex(otherV); + assert this.cache.vertex2Community(otherV.id()) != null; + Id otherCid = cidOfVertex(otherV, null); if (otherCid.compareTo(c.cid) < 0) { // skip if it should be collected by otherC continue; @@ -440,17 +448,33 @@ private void mergeCommunities(int pass) { if (!cedges.containsKey(otherCid)) { cedges.put(otherCid, new MutableInt(0)); } + // update edge weight cedges.get(otherCid).add(weightOfEdge(edge)); } } // insert new community vertex and edges into storage - this.insertNewCommunity(pass, c.cid, kin, members, cedges); + this.insertNewCommunity(pass, c.cid, c.weight(), kin, members, cedges); } this.graph().tx().commit(); // reset communities this.cache.reset(); } + private boolean allMembersExist(Collection>> comms, + int pass) { + String lastLabel = labelOfPassN(pass); + GraphTraversal t = pass < 0 ? this.g.V().id() : + this.g.V().hasLabel(lastLabel).id(); + Set all = this.execute(t, t::toSet); + for (Pair> comm : comms) { + all.removeAll(comm.getRight()); + } + if (all.size() > 0) { + LOG.warn("Lost members of last pass: {}", all); + } + return all.isEmpty(); + } + public Object louvain(int maxTimes, int stableTimes, double precision) { assert maxTimes > 0; assert precision > 0d; @@ -496,31 +520,40 @@ public Object louvain(int maxTimes, int stableTimes, double precision) { } } - long communities = 0L; + Map results = InsertionOrderUtil.newMap(); + results.putAll(ImmutableMap.of("pass_times", times, + "phase1_times", movedTimes, + "last_precision", movedPercent, + "times", maxTimes)); + Number communities = 0L; + Number modularity = -1L; String commLabel = this.passLabel; if (!commLabel.isEmpty()) { - GraphTraversal t = this.g.V().hasLabel(commLabel).count(); - communities = this.execute(t, t::next); + communities = tryNext(this.g.V().hasLabel(commLabel).count()); + modularity = this.modularity(commLabel); } - return ImmutableMap.of("pass_times", times, - "phase1_times", movedTimes, - "last_precision", movedPercent, - "times", maxTimes, - "communities", communities); + results.putAll(ImmutableMap.of("communities", communities, + "modularity", modularity)); + return results; } public double modularity(int pass) { - // pass: label the last pass + // community vertex label of one pass String label = labelOfPassN(pass); - Number kin = this.g.V().hasLabel(label).values(C_KIN).sum().next(); - Number weight = this.g.E().hasLabel(label).values(C_WEIGHT).sum().next(); + return this.modularity(label); + } + + private double modularity(String label) { + // label: community vertex label of one pass + Number kin = tryNext(this.g.V().hasLabel(label).values(C_KIN).sum()); + Number weight = tryNext(this.g.E().hasLabel(label).values(C_WEIGHT).sum()); double m = kin.intValue() + weight.floatValue() * 2.0d; double q = 0.0d; - Iterator coms = this.g.V().hasLabel(label); - while (coms.hasNext()) { - Vertex com = coms.next(); - int cin = com.value(C_KIN); - Number cout = this.g.V(com).bothE().values(C_WEIGHT).sum().next(); + Iterator comms = this.vertices(label, LIMIT); + while (comms.hasNext()) { + Vertex comm = comms.next(); + int cin = comm.value(C_KIN); + Number cout = tryNext(this.g.V(comm).bothE().values(C_WEIGHT).sum()); double cdegree = cin + cout.floatValue(); // Q = ∑(I/M - ((2I+O)/2M)^2) q += cin / m - Math.pow(cdegree / m, 2); @@ -528,6 +561,16 @@ public double modularity(int pass) { return q; } + private Number tryNext(GraphTraversal iter) { + return this.execute(iter, () -> { + try { + return iter.next(); + } catch (NoSuchElementException e) { + return 0; + } + }); + } + public Collection showCommunity(String community) { final String C_PASS0 = labelOfPassN(0); Collection comms = Arrays.asList(community); @@ -604,8 +647,10 @@ private static class Community { // community id (stored as a backend vertex) private final Id cid; - // community members size + // community members size of last pass [just for skip large community] private int size = 0; + // community members size of origin vertex [just for debug members lost] + private float weight = 0f; /* * weight of all edges in community(2X), sum of kin of new members * [each is from the last pass, stored in backend vertex] @@ -615,8 +660,7 @@ private static class Community { * weight of all edges between communities, sum of kout of new members * [each is last pass, calculated in real time by neighbors] */ - // - private int kout = 0; + private float kout = 0f; public Community(Id cid) { this.cid = cid; @@ -630,14 +674,20 @@ public int size() { return this.size; } + public float weight() { + return this.weight; + } + public void add(LouvainTraverser t, Vertex v, List nbs) { this.size++; + this.weight += t.cweightOfVertex(v); this.kin += t.kinOfVertex(v); this.kout += t.weightOfVertex(v, nbs); } public void remove(LouvainTraverser t, Vertex v, List nbs) { this.size--; + this.weight -= t.cweightOfVertex(v); this.kin -= t.kinOfVertex(v); this.kout -= t.weightOfVertex(v, nbs); } @@ -646,14 +696,15 @@ public int kin() { return this.kin; } - public int kout() { + public float kout() { return this.kout; } @Override public String toString() { - return String.format("[%s](size=%s kin=%s kout=%s)", - this.cid , this.size, this.kin, this.kout); + return String.format("[%s](size=%s weight=%s kin=%s kout=%s)", + this.cid , this.size, this.weight, + this.kin, this.kout); } } @@ -669,7 +720,8 @@ public Cache() { this.genIds = new HashMap<>(); } - public Community vertex2Community(Id id) { + public Community vertex2Community(Object id) { + assert id instanceof Id; return this.vertex2Community.get(id); } @@ -703,6 +755,23 @@ public Id genId(int pass, Id cid) { return IdGenerator.of(id); } + @SuppressWarnings("unused") + public Id genId2(int pass, Id cid) { + // gen id for merge-community vertex + String id = cid.toString(); + if (pass == 0) { + // conncat pass with cid + id = pass + "~" + id; + } else { + // replace last pass with current pass + String lastPass = String.valueOf(pass - 1); + assert id.startsWith(lastPass); + id = id.substring(lastPass.length()); + id = pass + id; + } + return IdGenerator.of(id); + } + public Collection>> communities(){ // TODO: get communities from backend store instead of ram Map>> comms = new HashMap<>(); From 366fe37f43714c4a2c6c7cea5541012abc3c1d61 Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Tue, 12 May 2020 20:59:35 +0800 Subject: [PATCH 08/33] support parallel: Louvain,LPA,Rings,K-Core,Fusiform (#15) * optimize louvain by multi threads * implement louvain threads * fix race condition * implement merge community by multi threads * remove debug info * fix genId race condition * compatible with serial and parallel computing * support parallel lpa * support parallel: Louvain,LPA,Rings,K-Core,Fusiform Change-Id: I2425d1da58581ea7a61dce72a88355ae3d2dd610 --- .../job/algorithm/AbstractAlgorithm.java | 53 ++- .../hugegraph/job/algorithm/Consumers.java | 161 +++++++++ .../job/algorithm/CountEdgeAlgorithm.java | 5 +- .../job/algorithm/CountVertexAlgorithm.java | 5 +- .../cent/BetweenessCentralityAlgorithm.java | 21 +- .../cent/ClosenessCentralityAlgorithm.java | 21 +- .../cent/DegreeCentralityAlgorithm.java | 9 +- .../cent/EigenvectorCentralityAlgorithm.java | 21 +- .../comm/ClusterCoeffcientAlgorithm.java | 7 +- .../job/algorithm/comm/KCoreAlgorithm.java | 55 ++-- .../job/algorithm/comm/LouvainAlgorithm.java | 9 +- .../job/algorithm/comm/LouvainTraverser.java | 311 ++++++++++++------ .../job/algorithm/comm/LpaAlgorithm.java | 50 +-- .../comm/TriangleCountAlgorithm.java | 7 +- .../algorithm/path/RingsDetectAlgorithm.java | 50 ++- .../FusiformSimilarityAlgorithm.java | 59 ++-- 16 files changed, 585 insertions(+), 259 deletions(-) create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 969bda1d8d..c36a704056 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -26,6 +26,8 @@ import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.function.Consumer; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.lang3.mutable.MutableLong; @@ -87,6 +89,7 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final String KEY_CAPACITY = "capacity"; public static final String KEY_LIMIT = "limit"; public static final String KEY_ALPHA = "alpha"; + public static final String KEY_WORKERS = "workers"; public static final long DEFAULT_CAPACITY = 10000000L; public static final long DEFAULT_LIMIT = 100L; @@ -213,6 +216,15 @@ protected static String sourceCLabel(Map parameters) { return parameterString(parameters, KEY_SOURCE_CLABEL); } + protected static int workers(Map parameters) { + if (!parameters.containsKey(KEY_WORKERS)) { + return -1; + } + int workers = parameterInt(parameters, KEY_WORKERS); + HugeTraverser.checkNonNegativeOrNoLimit(workers, KEY_WORKERS); + return workers; + } + public static Object parameter(Map parameters, String key) { Object value = parameters.get(key); E.checkArgument(value != null, @@ -280,20 +292,59 @@ public static Directions parseDirection(Object direction) { } } - public static class AlgoTraverser extends HugeTraverser { + public static class AlgoTraverser extends HugeTraverser + implements AutoCloseable { private final Job job; + protected final ExecutorService executor; protected long progress; public AlgoTraverser(Job job) { super(job.graph()); this.job = job; + this.executor = null; + } + + protected AlgoTraverser(Job job, String name, int workers) { + super(job.graph()); + this.job = job; + String prefix = name + "-" + job.task().id(); + this.executor = Consumers.newThreadPool(prefix, workers); } public void updateProgress(long progress) { this.job.updateProgress((int) progress); } + @Override + public void close() { + if (this.executor != null) { + this.executor.shutdown(); + } + } + + protected long traverse(String sourceLabel, String sourceCLabel, + Consumer consumer) { + Iterator vertices = this.vertices(sourceLabel, sourceLabel, + Query.NO_LIMIT); + + Consumers consumers = new Consumers<>(this.executor, + consumer); + consumers.start(); + + long total = 0L; + while (vertices.hasNext()) { + this.updateProgress(++this.progress); + total++; + Vertex v = vertices.next(); + consumers.provide(v); + } + + consumers.await(); + + return total; + } + protected Iterator vertices() { return this.vertices(Query.NO_LIMIT); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java new file mode 100644 index 0000000000..795e0d7127 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java @@ -0,0 +1,161 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm; + +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; + +import org.slf4j.Logger; + +import com.baidu.hugegraph.util.ExecutorUtil; +import com.baidu.hugegraph.util.Log; + +public class Consumers { + + public static final int CPUS = Runtime.getRuntime().availableProcessors(); + public static final int THREADS = 4 + CPUS / 4; + public static final int QUEUE_WORKER_SIZE = 1000; + + private static final Logger LOG = Log.logger(Consumers.class); + + private final ExecutorService executor; + private final Consumer consumer; + private final Runnable done; + + private final int workers; + private final int queueSize; + private final CountDownLatch latch; + private final BlockingQueue queue; + + private volatile boolean ending = false; + + public Consumers(ExecutorService executor, Consumer consumer) { + this(executor, consumer, null); + } + + public Consumers(ExecutorService executor, + Consumer consumer, Runnable done) { + this.executor = executor; + this.consumer = consumer; + this.done = done; + + int workers = THREADS; + if (this.executor instanceof ThreadPoolExecutor) { + workers = ((ThreadPoolExecutor) this.executor).getCorePoolSize(); + } + this.workers = workers; + this.queueSize = QUEUE_WORKER_SIZE * workers; + this.latch = new CountDownLatch(workers); + this.queue = new ArrayBlockingQueue<>(this.queueSize); + } + + public void start() { + if (this.executor == null) { + return; + } + LOG.info("Starting {} workers with queue size {}...", + this.workers, this.queueSize); + for (int i = 0; i < this.workers; i++) { + this.executor.submit(() -> { + try { + this.run(); + if (this.done != null) { + this.done.run(); + } + } catch (Throwable e) { + LOG.error("Error when running task", e); + } finally { + this.latch.countDown(); + } + }); + } + } + + private void run() { + LOG.debug("Start to work..."); + while (!this.ending) { + this.consume(); + } + assert this.ending; + while (this.consume()); + + LOG.debug("Worker finished"); + } + + private boolean consume() { + V elem; + try { + elem = this.queue.poll(1, TimeUnit.SECONDS); + } catch (InterruptedException e) { + // ignore + return true; + } + if (elem == null) { + return false; + } + // do job + this.consumer.accept(elem); + return true; + } + + public void provide(V v) { + if (this.executor == null) { + // do job directly + this.consumer.accept(v); + } else { + try { + this.queue.put(v); + } catch (InterruptedException e) { + LOG.warn("Interrupted", e);; + } + } + } + + public void await() { + this.ending = true; + if (this.executor != null) { + try { + this.latch.await(); + } catch (InterruptedException e) { + LOG.warn("Interrupted", e);; + } + } + } + + public static ExecutorService newThreadPool(String prefix, int workers) { + if (workers == 0) { + return null; + } else { + if (workers < 0) { + assert workers == -1; + workers = Consumers.THREADS; + } else if (workers > Consumers.CPUS * 2) { + workers = Consumers.CPUS * 2; + } + String name = prefix + "-worker-%d"; + return ExecutorUtil.newFixedThreadPool(workers, name); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountEdgeAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountEdgeAlgorithm.java index 9fb1223483..670f544719 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountEdgeAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountEdgeAlgorithm.java @@ -43,8 +43,9 @@ public String category() { @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); - return traverser.count(); + try (Traverser traverser = new Traverser(job)) { + return traverser.count(); + } } private static class Traverser extends AlgoTraverser { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountVertexAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountVertexAlgorithm.java index 582e0bb691..68a59a363c 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountVertexAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountVertexAlgorithm.java @@ -43,8 +43,9 @@ public String category() { @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); - return traverser.count(); + try (Traverser traverser = new Traverser(job)) { + return traverser.count(); + } } private static class Traverser extends AlgoTraverser { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java index 12e3acba02..4f3415a15a 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java @@ -42,16 +42,17 @@ public String name() { @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); - return traverser.betweenessCentrality(direction(parameters), - edgeLabel(parameters), - depth(parameters), - degree(parameters), - sample(parameters), - sourceLabel(parameters), - sourceSample(parameters), - sourceCLabel(parameters), - top(parameters)); + try (Traverser traverser = new Traverser(job)) { + return traverser.betweenessCentrality(direction(parameters), + edgeLabel(parameters), + depth(parameters), + degree(parameters), + sample(parameters), + sourceLabel(parameters), + sourceSample(parameters), + sourceCLabel(parameters), + top(parameters)); + } } private static class Traverser extends AbstractCentAlgorithm.Traverser { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java index cb64bd8bc5..6719eee1e6 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java @@ -51,16 +51,17 @@ public void checkParameters(Map parameters) { @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); - return traverser.closenessCentrality(direction(parameters), - edgeLabel(parameters), - depth(parameters), - degree(parameters), - sample(parameters), - sourceLabel(parameters), - sourceSample(parameters), - sourceCLabel(parameters), - top(parameters)); + try (Traverser traverser = new Traverser(job)) { + return traverser.closenessCentrality(direction(parameters), + edgeLabel(parameters), + depth(parameters), + degree(parameters), + sample(parameters), + sourceLabel(parameters), + sourceSample(parameters), + sourceCLabel(parameters), + top(parameters)); + } } private static class Traverser extends AbstractCentAlgorithm.Traverser { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java index a19c098229..f29a6301df 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java @@ -47,10 +47,11 @@ public void checkParameters(Map parameters) { @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); - return traverser.degreeCentrality(direction(parameters), - edgeLabel(parameters), - top(parameters)); + try (Traverser traverser = new Traverser(job)) { + return traverser.degreeCentrality(direction(parameters), + edgeLabel(parameters), + top(parameters)); + } } private static class Traverser extends AlgoTraverser { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java index ce47417c4d..39cec64cde 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java @@ -44,16 +44,17 @@ public String name() { @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); - return traverser.eigenvectorCentrality(direction(parameters), - edgeLabel(parameters), - depth(parameters), - degree(parameters), - sample(parameters), - sourceLabel(parameters), - sourceSample(parameters), - sourceCLabel(parameters), - top(parameters)); + try (Traverser traverser = new Traverser(job)) { + return traverser.eigenvectorCentrality(direction(parameters), + edgeLabel(parameters), + depth(parameters), + degree(parameters), + sample(parameters), + sourceLabel(parameters), + sourceSample(parameters), + sourceCLabel(parameters), + top(parameters)); + } } private static class Traverser extends AbstractCentAlgorithm.Traverser { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java index cc893fc1f0..52f0b07a79 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java @@ -40,9 +40,10 @@ public void checkParameters(Map parameters) { @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); - return traverser.clusterCoeffcient(direction(parameters), - degree(parameters)); + try (Traverser traverser = new Traverser(job)) { + return traverser.clusterCoeffcient(direction(parameters), + degree(parameters)); + } } private static class Traverser extends TriangleCountAlgorithm.Traverser { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java index 4cc6a88ba5..6a721258a3 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java @@ -33,7 +33,6 @@ import com.baidu.hugegraph.HugeGraph; import com.baidu.hugegraph.backend.id.Id; -import com.baidu.hugegraph.backend.query.Query; import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.schema.EdgeLabel; import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser; @@ -65,19 +64,22 @@ public void checkParameters(Map parameters) { sourceCLabel(parameters); direction(parameters); edgeLabel(parameters); + workers(parameters); } @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); - return traverser.kcore(sourceLabel(parameters), - sourceCLabel(parameters), - direction(parameters), - edgeLabel(parameters), - k(parameters), - alpha(parameters), - degree(parameters), - merged(parameters)); + int workers = workers(parameters); + try (Traverser traverser = new Traverser(job, workers)) { + return traverser.kcore(sourceLabel(parameters), + sourceCLabel(parameters), + direction(parameters), + edgeLabel(parameters), + k(parameters), + alpha(parameters), + degree(parameters), + merged(parameters)); + } } protected static int k(Map parameters) { @@ -98,16 +100,14 @@ protected static boolean merged(Map parameters) { public static class Traverser extends AlgoTraverser { - public Traverser(Job job) { - super(job); + public Traverser(Job job, int workers) { + super(job, "kcore", workers); } public Object kcore(String sourceLabel, String sourceCLabel, Directions dir, String label, int k, double alpha, long degree, boolean merged) { HugeGraph graph = this.graph(); - Iterator vertices = this.vertices(sourceLabel, sourceCLabel, - Query.NO_LIMIT); EdgeLabel edgeLabel = label == null ? null : graph.edgeLabel(label); KcoreTraverser traverser = new KcoreTraverser(graph); @@ -115,27 +115,34 @@ public Object kcore(String sourceLabel, String sourceCLabel, kcoresJson.startObject(); kcoresJson.appendKey("kcores"); kcoresJson.startList(); - Set> kcoreSet = new HashSet<>(); - while(vertices.hasNext()) { - this.updateProgress(++this.progress); - Vertex vertex = vertices.next(); - Set kcore = traverser.kcore(IteratorUtils.of(vertex), + + Set> kcores = new HashSet<>(); + + this.traverse(sourceLabel, sourceCLabel, v -> { + Set kcore = traverser.kcore(IteratorUtils.of(v), dir, edgeLabel, k, alpha, degree); if (kcore.isEmpty()) { - continue; + return; } if (merged) { - mergeKcores(kcoreSet, kcore); + synchronized (kcores) { + mergeKcores(kcores, kcore); + } } else { - kcoresJson.appendRaw(JsonUtil.toJson(kcore)); + String json = JsonUtil.toJson(kcore); + synchronized (kcoresJson) { + kcoresJson.appendRaw(json); + } } - } + }); + if (merged) { - for (Set kcore : kcoreSet) { + for (Set kcore : kcores) { kcoresJson.appendRaw(JsonUtil.toJson(kcore)); } } + kcoresJson.endList(); kcoresJson.endObject(); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java index c0c05f9a22..446ab2686e 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java @@ -42,6 +42,7 @@ public void checkParameters(Map parameters) { showModularity(parameters); showCommunity(parameters); clearPass(parameters); + workers(parameters); } @Override @@ -49,13 +50,15 @@ public Object call(Job job, Map parameters) { String label = sourceLabel(parameters); String clabel = sourceCLabel(parameters); long degree = degree(parameters); + int workers = workers(parameters); - LouvainTraverser traverser = new LouvainTraverser(job, degree, - label, clabel); Long clearPass = clearPass(parameters); Long modPass = showModularity(parameters); String showComm = showCommunity(parameters); - try { + + try (LouvainTraverser traverser = new LouvainTraverser( + job, workers, degree, + label, clabel)) { if (clearPass != null) { return traverser.clearPass(clearPass.intValue()); } else if (modPass != null) { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index a63a1259dc..e55152b10b 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -29,8 +29,12 @@ import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; +import java.util.Objects; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.commons.lang.mutable.MutableFloat; import org.apache.commons.lang3.mutable.MutableInt; import org.apache.commons.lang3.tuple.Pair; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; @@ -47,6 +51,7 @@ import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm.AlgoTraverser; +import com.baidu.hugegraph.job.algorithm.Consumers; import com.baidu.hugegraph.schema.SchemaLabel; import com.baidu.hugegraph.schema.SchemaManager; import com.baidu.hugegraph.schema.VertexLabel; @@ -78,9 +83,9 @@ public class LouvainTraverser extends AlgoTraverser { private long m; private String passLabel; - public LouvainTraverser(Job job, long degree, + public LouvainTraverser(Job job, int workers, long degree, String sourceLabel, String sourceCLabel) { - super(job); + super(job, "louvain", workers); this.g = this.graph().traversal(); this.sourceLabel = sourceLabel; this.sourceCLabel = sourceCLabel; @@ -197,13 +202,13 @@ private Edge newCommunityEdge(Vertex source, Vertex target, float weight) { private void insertNewCommunity(int pass, Id cid, float cweight, int kin, List members, - Map cedges) { + Map cedges) { // create backend vertex if it's the first time Id vid = this.cache.genId(pass, cid); Vertex node = this.newCommunityNode(vid, cweight, kin, members); commitIfNeeded(); // update backend vertex edges - for (Map.Entry e : cedges.entrySet()) { + for (Map.Entry e : cedges.entrySet()) { float weight = e.getValue().floatValue(); vid = this.cache.genId(pass, e.getKey()); Vertex targetV = this.makeCommunityNode(vid); @@ -280,7 +285,7 @@ private float cweightOfVertex(Vertex v) { return 1f; } - private Id cidOfVertex(Vertex v, List nbs) { + private Community communityOfVertex(Vertex v, List nbs) { Id vid = (Id) v.id(); Community c = this.cache.vertex2Community(vid); // ensure source vertex exist in cache @@ -288,7 +293,7 @@ private Id cidOfVertex(Vertex v, List nbs) { c = this.wrapCommunity(v, nbs); assert c != null; } - return c != null ? c.cid : vid; + return c; } // 1: wrap original vertex as community node @@ -305,7 +310,7 @@ private Community wrapCommunity(Vertex v, List nbs) { comm = new Community(vid); comm.add(this, v, nbs); - this.cache.vertex2Community(vid, comm); + comm = this.cache.vertex2CommunityIfAbsent(vid, comm); return comm; } @@ -331,31 +336,93 @@ private Collection> nbCommunities( return comms.values(); } - private void moveCommunity(Vertex v, List nbs, Community newC) { + private void doMoveCommunity(Vertex v, List nbs, Community newC) { Id vid = (Id) v.id(); - // remove v from old community - Community oldC = this.cache.vertex2Community(vid); + // update community of v (return the origin one) + Community oldC = this.cache.vertex2Community(vid, newC); + + // remove v from old community. should synchronized (vid)? if (oldC != null) { oldC.remove(this, v, nbs); } // add v to new community newC.add(this, v, nbs); - LOG.debug("Move {} to comm: {}", v, newC); - - // update community of v - this.cache.vertex2Community(vid, newC); + LOG.debug("Move {} to community: {}", v, newC); + } + + private boolean moveCommunity(Vertex v, int pass) { + // move vertex to neighbor community if needed + List nbs = neighbors((Id) v.id()); + Community c = communityOfVertex(v, nbs); + double ki = kinOfVertex(v) + weightOfVertex(v, nbs); + // update community of v if △Q changed + double maxDeltaQ = 0d; + Community bestComm = null; + // list all neighbor communities of v + for (Pair nbc : nbCommunities(pass, nbs)) { + // △Q = (Ki_in - Ki * Etot / m) / 2m + Community otherC = nbc.getLeft(); + if (otherC.size() >= MAX_COMM_SIZE) { + LOG.info("Skip community {} for {} due to its size >= {}", + otherC.cid, v, MAX_COMM_SIZE); + continue; + } + // weight between c and otherC + double kiin = nbc.getRight().floatValue(); + // weight of otherC + double tot = otherC.kin() + otherC.kout(); + if (c.equals(otherC)) { + assert c == otherC; + if (tot < ki) { + /* + * expect tot >= ki, but multi-threads may + * cause tot < ki due to concurrent update otherC + */ + LOG.warn("Changing vertex: {}(ki={}, kiin={}, pass={}), otherC: {}", + v, ki, kiin, pass, otherC); + } + tot -= ki; + // assert tot >= 0d : otherC + ", tot=" + tot + ", ki=" + ki; + // expect tot >= 0, but may be something wrong? + if (tot < 0d) { + tot = 0d; + } + } + double deltaQ = kiin - ki * tot / this.m; + if (deltaQ > maxDeltaQ) { + // TODO: cache otherC for neighbors the same community + maxDeltaQ = deltaQ; + bestComm = otherC; + } + } + if (maxDeltaQ > 0d && !c.equals(bestComm)) { + // move v to the community of maxQ neighbor + doMoveCommunity(v, nbs, bestComm); + return true; + } + return false; } private double moveCommunities(int pass) { + LOG.info("Detect community for pass {}", pass); Iterator vertices = this.sourceVertices(pass); // shuffle //r = r.order().by(shuffle); long total = 0L; - long moved = 0L; + AtomicLong moved = new AtomicLong(0L); + + Consumers consumers = new Consumers<>(this.executor, v -> { + // called by multi-threads + if (this.moveCommunity(v, pass)) { + moved.incrementAndGet(); + } + }); + consumers.start(); + while (vertices.hasNext()) { this.updateProgress(++this.progress); Vertex v = vertices.next(); @@ -364,106 +431,93 @@ private double moveCommunities(int pass) { continue; } total++; - List nbs = neighbors((Id) v.id()); - Id cid = cidOfVertex(v, nbs); - double ki = kinOfVertex(v) + weightOfVertex(v, nbs); - // update community of v if △Q changed - double maxDeltaQ = 0d; - Community bestComm = null; - // list all neighbor communities of v - for (Pair nbc : nbCommunities(pass, nbs)) { - // △Q = (Ki_in - Ki * Etot / m) / 2m - Community otherC = nbc.getLeft(); - if (otherC.size() >= MAX_COMM_SIZE) { - LOG.info("Skip community {} for {} due to its size >= {}", - otherC.cid, v, MAX_COMM_SIZE); - continue; - } - // weight between c and otherC - double kiin = nbc.getRight().floatValue(); - // weight of otherC - double tot = otherC.kin() + otherC.kout(); - if (cid.equals(otherC.cid)) { - tot -= ki; - assert tot >= 0d; - // expect tot >= 0, but may be something wrong? - if (tot < 0d) { - tot = 0d; - } - } - double deltaQ = kiin - ki * tot / this.m; - if (deltaQ > maxDeltaQ) { - // TODO: cache otherC for neighbors the same community - maxDeltaQ = deltaQ; - bestComm = otherC; - } - } - if (maxDeltaQ > 0d && !cid.equals(bestComm.cid)) { - moved++; - // move v to the community of maxQ neighbor - moveCommunity(v, nbs, bestComm); - } + consumers.provide(v); } - // maybe always shocking when set degree limit - return total == 0L ? 0d : (double) moved / total; + consumers.await(); + + // maybe always shocking when set degree limited + return total == 0L ? 0d : moved.doubleValue() / total; } private void mergeCommunities(int pass) { + LOG.info("Merge community for pass {}", pass); // merge each community as a vertex Collection>> comms = this.cache.communities(); - assert this.allMembersExist(comms, pass -1); + assert this.allMembersExist(comms, pass - 1); this.cache.resetVertexWeight(); + + Consumers>> consumers = new Consumers<>( + this.executor, pair -> { + // called by multi-threads + this.mergeCommunity(pass, pair.getLeft(), pair.getRight()); + }, () -> { + // commit when finished + this.graph().tx().commit(); + }); + consumers.start(); + for (Pair> pair : comms) { - Community c = pair.getKey(); + Community c = pair.getLeft(); if (c.empty()) { continue; } - // update kin and edges between communities - int kin = c.kin(); - Set vertices = pair.getRight(); - assert !vertices.isEmpty(); - assert vertices.size() == c.size(); - List members = new ArrayList<>(vertices.size()); - Map cedges = new HashMap<>(vertices.size()); - for (Id v : vertices) { - this.updateProgress(++this.progress); - members.add(v.toString()); - // collect edges between this community and other communities - List neighbors = neighbors(v); - for (Edge edge : neighbors) { - Vertex otherV = ((HugeEdge) edge).otherVertex(); - if (vertices.contains(otherV.id())) { - // inner edges of this community, will be calc twice - // due to both e-in and e-out are in vertices, - kin += weightOfEdge(edge); - continue; - } - assert this.cache.vertex2Community(otherV.id()) != null; - Id otherCid = cidOfVertex(otherV, null); - if (otherCid.compareTo(c.cid) < 0) { - // skip if it should be collected by otherC - continue; - } - if (!cedges.containsKey(otherCid)) { - cedges.put(otherCid, new MutableInt(0)); - } - // update edge weight - cedges.get(otherCid).add(weightOfEdge(edge)); - } - } - // insert new community vertex and edges into storage - this.insertNewCommunity(pass, c.cid, c.weight(), kin, members, cedges); + this.progress += pair.getRight().size(); + this.updateProgress(this.progress); + //this.mergeCommunity(pass, pair.getLeft(), pair.getRight()); + consumers.provide(pair); } + consumers.await(); + this.graph().tx().commit(); + assert this.allMembersExist(pass); + // reset communities this.cache.reset(); } + private void mergeCommunity(int pass, Community c, Set cvertices) { + // update kin and edges between communities + int kin = c.kin(); + int membersSize = cvertices.size(); + assert !cvertices.isEmpty(); + assert membersSize == c.size(); + List members = new ArrayList<>(membersSize); + Map cedges = new HashMap<>(membersSize); + for (Id v : cvertices) { + members.add(v.toString()); + // collect edges between this community and other communities + List neighbors = neighbors(v); + for (Edge edge : neighbors) { + Vertex otherV = ((HugeEdge) edge).otherVertex(); + if (cvertices.contains(otherV.id())) { + // inner edges of this community, will be calc twice + // due to both e-in and e-out are in vertices, + kin += weightOfEdge(edge); + continue; + } + assert this.cache.vertex2Community(otherV.id()) != null; + Id otherCid = communityOfVertex(otherV, null).cid; + if (otherCid.compareTo(c.cid) < 0) { + // skip if it should be collected by otherC + continue; + } + if (!cedges.containsKey(otherCid)) { + cedges.putIfAbsent(otherCid, new MutableFloat(0f)); + } + // update edge weight + cedges.get(otherCid).add(weightOfEdge(edge)); + } + } + + // insert new community vertex and edges into storage + this.insertNewCommunity(pass, c.cid, c.weight(), kin, members, cedges); + } + private boolean allMembersExist(Collection>> comms, - int pass) { - String lastLabel = labelOfPassN(pass); - GraphTraversal t = pass < 0 ? this.g.V().id() : + int lastPass) { + String lastLabel = labelOfPassN(lastPass); + GraphTraversal t = lastPass < 0 ? this.g.V().id() : this.g.V().hasLabel(lastLabel).id(); Set all = this.execute(t, t::toSet); for (Pair> comm : comms) { @@ -475,6 +529,24 @@ private boolean allMembersExist(Collection>> comms, return all.isEmpty(); } + private boolean allMembersExist(int pass) { + String label = labelOfPassN(pass); + int lastPass = pass - 1; + Number expected; + if (lastPass < 0) { + expected = tryNext(this.g.V().count()).longValue() - + tryNext(this.g.V().hasLabel(label).count()).longValue(); + } else { + expected = tryNext(this.g.V().hasLabel(labelOfPassN(lastPass)) + .values(C_WEIGHT).sum()); + } + Number actual = tryNext(this.g.V().hasLabel(label) + .values(C_WEIGHT).sum()); + boolean allExist = actual.floatValue() == expected.floatValue(); + assert allExist : actual + "!=" + expected; + return allExist; + } + public Object louvain(int maxTimes, int stableTimes, double precision) { assert maxTimes > 0; assert precision > 0d; @@ -678,28 +750,39 @@ public float weight() { return this.weight; } - public void add(LouvainTraverser t, Vertex v, List nbs) { + public synchronized void add(LouvainTraverser t, + Vertex v, List nbs) { this.size++; this.weight += t.cweightOfVertex(v); this.kin += t.kinOfVertex(v); this.kout += t.weightOfVertex(v, nbs); } - public void remove(LouvainTraverser t, Vertex v, List nbs) { + public synchronized void remove(LouvainTraverser t, + Vertex v, List nbs) { this.size--; this.weight -= t.cweightOfVertex(v); this.kin -= t.kinOfVertex(v); this.kout -= t.weightOfVertex(v, nbs); } - public int kin() { + public synchronized int kin() { return this.kin; } - public float kout() { + public synchronized float kout() { return this.kout; } + @Override + public boolean equals(Object object) { + if (!(object instanceof Community)) { + return false; + } + Community other = (Community) object; + return Objects.equals(this.cid, other.cid); + } + @Override public String toString() { return String.format("[%s](size=%s weight=%s kin=%s kout=%s)", @@ -715,9 +798,9 @@ private static class Cache { private final Map genIds; public Cache() { - this.vertexWeightCache = new HashMap<>(); - this.vertex2Community = new HashMap<>(); - this.genIds = new HashMap<>(); + this.vertexWeightCache = new ConcurrentHashMap<>(); + this.vertex2Community = new ConcurrentHashMap<>(); + this.genIds = new ConcurrentHashMap<>(); } public Community vertex2Community(Object id) { @@ -725,8 +808,16 @@ public Community vertex2Community(Object id) { return this.vertex2Community.get(id); } - public void vertex2Community(Id id, Community c) { - this.vertex2Community.put(id, c); + public Community vertex2Community(Id id, Community c) { + return this.vertex2Community.put(id, c); + } + + public Community vertex2CommunityIfAbsent(Id id, Community c) { + Community old = this.vertex2Community.putIfAbsent(id, c); + if (old != null) { + c = old; + } + return c; } public Float vertexWeight(Id id) { @@ -748,11 +839,13 @@ public void resetVertexWeight() { } public Id genId(int pass, Id cid) { - if (!this.genIds.containsKey(cid)) { - this.genIds.put(cid, this.genIds.size() + 1); + synchronized (this.genIds) { + if (!this.genIds.containsKey(cid)) { + this.genIds.putIfAbsent(cid, this.genIds.size() + 1); + } + String id = pass + "~" + this.genIds.get(cid); + return IdGenerator.of(id); } - String id = pass + "~" + this.genIds.get(cid); - return IdGenerator.of(id); } @SuppressWarnings("unused") diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java index abcdb938cc..e98ed8480c 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; import java.util.Random; +import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.lang3.mutable.MutableInt; import org.apache.tinkerpop.gremlin.process.traversal.Scope; @@ -54,14 +55,15 @@ public void checkParameters(Map parameters) { direction(parameters); degree(parameters); showCommunity(parameters); + workers(parameters); } @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); + int workers = workers(parameters); String showComm = showCommunity(parameters); - try { + try (Traverser traverser = new Traverser(job, workers)) { if (showComm != null) { return traverser.showCommunity(showComm); } else { @@ -84,8 +86,8 @@ public static class Traverser extends AlgoTraverser { private final Random R = new Random(); - public Traverser(Job job) { - super(job); + public Traverser(Job job, int workers) { + super(job, "lpa", workers); } public Object lpa(String sourceLabel, String edgeLabel, @@ -113,7 +115,7 @@ public Object lpa(String sourceLabel, String edgeLabel, } } - long communities = this.graph().traversal().V().limit(10000L) + long communities = this.graph().traversal().V().limit(100000L) .groupCount().by(C_LABEL) .count(Scope.local).next(); return ImmutableMap.of("iteration_times", times, @@ -143,26 +145,30 @@ private double detectCommunities(String sourceLabel, String edgeLabel, // shuffle: r.order().by(shuffle) // r = this.graph().traversal().V().sample((int) LIMIT); - // all vertices - Iterator vertices = this.vertices(sourceLabel, LIMIT); - - long total = 0L; - long changed = 0L; - while (vertices.hasNext()) { - this.updateProgress(++this.progress); - total++; - Vertex v = vertices.next(); - String label = this.voteCommunityOfVertex(v, edgeLabel, - dir, degree); - // update label if it's absent or changed - if (!labelPresent(v) || !label.equals(this.labelOfVertex(v))) { - changed++; - this.updateLabelOfVertex(v, label); + // detect all vertices + AtomicLong changed = new AtomicLong(0L); + long total = this.traverse(sourceLabel, null, v -> { + // called by multi-threads + if (this.voteCommunityAndUpdate(v, edgeLabel, dir, degree)) { + changed.incrementAndGet(); } - } + }); + this.graph().tx().commit(); - return total == 0L ? 0d : (double) changed / total; + return total == 0L ? 0d : changed.doubleValue() / total; + } + + private boolean voteCommunityAndUpdate(Vertex vertex, String edgeLabel, + Directions dir, long degree) { + String label = this.voteCommunityOfVertex(vertex, edgeLabel, + dir, degree); + // update label if it's absent or changed + if (!labelPresent(vertex) || !label.equals(labelOfVertex(vertex))) { + this.updateLabelOfVertex(vertex, label); + return true; + } + return false; } private String voteCommunityOfVertex(Vertex vertex, String edgeLabel, diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java index c47d19f655..34a1a5658a 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java @@ -48,9 +48,10 @@ public void checkParameters(Map parameters) { @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); - return traverser.triangleCount(direction(parameters), - degree(parameters)); + try (Traverser traverser = new Traverser(job)) { + return traverser.triangleCount(direction(parameters), + degree(parameters)); + } } protected static class Traverser extends AlgoTraverser { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java index b3ce1ec992..855b7c8177 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java @@ -19,17 +19,11 @@ package com.baidu.hugegraph.job.algorithm.path; -import java.util.Iterator; import java.util.Map; -import org.apache.tinkerpop.gremlin.structure.Vertex; - -import com.baidu.hugegraph.HugeGraph; import com.baidu.hugegraph.backend.id.Id; -import com.baidu.hugegraph.backend.query.Query; import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; -import com.baidu.hugegraph.structure.HugeVertex; import com.baidu.hugegraph.traversal.algorithm.SubGraphTraverser; import com.baidu.hugegraph.type.define.Directions; import com.baidu.hugegraph.util.JsonUtil; @@ -56,41 +50,42 @@ public void checkParameters(Map parameters) { sourceCLabel(parameters); direction(parameters); edgeLabel(parameters); + workers(parameters); } @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); - return traverser.rings(sourceLabel(parameters), - sourceCLabel(parameters), - direction(parameters), - edgeLabel(parameters), - depth(parameters), - degree(parameters), - capacity(parameters), - limit(parameters)); + int workers = workers(parameters); + try (Traverser traverser = new Traverser(job, workers)) { + return traverser.rings(sourceLabel(parameters), + sourceCLabel(parameters), + direction(parameters), + edgeLabel(parameters), + depth(parameters), + degree(parameters), + capacity(parameters), + limit(parameters)); + } } public static class Traverser extends AlgoTraverser { - public Traverser(Job job) { - super(job); + public Traverser(Job job, int workers) { + super(job, "ring", workers); } public Object rings(String sourceLabel, String sourceCLabel, Directions dir, String label, int depth, long degree, long capacity, long limit) { - HugeGraph graph = this.graph(); - Iterator vertices = this.vertices(sourceLabel, sourceCLabel, - Query.NO_LIMIT); JsonMap ringsJson = new JsonMap(); ringsJson.startObject(); ringsJson.appendKey("rings"); ringsJson.startList(); - SubGraphTraverser traverser = new SubGraphTraverser(graph); - while(vertices.hasNext()) { - this.updateProgress(++this.progress); - Id source = ((HugeVertex) vertices.next()).id(); + + SubGraphTraverser traverser = new SubGraphTraverser(this.graph()); + + this.traverse(sourceLabel, sourceCLabel, v -> { + Id source = (Id) v.id(); PathSet rings = traverser.rings(source, dir, label, depth, true, degree, capacity, limit); for (Path ring : rings) { @@ -101,10 +96,13 @@ public Object rings(String sourceLabel, String sourceCLabel, } } if (source.equals(min)) { - ringsJson.appendRaw(JsonUtil.toJson(ring.vertices())); + String ringJson = JsonUtil.toJson(ring.vertices()); + synchronized (ringsJson) { + ringsJson.appendRaw(ringJson); + } } } - } + }); ringsJson.endList(); ringsJson.endObject(); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java index 26ee4e25e5..463526c5d4 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java @@ -19,14 +19,11 @@ package com.baidu.hugegraph.job.algorithm.similarity; -import java.util.Iterator; import java.util.Map; -import org.apache.tinkerpop.gremlin.structure.Vertex; import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; import com.baidu.hugegraph.HugeGraph; -import com.baidu.hugegraph.backend.query.Query; import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; import com.baidu.hugegraph.schema.EdgeLabel; @@ -72,24 +69,27 @@ public void checkParameters(Map parameters) { sourceCLabel(parameters); direction(parameters); edgeLabel(parameters); + workers(parameters); } @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); - return traverser.fusiformSimilars(sourceLabel(parameters), - sourceCLabel(parameters), - direction(parameters), - edgeLabel(parameters), - minNeighbors(parameters), - alpha(parameters), - minSimilars(parameters), - top(parameters), - groupProperty(parameters), - minGroups(parameters), - degree(parameters), - capacity(parameters), - limit(parameters)); + int workers = workers(parameters); + try (Traverser traverser = new Traverser(job, workers)) { + return traverser.fusiformSimilars(sourceLabel(parameters), + sourceCLabel(parameters), + direction(parameters), + edgeLabel(parameters), + minNeighbors(parameters), + alpha(parameters), + minSimilars(parameters), + top(parameters), + groupProperty(parameters), + minGroups(parameters), + degree(parameters), + capacity(parameters), + limit(parameters)); + } } protected static int minNeighbors(Map parameters) { @@ -128,8 +128,8 @@ protected static int minGroups(Map parameters) { protected static class Traverser extends AlgoTraverser { - public Traverser(Job job) { - super(job); + public Traverser(Job job, int workers) { + super(job, "fusiform", workers); } public Object fusiformSimilars(String sourceLabel, String sourceCLabel, @@ -138,31 +138,30 @@ public Object fusiformSimilars(String sourceLabel, String sourceCLabel, int minSimilars, long topSimilars, String groupProperty, int minGroups, long degree, long capacity, long limit) { - Iterator vertices = this.vertices(sourceLabel, sourceCLabel, - Query.NO_LIMIT); HugeGraph graph = this.graph(); EdgeLabel edgeLabel = label == null ? null : graph.edgeLabel(label); - FusiformSimilarityTraverser traverser = new - FusiformSimilarityTraverser(graph); + FusiformSimilarityTraverser traverser = + new FusiformSimilarityTraverser(graph); JsonMap similarsJson = new JsonMap(); similarsJson.startObject(); - while(vertices.hasNext()) { - this.updateProgress(++this.progress); - Vertex vertex = vertices.next(); + + this.traverse(sourceLabel, sourceCLabel, v -> { SimilarsMap similars = traverser.fusiformSimilarity( - IteratorUtils.of(vertex), direction, + IteratorUtils.of(v), direction, edgeLabel, minNeighbors, alpha, minSimilars, (int) topSimilars, groupProperty, minGroups, degree, capacity, limit, true); if (similars.isEmpty()) { - continue; + return; } String result = JsonUtil.toJson(similars.toMap()); result = result.substring(1, result.length() - 1); - similarsJson.appendRaw(result); - } + synchronized (similarsJson) { + similarsJson.appendRaw(result); + } + }); similarsJson.endObject(); return similarsJson.asJson(); From 5974fd1d44afe6a89f89ba962888401db53e3a98 Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Wed, 20 May 2020 16:54:44 +0800 Subject: [PATCH 09/33] fix parallel LPA not commit by threads (#16) Change-Id: I8eaaeccaa0b23048a9d0f597080186c069b9799b --- .../baidu/hugegraph/job/algorithm/AbstractAlgorithm.java | 7 ++++++- .../com/baidu/hugegraph/job/algorithm/Consumers.java | 9 +++++++-- .../baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java | 5 +++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index c36a704056..5bb3426ff9 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -325,11 +325,16 @@ public void close() { protected long traverse(String sourceLabel, String sourceCLabel, Consumer consumer) { + return this.traverse(sourceLabel, sourceCLabel, consumer, null); + } + + protected long traverse(String sourceLabel, String sourceCLabel, + Consumer consumer, Runnable done) { Iterator vertices = this.vertices(sourceLabel, sourceLabel, Query.NO_LIMIT); Consumers consumers = new Consumers<>(this.executor, - consumer); + consumer, done); consumers.start(); long total = 0L; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java index 795e0d7127..526419c46c 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java @@ -122,7 +122,7 @@ private boolean consume() { public void provide(V v) { if (this.executor == null) { - // do job directly + // do job directly if without thread pool this.consumer.accept(v); } else { try { @@ -135,7 +135,12 @@ public void provide(V v) { public void await() { this.ending = true; - if (this.executor != null) { + if (this.executor == null) { + // call done() directly if without thread pool + if (this.done != null) { + this.done.run(); + } + } else { try { this.latch.await(); } catch (InterruptedException e) { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java index e98ed8480c..59c420ae74 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java @@ -152,10 +152,11 @@ private double detectCommunities(String sourceLabel, String edgeLabel, if (this.voteCommunityAndUpdate(v, edgeLabel, dir, degree)) { changed.incrementAndGet(); } + }, () -> { + // commit when finished + this.graph().tx().commit(); }); - this.graph().tx().commit(); - return total == 0L ? 0d : changed.doubleValue() / total; } From 2ee51c4117502bbb618cb94a8031a7f16bfda2b5 Mon Sep 17 00:00:00 2001 From: zhoney Date: Tue, 26 May 2020 20:52:17 +0800 Subject: [PATCH 10/33] Support ring detect count (#17) * support rings count for rings-detect ap algo * fix direction can't be null or Both for triangle and clusterCoeffcient * fix source_clabel error for rings-detect, fusiform and kcore * change limit meaning for fusiform similarity Change-Id: I5a4ccbf46b47c13ea2eafe7f3e335dc6aea4a83c --- .../job/algorithm/AbstractAlgorithm.java | 24 +++++++++++-- .../comm/ClusterCoeffcientAlgorithm.java | 2 +- .../comm/TriangleCountAlgorithm.java | 2 +- .../algorithm/path/RingsDetectAlgorithm.java | 36 ++++++++++++++++--- .../FusiformSimilarityAlgorithm.java | 14 ++++++-- 5 files changed, 66 insertions(+), 12 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 5bb3426ff9..d3311772a6 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -19,7 +19,6 @@ package com.baidu.hugegraph.job.algorithm; - import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -131,6 +130,17 @@ protected static Directions direction(Map parameters) { return parseDirection(direction); } + protected static Directions directionOutIn(Map parameters) { + E.checkArgument(parameters.containsKey(KEY_DIRECTION), + "The direction must be set"); + Object direction = parameter(parameters, KEY_DIRECTION); + Directions direct = parseDirection(direction); + E.checkArgument(direct == Directions.OUT || direct == Directions.IN, + "The direction for triangle_count must be " + + "either OUT or IN, but got: %s", direct); + return direct; + } + protected static double alpha(Map parameters) { if (!parameters.containsKey(KEY_ALPHA)) { return DEFAULT_ALPHA; @@ -330,8 +340,16 @@ protected long traverse(String sourceLabel, String sourceCLabel, protected long traverse(String sourceLabel, String sourceCLabel, Consumer consumer, Runnable done) { - Iterator vertices = this.vertices(sourceLabel, sourceLabel, - Query.NO_LIMIT); + return this.traverse(sourceLabel, sourceCLabel, consumer, done, + NO_LIMIT); + } + + protected long traverse(String sourceLabel, String sourceCLabel, + Consumer consumer, Runnable done, + long limit) { + long actualLimit = limit == NO_LIMIT ? Query.NO_LIMIT : limit; + Iterator vertices = this.vertices(sourceLabel, sourceCLabel, + actualLimit); Consumers consumers = new Consumers<>(this.executor, consumer, done); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java index 52f0b07a79..0e5760e24d 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java @@ -34,7 +34,7 @@ public String name() { @Override public void checkParameters(Map parameters) { - direction(parameters); + directionOutIn(parameters); degree(parameters); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java index 34a1a5658a..6128c6b17f 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java @@ -42,7 +42,7 @@ public String name() { @Override public void checkParameters(Map parameters) { - direction(parameters); + directionOutIn(parameters); degree(parameters); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java index 855b7c8177..c7c0c677ab 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java @@ -20,6 +20,7 @@ package com.baidu.hugegraph.job.algorithm.path; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.job.Job; @@ -30,6 +31,8 @@ public class RingsDetectAlgorithm extends AbstractAlgorithm { + public static final String KEY_COUNT_ONLY = "count_only"; + @Override public String name() { return "rings_detect"; @@ -50,6 +53,7 @@ public void checkParameters(Map parameters) { sourceCLabel(parameters); direction(parameters); edgeLabel(parameters); + countOnly(parameters); workers(parameters); } @@ -64,10 +68,18 @@ public Object call(Job job, Map parameters) { depth(parameters), degree(parameters), capacity(parameters), - limit(parameters)); + limit(parameters), + countOnly(parameters)); } } + public boolean countOnly(Map parameters) { + if (!parameters.containsKey(KEY_COUNT_ONLY)) { + return false; + } + return parameterBoolean(parameters, KEY_COUNT_ONLY); + } + public static class Traverser extends AlgoTraverser { public Traverser(Job job, int workers) { @@ -76,13 +88,19 @@ public Traverser(Job job, int workers) { public Object rings(String sourceLabel, String sourceCLabel, Directions dir, String label, int depth, - long degree, long capacity, long limit) { + long degree, long capacity, long limit, + boolean countOnly) { JsonMap ringsJson = new JsonMap(); ringsJson.startObject(); - ringsJson.appendKey("rings"); - ringsJson.startList(); + if (countOnly) { + ringsJson.appendKey("rings_count"); + } else { + ringsJson.appendKey("rings"); + ringsJson.startList(); + } SubGraphTraverser traverser = new SubGraphTraverser(this.graph()); + AtomicInteger count = new AtomicInteger(0); this.traverse(sourceLabel, sourceCLabel, v -> { Id source = (Id) v.id(); @@ -96,6 +114,10 @@ public Object rings(String sourceLabel, String sourceCLabel, } } if (source.equals(min)) { + if (countOnly) { + count.incrementAndGet(); + continue; + } String ringJson = JsonUtil.toJson(ring.vertices()); synchronized (ringsJson) { ringsJson.appendRaw(ringJson); @@ -103,7 +125,11 @@ public Object rings(String sourceLabel, String sourceCLabel, } } }); - ringsJson.endList(); + if (countOnly) { + ringsJson.append(count.get()); + } else { + ringsJson.endList(); + } ringsJson.endObject(); return ringsJson.asJson(); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java index 463526c5d4..6734306235 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java @@ -43,6 +43,7 @@ public class FusiformSimilarityAlgorithm extends AbstractAlgorithm { public static final int DEFAULT_MIN_NEIGHBORS = 10; public static final int DEFAULT_MIN_SIMILARS = 6; public static final int DEFAULT_MIN_GROUPS = 1; + public static final long DEFAULT_LIMIT = -1L; @Override public String name() { @@ -126,6 +127,15 @@ protected static int minGroups(Map parameters) { return minGroups; } + protected static long limit(Map parameters) { + if (!parameters.containsKey(KEY_LIMIT)) { + return DEFAULT_LIMIT; + } + long limit = parameterLong(parameters, KEY_LIMIT); + HugeTraverser.checkLimit(limit); + return limit; + } + protected static class Traverser extends AlgoTraverser { public Traverser(Job job, int workers) { @@ -152,7 +162,7 @@ public Object fusiformSimilars(String sourceLabel, String sourceCLabel, edgeLabel, minNeighbors, alpha, minSimilars, (int) topSimilars, groupProperty, minGroups, degree, - capacity, limit, true); + capacity, NO_LIMIT, true); if (similars.isEmpty()) { return; } @@ -161,7 +171,7 @@ public Object fusiformSimilars(String sourceLabel, String sourceCLabel, synchronized (similarsJson) { similarsJson.appendRaw(result); } - }); + }, null, limit); similarsJson.endObject(); return similarsJson.asJson(); From ff88f116c44e84556377b51038e2af0afa535460 Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Wed, 3 Jun 2020 22:15:33 +0800 Subject: [PATCH 11/33] fix algorithm can't stop caused by threads exception (#18) Change-Id: I546682b19fb5a84a65dc2a3bd77d62b386722bfa --- .../job/algorithm/AbstractAlgorithm.java | 24 +++++---- .../hugegraph/job/algorithm/Consumers.java | 36 ++++++++++--- .../job/algorithm/comm/LouvainTraverser.java | 51 +++++++++++-------- 3 files changed, 71 insertions(+), 40 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index d3311772a6..327905ad38 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -354,18 +354,20 @@ protected long traverse(String sourceLabel, String sourceCLabel, Consumers consumers = new Consumers<>(this.executor, consumer, done); consumers.start(); - - long total = 0L; - while (vertices.hasNext()) { - this.updateProgress(++this.progress); - total++; - Vertex v = vertices.next(); - consumers.provide(v); + try { + long total = 0L; + while (vertices.hasNext()) { + this.updateProgress(++this.progress); + total++; + Vertex v = vertices.next(); + consumers.provide(v); + } + return total; + } catch (Throwable e) { + throw Consumers.wrapException(e); + } finally { + consumers.await(); } - - consumers.await(); - - return total; } protected Iterator vertices() { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java index 526419c46c..f5d01d9803 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java @@ -29,6 +29,7 @@ import org.slf4j.Logger; +import com.baidu.hugegraph.HugeException; import com.baidu.hugegraph.util.ExecutorUtil; import com.baidu.hugegraph.util.Log; @@ -50,6 +51,7 @@ public class Consumers { private final BlockingQueue queue; private volatile boolean ending = false; + private volatile Throwable exception = null; public Consumers(ExecutorService executor, Consumer consumer) { this(executor, consumer, null); @@ -72,6 +74,8 @@ public Consumers(ExecutorService executor, } public void start() { + this.ending = false; + this.exception = null; if (this.executor == null) { return; } @@ -81,11 +85,12 @@ public void start() { this.executor.submit(() -> { try { this.run(); - if (this.done != null) { - this.done.run(); - } + this.done(); } catch (Throwable e) { + // Only the first exception of one thread can be stored + this.exception = e; LOG.error("Error when running task", e); + this.done(); } finally { this.latch.countDown(); } @@ -120,10 +125,19 @@ private boolean consume() { return true; } - public void provide(V v) { + private void done() { + if (this.done != null) { + this.done.run(); + } + } + + public void provide(V v) throws Throwable { if (this.executor == null) { + assert this.exception == null; // do job directly if without thread pool this.consumer.accept(v); + } else if (this.exception != null) { + throw this.exception; } else { try { this.queue.put(v); @@ -137,14 +151,12 @@ public void await() { this.ending = true; if (this.executor == null) { // call done() directly if without thread pool - if (this.done != null) { - this.done.run(); - } + this.done(); } else { try { this.latch.await(); } catch (InterruptedException e) { - LOG.warn("Interrupted", e);; + LOG.warn("Interrupted", e); } } } @@ -163,4 +175,12 @@ public static ExecutorService newThreadPool(String prefix, int workers) { return ExecutorUtil.newFixedThreadPool(workers, name); } } + + public static RuntimeException wrapException(Throwable e) { + if (e instanceof RuntimeException) { + throw (RuntimeException) e; + } + throw new HugeException("Error when running task: %s", + HugeException.rootCause(e).getMessage(), e); + } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index e55152b10b..5d7548aa3b 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -421,21 +421,25 @@ private double moveCommunities(int pass) { moved.incrementAndGet(); } }); - consumers.start(); - while (vertices.hasNext()) { - this.updateProgress(++this.progress); - Vertex v = vertices.next(); - if (needSkipVertex(pass, v)) { - // skip the old intermediate data, or filter clabel - continue; + consumers.start(); + try { + while (vertices.hasNext()) { + this.updateProgress(++this.progress); + Vertex v = vertices.next(); + if (needSkipVertex(pass, v)) { + // skip the old intermediate data, or filter clabel + continue; + } + total++; + consumers.provide(v); } - total++; - consumers.provide(v); + } catch (Throwable e) { + throw Consumers.wrapException(e); + } finally { + consumers.await(); } - consumers.await(); - // maybe always shocking when set degree limited return total == 0L ? 0d : moved.doubleValue() / total; } @@ -455,19 +459,24 @@ private void mergeCommunities(int pass) { // commit when finished this.graph().tx().commit(); }); - consumers.start(); - for (Pair> pair : comms) { - Community c = pair.getLeft(); - if (c.empty()) { - continue; + consumers.start(); + try { + for (Pair> pair : comms) { + Community c = pair.getLeft(); + if (c.empty()) { + continue; + } + this.progress += pair.getRight().size(); + this.updateProgress(this.progress); + //this.mergeCommunity(pass, pair.getLeft(), pair.getRight()); + consumers.provide(pair); } - this.progress += pair.getRight().size(); - this.updateProgress(this.progress); - //this.mergeCommunity(pass, pair.getLeft(), pair.getRight()); - consumers.provide(pair); + } catch (Throwable e) { + throw Consumers.wrapException(e); + } finally { + consumers.await(); } - consumers.await(); this.graph().tx().commit(); assert this.allMembersExist(pass); From d4be504b4cf70b27322f9af5114241b4b41856d1 Mon Sep 17 00:00:00 2001 From: houzhizhen Date: Wed, 3 Jun 2020 23:56:42 -0500 Subject: [PATCH 12/33] add page rank algorithm (#19) --- .../job/algorithm/AbstractAlgorithm.java | 1 + .../job/algorithm/AlgorithmPool.java | 3 + .../job/algorithm/rank/PageRankAlgorithm.java | 297 ++++++++++++++++++ 3 files changed, 301 insertions(+) create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 327905ad38..aab04db394 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -100,6 +100,7 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final double DEFAULT_ALPHA = 0.5D; public static final String C_LABEL = "c_label"; + public static final String R_RANK = "r_rank"; @Override public void checkParameters(Map parameters) { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java index f1e35fd581..5725be61ff 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java @@ -32,6 +32,7 @@ import com.baidu.hugegraph.job.algorithm.comm.LpaAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.TriangleCountAlgorithm; import com.baidu.hugegraph.job.algorithm.path.RingsDetectAlgorithm; +import com.baidu.hugegraph.job.algorithm.rank.PageRankAlgorithm; import com.baidu.hugegraph.job.algorithm.similarity.FusiformSimilarityAlgorithm; public class AlgorithmPool { @@ -55,6 +56,8 @@ public class AlgorithmPool { INSTANCE.register(new FusiformSimilarityAlgorithm()); INSTANCE.register(new RingsDetectAlgorithm()); INSTANCE.register(new KCoreAlgorithm()); + + INSTANCE.register(new PageRankAlgorithm()); } private final Map algorithms; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java new file mode 100644 index 0000000000..2773a9db00 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java @@ -0,0 +1,297 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.rank; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.tinkerpop.gremlin.structure.Edge; +import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.slf4j.Logger; + +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.algorithm.comm.AbstractCommAlgorithm; +import com.baidu.hugegraph.schema.SchemaManager; +import com.baidu.hugegraph.schema.VertexLabel; +import com.baidu.hugegraph.structure.HugeEdge; +import com.baidu.hugegraph.structure.HugeVertex; +import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.Log; +import com.google.common.collect.ImmutableMap; + +public class PageRankAlgorithm extends AbstractCommAlgorithm { + + protected static final Logger LOG = Log.logger(PageRankAlgorithm.class); + + @Override + public String name() { + return "page_rank"; + } + + @Override + public String category() { + return CATEGORY_RANK; + } + + @Override + public void checkParameters(Map parameters) { + alpha(parameters); + times(parameters); + precision(parameters); + degree(parameters); + directionOutIn(parameters); + } + + @Override + public Object call(Job job, Map parameters) { + Traverser traverser = new Traverser(job); + try { + return traverser.pageRank(alpha(parameters), + times(parameters), + precision(parameters), + degree(parameters), + directionOutIn(parameters)); + } catch (Throwable e) { + job.graph().tx().rollback(); + throw e; + } + } + + protected static class Traverser extends AlgoTraverser { + + // DoublePair.left is rank computed by previous step, DoublePair.right + // is rank computed by current step. + private Map vertexRankMap; + + public Traverser(Job job) { + super(job); + this.vertexRankMap = new HashMap<>(); + } + + public Object pageRank(double alpha, + int maxTimes, + double precision, + long degree, + Directions direction) { + this.initSchema(); + + int times; + double changedRank = 0.0; + long numOfVertices = this.initRankMap(); + + for (times = 0; times <= maxTimes; times++) { + Id currentSourceVertexId = null; + // the edges are ordered by ownerVertex + Iterator edges = this.edges(direction); + List adjacentVertices = new ArrayList<>(); + + while (edges.hasNext()) { + HugeEdge edge = (HugeEdge) edges.next(); + Id sourceVertexId = edge.ownerVertex().id(); + Id targetVertexId = edge.otherVertex().id(); + + if (currentSourceVertexId == null) { + currentSourceVertexId = sourceVertexId; + adjacentVertices.add(targetVertexId); + } else if (currentSourceVertexId.equals(sourceVertexId)) { + if (adjacentVertices.size() < degree) { + adjacentVertices.add(targetVertexId); + } + } else { + this.contributeToAdjacentVertices(currentSourceVertexId, + adjacentVertices); + adjacentVertices = new ArrayList<>(); + currentSourceVertexId = sourceVertexId; + adjacentVertices.add(targetVertexId); + } + } + + // deal with the last vertex + this.contributeToAdjacentVertices(currentSourceVertexId, + adjacentVertices); + + double sumRank = this.computeRank(alpha, numOfVertices); + + double compensatedRank = 1.0 - sumRank; + changedRank = + this.compensateRank(compensatedRank / numOfVertices); + LOG.debug("PageRank execution times:{}, changedRank:{} ", + times, changedRank); + if (changedRank < precision) { + break; + } + } + this.writeBackRankValue(); + + return ImmutableMap.of("alpha", alpha, + "iteration_times", times, + "last_changed_rank", changedRank, + "times", maxTimes); + } + + private long initRankMap() { + long vertexCount = 0; + Iterator vertices = this.vertices(); + while (vertices.hasNext()) { + Id vertex = ((HugeVertex) vertices.next()).id(); + DoublePair pair = new DoublePair(0.0, 0.0); + this.vertexRankMap.put(vertex, pair); + vertexCount++; + } + + double initValue = 1.0 / vertexCount; + for (DoublePair pair : this.vertexRankMap.values()) { + pair.left(initValue); + } + return vertexCount; + } + + private void contributeToAdjacentVertices(Id sourceVertexId, + List adjacentVertices) { + if (adjacentVertices.size() == 0) { + return; + } + DoublePair sourcePair = this.vertexRankMap.get(sourceVertexId); + // sourceVertexId not in vertices. + if (sourcePair == null) { + LOG.info("source vertex {} not exists.", sourceVertexId); + return; + } + double distributedValue = sourcePair.left() / + adjacentVertices.size(); + for (Id targetId : adjacentVertices) { + DoublePair targetPair = this.vertexRankMap.get(targetId); + if (targetPair == null) { + // targetId not in vertices + LOG.warn("target vertex {} not exists.", targetId); + continue; + } + targetPair.addRight(distributedValue); + } + } + + private double compensateRank(double compensatedRank) { + double changedRank = 0.0; + for (DoublePair pair : this.vertexRankMap.values()) { + double previousRank = pair.left(); + double currentRank = pair.right() + compensatedRank; + changedRank += Math.abs(previousRank - currentRank); + pair.left(currentRank); + pair.right(0.0); + } + return changedRank; + } + + private void initSchema() { + SchemaManager schema = this.graph().schema(); + schema.propertyKey(R_RANK).asDouble().ifNotExist().create(); + for (VertexLabel vl : schema.getVertexLabels()) { + schema.vertexLabel(vl.name()).properties(R_RANK) + .nullableKeys(R_RANK).append(); + } + } + + private void writeBackRankValue() { + for (Map.Entry entry : + this.vertexRankMap.entrySet()) { + Id vertexId = entry.getKey(); + Iterator vertices = this.graph().vertices(vertexId); + if (vertices.hasNext()) { + Vertex vertex = vertices.next(); + vertex.property(R_RANK, entry.getValue().left()); + this.commitIfNeeded(); + } + } + this.graph().tx().commit(); + } + + private double computeRank(double alpha, long numOfVertices) { + double oneMinusAlpha = 1.0 - alpha; + double sum = 0.0; + double baseRank = alpha / numOfVertices; + for (DoublePair pair : this.vertexRankMap.values()) { + double rankValue = baseRank + pair.right() * oneMinusAlpha; + pair.right(rankValue); + sum += rankValue; + } + return sum; + } + } + + public static class DoublePair { + + private double left; + private double right; + + public DoublePair(double left, double right) { + this.left = left; + this.right = right; + } + + public void addLeft(double value) { + this.left += value; + } + + public void addRight(double value) { + this.right += value; + } + + public double left() { + return left; + } + + public void left(double value) { + this.left = value; + } + + public double right() { + return right; + } + + public void right(double value) { + this.right = value; + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("left:").append(left) + .append(", right: ").append(right); + return sb.toString(); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof DoublePair)) { + return false; + } + DoublePair other = (DoublePair) obj; + return this.left == other.left && right == other.right; + } + + @Override + public int hashCode() { + return Double.hashCode(left) ^ Double.hashCode(right); + } + } +} From 1449cac5b33e7b78b9f1a1e636f4217b1db54ac7 Mon Sep 17 00:00:00 2001 From: houzhizhen Date: Fri, 5 Jun 2020 06:33:01 -0500 Subject: [PATCH 13/33] add weak connected component analysis (#21) --- .../job/algorithm/AbstractAlgorithm.java | 8 + .../job/algorithm/AlgorithmPool.java | 2 + .../comm/WeakConnectedComponent.java | 221 ++++++++++++++++++ 3 files changed, 231 insertions(+) create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/WeakConnectedComponent.java diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index aab04db394..f94912902b 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -415,6 +415,14 @@ protected Iterator vertices(Iterator ids) { }); } + protected Vertex vertex(Object id) { + Iterator iter = this.graph().vertices(id); + if (!iter.hasNext()) { + return null; + } + return iter.next(); + } + protected Iterator filter(Iterator vertices, String key, Object value) { return new FilterIterator<>(vertices, vertex -> { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java index 5725be61ff..7a8e4291a9 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java @@ -31,6 +31,7 @@ import com.baidu.hugegraph.job.algorithm.comm.LouvainAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.LpaAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.TriangleCountAlgorithm; +import com.baidu.hugegraph.job.algorithm.comm.WeakConnectedComponent; import com.baidu.hugegraph.job.algorithm.path.RingsDetectAlgorithm; import com.baidu.hugegraph.job.algorithm.rank.PageRankAlgorithm; import com.baidu.hugegraph.job.algorithm.similarity.FusiformSimilarityAlgorithm; @@ -52,6 +53,7 @@ public class AlgorithmPool { INSTANCE.register(new ClusterCoeffcientAlgorithm()); INSTANCE.register(new LpaAlgorithm()); INSTANCE.register(new LouvainAlgorithm()); + INSTANCE.register(new WeakConnectedComponent()); INSTANCE.register(new FusiformSimilarityAlgorithm()); INSTANCE.register(new RingsDetectAlgorithm()); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/WeakConnectedComponent.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/WeakConnectedComponent.java new file mode 100644 index 0000000000..99dee85cdc --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/WeakConnectedComponent.java @@ -0,0 +1,221 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.comm; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.tinkerpop.gremlin.structure.Edge; +import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.slf4j.Logger; + +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.schema.SchemaManager; +import com.baidu.hugegraph.schema.VertexLabel; +import com.baidu.hugegraph.structure.HugeEdge; +import com.baidu.hugegraph.structure.HugeVertex; +import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.Log; +import com.google.common.collect.ImmutableMap; + +public class WeakConnectedComponent extends AbstractCommAlgorithm { + + protected static final Logger LOG = Log.logger(WeakConnectedComponent.class); + + @Override + public String name() { + return "weak_connected_component"; + } + + @Override + public void checkParameters(Map parameters) { + times(parameters); + directionOutIn(parameters); + degree(parameters); + } + + @Override + public Object call(Job job, Map parameters) { + try (Traverser traverser = new Traverser(job)) { + return traverser.connectedComponent(times(parameters), + directionOutIn(parameters), + degree(parameters)); + } catch (Throwable e) { + job.graph().tx().rollback(); + throw e; + } + } + + protected static class Traverser extends AlgoTraverser { + + private final Map vertexComponentMap = new HashMap<>(); + + public Traverser(Job job) { + super(job); + } + + public Object connectedComponent(int maxTimes, + Directions direction, + long degree) { + this.initSchema(); + this.initVertexComponentMap(); + int times; + + for (times = 0; times < maxTimes; times++) { + long changeCount = 0; + Id currentSourceVertexId = null; + // the edges are ordered by ownerVertex + Iterator edges = this.edges(direction); + List adjacentVertices = new ArrayList<>(); + + while (edges.hasNext()) { + HugeEdge edge = (HugeEdge) edges.next(); + Id sourceVertexId = edge.ownerVertex().id(); + Id targetVertexId = edge.otherVertex().id(); + + if (currentSourceVertexId == null) { + currentSourceVertexId = sourceVertexId; + adjacentVertices.add(targetVertexId); + } else if (currentSourceVertexId.equals(sourceVertexId)) { + if (adjacentVertices.size() < degree) { + adjacentVertices.add(targetVertexId); + } + } else { + changeCount += this.findAndSetMinComponent( + currentSourceVertexId, + adjacentVertices); + adjacentVertices = new ArrayList<>(); + currentSourceVertexId = sourceVertexId; + adjacentVertices.add(targetVertexId); + } + } + changeCount += this.findAndSetMinComponent( + currentSourceVertexId, + adjacentVertices); + LOG.debug("iterationTimes:{}, changeCount:{}", + times, changeCount); + + if (changeCount == 0L) { + break; + } + } + + int compCount = writeBackValue(); + return ImmutableMap.of("components", compCount, + "iteration_times", times, + "times", maxTimes); + } + + private void initSchema() { + String cl = C_LABEL; + SchemaManager schema = this.graph().schema(); + schema.propertyKey(cl).asText().ifNotExist().create(); + for (VertexLabel vl : schema.getVertexLabels()) { + schema.vertexLabel(vl.name()).properties(cl) + .nullableKeys(cl).append(); + } + } + + private void initVertexComponentMap() { + Iterator vertices = this.vertices(); + while (vertices.hasNext()) { + Id id = ((HugeVertex) vertices.next()).id(); + this.vertexComponentMap.put(id, id); + } + } + + /** + * process for a vertex and its adjacentVertices + * @param sourceVertexId the source vertex + * @param adjacentVertices the adjacent vertices attached to source + * vertex + * @return the count of vertex that changed Component + */ + private long findAndSetMinComponent(Id sourceVertexId, + List adjacentVertices) { + if (!this.vertexComponentMap.containsKey(sourceVertexId)) { + return 0L; + } + Id min = this.findMinComponent(sourceVertexId, adjacentVertices); + return this.updateComponentIfNeeded(min, + sourceVertexId, + adjacentVertices); + } + + private Id findMinComponent(Id sourceVertexId, + List adjacentVertices) { + Id min = this.vertexComponentMap.get(sourceVertexId); + for (Id vertex : adjacentVertices) { + Id comp = this.vertexComponentMap.get(vertex); + if (comp != null && comp.compareTo(min) < 0) { + min = comp; + } + } + return min; + } + + private long updateComponentIfNeeded(Id min, + Id sourceVertexId, + List adjacentVertices) { + long changedCount = 0; + Id comp = this.vertexComponentMap.get(sourceVertexId); + if (comp.compareTo(min) > 0) { + this.vertexComponentMap.put(sourceVertexId, min); + changedCount++; + } + for (Id vertex : adjacentVertices) { + comp = this.vertexComponentMap.get(vertex); + if (comp != null && comp.compareTo(min) > 0) { + this.vertexComponentMap.put(vertex, min); + changedCount++; + } + } + return changedCount; + } + + /** + * @return the count of components + */ + private int writeBackValue() { + Map componentIndexMap = new HashMap<>(); + int index = 0; + for (Map.Entry entry : this.vertexComponentMap.entrySet()) { + Id comp = entry.getValue(); + Integer componentIndex = componentIndexMap.get(comp); + if (componentIndex == null) { + componentIndex = index; + componentIndexMap.put(comp, componentIndex); + index++; + } + Vertex vertex = this.vertex(entry.getKey()); + if (vertex != null) { + vertex.property(C_LABEL, String.valueOf(componentIndex)); + this.commitIfNeeded(); + } + } + this.graph().tx().commit(); + return index; + } + } +} From 12b6bdf32154368497b9d9c3f00816cf43c5685c Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Tue, 9 Jun 2020 15:32:26 +0800 Subject: [PATCH 14/33] add subgraph_stat algorithm (#23) Change-Id: If4faafc1a952186c17314c599f611f7ab6132b7b --- .../job/algorithm/AbstractAlgorithm.java | 19 +- .../job/algorithm/AlgorithmPool.java | 2 + .../job/algorithm/SubgraphStatAlgorithm.java | 207 ++++++++++++++++++ .../algorithm/cent/AbstractCentAlgorithm.java | 2 +- .../cent/BetweenessCentralityAlgorithm.java | 2 +- .../cent/ClosenessCentralityAlgorithm.java | 2 +- .../cent/EigenvectorCentralityAlgorithm.java | 2 +- .../comm/ClusterCoeffcientAlgorithm.java | 2 +- .../job/algorithm/comm/KCoreAlgorithm.java | 2 +- .../job/algorithm/comm/LpaAlgorithm.java | 7 +- .../comm/TriangleCountAlgorithm.java | 2 +- .../algorithm/path/RingsDetectAlgorithm.java | 2 +- .../job/algorithm/rank/PageRankAlgorithm.java | 49 +++-- .../FusiformSimilarityAlgorithm.java | 2 +- 14 files changed, 256 insertions(+), 46 deletions(-) create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index f94912902b..67d508a470 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -132,14 +132,15 @@ protected static Directions direction(Map parameters) { } protected static Directions directionOutIn(Map parameters) { - E.checkArgument(parameters.containsKey(KEY_DIRECTION), - "The direction must be set"); + if (!parameters.containsKey(KEY_DIRECTION)) { + return Directions.OUT; + } Object direction = parameter(parameters, KEY_DIRECTION); - Directions direct = parseDirection(direction); - E.checkArgument(direct == Directions.OUT || direct == Directions.IN, - "The direction for triangle_count must be " + - "either OUT or IN, but got: %s", direct); - return direct; + Directions dir = parseDirection(direction); + E.checkArgument(dir == Directions.OUT || dir == Directions.IN, + "The value of %s must be either OUT or IN, but got: %s", + KEY_DIRECTION, dir); + return dir; } protected static double alpha(Map parameters) { @@ -153,8 +154,8 @@ protected static double alpha(Map parameters) { public static void checkAlpha(double alpha) { E.checkArgument(alpha > 0 && alpha <= 1.0, - "The alpha of must be in range (0, 1], but got %s", - alpha); + "The value of %s must be in range (0, 1], but got %s", + KEY_ALPHA, alpha); } protected static long top(Map parameters) { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java index 7a8e4291a9..9a84120772 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java @@ -60,6 +60,8 @@ public class AlgorithmPool { INSTANCE.register(new KCoreAlgorithm()); INSTANCE.register(new PageRankAlgorithm()); + + INSTANCE.register(new SubgraphStatAlgorithm()); } private final Map algorithms; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java new file mode 100644 index 0000000000..385fbbb5b5 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java @@ -0,0 +1,207 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm; + +import java.util.Iterator; +import java.util.Map; + +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.HugeGraph; +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.config.CoreOptions; +import com.baidu.hugegraph.config.HugeConfig; +import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.algorithm.cent.BetweenessCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.ClosenessCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.DegreeCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.EigenvectorCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.comm.ClusterCoeffcientAlgorithm; +import com.baidu.hugegraph.job.algorithm.path.RingsDetectAlgorithm; +import com.baidu.hugegraph.job.algorithm.rank.PageRankAlgorithm; +import com.baidu.hugegraph.task.HugeTask; +import com.baidu.hugegraph.traversal.optimize.HugeScriptTraversal; +import com.baidu.hugegraph.util.E; +import com.baidu.hugegraph.util.InsertionOrderUtil; +import com.google.common.collect.ImmutableMap; + +public class SubgraphStatAlgorithm extends AbstractAlgorithm { + + public static final String KEY_SUBGRAPH = "subgraph"; + public static final String KEY_COPY_SCHEMA = "copy_schema"; + + @Override + public String name() { + return "subgraph_stat"; + } + + @Override + public String category() { + return CATEGORY_AGGR; + } + + @Override + public void checkParameters(Map parameters) { + subgraph(parameters); + } + + @Override + public Object call(Job job, Map parameters) { + HugeGraph graph = this.createTempGraph(job); + try (Traverser traverser = new Traverser(job)) { + this.initGraph(job.graph(), graph, subgraph(parameters), + copySchema(parameters)); + Job tmpJob = new TempJob<>(graph, job, job.task()); + return traverser.subgraphStat(tmpJob); + } finally { + graph.truncateBackend(); + // FIXME: task thread can't call close() here (will hang) + graph.closeTx(); + } + } + + private HugeGraph createTempGraph(Job job) { + Id id = job.task().id(); + PropertiesConfiguration config = new PropertiesConfiguration(); + config.setProperty(CoreOptions.BACKEND.name(), "memory"); + config.setProperty(CoreOptions.STORE.name(), "tmp_" + id); + config.setDelimiterParsingDisabled(true); + return new HugeGraph(new HugeConfig(config)); + } + + @SuppressWarnings("resource") + private void initGraph(HugeGraph parent, HugeGraph graph, + String script, boolean copySchema) { + if (copySchema) { + graph.schema().copyFrom(parent.schema()); + } + new HugeScriptTraversal<>(graph.traversal(), "gremlin-groovy", + script, ImmutableMap.of(), + ImmutableMap.of()).iterate(); + graph.tx().commit(); + } + + protected static String subgraph(Map parameters) { + Object subgraph = parameters.get(KEY_SUBGRAPH); + E.checkArgument(subgraph != null, + "Must pass parameter '%s'", KEY_SUBGRAPH); + E.checkArgument(subgraph instanceof String, + "Invalid parameter '%s', expect a String, but got %s", + KEY_SUBGRAPH, subgraph.getClass().getSimpleName()); + return (String) subgraph; + } + + protected static boolean copySchema(Map parameters) { + if (!parameters.containsKey(KEY_COPY_SCHEMA)) { + return false; + } + return parameterBoolean(parameters, KEY_COPY_SCHEMA); + } + + private static class Traverser extends AlgoTraverser { + + private static Map PARAMS = ImmutableMap.of( + "depth", 10L, + "degree", -1L, + "sample", -1L, + "workers", 0); + + public Traverser(Job job) { + super(job); + } + + public Object subgraphStat(Job job) { + Map results = InsertionOrderUtil.newMap(); + + GraphTraversalSource g = job.graph().traversal(); + results.put("vertices_count", g.V().count().next()); + results.put("edges_count", g.E().count().next()); + + Algorithm algo = new DegreeCentralityAlgorithm(); + Map parameters = ImmutableMap.copyOf(PARAMS); + results.put("degrees", algo.call(job, parameters)); + + algo = new BetweenessCentralityAlgorithm(); + results.put("betweeness", algo.call(job, parameters)); + + algo = new EigenvectorCentralityAlgorithm(); + results.put("eigenvectors", algo.call(job, parameters)); + + algo = new ClosenessCentralityAlgorithm(); + results.put("closeness", algo.call(job, parameters)); + + results.put("page_ranks", pageRanks(job)); + + algo = new ClusterCoeffcientAlgorithm(); + results.put("cluster_coeffcient", algo.call(job, parameters)); + + algo = new RingsDetectAlgorithm(); + parameters = ImmutableMap.builder() + .putAll(PARAMS) + .put("count_only", true) + .build(); + results.put("rings", algo.call(job, parameters)); + + return results; + } + + private Map pageRanks(Job job) { + PageRankAlgorithm algo = new PageRankAlgorithm(); + algo.call(job, ImmutableMap.of("alpha", 0.15)); + + // Collect page ranks + Map ranks = InsertionOrderUtil.newMap(); + Iterator vertices = job.graph().vertices(); + while (vertices.hasNext()) { + Vertex vertex = vertices.next(); + ranks.put(vertex.id(), vertex.value(R_RANK)); + } + return ranks; + } + } + + private static class TempJob extends Job { + + private final Job parent; + + public TempJob(HugeGraph graph, Job job, HugeTask task) { + this.scheduler(graph.taskScheduler()); + this.task(task); + this.parent = job; + } + + @Override + public String type() { + return "temp"; + } + + @Override + public V execute() throws Exception { + return null; + } + + @Override + public void updateProgress(int progress) { + this.parent.updateProgress(progress); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index fb0c33d503..22372ad097 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -56,7 +56,7 @@ public void checkParameters(Map parameters) { top(parameters); } - public static class Traverser extends AlgoTraverser { + protected static class Traverser extends AlgoTraverser { public Traverser(Job job) { super(job); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java index 4f3415a15a..3702991632 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java @@ -71,7 +71,7 @@ public Object betweenessCentrality(Directions direction, String sourceCLabel, long topN) { assert depth > 0; - assert degree > 0L; + assert degree > 0L || degree == NO_LIMIT; assert topN >= 0L; GraphTraversal t = constructSource(sourceLabel, diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java index 6719eee1e6..56d61504a7 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java @@ -80,7 +80,7 @@ public Object closenessCentrality(Directions direction, String sourceCLabel, long topN) { assert depth > 0; - assert degree > 0L; + assert degree > 0L || degree == NO_LIMIT; assert topN >= 0L; GraphTraversal t = constructSource(sourceLabel, diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java index 39cec64cde..ec065fa07a 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java @@ -73,7 +73,7 @@ public Object eigenvectorCentrality(Directions direction, String sourceCLabel, long topN) { assert depth > 0; - assert degree > 0L; + assert degree > 0L || degree == NO_LIMIT; assert topN >= 0L; // TODO: support parameters: Directions dir, String label diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java index 0e5760e24d..7ac30cd2da 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java @@ -41,7 +41,7 @@ public void checkParameters(Map parameters) { @Override public Object call(Job job, Map parameters) { try (Traverser traverser = new Traverser(job)) { - return traverser.clusterCoeffcient(direction(parameters), + return traverser.clusterCoeffcient(directionOutIn(parameters), degree(parameters)); } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java index 6a721258a3..923c1a2a33 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java @@ -98,7 +98,7 @@ protected static boolean merged(Map parameters) { return parameterBoolean(parameters, KEY_MERGED); } - public static class Traverser extends AlgoTraverser { + private static class Traverser extends AlgoTraverser { public Traverser(Job job, int workers) { super(job, "kcore", workers); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java index 59c420ae74..e15665cfc4 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java @@ -80,7 +80,7 @@ public Object call(Job job, Map parameters) { } } - public static class Traverser extends AlgoTraverser { + private static class Traverser extends AlgoTraverser { private static final long LIMIT = MAX_QUERY_LIMIT; @@ -242,11 +242,10 @@ private String labelOfVertex(Vertex vertex) { private String labelOfVertex(Id vid) { // TODO: cache with Map - Iterator iter = this.graph().vertices(vid); - if (!iter.hasNext()) { + Vertex vertex = this.vertex(vid); + if (vertex == null) { return null; } - Vertex vertex = iter.next(); return this.labelOfVertex(vertex); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java index 6128c6b17f..0adb4707c5 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java @@ -49,7 +49,7 @@ public void checkParameters(Map parameters) { @Override public Object call(Job job, Map parameters) { try (Traverser traverser = new Traverser(job)) { - return traverser.triangleCount(direction(parameters), + return traverser.triangleCount(directionOutIn(parameters), degree(parameters)); } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java index c7c0c677ab..3d5bd163e3 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java @@ -80,7 +80,7 @@ public boolean countOnly(Map parameters) { return parameterBoolean(parameters, KEY_COUNT_ONLY); } - public static class Traverser extends AlgoTraverser { + private static class Traverser extends AlgoTraverser { public Traverser(Job job, int workers) { super(job, "ring", workers); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java index 2773a9db00..9f51bf7b60 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java @@ -65,8 +65,7 @@ public void checkParameters(Map parameters) { @Override public Object call(Job job, Map parameters) { - Traverser traverser = new Traverser(job); - try { + try (Traverser traverser = new Traverser(job)) { return traverser.pageRank(alpha(parameters), times(parameters), precision(parameters), @@ -78,11 +77,13 @@ public Object call(Job job, Map parameters) { } } - protected static class Traverser extends AlgoTraverser { + private static class Traverser extends AlgoTraverser { - // DoublePair.left is rank computed by previous step, DoublePair.right - // is rank computed by current step. - private Map vertexRankMap; + /* + * DoublePair.left is rank computed by previous step, + * DoublePair.right is rank computed by current step. + */ + private final Map vertexRankMap; public Traverser(Job job) { super(job); @@ -100,7 +101,7 @@ public Object pageRank(double alpha, double changedRank = 0.0; long numOfVertices = this.initRankMap(); - for (times = 0; times <= maxTimes; times++) { + for (times = 0; times < maxTimes; times++) { Id currentSourceVertexId = null; // the edges are ordered by ownerVertex Iterator edges = this.edges(direction); @@ -134,15 +135,16 @@ public Object pageRank(double alpha, double sumRank = this.computeRank(alpha, numOfVertices); double compensatedRank = 1.0 - sumRank; - changedRank = - this.compensateRank(compensatedRank / numOfVertices); + changedRank = this.compensateRank(compensatedRank / + numOfVertices); LOG.debug("PageRank execution times:{}, changedRank:{} ", times, changedRank); if (changedRank < precision) { break; } } - this.writeBackRankValue(); + + this.writeBackRankValues(); return ImmutableMap.of("alpha", alpha, "iteration_times", times, @@ -212,14 +214,12 @@ private void initSchema() { } } - private void writeBackRankValue() { - for (Map.Entry entry : - this.vertexRankMap.entrySet()) { - Id vertexId = entry.getKey(); - Iterator vertices = this.graph().vertices(vertexId); - if (vertices.hasNext()) { - Vertex vertex = vertices.next(); - vertex.property(R_RANK, entry.getValue().left()); + private void writeBackRankValues() { + for (Map.Entry e : this.vertexRankMap.entrySet()) { + Id vertexId = e.getKey(); + Vertex vertex = this.vertex(vertexId); + if (vertex != null) { + vertex.property(R_RANK, e.getValue().left()); this.commitIfNeeded(); } } @@ -258,7 +258,7 @@ public void addRight(double value) { } public double left() { - return left; + return this.left; } public void left(double value) { @@ -266,17 +266,18 @@ public void left(double value) { } public double right() { - return right; + return this.right; } public void right(double value) { this.right = value; } + @Override public String toString() { StringBuilder sb = new StringBuilder(); - sb.append("left:").append(left) - .append(", right: ").append(right); + sb.append("left:").append(this.left) + .append(", right: ").append(this.right); return sb.toString(); } @@ -286,12 +287,12 @@ public boolean equals(Object obj) { return false; } DoublePair other = (DoublePair) obj; - return this.left == other.left && right == other.right; + return this.left == other.left && this.right == other.right; } @Override public int hashCode() { - return Double.hashCode(left) ^ Double.hashCode(right); + return Double.hashCode(this.left) ^ Double.hashCode(this.right); } } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java index 6734306235..82294f48bb 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java @@ -136,7 +136,7 @@ protected static long limit(Map parameters) { return limit; } - protected static class Traverser extends AlgoTraverser { + private static class Traverser extends AlgoTraverser { public Traverser(Job job, int workers) { super(job, "fusiform", workers); From abb5dddb7bb7eb4631820b6ba737446819b2731e Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Wed, 24 Jun 2020 17:58:34 +0800 Subject: [PATCH 15/33] add BOTH direction support for triangle_count/cluster_coeffcient (#24) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit change log: 1. add BOTH direction support for triangle_count/cluster_coeffcientith. 2. fix extra triangle count with multi edges between tow adjacent vertices. 3. set default value of direction to BOTH for degree_centrality and cluster_coeffcient . 4. add workers for triangle_count and cluster_coeffcient. 5. fix closeness: multi shortest paths cause results illogical. 6. rename rings_detect to rings, rename limit to each_limit which means limit number of rings of each source vertex, and don't do dedup if passed each_limit > 0. 7. unify top for 4 centrality algos: sorted results when top = -1, unsorted results when top = 0. 8. fusiform: rename top to top_similars (expected >= 0). 9. fusiform/rings: add limit param which means max number of results, and remove capacity param and hardcode to 100000000. Change-Id: I9ddf8553e6d86b99adbff8b972890d69d623fa1a --- .../job/algorithm/AbstractAlgorithm.java | 52 ++++++-- .../hugegraph/job/algorithm/Consumers.java | 25 +++- .../job/algorithm/SubgraphStatAlgorithm.java | 5 + .../algorithm/cent/AbstractCentAlgorithm.java | 30 ++++- .../cent/BetweenessCentralityAlgorithm.java | 14 +- .../cent/ClosenessCentralityAlgorithm.java | 12 +- .../cent/DegreeCentralityAlgorithm.java | 22 ++-- .../cent/EigenvectorCentralityAlgorithm.java | 11 +- .../comm/ClusterCoeffcientAlgorithm.java | 27 +++- .../job/algorithm/comm/KCoreAlgorithm.java | 6 +- .../job/algorithm/comm/LouvainAlgorithm.java | 4 +- .../job/algorithm/comm/LouvainTraverser.java | 6 +- .../job/algorithm/comm/LpaAlgorithm.java | 6 +- .../comm/TriangleCountAlgorithm.java | 121 +++++++++++++++--- .../algorithm/path/RingsDetectAlgorithm.java | 55 ++++---- .../FusiformSimilarityAlgorithm.java | 50 +++++--- 16 files changed, 315 insertions(+), 131 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 67d508a470..e30ce1e165 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -36,6 +36,7 @@ import org.apache.tinkerpop.gremlin.structure.Property; import org.apache.tinkerpop.gremlin.structure.Transaction; import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.apache.tinkerpop.gremlin.structure.util.CloseableIterator; import com.baidu.hugegraph.HugeException; import com.baidu.hugegraph.backend.id.Id; @@ -44,6 +45,7 @@ import com.baidu.hugegraph.iterator.FilterIterator; import com.baidu.hugegraph.iterator.FlatMapperIterator; import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.algorithm.Consumers.StopExecution; import com.baidu.hugegraph.testutil.Whitebox; import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; import com.baidu.hugegraph.type.HugeType; @@ -61,6 +63,7 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final long MAX_RESULT_SIZE = 100L * Bytes.MB; public static final long MAX_QUERY_LIMIT = 100000000L; // about 100GB + public static final long MAX_CAPACITY = MAX_QUERY_LIMIT; public static final int BATCH = 500; public static final String CATEGORY_AGGR = "aggregate"; @@ -87,11 +90,13 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final String KEY_CLEAR = "clear"; public static final String KEY_CAPACITY = "capacity"; public static final String KEY_LIMIT = "limit"; + public static final String KEY_EACH_LIMIT = "each_limit"; public static final String KEY_ALPHA = "alpha"; public static final String KEY_WORKERS = "workers"; public static final long DEFAULT_CAPACITY = 10000000L; public static final long DEFAULT_LIMIT = 100L; + public static final long DEFAULT_EACH_LIMIT = 1L; public static final long DEFAULT_DEGREE = 100L; public static final long DEFAULT_SAMPLE = 1L; public static final long DEFAULT_TIMES = 20L; @@ -131,6 +136,14 @@ protected static Directions direction(Map parameters) { return parseDirection(direction); } + protected static Directions direction4Out(Map parameters) { + if (!parameters.containsKey(KEY_DIRECTION)) { + return Directions.OUT; + } + Object direction = parameter(parameters, KEY_DIRECTION); + return parseDirection(direction); + } + protected static Directions directionOutIn(Map parameters) { if (!parameters.containsKey(KEY_DIRECTION)) { return Directions.OUT; @@ -148,14 +161,10 @@ protected static double alpha(Map parameters) { return DEFAULT_ALPHA; } double alpha = parameterDouble(parameters, KEY_ALPHA); - checkAlpha(alpha); - return alpha; - } - - public static void checkAlpha(double alpha) { - E.checkArgument(alpha > 0 && alpha <= 1.0, + E.checkArgument(alpha > 0.0 && alpha <= 1.0, "The value of %s must be in range (0, 1], but got %s", KEY_ALPHA, alpha); + return alpha; } protected static long top(Map parameters) { @@ -163,9 +172,7 @@ protected static long top(Map parameters) { return 0L; } long top = parameterLong(parameters, KEY_TOP); - E.checkArgument(top >= 0L, - "The value of %s must be >= 0, but got %s", - KEY_TOP, top); + HugeTraverser.checkNonNegativeOrNoLimit(top, KEY_TOP); return top; } @@ -196,6 +203,15 @@ protected static long limit(Map parameters) { return limit; } + protected static long eachLimit(Map parameters) { + if (!parameters.containsKey(KEY_EACH_LIMIT)) { + return DEFAULT_EACH_LIMIT; + } + long limit = parameterLong(parameters, KEY_EACH_LIMIT); + HugeTraverser.checkPositiveOrNoLimit(limit, KEY_EACH_LIMIT); + return limit; + } + protected static long sample(Map parameters) { if (!parameters.containsKey(KEY_SAMPLE)) { return DEFAULT_SAMPLE; @@ -355,21 +371,24 @@ protected long traverse(String sourceLabel, String sourceCLabel, Consumers consumers = new Consumers<>(this.executor, consumer, done); - consumers.start(); + consumers.start("task-" + this.job.task().id()); + long total = 0L; try { - long total = 0L; while (vertices.hasNext()) { this.updateProgress(++this.progress); total++; Vertex v = vertices.next(); consumers.provide(v); } - return total; + } catch (StopExecution e) { + // pass } catch (Throwable e) { throw Consumers.wrapException(e); } finally { consumers.await(); + CloseableIterator.closeIterator(vertices); } + return total; } protected Iterator vertices() { @@ -520,9 +539,11 @@ public void add(K key, long value) { } public void put(K key, long value) { + assert this.topN != 0L; this.tops.put(key, new MutableLong(value)); // keep 2x buffer - if (this.tops.size() > this.topN * 2) { + if (this.tops.size() > this.topN * 2 && + this.topN != HugeTraverser.NO_LIMIT) { this.shrinkIfNeeded(this.topN); } } @@ -537,7 +558,10 @@ public Set> entrySet() { } private void shrinkIfNeeded(long limit) { - if (this.tops.size() >= limit && limit != HugeTraverser.NO_LIMIT) { + assert limit != 0L; + if (this.tops.size() >= limit && + (limit > 0L || limit == HugeTraverser.NO_LIMIT)) { + // Just do sort if limit=NO_LIMIT, else do sort and shrink this.tops = HugeTraverser.topN(this.tops, true, limit); } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java index f5d01d9803..711e95edc8 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java @@ -73,14 +73,14 @@ public Consumers(ExecutorService executor, this.queue = new ArrayBlockingQueue<>(this.queueSize); } - public void start() { + public void start(String name) { this.ending = false; this.exception = null; if (this.executor == null) { return; } - LOG.info("Starting {} workers with queue size {}...", - this.workers, this.queueSize); + LOG.info("Starting {} workers[{}] with queue size {}...", + this.workers, name, this.queueSize); for (int i = 0; i < this.workers; i++) { this.executor.submit(() -> { try { @@ -88,8 +88,10 @@ public void start() { this.done(); } catch (Throwable e) { // Only the first exception of one thread can be stored - this.exception = e; - LOG.error("Error when running task", e); + this.exception = e; + if (!(e instanceof StopExecution)) { + LOG.error("Error when running task", e); + } this.done(); } finally { this.latch.countDown(); @@ -183,4 +185,17 @@ public static RuntimeException wrapException(Throwable e) { throw new HugeException("Error when running task: %s", HugeException.rootCause(e).getMessage(), e); } + + public static class StopExecution extends HugeException { + + private static final long serialVersionUID = -371829356182454517L; + + public StopExecution(String message) { + super(message); + } + + public StopExecution(String message, Object... args) { + super(message, args); + } + } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java index 385fbbb5b5..4ce97d3628 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java @@ -39,6 +39,7 @@ import com.baidu.hugegraph.job.algorithm.path.RingsDetectAlgorithm; import com.baidu.hugegraph.job.algorithm.rank.PageRankAlgorithm; import com.baidu.hugegraph.task.HugeTask; +import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; import com.baidu.hugegraph.traversal.optimize.HugeScriptTraversal; import com.baidu.hugegraph.util.E; import com.baidu.hugegraph.util.InsertionOrderUtil; @@ -123,6 +124,7 @@ private static class Traverser extends AlgoTraverser { "depth", 10L, "degree", -1L, "sample", -1L, + "top", -1L /* sorted */, "workers", 0); public Traverser(Job job) { @@ -158,6 +160,8 @@ public Object subgraphStat(Job job) { parameters = ImmutableMap.builder() .putAll(PARAMS) .put("count_only", true) + .put("each_limit", NO_LIMIT) + .put("limit", NO_LIMIT) .build(); results.put("rings", algo.call(job, parameters)); @@ -175,6 +179,7 @@ private Map pageRanks(Job job) { Vertex vertex = vertices.next(); ranks.put(vertex.id(), vertex.value(R_RANK)); } + ranks = HugeTraverser.topN(ranks, true, NO_LIMIT); return ranks; } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index 22372ad097..7b11c134c1 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -24,9 +24,12 @@ import java.util.Map; import org.apache.commons.lang3.tuple.Pair; +import org.apache.tinkerpop.gremlin.process.traversal.Order; import org.apache.tinkerpop.gremlin.process.traversal.Pop; +import org.apache.tinkerpop.gremlin.process.traversal.Scope; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; +import org.apache.tinkerpop.gremlin.structure.Column; import org.apache.tinkerpop.gremlin.structure.Direction; import org.apache.tinkerpop.gremlin.structure.Vertex; @@ -130,10 +133,10 @@ protected GraphTraversal constructPathUnit( return unit; } - protected GraphTraversal filterNonShortestPath( - GraphTraversal - t) { - long size = this.graph().traversal().V().limit(MAX_QUERY_LIMIT) + protected GraphTraversal filterNonShortestPath( + GraphTraversal t, + boolean keepOneShortestPath) { + long size = this.graph().traversal().V().limit(100000L) .count().next(); Map, Integer> triples = new HashMap<>((int) size); return t.filter(it -> { @@ -142,15 +145,32 @@ protected GraphTraversal filterNonShortestPath( int len = it.>path(Pop.all, "v").size(); Pair key = Pair.of(start, end); Integer shortest = triples.get(key); - if (shortest != null && shortest != len) { + if (shortest != null && len > shortest) { // ignore non shortest path return false; } if (shortest == null) { triples.put(key, len); + } else { + assert len == shortest; + if (keepOneShortestPath) { + return false; + } } return true; }); } + + protected GraphTraversal topN(GraphTraversal t, + long topN) { + if (topN > 0L || topN == NO_LIMIT) { + t = t.order(Scope.local).by(Column.values, Order.desc); + if (topN > 0L) { + assert topN != NO_LIMIT; + t = t.limit(Scope.local, topN); + } + } + return t; + } } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java index 3702991632..40b38f6555 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java @@ -21,13 +21,10 @@ import java.util.Map; -import org.apache.tinkerpop.gremlin.process.traversal.Order; import org.apache.tinkerpop.gremlin.process.traversal.P; import org.apache.tinkerpop.gremlin.process.traversal.Pop; -import org.apache.tinkerpop.gremlin.process.traversal.Scope; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; -import org.apache.tinkerpop.gremlin.structure.Column; import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.job.Job; @@ -72,7 +69,7 @@ public Object betweenessCentrality(Directions direction, long topN) { assert depth > 0; assert degree > 0L || degree == NO_LIMIT; - assert topN >= 0L; + assert topN >= 0L || topN == NO_LIMIT; GraphTraversal t = constructSource(sourceLabel, sourceSample, @@ -80,14 +77,11 @@ public Object betweenessCentrality(Directions direction, t = constructPath(t, direction, label, degree, sample, sourceLabel, sourceCLabel); t = t.emit().until(__.loops().is(P.gte(depth))); - t = filterNonShortestPath(t); + t = filterNonShortestPath(t, false); GraphTraversal tg = t.select(Pop.all, "v") - .unfold().id() - .groupCount().order(Scope.local) - .by(Column.values, Order.desc); - GraphTraversal tLimit = topN <= 0L ? tg : - tg.limit(Scope.local, topN); + .unfold().id().groupCount(); + GraphTraversal tLimit = topN(tg, topN); return this.execute(tLimit, () -> tLimit.next()); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java index 56d61504a7..9a25b6394a 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java @@ -22,13 +22,11 @@ import java.util.Map; import org.apache.tinkerpop.gremlin.process.traversal.Operator; -import org.apache.tinkerpop.gremlin.process.traversal.Order; import org.apache.tinkerpop.gremlin.process.traversal.P; import org.apache.tinkerpop.gremlin.process.traversal.Pop; import org.apache.tinkerpop.gremlin.process.traversal.Scope; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; -import org.apache.tinkerpop.gremlin.structure.Column; import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.job.Job; @@ -81,7 +79,7 @@ public Object closenessCentrality(Directions direction, long topN) { assert depth > 0; assert degree > 0L || degree == NO_LIMIT; - assert topN >= 0L; + assert topN >= 0L || topN == NO_LIMIT; GraphTraversal t = constructSource(sourceLabel, sourceSample, @@ -89,15 +87,13 @@ public Object closenessCentrality(Directions direction, t = constructPath(t, direction, label, degree, sample, sourceLabel, sourceCLabel); t = t.emit().until(__.loops().is(P.gte(depth))); - t = filterNonShortestPath(t); + t = filterNonShortestPath(t, true); GraphTraversal tg; tg = t.group().by(__.select(Pop.first, "v").id()) .by(__.select(Pop.all, "v").count(Scope.local) - .sack(Operator.div).sack().sum()) - .order(Scope.local).by(Column.values, Order.desc); - GraphTraversal tLimit = topN <= 0L ? tg : - tg.limit(Scope.local, topN); + .sack(Operator.div).sack().sum()); + GraphTraversal tLimit = topN(tg, topN); return this.execute(tLimit, () -> tLimit.next()); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java index f29a6301df..01b3e5c4b9 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java @@ -64,10 +64,10 @@ public Object degreeCentrality(Directions direction, String label, long topN) { if (direction == null || direction == Directions.BOTH) { - return degreeCentrality(label, topN); + return this.degreeCentralityForBothDir(label, topN); } assert direction == Directions.OUT || direction == Directions.IN; - assert topN >= 0L; + assert topN >= 0L || topN == NO_LIMIT; Iterator edges = this.edges(direction); @@ -76,12 +76,12 @@ public Object degreeCentrality(Directions direction, Id vertex = null; Id labelId = this.getEdgeLabelId(label); long degree = 0L; - long total = 0L; + long totalEdges = 0L; degrees.startObject(); while (edges.hasNext()) { HugeEdge edge = (HugeEdge) edges.next(); - this.updateProgress(++total); + this.updateProgress(++totalEdges); Id schemaLabel = edge.schemaLabel().id(); if (labelId != null && !labelId.equals(schemaLabel)) { @@ -97,7 +97,7 @@ public Object degreeCentrality(Directions direction, if (vertex != null) { // next vertex found - if (topN <= 0L) { + if (topN <= 0L && topN != NO_LIMIT) { degrees.append(vertex, degree); } else { tops.put(vertex, degree); @@ -108,7 +108,7 @@ public Object degreeCentrality(Directions direction, } if (vertex != null) { - if (topN <= 0L) { + if (topN <= 0L && topN != NO_LIMIT) { degrees.append(vertex, degree); } else { tops.put(vertex, degree); @@ -121,9 +121,9 @@ public Object degreeCentrality(Directions direction, return degrees.asJson(); } - protected Object degreeCentrality(String label, long topN) { - assert topN >= 0L; - long total = 0L; + protected Object degreeCentralityForBothDir(String label, long topN) { + assert topN >= 0L || topN == NO_LIMIT; + long totalVertices = 0L; JsonMap degrees = new JsonMap(); TopMap tops = new TopMap<>(topN); @@ -132,11 +132,11 @@ protected Object degreeCentrality(String label, long topN) { degrees.startObject(); while (vertices.hasNext()) { Id source = (Id) vertices.next().id(); - this.updateProgress(++total); + this.updateProgress(++totalVertices); long degree = this.degree(source, label); if (degree > 0L) { - if (topN <= 0L) { + if (topN <= 0L && topN != NO_LIMIT) { degrees.append(source, degree); } else { tops.put(source, degree); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java index ec065fa07a..0f695a1fb0 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java @@ -21,11 +21,8 @@ import java.util.Map; -import org.apache.tinkerpop.gremlin.process.traversal.Order; -import org.apache.tinkerpop.gremlin.process.traversal.Scope; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; -import org.apache.tinkerpop.gremlin.structure.Column; import org.apache.tinkerpop.gremlin.structure.T; import org.apache.tinkerpop.gremlin.structure.Vertex; @@ -74,7 +71,7 @@ public Object eigenvectorCentrality(Directions direction, long topN) { assert depth > 0; assert degree > 0L || degree == NO_LIMIT; - assert topN >= 0L; + assert topN >= 0L || topN == NO_LIMIT; // TODO: support parameters: Directions dir, String label /* @@ -96,10 +93,8 @@ public Object eigenvectorCentrality(Directions direction, t = t.repeat(__.groupCount("m").by(T.id) .local(unit).simplePath()).times(depth); - GraphTraversal tCap; - tCap = t.cap("m").order(Scope.local).by(Column.values, Order.desc); - GraphTraversal tLimit = topN <= 0L ? tCap : - tCap.limit(Scope.local, topN); + GraphTraversal tCap = t.cap("m"); + GraphTraversal tLimit = topN(tCap, topN); return this.execute(tLimit, () -> tLimit.next()); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java index 7ac30cd2da..3f3a26c3ca 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java @@ -23,33 +23,48 @@ import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.E; import com.baidu.hugegraph.util.InsertionOrderUtil; public class ClusterCoeffcientAlgorithm extends AbstractCommAlgorithm { + public static final String ALGO_NAME = "cluster_coeffcient"; + @Override public String name() { - return "cluster_coeffcient"; + return ALGO_NAME; } @Override public void checkParameters(Map parameters) { - directionOutIn(parameters); + direction(parameters); degree(parameters); + workersWhenBoth(parameters); } @Override public Object call(Job job, Map parameters) { - try (Traverser traverser = new Traverser(job)) { - return traverser.clusterCoeffcient(directionOutIn(parameters), + int workers = workersWhenBoth(parameters); + try (Traverser traverser = new Traverser(job, workers)) { + return traverser.clusterCoeffcient(direction(parameters), degree(parameters)); } } + protected static int workersWhenBoth(Map parameters) { + Directions direction = direction(parameters); + int workers = workers(parameters); + E.checkArgument(direction == Directions.BOTH || workers <= 0, + "The workers must be not set when direction!=BOTH, " + + "but got workers=%s and direction=%s", + workers, direction); + return workers; + } + private static class Traverser extends TriangleCountAlgorithm.Traverser { - public Traverser(Job job) { - super(job); + public Traverser(Job job, int workers) { + super(job, ALGO_NAME, workers); } public Object clusterCoeffcient(Directions direction, long degree) { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java index 923c1a2a33..52ddeeb71b 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java @@ -44,6 +44,8 @@ public class KCoreAlgorithm extends AbstractCommAlgorithm { + public static final String ALGO_NAME = "k_core"; + public static final String KEY_K = "k"; public static final String KEY_MERGED = "merged"; @@ -51,7 +53,7 @@ public class KCoreAlgorithm extends AbstractCommAlgorithm { @Override public String name() { - return "k_core"; + return ALGO_NAME; } @Override @@ -101,7 +103,7 @@ protected static boolean merged(Map parameters) { private static class Traverser extends AlgoTraverser { public Traverser(Job job, int workers) { - super(job, "kcore", workers); + super(job, ALGO_NAME, workers); } public Object kcore(String sourceLabel, String sourceCLabel, diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java index 446ab2686e..8eee9f43e5 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java @@ -26,9 +26,11 @@ public class LouvainAlgorithm extends AbstractCommAlgorithm { + public static final String ALGO_NAME = "louvain"; + @Override public String name() { - return "louvain"; + return ALGO_NAME; } @Override diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index 5d7548aa3b..6135d1d402 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -85,7 +85,7 @@ public class LouvainTraverser extends AlgoTraverser { public LouvainTraverser(Job job, int workers, long degree, String sourceLabel, String sourceCLabel) { - super(job, "louvain", workers); + super(job, LouvainAlgorithm.ALGO_NAME, workers); this.g = this.graph().traversal(); this.sourceLabel = sourceLabel; this.sourceCLabel = sourceCLabel; @@ -422,7 +422,7 @@ private double moveCommunities(int pass) { } }); - consumers.start(); + consumers.start("louvain-move-pass-" + pass); try { while (vertices.hasNext()) { this.updateProgress(++this.progress); @@ -460,7 +460,7 @@ private void mergeCommunities(int pass) { this.graph().tx().commit(); }); - consumers.start(); + consumers.start("louvain-merge-pass-" + pass); try { for (Pair> pair : comms) { Community c = pair.getLeft(); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java index e15665cfc4..0f3506a154 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java @@ -41,9 +41,11 @@ public class LpaAlgorithm extends AbstractCommAlgorithm { + public static final String ALGO_NAME = "lpa"; + @Override public String name() { - return "lpa"; + return ALGO_NAME; } @Override @@ -87,7 +89,7 @@ private static class Traverser extends AlgoTraverser { private final Random R = new Random(); public Traverser(Job job, int workers) { - super(job, "lpa", workers); + super(job, ALGO_NAME, workers); } public Object lpa(String sourceLabel, String edgeLabel, diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java index 0adb4707c5..d8a17653c5 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java @@ -19,48 +19,70 @@ package com.baidu.hugegraph.job.algorithm.comm; -import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.commons.lang.mutable.MutableLong; import org.apache.tinkerpop.gremlin.structure.Edge; import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.backend.id.IdGenerator; import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.structure.HugeEdge; import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.E; import com.baidu.hugegraph.util.InsertionOrderUtil; import com.google.common.collect.ImmutableMap; public class TriangleCountAlgorithm extends AbstractCommAlgorithm { + public static final String ALGO_NAME = "triangle_count"; + @Override public String name() { - return "triangle_count"; + return ALGO_NAME; } @Override public void checkParameters(Map parameters) { - directionOutIn(parameters); + direction4Out(parameters); degree(parameters); + workersWhenBoth(parameters); } @Override public Object call(Job job, Map parameters) { - try (Traverser traverser = new Traverser(job)) { - return traverser.triangleCount(directionOutIn(parameters), + int workers = workersWhenBoth(parameters); + try (Traverser traverser = new Traverser(job, workers)) { + return traverser.triangleCount(direction4Out(parameters), degree(parameters)); } } + protected static int workersWhenBoth(Map parameters) { + Directions direction = direction4Out(parameters); + int workers = workers(parameters); + E.checkArgument(direction == Directions.BOTH || workers <= 0, + "The workers must be not set when direction!=BOTH, " + + "but got workers=%s and direction=%s", + workers, direction); + return workers; + } + protected static class Traverser extends AlgoTraverser { protected static final String KEY_TRIANGLES = "triangles"; protected static final String KEY_TRIADS = "triads"; - public Traverser(Job job) { - super(job); + public Traverser(Job job, int workers) { + super(job, ALGO_NAME, workers); + } + + protected Traverser(Job job, String name, int workers) { + super(job, name, workers); } public Object triangleCount(Directions direction, long degree) { @@ -73,22 +95,23 @@ public Object triangleCount(Directions direction, long degree) { protected Map triangles(Directions direction, long degree) { if (direction == null || direction == Directions.BOTH) { - throw new IllegalArgumentException("Direction must be OUT/IN"); + return this.trianglesForBothDir(degree); } + assert direction == Directions.OUT || direction == Directions.IN; Iterator edges = this.edges(direction); long triangles = 0L; long triads = 0L; - long total = 0L; + long totalEdges = 0L; long totalVertices = 0L; Id vertex = null; - Set adjVertices = new HashSet<>(); + Set adjVertices = newOrderedSet(); while (edges.hasNext()) { HugeEdge edge = (HugeEdge) edges.next(); - this.updateProgress(++total); + this.updateProgress(++totalEdges); Id source = edge.ownerVertex().id(); Id target = edge.otherVertex().id(); @@ -108,37 +131,97 @@ protected Map triangles(Directions direction, * B -> [D,F] * E -> [B,C,F] */ - triangles += this.intersect(direction, degree, adjVertices); + triangles += this.intersect(degree, adjVertices); triads += this.localTriads(adjVertices.size()); totalVertices++; // Reset for the next source - adjVertices = new HashSet<>(); + adjVertices = newOrderedSet(); } vertex = source; adjVertices.add(target); } if (vertex != null) { - triangles += this.intersect(direction, degree, adjVertices); + triangles += this.intersect(degree, adjVertices); triads += this.localTriads(adjVertices.size()); totalVertices++; } String suffix = "_" + direction.string(); - return ImmutableMap.of("edges" + suffix, total, + return ImmutableMap.of("edges" + suffix, totalEdges, "vertices" + suffix, totalVertices, KEY_TRIANGLES, triangles, KEY_TRIADS, triads); } - protected long intersect(Directions dir, long degree, - Set adjVertices) { + protected Map trianglesForBothDir(long degree) { + AtomicLong triangles = new AtomicLong(0L); + AtomicLong triads = new AtomicLong(0L); + AtomicLong totalEdges = new AtomicLong(0L); + AtomicLong totalVertices = new AtomicLong(0L); + + this.traverse(null, null, v -> { + Id source = (Id) v.id(); + + MutableLong edgesCount = new MutableLong(0L); + Set adjVertices = this.adjacentVertices(source, degree, + edgesCount); + + triangles.addAndGet(this.intersect(degree, adjVertices)); + triads.addAndGet(this.localTriads(adjVertices.size())); + + totalVertices.incrementAndGet(); + totalEdges.addAndGet(edgesCount.longValue()); + }); + + assert totalEdges.get() % 2L == 0L; + assert triangles.get() % 3L == 0L; + // totalEdges /= 2L + totalEdges.getAndAccumulate(2L, (l, w) -> l / w); + // triangles /= 3L + triangles.getAndAccumulate(3L, (l, w) -> l / w); + // triads -= triangles * 2L + triads.addAndGet(triangles.get() * -2L); + + return ImmutableMap.of("edges", totalEdges.get(), + "vertices", totalVertices.get(), + KEY_TRIANGLES, triangles.get(), + KEY_TRIADS, triads.get()); + } + + private Set adjacentVertices(Id source, long degree, + MutableLong edgesCount) { + Iterator adjVertices = this.adjacentVertices(source, + Directions.BOTH, + null, degree); + Set set = newOrderedSet(); + while (adjVertices.hasNext()) { + edgesCount.increment(); + set.add(adjVertices.next()); + } + return set; + } + + protected long intersect(long degree, Set adjVertices) { long count = 0L; + Directions dir = Directions.OUT; + Id empty = IdGenerator.of(0); Iterator vertices; for (Id v : adjVertices) { vertices = this.adjacentVertices(v, dir, null, degree); + Id lastVertex = empty; while (vertices.hasNext()) { Id vertex = vertices.next(); + if (lastVertex.equals(vertex)) { + // Skip duplicated target vertex (through sortkeys) + continue; + } + lastVertex = vertex; + + /* + * FIXME: deduplicate two edges with opposite directions + * between two specified adjacent vertices + */ if (adjVertices.contains(vertex)) { count++; } @@ -150,5 +233,9 @@ protected long intersect(Directions dir, long degree, protected long localTriads(int size) { return size * (size - 1L) / 2L; } + + protected static Set newOrderedSet() { + return new TreeSet<>(); + } } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java index 3d5bd163e3..bbb028efc1 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java @@ -20,34 +20,37 @@ package com.baidu.hugegraph.job.algorithm.path; import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; +import com.baidu.hugegraph.job.algorithm.Consumers.StopExecution; import com.baidu.hugegraph.traversal.algorithm.SubGraphTraverser; import com.baidu.hugegraph.type.define.Directions; import com.baidu.hugegraph.util.JsonUtil; public class RingsDetectAlgorithm extends AbstractAlgorithm { + public static final String ALGO_NAME = "rings"; + public static final String KEY_COUNT_ONLY = "count_only"; @Override - public String name() { - return "rings_detect"; + public String category() { + return CATEGORY_PATH; } @Override - public String category() { - return CATEGORY_PATH; + public String name() { + return ALGO_NAME; } @Override public void checkParameters(Map parameters) { depth(parameters); degree(parameters); - capacity(parameters); + eachLimit(parameters); limit(parameters); sourceLabel(parameters); sourceCLabel(parameters); @@ -67,13 +70,13 @@ public Object call(Job job, Map parameters) { edgeLabel(parameters), depth(parameters), degree(parameters), - capacity(parameters), + eachLimit(parameters), limit(parameters), countOnly(parameters)); } } - public boolean countOnly(Map parameters) { + protected boolean countOnly(Map parameters) { if (!parameters.containsKey(KEY_COUNT_ONLY)) { return false; } @@ -83,12 +86,12 @@ public boolean countOnly(Map parameters) { private static class Traverser extends AlgoTraverser { public Traverser(Job job, int workers) { - super(job, "ring", workers); + super(job, ALGO_NAME, workers); } public Object rings(String sourceLabel, String sourceCLabel, Directions dir, String label, int depth, - long degree, long capacity, long limit, + long degree, long eachLimit, long limit, boolean countOnly) { JsonMap ringsJson = new JsonMap(); ringsJson.startObject(); @@ -100,24 +103,24 @@ public Object rings(String sourceLabel, String sourceCLabel, } SubGraphTraverser traverser = new SubGraphTraverser(this.graph()); - AtomicInteger count = new AtomicInteger(0); + AtomicLong count = new AtomicLong(0L); this.traverse(sourceLabel, sourceCLabel, v -> { Id source = (Id) v.id(); PathSet rings = traverser.rings(source, dir, label, depth, - true, degree, capacity, limit); + true, degree, MAX_CAPACITY, + eachLimit); + assert eachLimit == NO_LIMIT || rings.size() <= eachLimit; for (Path ring : rings) { - Id min = null; - for (Id id : ring.vertices()) { - if (min == null || id.compareTo(min) < 0) { - min = id; - } + if (eachLimit == NO_LIMIT && !ring.ownedBy(source)) { + // Only dedup rings when each_limit!=NO_LIMIT + continue; } - if (source.equals(min)) { - if (countOnly) { - count.incrementAndGet(); - continue; - } + + if (count.incrementAndGet() > limit && limit != NO_LIMIT) { + throw new StopExecution("exceed limit %s", limit); + } + if (!countOnly) { String ringJson = JsonUtil.toJson(ring.vertices()); synchronized (ringsJson) { ringsJson.appendRaw(ringJson); @@ -125,8 +128,14 @@ public Object rings(String sourceLabel, String sourceCLabel, } } }); + if (countOnly) { - ringsJson.append(count.get()); + long counted = count.get(); + if (limit != NO_LIMIT && counted > limit) { + // The count increased by multi threads and exceed limit + counted = limit; + } + ringsJson.append(counted); } else { ringsJson.endList(); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java index 82294f48bb..fbaca4960e 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java @@ -20,12 +20,14 @@ package com.baidu.hugegraph.job.algorithm.similarity; import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; import com.baidu.hugegraph.HugeGraph; import com.baidu.hugegraph.job.Job; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; +import com.baidu.hugegraph.job.algorithm.Consumers.StopExecution; import com.baidu.hugegraph.schema.EdgeLabel; import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser; import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser.SimilarsMap; @@ -35,24 +37,27 @@ public class FusiformSimilarityAlgorithm extends AbstractAlgorithm { + public static final String ALGO_NAME = "fusiform_similarity"; + public static final String KEY_MIN_NEIGHBORS = "min_neighbors"; public static final String KEY_MIN_SIMILARS = "min_similars"; + public static final String KEY_TOP_SIMILARS = "top_similars"; public static final String KEY_GROUP_PROPERTY = "group_property"; public static final String KEY_MIN_GROUPS = "min_groups"; public static final int DEFAULT_MIN_NEIGHBORS = 10; public static final int DEFAULT_MIN_SIMILARS = 6; + public static final int DEFAULT_TOP_SIMILARS = 0; public static final int DEFAULT_MIN_GROUPS = 1; - public static final long DEFAULT_LIMIT = -1L; @Override - public String name() { - return "fusiform_similarity"; + public String category() { + return CATEGORY_SIMI; } @Override - public String category() { - return CATEGORY_SIMI; + public String name() { + return ALGO_NAME; } @Override @@ -60,11 +65,10 @@ public void checkParameters(Map parameters) { minNeighbors(parameters); alpha(parameters); minSimilars(parameters); - top(parameters); + topSimilars(parameters); groupProperty(parameters); minGroups(parameters); degree(parameters); - capacity(parameters); limit(parameters); sourceLabel(parameters); sourceCLabel(parameters); @@ -84,11 +88,10 @@ public Object call(Job job, Map parameters) { minNeighbors(parameters), alpha(parameters), minSimilars(parameters), - top(parameters), + topSimilars(parameters), groupProperty(parameters), minGroups(parameters), degree(parameters), - capacity(parameters), limit(parameters)); } } @@ -98,7 +101,7 @@ protected static int minNeighbors(Map parameters) { return DEFAULT_MIN_NEIGHBORS; } int minNeighbors = parameterInt(parameters, KEY_MIN_NEIGHBORS); - HugeTraverser.checkPositive(minNeighbors, "min neighbors"); + HugeTraverser.checkPositive(minNeighbors, KEY_MIN_NEIGHBORS); return minNeighbors; } @@ -107,7 +110,16 @@ protected static int minSimilars(Map parameters) { return DEFAULT_MIN_SIMILARS; } int minSimilars = parameterInt(parameters, KEY_MIN_SIMILARS); - HugeTraverser.checkPositive(minSimilars, "min similars"); + HugeTraverser.checkPositive(minSimilars, KEY_MIN_SIMILARS); + return minSimilars; + } + + protected static int topSimilars(Map parameters) { + if (!parameters.containsKey(KEY_TOP_SIMILARS)) { + return DEFAULT_TOP_SIMILARS; + } + int minSimilars = parameterInt(parameters, KEY_TOP_SIMILARS); + HugeTraverser.checkNonNegative(minSimilars, KEY_TOP_SIMILARS); return minSimilars; } @@ -123,7 +135,7 @@ protected static int minGroups(Map parameters) { return DEFAULT_MIN_GROUPS; } int minGroups = parameterInt(parameters, KEY_MIN_GROUPS); - HugeTraverser.checkPositive(minGroups, "min groups"); + HugeTraverser.checkPositive(minGroups, KEY_MIN_GROUPS); return minGroups; } @@ -139,7 +151,7 @@ protected static long limit(Map parameters) { private static class Traverser extends AlgoTraverser { public Traverser(Job job, int workers) { - super(job, "fusiform", workers); + super(job, ALGO_NAME, workers); } public Object fusiformSimilars(String sourceLabel, String sourceCLabel, @@ -147,12 +159,14 @@ public Object fusiformSimilars(String sourceLabel, String sourceCLabel, int minNeighbors, double alpha, int minSimilars, long topSimilars, String groupProperty, int minGroups, - long degree, long capacity, long limit) { + long degree, long limit) { HugeGraph graph = this.graph(); EdgeLabel edgeLabel = label == null ? null : graph.edgeLabel(label); FusiformSimilarityTraverser traverser = new FusiformSimilarityTraverser(graph); + + AtomicLong count = new AtomicLong(0L); JsonMap similarsJson = new JsonMap(); similarsJson.startObject(); @@ -162,16 +176,20 @@ public Object fusiformSimilars(String sourceLabel, String sourceCLabel, edgeLabel, minNeighbors, alpha, minSimilars, (int) topSimilars, groupProperty, minGroups, degree, - capacity, NO_LIMIT, true); + MAX_CAPACITY, NO_LIMIT, true); if (similars.isEmpty()) { return; } String result = JsonUtil.toJson(similars.toMap()); result = result.substring(1, result.length() - 1); synchronized (similarsJson) { + if (count.incrementAndGet() > limit && limit != NO_LIMIT) { + throw new StopExecution("exceed limit %s", limit); + } similarsJson.appendRaw(result); } - }, null, limit); + }); + similarsJson.endObject(); return similarsJson.asJson(); From 69ebdfd74dee2bc104b7dc853c43d2b9a0b019f1 Mon Sep 17 00:00:00 2001 From: Zhangmei Li Date: Fri, 5 Jun 2020 17:38:26 +0800 Subject: [PATCH 16/33] rebase master(0.11.1) Change-Id: I565602945a26c2a575baaa3f17d084b65399a009 --- .../com/baidu/hugegraph/job/AlgorithmJob.java | 2 +- .../job/algorithm/AbstractAlgorithm.java | 8 ++--- .../hugegraph/job/algorithm/Algorithm.java | 4 +-- .../job/algorithm/CountEdgeAlgorithm.java | 6 ++-- .../job/algorithm/CountVertexAlgorithm.java | 6 ++-- .../job/algorithm/SubgraphStatAlgorithm.java | 34 ++++++++++--------- .../algorithm/cent/AbstractCentAlgorithm.java | 4 +-- .../cent/BetweenessCentralityAlgorithm.java | 6 ++-- .../cent/ClosenessCentralityAlgorithm.java | 6 ++-- .../cent/DegreeCentralityAlgorithm.java | 6 ++-- .../cent/EigenvectorCentralityAlgorithm.java | 6 ++-- .../comm/ClusterCoeffcientAlgorithm.java | 6 ++-- .../job/algorithm/comm/KCoreAlgorithm.java | 6 ++-- .../job/algorithm/comm/LouvainAlgorithm.java | 4 +-- .../job/algorithm/comm/LouvainTraverser.java | 4 +-- .../job/algorithm/comm/LpaAlgorithm.java | 6 ++-- .../comm/TriangleCountAlgorithm.java | 10 +++--- .../comm/WeakConnectedComponent.java | 8 ++--- .../algorithm/path/RingsDetectAlgorithm.java | 6 ++-- .../job/algorithm/rank/PageRankAlgorithm.java | 6 ++-- .../FusiformSimilarityAlgorithm.java | 6 ++-- 21 files changed, 76 insertions(+), 74 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/AlgorithmJob.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/AlgorithmJob.java index 7e752ac429..f25c1b2503 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/AlgorithmJob.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/AlgorithmJob.java @@ -26,7 +26,7 @@ import com.baidu.hugegraph.util.E; import com.baidu.hugegraph.util.JsonUtil; -public class AlgorithmJob extends Job { +public class AlgorithmJob extends UserJob { public static final String TASK_TYPE = "algorithm"; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index e30ce1e165..cc245f6f9d 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -44,7 +44,7 @@ import com.baidu.hugegraph.backend.query.Query; import com.baidu.hugegraph.iterator.FilterIterator; import com.baidu.hugegraph.iterator.FlatMapperIterator; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.job.algorithm.Consumers.StopExecution; import com.baidu.hugegraph.testutil.Whitebox; import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; @@ -323,17 +323,17 @@ public static Directions parseDirection(Object direction) { public static class AlgoTraverser extends HugeTraverser implements AutoCloseable { - private final Job job; + private final UserJob job; protected final ExecutorService executor; protected long progress; - public AlgoTraverser(Job job) { + public AlgoTraverser(UserJob job) { super(job.graph()); this.job = job; this.executor = null; } - protected AlgoTraverser(Job job, String name, int workers) { + protected AlgoTraverser(UserJob job, String name, int workers) { super(job.graph()); this.job = job; String prefix = name + "-" + job.task().id(); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Algorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Algorithm.java index 6ad200157a..b1cb531443 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Algorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Algorithm.java @@ -21,7 +21,7 @@ import java.util.Map; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; public interface Algorithm { @@ -29,7 +29,7 @@ public interface Algorithm { public String category(); - public Object call(Job job, Map parameters); + public Object call(UserJob job, Map parameters); public void checkParameters(Map parameters); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountEdgeAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountEdgeAlgorithm.java index 670f544719..9ed617b083 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountEdgeAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountEdgeAlgorithm.java @@ -26,7 +26,7 @@ import org.apache.commons.lang3.mutable.MutableLong; import org.apache.tinkerpop.gremlin.structure.Edge; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.util.JsonUtil; public class CountEdgeAlgorithm extends AbstractAlgorithm { @@ -42,7 +42,7 @@ public String category() { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { try (Traverser traverser = new Traverser(job)) { return traverser.count(); } @@ -50,7 +50,7 @@ public Object call(Job job, Map parameters) { private static class Traverser extends AlgoTraverser { - public Traverser(Job job) { + public Traverser(UserJob job) { super(job); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountVertexAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountVertexAlgorithm.java index 68a59a363c..721979c352 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountVertexAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/CountVertexAlgorithm.java @@ -26,7 +26,7 @@ import org.apache.commons.lang3.mutable.MutableLong; import org.apache.tinkerpop.gremlin.structure.Vertex; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.util.JsonUtil; public class CountVertexAlgorithm extends AbstractAlgorithm { @@ -42,7 +42,7 @@ public String category() { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { try (Traverser traverser = new Traverser(job)) { return traverser.count(); } @@ -50,7 +50,7 @@ public Object call(Job job, Map parameters) { private static class Traverser extends AlgoTraverser { - public Traverser(Job job) { + public Traverser(UserJob job) { super(job); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java index 4ce97d3628..199d1b020f 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java @@ -27,10 +27,11 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.HugeGraph; +import com.baidu.hugegraph.StandardHugeGraph; import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.config.CoreOptions; import com.baidu.hugegraph.config.HugeConfig; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.job.algorithm.cent.BetweenessCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.ClosenessCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.DegreeCentralityAlgorithm; @@ -39,6 +40,7 @@ import com.baidu.hugegraph.job.algorithm.path.RingsDetectAlgorithm; import com.baidu.hugegraph.job.algorithm.rank.PageRankAlgorithm; import com.baidu.hugegraph.task.HugeTask; +import com.baidu.hugegraph.testutil.Whitebox; import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; import com.baidu.hugegraph.traversal.optimize.HugeScriptTraversal; import com.baidu.hugegraph.util.E; @@ -66,27 +68,27 @@ public void checkParameters(Map parameters) { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { HugeGraph graph = this.createTempGraph(job); try (Traverser traverser = new Traverser(job)) { - this.initGraph(job.graph(), graph, subgraph(parameters), - copySchema(parameters)); - Job tmpJob = new TempJob<>(graph, job, job.task()); + this.initGraph(job.graph(), graph, + subgraph(parameters), copySchema(parameters)); + UserJob tmpJob = new TempJob<>(graph, job, job.task()); return traverser.subgraphStat(tmpJob); } finally { graph.truncateBackend(); - // FIXME: task thread can't call close() here (will hang) - graph.closeTx(); + // FIXME: task thread can't call close() (will hang), use closeTx() + Whitebox.invoke(graph.getClass(), "closeTx", graph); } } - private HugeGraph createTempGraph(Job job) { + private HugeGraph createTempGraph(UserJob job) { Id id = job.task().id(); PropertiesConfiguration config = new PropertiesConfiguration(); config.setProperty(CoreOptions.BACKEND.name(), "memory"); config.setProperty(CoreOptions.STORE.name(), "tmp_" + id); config.setDelimiterParsingDisabled(true); - return new HugeGraph(new HugeConfig(config)); + return new StandardHugeGraph(new HugeConfig(config)); } @SuppressWarnings("resource") @@ -127,11 +129,11 @@ private static class Traverser extends AlgoTraverser { "top", -1L /* sorted */, "workers", 0); - public Traverser(Job job) { + public Traverser(UserJob job) { super(job); } - public Object subgraphStat(Job job) { + public Object subgraphStat(UserJob job) { Map results = InsertionOrderUtil.newMap(); GraphTraversalSource g = job.graph().traversal(); @@ -168,7 +170,7 @@ public Object subgraphStat(Job job) { return results; } - private Map pageRanks(Job job) { + private Map pageRanks(UserJob job) { PageRankAlgorithm algo = new PageRankAlgorithm(); algo.call(job, ImmutableMap.of("alpha", 0.15)); @@ -184,12 +186,12 @@ private Map pageRanks(Job job) { } } - private static class TempJob extends Job { + private static class TempJob extends UserJob { - private final Job parent; + private final UserJob parent; - public TempJob(HugeGraph graph, Job job, HugeTask task) { - this.scheduler(graph.taskScheduler()); + public TempJob(HugeGraph graph, UserJob job, HugeTask task) { + this.graph(graph); this.task(task); this.parent = job; } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index 7b11c134c1..19da8e968e 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -34,7 +34,7 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.backend.id.Id; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; import com.baidu.hugegraph.structure.HugeElement; import com.baidu.hugegraph.type.define.Directions; @@ -61,7 +61,7 @@ public void checkParameters(Map parameters) { protected static class Traverser extends AlgoTraverser { - public Traverser(Job job) { + public Traverser(UserJob job) { super(job); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java index 40b38f6555..465c6f96c9 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java @@ -27,7 +27,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; import org.apache.tinkerpop.gremlin.structure.Vertex; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.type.define.Directions; public class BetweenessCentralityAlgorithm extends AbstractCentAlgorithm { @@ -38,7 +38,7 @@ public String name() { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { try (Traverser traverser = new Traverser(job)) { return traverser.betweenessCentrality(direction(parameters), edgeLabel(parameters), @@ -54,7 +54,7 @@ public Object call(Job job, Map parameters) { private static class Traverser extends AbstractCentAlgorithm.Traverser { - public Traverser(Job job) { + public Traverser(UserJob job) { super(job); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java index 9a25b6394a..3391a191a1 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java @@ -29,7 +29,7 @@ import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; import org.apache.tinkerpop.gremlin.structure.Vertex; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.type.define.Directions; public class ClosenessCentralityAlgorithm extends AbstractCentAlgorithm { @@ -48,7 +48,7 @@ public void checkParameters(Map parameters) { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { try (Traverser traverser = new Traverser(job)) { return traverser.closenessCentrality(direction(parameters), edgeLabel(parameters), @@ -64,7 +64,7 @@ public Object call(Job job, Map parameters) { private static class Traverser extends AbstractCentAlgorithm.Traverser { - public Traverser(Job job) { + public Traverser(UserJob job) { super(job); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java index 01b3e5c4b9..b2030e8453 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java @@ -27,7 +27,7 @@ import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; import com.baidu.hugegraph.backend.id.Id; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.structure.HugeEdge; import com.baidu.hugegraph.type.define.Directions; @@ -46,7 +46,7 @@ public void checkParameters(Map parameters) { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { try (Traverser traverser = new Traverser(job)) { return traverser.degreeCentrality(direction(parameters), edgeLabel(parameters), @@ -56,7 +56,7 @@ public Object call(Job job, Map parameters) { private static class Traverser extends AlgoTraverser { - public Traverser(Job job) { + public Traverser(UserJob job) { super(job); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java index 0f695a1fb0..15748ec726 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java @@ -26,7 +26,7 @@ import org.apache.tinkerpop.gremlin.structure.T; import org.apache.tinkerpop.gremlin.structure.Vertex; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.type.define.Directions; public class EigenvectorCentralityAlgorithm extends AbstractCentAlgorithm { @@ -40,7 +40,7 @@ public String name() { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { try (Traverser traverser = new Traverser(job)) { return traverser.eigenvectorCentrality(direction(parameters), edgeLabel(parameters), @@ -56,7 +56,7 @@ public Object call(Job job, Map parameters) { private static class Traverser extends AbstractCentAlgorithm.Traverser { - public Traverser(Job job) { + public Traverser(UserJob job) { super(job); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java index 3f3a26c3ca..2a0cf1a42e 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java @@ -21,7 +21,7 @@ import java.util.Map; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.type.define.Directions; import com.baidu.hugegraph.util.E; import com.baidu.hugegraph.util.InsertionOrderUtil; @@ -43,7 +43,7 @@ public void checkParameters(Map parameters) { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { int workers = workersWhenBoth(parameters); try (Traverser traverser = new Traverser(job, workers)) { return traverser.clusterCoeffcient(direction(parameters), @@ -63,7 +63,7 @@ protected static int workersWhenBoth(Map parameters) { private static class Traverser extends TriangleCountAlgorithm.Traverser { - public Traverser(Job job, int workers) { + public Traverser(UserJob job, int workers) { super(job, ALGO_NAME, workers); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java index 52ddeeb71b..5080523549 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java @@ -33,7 +33,7 @@ import com.baidu.hugegraph.HugeGraph; import com.baidu.hugegraph.backend.id.Id; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.schema.EdgeLabel; import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser; import com.baidu.hugegraph.type.define.Directions; @@ -70,7 +70,7 @@ public void checkParameters(Map parameters) { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { int workers = workers(parameters); try (Traverser traverser = new Traverser(job, workers)) { return traverser.kcore(sourceLabel(parameters), @@ -102,7 +102,7 @@ protected static boolean merged(Map parameters) { private static class Traverser extends AlgoTraverser { - public Traverser(Job job, int workers) { + public Traverser(UserJob job, int workers) { super(job, ALGO_NAME, workers); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java index 8eee9f43e5..f05f85e56c 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java @@ -21,7 +21,7 @@ import java.util.Map; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; public class LouvainAlgorithm extends AbstractCommAlgorithm { @@ -48,7 +48,7 @@ public void checkParameters(Map parameters) { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { String label = sourceLabel(parameters); String clabel = sourceCLabel(parameters); long degree = degree(parameters); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index 6135d1d402..3a8a83f53a 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -48,7 +48,7 @@ import com.baidu.hugegraph.backend.id.IdGenerator; import com.baidu.hugegraph.exception.ExistedException; import com.baidu.hugegraph.iterator.ListIterator; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm.AlgoTraverser; import com.baidu.hugegraph.job.algorithm.Consumers; @@ -83,7 +83,7 @@ public class LouvainTraverser extends AlgoTraverser { private long m; private String passLabel; - public LouvainTraverser(Job job, int workers, long degree, + public LouvainTraverser(UserJob job, int workers, long degree, String sourceLabel, String sourceCLabel) { super(job, LouvainAlgorithm.ALGO_NAME, workers); this.g = this.graph().traversal(); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java index 0f3506a154..8b54241fea 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java @@ -32,7 +32,7 @@ import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.backend.id.Id; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.schema.SchemaManager; import com.baidu.hugegraph.schema.VertexLabel; import com.baidu.hugegraph.type.define.Directions; @@ -61,7 +61,7 @@ public void checkParameters(Map parameters) { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { int workers = workers(parameters); String showComm = showCommunity(parameters); @@ -88,7 +88,7 @@ private static class Traverser extends AlgoTraverser { private final Random R = new Random(); - public Traverser(Job job, int workers) { + public Traverser(UserJob job, int workers) { super(job, ALGO_NAME, workers); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java index d8a17653c5..4cecc623bf 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/TriangleCountAlgorithm.java @@ -30,7 +30,7 @@ import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.backend.id.IdGenerator; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.structure.HugeEdge; import com.baidu.hugegraph.type.define.Directions; import com.baidu.hugegraph.util.E; @@ -54,7 +54,7 @@ public void checkParameters(Map parameters) { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { int workers = workersWhenBoth(parameters); try (Traverser traverser = new Traverser(job, workers)) { return traverser.triangleCount(direction4Out(parameters), @@ -77,11 +77,11 @@ protected static class Traverser extends AlgoTraverser { protected static final String KEY_TRIANGLES = "triangles"; protected static final String KEY_TRIADS = "triads"; - public Traverser(Job job, int workers) { + public Traverser(UserJob job, int workers) { super(job, ALGO_NAME, workers); } - protected Traverser(Job job, String name, int workers) { + protected Traverser(UserJob job, String name, int workers) { super(job, name, workers); } @@ -205,7 +205,7 @@ private Set adjacentVertices(Id source, long degree, protected long intersect(long degree, Set adjVertices) { long count = 0L; Directions dir = Directions.OUT; - Id empty = IdGenerator.of(0); + Id empty = IdGenerator.ZERO; Iterator vertices; for (Id v : adjVertices) { vertices = this.adjacentVertices(v, dir, null, degree); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/WeakConnectedComponent.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/WeakConnectedComponent.java index 99dee85cdc..435a6e7778 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/WeakConnectedComponent.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/WeakConnectedComponent.java @@ -30,7 +30,7 @@ import org.slf4j.Logger; import com.baidu.hugegraph.backend.id.Id; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.schema.SchemaManager; import com.baidu.hugegraph.schema.VertexLabel; import com.baidu.hugegraph.structure.HugeEdge; @@ -56,7 +56,7 @@ public void checkParameters(Map parameters) { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { try (Traverser traverser = new Traverser(job)) { return traverser.connectedComponent(times(parameters), directionOutIn(parameters), @@ -71,7 +71,7 @@ protected static class Traverser extends AlgoTraverser { private final Map vertexComponentMap = new HashMap<>(); - public Traverser(Job job) { + public Traverser(UserJob job) { super(job); } @@ -81,7 +81,7 @@ public Object connectedComponent(int maxTimes, this.initSchema(); this.initVertexComponentMap(); int times; - + for (times = 0; times < maxTimes; times++) { long changeCount = 0; Id currentSourceVertexId = null; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java index bbb028efc1..d228b70a2f 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java @@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicLong; import com.baidu.hugegraph.backend.id.Id; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; import com.baidu.hugegraph.job.algorithm.Consumers.StopExecution; import com.baidu.hugegraph.traversal.algorithm.SubGraphTraverser; @@ -61,7 +61,7 @@ public void checkParameters(Map parameters) { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { int workers = workers(parameters); try (Traverser traverser = new Traverser(job, workers)) { return traverser.rings(sourceLabel(parameters), @@ -85,7 +85,7 @@ protected boolean countOnly(Map parameters) { private static class Traverser extends AlgoTraverser { - public Traverser(Job job, int workers) { + public Traverser(UserJob job, int workers) { super(job, ALGO_NAME, workers); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java index 9f51bf7b60..fa552e3997 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java @@ -30,7 +30,7 @@ import org.slf4j.Logger; import com.baidu.hugegraph.backend.id.Id; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.job.algorithm.comm.AbstractCommAlgorithm; import com.baidu.hugegraph.schema.SchemaManager; import com.baidu.hugegraph.schema.VertexLabel; @@ -64,7 +64,7 @@ public void checkParameters(Map parameters) { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { try (Traverser traverser = new Traverser(job)) { return traverser.pageRank(alpha(parameters), times(parameters), @@ -85,7 +85,7 @@ private static class Traverser extends AlgoTraverser { */ private final Map vertexRankMap; - public Traverser(Job job) { + public Traverser(UserJob job) { super(job); this.vertexRankMap = new HashMap<>(); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java index fbaca4960e..daf5b09aa2 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java @@ -25,7 +25,7 @@ import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; import com.baidu.hugegraph.HugeGraph; -import com.baidu.hugegraph.job.Job; +import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; import com.baidu.hugegraph.job.algorithm.Consumers.StopExecution; import com.baidu.hugegraph.schema.EdgeLabel; @@ -78,7 +78,7 @@ public void checkParameters(Map parameters) { } @Override - public Object call(Job job, Map parameters) { + public Object call(UserJob job, Map parameters) { int workers = workers(parameters); try (Traverser traverser = new Traverser(job, workers)) { return traverser.fusiformSimilars(sourceLabel(parameters), @@ -150,7 +150,7 @@ protected static long limit(Map parameters) { private static class Traverser extends AlgoTraverser { - public Traverser(Job job, int workers) { + public Traverser(UserJob job, int workers) { super(job, ALGO_NAME, workers); } From 909a11076d17ae0ce532f654858c16301b54051d Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Tue, 28 Jul 2020 22:10:03 +0800 Subject: [PATCH 17/33] fix no auth with worker thread of olap algo (#27) call graph close instead of closeTx Change-Id: I0e329280b067f34daec69c9b1b2b81a6cd3309bf --- .../hugegraph/job/algorithm/Consumers.java | 34 +++++++++++-------- .../job/algorithm/SubgraphStatAlgorithm.java | 13 +++++-- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java index 711e95edc8..1c68413fc0 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java @@ -30,6 +30,7 @@ import org.slf4j.Logger; import com.baidu.hugegraph.HugeException; +import com.baidu.hugegraph.task.TaskManager.ContextCallable; import com.baidu.hugegraph.util.ExecutorUtil; import com.baidu.hugegraph.util.Log; @@ -82,24 +83,27 @@ public void start(String name) { LOG.info("Starting {} workers[{}] with queue size {}...", this.workers, name, this.queueSize); for (int i = 0; i < this.workers; i++) { - this.executor.submit(() -> { - try { - this.run(); - this.done(); - } catch (Throwable e) { - // Only the first exception of one thread can be stored - this.exception = e; - if (!(e instanceof StopExecution)) { - LOG.error("Error when running task", e); - } - this.done(); - } finally { - this.latch.countDown(); - } - }); + this.executor.submit(new ContextCallable<>(this::runAndDone)); } } + private Void runAndDone() { + try { + this.run(); + this.done(); + } catch (Throwable e) { + // Only the first exception of one thread can be stored + this.exception = e; + if (!(e instanceof StopExecution)) { + LOG.error("Error when running task", e); + } + this.done(); + } finally { + this.latch.countDown(); + } + return null; + } + private void run() { LOG.debug("Start to work..."); while (!this.ending) { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java index 199d1b020f..a098a85826 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java @@ -25,6 +25,7 @@ import org.apache.commons.configuration.PropertiesConfiguration; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.slf4j.Logger; import com.baidu.hugegraph.HugeGraph; import com.baidu.hugegraph.StandardHugeGraph; @@ -40,11 +41,11 @@ import com.baidu.hugegraph.job.algorithm.path.RingsDetectAlgorithm; import com.baidu.hugegraph.job.algorithm.rank.PageRankAlgorithm; import com.baidu.hugegraph.task.HugeTask; -import com.baidu.hugegraph.testutil.Whitebox; import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; import com.baidu.hugegraph.traversal.optimize.HugeScriptTraversal; import com.baidu.hugegraph.util.E; import com.baidu.hugegraph.util.InsertionOrderUtil; +import com.baidu.hugegraph.util.Log; import com.google.common.collect.ImmutableMap; public class SubgraphStatAlgorithm extends AbstractAlgorithm { @@ -52,6 +53,8 @@ public class SubgraphStatAlgorithm extends AbstractAlgorithm { public static final String KEY_SUBGRAPH = "subgraph"; public static final String KEY_COPY_SCHEMA = "copy_schema"; + private static final Logger LOG = Log.logger(SubgraphStatAlgorithm.class); + @Override public String name() { return "subgraph_stat"; @@ -77,8 +80,12 @@ public Object call(UserJob job, Map parameters) { return traverser.subgraphStat(tmpJob); } finally { graph.truncateBackend(); - // FIXME: task thread can't call close() (will hang), use closeTx() - Whitebox.invoke(graph.getClass(), "closeTx", graph); + try { + graph.close(); + } catch (Throwable e) { + LOG.warn("Can't close subgraph_stat temp graph {}: {}", + graph, e.getMessage(), e); + } } } From b0f79814f444c7e49fa8a052cae80cec571783de Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Mon, 3 Aug 2020 11:00:33 +0800 Subject: [PATCH 18/33] fix server id/role NPE in initServerInfo when truncate temp graph (#28) Change-Id: Iedbafa9a31ed3f5b3fb35ccb0c583ec0a0cfc6ac --- .../job/algorithm/AbstractAlgorithm.java | 89 ++++--------------- .../job/algorithm/SubgraphStatAlgorithm.java | 13 ++- .../algorithm/comm/AbstractCommAlgorithm.java | 10 ++- .../job/algorithm/comm/KCoreAlgorithm.java | 5 +- .../job/algorithm/comm/LouvainAlgorithm.java | 5 +- .../algorithm/path/RingsDetectAlgorithm.java | 3 +- .../FusiformSimilarityAlgorithm.java | 16 ++-- 7 files changed, 53 insertions(+), 88 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index cc245f6f9d..0d9c79a9f8 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -55,6 +55,7 @@ import com.baidu.hugegraph.util.CollectionUtil; import com.baidu.hugegraph.util.E; import com.baidu.hugegraph.util.JsonUtil; +import com.baidu.hugegraph.util.ParameterUtil; import jersey.repackaged.com.google.common.base.Objects; @@ -114,7 +115,7 @@ public void checkParameters(Map parameters) { } protected static int depth(Map parameters) { - int depth = parameterInt(parameters, KEY_DEPTH); + int depth = ParameterUtil.parameterInt(parameters, KEY_DEPTH); E.checkArgument(depth > 0, "The value of %s must be > 0, but got %s", KEY_DEPTH, depth); @@ -125,14 +126,14 @@ protected static String edgeLabel(Map parameters) { if (!parameters.containsKey(KEY_LABEL)) { return null; } - return parameterString(parameters, KEY_LABEL); + return ParameterUtil.parameterString(parameters, KEY_LABEL); } protected static Directions direction(Map parameters) { if (!parameters.containsKey(KEY_DIRECTION)) { return Directions.BOTH; } - Object direction = parameter(parameters, KEY_DIRECTION); + Object direction = ParameterUtil.parameter(parameters, KEY_DIRECTION); return parseDirection(direction); } @@ -140,7 +141,7 @@ protected static Directions direction4Out(Map parameters) { if (!parameters.containsKey(KEY_DIRECTION)) { return Directions.OUT; } - Object direction = parameter(parameters, KEY_DIRECTION); + Object direction = ParameterUtil.parameter(parameters, KEY_DIRECTION); return parseDirection(direction); } @@ -148,7 +149,7 @@ protected static Directions directionOutIn(Map parameters) { if (!parameters.containsKey(KEY_DIRECTION)) { return Directions.OUT; } - Object direction = parameter(parameters, KEY_DIRECTION); + Object direction = ParameterUtil.parameter(parameters, KEY_DIRECTION); Directions dir = parseDirection(direction); E.checkArgument(dir == Directions.OUT || dir == Directions.IN, "The value of %s must be either OUT or IN, but got: %s", @@ -160,7 +161,7 @@ protected static double alpha(Map parameters) { if (!parameters.containsKey(KEY_ALPHA)) { return DEFAULT_ALPHA; } - double alpha = parameterDouble(parameters, KEY_ALPHA); + double alpha = ParameterUtil.parameterDouble(parameters, KEY_ALPHA); E.checkArgument(alpha > 0.0 && alpha <= 1.0, "The value of %s must be in range (0, 1], but got %s", KEY_ALPHA, alpha); @@ -171,7 +172,7 @@ protected static long top(Map parameters) { if (!parameters.containsKey(KEY_TOP)) { return 0L; } - long top = parameterLong(parameters, KEY_TOP); + long top = ParameterUtil.parameterLong(parameters, KEY_TOP); HugeTraverser.checkNonNegativeOrNoLimit(top, KEY_TOP); return top; } @@ -180,7 +181,7 @@ protected static long degree(Map parameters) { if (!parameters.containsKey(KEY_DEGREE)) { return DEFAULT_DEGREE; } - long degree = parameterLong(parameters, KEY_DEGREE); + long degree = ParameterUtil.parameterLong(parameters, KEY_DEGREE); HugeTraverser.checkDegree(degree); return degree; } @@ -189,7 +190,7 @@ protected static long capacity(Map parameters) { if (!parameters.containsKey(KEY_CAPACITY)) { return DEFAULT_CAPACITY; } - long capacity = parameterLong(parameters, KEY_CAPACITY); + long capacity = ParameterUtil.parameterLong(parameters, KEY_CAPACITY); HugeTraverser.checkCapacity(capacity); return capacity; } @@ -198,7 +199,7 @@ protected static long limit(Map parameters) { if (!parameters.containsKey(KEY_LIMIT)) { return DEFAULT_LIMIT; } - long limit = parameterLong(parameters, KEY_LIMIT); + long limit = ParameterUtil.parameterLong(parameters, KEY_LIMIT); HugeTraverser.checkLimit(limit); return limit; } @@ -207,7 +208,7 @@ protected static long eachLimit(Map parameters) { if (!parameters.containsKey(KEY_EACH_LIMIT)) { return DEFAULT_EACH_LIMIT; } - long limit = parameterLong(parameters, KEY_EACH_LIMIT); + long limit = ParameterUtil.parameterLong(parameters, KEY_EACH_LIMIT); HugeTraverser.checkPositiveOrNoLimit(limit, KEY_EACH_LIMIT); return limit; } @@ -216,7 +217,7 @@ protected static long sample(Map parameters) { if (!parameters.containsKey(KEY_SAMPLE)) { return DEFAULT_SAMPLE; } - long sample = parameterLong(parameters, KEY_SAMPLE); + long sample = ParameterUtil.parameterLong(parameters, KEY_SAMPLE); HugeTraverser.checkPositiveOrNoLimit(sample, KEY_SAMPLE); return sample; } @@ -225,7 +226,8 @@ protected static long sourceSample(Map parameters) { if (!parameters.containsKey(KEY_SOURCE_SAMPLE)) { return HugeTraverser.NO_LIMIT; } - long sample = parameterLong(parameters, KEY_SOURCE_SAMPLE); + long sample = ParameterUtil.parameterLong(parameters, + KEY_SOURCE_SAMPLE); HugeTraverser.checkPositiveOrNoLimit(sample, KEY_SOURCE_SAMPLE); return sample; } @@ -234,79 +236,26 @@ protected static String sourceLabel(Map parameters) { if (!parameters.containsKey(KEY_SOURCE_LABEL)) { return null; } - return parameterString(parameters, KEY_SOURCE_LABEL); + return ParameterUtil.parameterString(parameters, KEY_SOURCE_LABEL); } protected static String sourceCLabel(Map parameters) { if (!parameters.containsKey(KEY_SOURCE_CLABEL)) { return null; } - return parameterString(parameters, KEY_SOURCE_CLABEL); + return ParameterUtil.parameterString(parameters, KEY_SOURCE_CLABEL); } protected static int workers(Map parameters) { if (!parameters.containsKey(KEY_WORKERS)) { return -1; } - int workers = parameterInt(parameters, KEY_WORKERS); + int workers = ParameterUtil.parameterInt(parameters, KEY_WORKERS); HugeTraverser.checkNonNegativeOrNoLimit(workers, KEY_WORKERS); return workers; } - public static Object parameter(Map parameters, String key) { - Object value = parameters.get(key); - E.checkArgument(value != null, - "Expect '%s' in parameters: %s", - key, parameters); - return value; - } - - public static String parameterString(Map parameters, - String key) { - Object value = parameter(parameters, key); - E.checkArgument(value instanceof String, - "Expect string value for parameter '%s': '%s'", - key, value); - return (String) value; - } - - public static int parameterInt(Map parameters, - String key) { - Object value = parameter(parameters, key); - E.checkArgument(value instanceof Number, - "Expect int value for parameter '%s': '%s'", - key, value); - return ((Number) value).intValue(); - } - - public static long parameterLong(Map parameters, - String key) { - Object value = parameter(parameters, key); - E.checkArgument(value instanceof Number, - "Expect long value for parameter '%s': '%s'", - key, value); - return ((Number) value).longValue(); - } - - public static double parameterDouble(Map parameters, - String key) { - Object value = parameter(parameters, key); - E.checkArgument(value instanceof Number, - "Expect double value for parameter '%s': '%s'", - key, value); - return ((Number) value).doubleValue(); - } - - public static boolean parameterBoolean(Map parameters, - String key) { - Object value = parameter(parameters, key); - E.checkArgument(value instanceof Boolean, - "Expect boolean value for parameter '%s': '%s'", - key, value); - return ((Boolean) value); - } - - public static Directions parseDirection(Object direction) { + protected static Directions parseDirection(Object direction) { if (direction.equals(Directions.BOTH.toString())) { return Directions.BOTH; } else if (direction.equals(Directions.OUT.toString())) { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java index a098a85826..bf35097346 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java @@ -46,6 +46,7 @@ import com.baidu.hugegraph.util.E; import com.baidu.hugegraph.util.InsertionOrderUtil; import com.baidu.hugegraph.util.Log; +import com.baidu.hugegraph.util.ParameterUtil; import com.google.common.collect.ImmutableMap; public class SubgraphStatAlgorithm extends AbstractAlgorithm { @@ -79,7 +80,8 @@ public Object call(UserJob job, Map parameters) { UserJob tmpJob = new TempJob<>(graph, job, job.task()); return traverser.subgraphStat(tmpJob); } finally { - graph.truncateBackend(); + // Use clearBackend instead of truncateBackend due to no server-id + graph.clearBackend(); try { graph.close(); } catch (Throwable e) { @@ -91,10 +93,15 @@ public Object call(UserJob job, Map parameters) { private HugeGraph createTempGraph(UserJob job) { Id id = job.task().id(); + String name = "tmp_" + id; PropertiesConfiguration config = new PropertiesConfiguration(); config.setProperty(CoreOptions.BACKEND.name(), "memory"); - config.setProperty(CoreOptions.STORE.name(), "tmp_" + id); + config.setProperty(CoreOptions.STORE.name(), name); config.setDelimiterParsingDisabled(true); + /* + * NOTE: this temp graph don't need to init backend because no task info + * required, also not set started because no task to be scheduled. + */ return new StandardHugeGraph(new HugeConfig(config)); } @@ -124,7 +131,7 @@ protected static boolean copySchema(Map parameters) { if (!parameters.containsKey(KEY_COPY_SCHEMA)) { return false; } - return parameterBoolean(parameters, KEY_COPY_SCHEMA); + return ParameterUtil.parameterBoolean(parameters, KEY_COPY_SCHEMA); } private static class Traverser extends AlgoTraverser { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/AbstractCommAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/AbstractCommAlgorithm.java index 74b884a063..82bba9bc3d 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/AbstractCommAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/AbstractCommAlgorithm.java @@ -24,6 +24,7 @@ import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; import com.baidu.hugegraph.util.E; +import com.baidu.hugegraph.util.ParameterUtil; public abstract class AbstractCommAlgorithm extends AbstractAlgorithm { @@ -38,7 +39,7 @@ protected static int times(Map parameters) { if (!parameters.containsKey(KEY_TIMES)) { return (int) DEFAULT_TIMES; } - int times = parameterInt(parameters, KEY_TIMES); + int times = ParameterUtil.parameterInt(parameters, KEY_TIMES); HugeTraverser.checkPositiveOrNoLimit(times, KEY_TIMES); E.checkArgument(times <= MAX_TIMES, "The maximum number of iterations is %s, but got %s", @@ -50,7 +51,7 @@ protected static int stableTimes(Map parameters) { if (!parameters.containsKey(KEY_STABLE_TIMES)) { return (int) DEFAULT_STABLE_TIMES; } - int times = parameterInt(parameters, KEY_STABLE_TIMES); + int times = ParameterUtil.parameterInt(parameters, KEY_STABLE_TIMES); HugeTraverser.checkPositiveOrNoLimit(times, KEY_STABLE_TIMES); E.checkArgument(times <= MAX_TIMES, "The maximum number of stable iterations is %s, " + @@ -62,7 +63,8 @@ protected static double precision(Map parameters) { if (!parameters.containsKey(KEY_PRECISION)) { return DEFAULT_PRECISION; } - double precision = parameterDouble(parameters, KEY_PRECISION); + double precision = ParameterUtil.parameterDouble(parameters, + KEY_PRECISION); E.checkArgument(0d < precision && precision < 1d, "The %s parameter must be in range(0,1), but got: %s", KEY_PRECISION, precision); @@ -73,6 +75,6 @@ protected static String showCommunity(Map parameters) { if (!parameters.containsKey(KEY_SHOW_COMM)) { return null; } - return parameterString(parameters, KEY_SHOW_COMM); + return ParameterUtil.parameterString(parameters, KEY_SHOW_COMM); } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java index 5080523549..f03db565ed 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java @@ -40,6 +40,7 @@ import com.baidu.hugegraph.util.CollectionUtil; import com.baidu.hugegraph.util.E; import com.baidu.hugegraph.util.JsonUtil; +import com.baidu.hugegraph.util.ParameterUtil; import com.google.common.collect.ImmutableSet; public class KCoreAlgorithm extends AbstractCommAlgorithm { @@ -88,7 +89,7 @@ protected static int k(Map parameters) { if (!parameters.containsKey(KEY_K)) { return DEFAULT_K; } - int k = parameterInt(parameters, KEY_K); + int k = ParameterUtil.parameterInt(parameters, KEY_K); E.checkArgument(k > 1, "The k of kcore must be > 1, but got %s", k); return k; } @@ -97,7 +98,7 @@ protected static boolean merged(Map parameters) { if (!parameters.containsKey(KEY_MERGED)) { return false; } - return parameterBoolean(parameters, KEY_MERGED); + return ParameterUtil.parameterBoolean(parameters, KEY_MERGED); } private static class Traverser extends AlgoTraverser { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java index f05f85e56c..ab6e0f2143 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java @@ -23,6 +23,7 @@ import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; +import com.baidu.hugegraph.util.ParameterUtil; public class LouvainAlgorithm extends AbstractCommAlgorithm { @@ -82,7 +83,7 @@ protected static Long clearPass(Map parameters) { if (!parameters.containsKey(KEY_CLEAR)) { return null; } - long pass = parameterLong(parameters, KEY_CLEAR); + long pass = ParameterUtil.parameterLong(parameters, KEY_CLEAR); HugeTraverser.checkNonNegativeOrNoLimit(pass, KEY_CLEAR); return pass; } @@ -91,7 +92,7 @@ protected static Long showModularity(Map parameters) { if (!parameters.containsKey(KEY_SHOW_MOD)) { return null; } - long pass = parameterLong(parameters, KEY_SHOW_MOD); + long pass = ParameterUtil.parameterLong(parameters, KEY_SHOW_MOD); HugeTraverser.checkNonNegative(pass, KEY_SHOW_MOD); return pass; } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java index d228b70a2f..e6c6435c42 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/path/RingsDetectAlgorithm.java @@ -29,6 +29,7 @@ import com.baidu.hugegraph.traversal.algorithm.SubGraphTraverser; import com.baidu.hugegraph.type.define.Directions; import com.baidu.hugegraph.util.JsonUtil; +import com.baidu.hugegraph.util.ParameterUtil; public class RingsDetectAlgorithm extends AbstractAlgorithm { @@ -80,7 +81,7 @@ protected boolean countOnly(Map parameters) { if (!parameters.containsKey(KEY_COUNT_ONLY)) { return false; } - return parameterBoolean(parameters, KEY_COUNT_ONLY); + return ParameterUtil.parameterBoolean(parameters, KEY_COUNT_ONLY); } private static class Traverser extends AlgoTraverser { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java index daf5b09aa2..0a1679fcfd 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java @@ -34,6 +34,7 @@ import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; import com.baidu.hugegraph.type.define.Directions; import com.baidu.hugegraph.util.JsonUtil; +import com.baidu.hugegraph.util.ParameterUtil; public class FusiformSimilarityAlgorithm extends AbstractAlgorithm { @@ -100,7 +101,8 @@ protected static int minNeighbors(Map parameters) { if (!parameters.containsKey(KEY_MIN_NEIGHBORS)) { return DEFAULT_MIN_NEIGHBORS; } - int minNeighbors = parameterInt(parameters, KEY_MIN_NEIGHBORS); + int minNeighbors = ParameterUtil.parameterInt(parameters, + KEY_MIN_NEIGHBORS); HugeTraverser.checkPositive(minNeighbors, KEY_MIN_NEIGHBORS); return minNeighbors; } @@ -109,7 +111,8 @@ protected static int minSimilars(Map parameters) { if (!parameters.containsKey(KEY_MIN_SIMILARS)) { return DEFAULT_MIN_SIMILARS; } - int minSimilars = parameterInt(parameters, KEY_MIN_SIMILARS); + int minSimilars = ParameterUtil.parameterInt(parameters, + KEY_MIN_SIMILARS); HugeTraverser.checkPositive(minSimilars, KEY_MIN_SIMILARS); return minSimilars; } @@ -118,7 +121,8 @@ protected static int topSimilars(Map parameters) { if (!parameters.containsKey(KEY_TOP_SIMILARS)) { return DEFAULT_TOP_SIMILARS; } - int minSimilars = parameterInt(parameters, KEY_TOP_SIMILARS); + int minSimilars = ParameterUtil.parameterInt(parameters, + KEY_TOP_SIMILARS); HugeTraverser.checkNonNegative(minSimilars, KEY_TOP_SIMILARS); return minSimilars; } @@ -127,14 +131,14 @@ protected static String groupProperty(Map parameters) { if (!parameters.containsKey(KEY_GROUP_PROPERTY)) { return null; } - return parameterString(parameters, KEY_GROUP_PROPERTY); + return ParameterUtil.parameterString(parameters, KEY_GROUP_PROPERTY); } protected static int minGroups(Map parameters) { if (!parameters.containsKey(KEY_MIN_GROUPS)) { return DEFAULT_MIN_GROUPS; } - int minGroups = parameterInt(parameters, KEY_MIN_GROUPS); + int minGroups = ParameterUtil.parameterInt(parameters, KEY_MIN_GROUPS); HugeTraverser.checkPositive(minGroups, KEY_MIN_GROUPS); return minGroups; } @@ -143,7 +147,7 @@ protected static long limit(Map parameters) { if (!parameters.containsKey(KEY_LIMIT)) { return DEFAULT_LIMIT; } - long limit = parameterLong(parameters, KEY_LIMIT); + long limit = ParameterUtil.parameterLong(parameters, KEY_LIMIT); HugeTraverser.checkLimit(limit); return limit; } From 320402a2209b38762c301c9ede06150c92fc679a Mon Sep 17 00:00:00 2001 From: houzhizhen Date: Tue, 11 Aug 2020 08:54:18 +0800 Subject: [PATCH 19/33] add parameter top to print the top result in job result (#32) * add parameter top to print the top result in job result --- .../job/algorithm/rank/PageRankAlgorithm.java | 50 +++++++++++++++++-- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java index fa552e3997..651aa91259 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/rank/PageRankAlgorithm.java @@ -19,6 +19,8 @@ package com.baidu.hugegraph.job.algorithm.rank; +import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; + import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -61,6 +63,7 @@ public void checkParameters(Map parameters) { precision(parameters); degree(parameters); directionOutIn(parameters); + top(parameters); } @Override @@ -70,7 +73,8 @@ public Object call(UserJob job, Map parameters) { times(parameters), precision(parameters), degree(parameters), - directionOutIn(parameters)); + directionOutIn(parameters), + top(parameters)); } catch (Throwable e) { job.graph().tx().rollback(); throw e; @@ -90,11 +94,15 @@ public Traverser(UserJob job) { this.vertexRankMap = new HashMap<>(); } - public Object pageRank(double alpha, + /** + * If topN > 0, then return topN elements with rank value in json. + */ + private Object pageRank(double alpha, int maxTimes, double precision, long degree, - Directions direction) { + Directions direction, + long topN) { this.initSchema(); int times; @@ -146,12 +154,32 @@ public Object pageRank(double alpha, this.writeBackRankValues(); + if (topN > 0) { + Object topNJson = this.getTopRank(topN); + return ImmutableMap.of("alpha", alpha, + "iteration_times", times, + "last_changed_rank", changedRank, + "times", maxTimes, + "top", topNJson); + } return ImmutableMap.of("alpha", alpha, "iteration_times", times, "last_changed_rank", changedRank, "times", maxTimes); } + private Object getTopRank(long topN) { + JsonMap jsonMap = new JsonMap(); + jsonMap.startObject(); + Map topNMap = + HugeTraverser.topN(this.vertexRankMap, true, topN); + for (Map.Entry e : topNMap.entrySet()) { + jsonMap.append(e.getKey().toString(), e.getValue().left); + } + jsonMap.endObject(); + return jsonMap.asJson(); + } + private long initRankMap() { long vertexCount = 0; Iterator vertices = this.vertices(); @@ -239,12 +267,12 @@ private double computeRank(double alpha, long numOfVertices) { } } - public static class DoublePair { + public static class DoublePair implements Comparable { private double left; private double right; - public DoublePair(double left, double right) { + private DoublePair(double left, double right) { this.left = left; this.right = right; } @@ -294,5 +322,17 @@ public boolean equals(Object obj) { public int hashCode() { return Double.hashCode(this.left) ^ Double.hashCode(this.right); } + + // only left saves the rank value. + @Override + public int compareTo(DoublePair o) { + double result = this.left - o.left; + if (result > 0.0) { + return 1; + } else if (result < 0.0) { + return -1; + } + return 0; + } } } From 5def931f346a5d61aea604e6a5faf8de1f54f9b2 Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Fri, 14 Aug 2020 18:02:11 +0800 Subject: [PATCH 20/33] add skipi_isolated param for louvain (#37) Change-Id: Ib24ede9c20bb2c23a3f06fe72c53be2342295fd4 --- .../hugegraph/job/algorithm/AbstractAlgorithm.java | 1 + .../job/algorithm/comm/LouvainAlgorithm.java | 11 ++++++++++- .../job/algorithm/comm/LouvainTraverser.java | 13 ++++++++++--- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 0d9c79a9f8..ffe55e8f6f 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -88,6 +88,7 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final String KEY_PRECISION = "precision"; public static final String KEY_SHOW_MOD= "show_modularity"; public static final String KEY_SHOW_COMM = "show_community"; + public static final String KEY_SKIP_ISOLATED = "skip_isolated"; public static final String KEY_CLEAR = "clear"; public static final String KEY_CAPACITY = "capacity"; public static final String KEY_LIMIT = "limit"; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java index ab6e0f2143..3b3b0a6b8f 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java @@ -44,6 +44,7 @@ public void checkParameters(Map parameters) { sourceCLabel(parameters); showModularity(parameters); showCommunity(parameters); + skipIsolated(parameters); clearPass(parameters); workers(parameters); } @@ -53,6 +54,7 @@ public Object call(UserJob job, Map parameters) { String label = sourceLabel(parameters); String clabel = sourceCLabel(parameters); long degree = degree(parameters); + boolean skipIsolated = skipIsolated(parameters); int workers = workers(parameters); Long clearPass = clearPass(parameters); @@ -61,7 +63,7 @@ public Object call(UserJob job, Map parameters) { try (LouvainTraverser traverser = new LouvainTraverser( job, workers, degree, - label, clabel)) { + label, clabel, skipIsolated)) { if (clearPass != null) { return traverser.clearPass(clearPass.intValue()); } else if (modPass != null) { @@ -96,4 +98,11 @@ protected static Long showModularity(Map parameters) { HugeTraverser.checkNonNegative(pass, KEY_SHOW_MOD); return pass; } + + protected static boolean skipIsolated(Map parameters) { + if (!parameters.containsKey(KEY_SKIP_ISOLATED)) { + return true; + } + return ParameterUtil.parameterBoolean(parameters, KEY_SKIP_ISOLATED); + } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index 3a8a83f53a..c6cd24fab6 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -78,18 +78,22 @@ public class LouvainTraverser extends AlgoTraverser { private final String sourceLabel; private final String sourceCLabel; private final long degree; + private final boolean skipIsolated; + private final Cache cache; private long m; private String passLabel; public LouvainTraverser(UserJob job, int workers, long degree, - String sourceLabel, String sourceCLabel) { + String sourceLabel, String sourceCLabel, + boolean skipIsolated) { super(job, LouvainAlgorithm.ALGO_NAME, workers); this.g = this.graph().traversal(); this.sourceLabel = sourceLabel; this.sourceCLabel = sourceCLabel; this.degree = degree; + this.skipIsolated = skipIsolated; this.m = 1L; this.passLabel = ""; @@ -355,6 +359,9 @@ private void doMoveCommunity(Vertex v, List nbs, Community newC) { private boolean moveCommunity(Vertex v, int pass) { // move vertex to neighbor community if needed List nbs = neighbors((Id) v.id()); + if (this.skipIsolated && pass == 0 && nbs.isEmpty()) { + return false; + } Community c = communityOfVertex(v, nbs); double ki = kinOfVertex(v) + weightOfVertex(v, nbs); // update community of v if △Q changed @@ -448,7 +455,7 @@ private void mergeCommunities(int pass) { LOG.info("Merge community for pass {}", pass); // merge each community as a vertex Collection>> comms = this.cache.communities(); - assert this.allMembersExist(comms, pass - 1); + assert this.skipIsolated || this.allMembersExist(comms, pass - 1); this.cache.resetVertexWeight(); Consumers>> consumers = new Consumers<>( @@ -479,7 +486,7 @@ private void mergeCommunities(int pass) { } this.graph().tx().commit(); - assert this.allMembersExist(pass); + assert this.skipIsolated || this.allMembersExist(pass); // reset communities this.cache.reset(); From 4cd6eb390613f4932a7129762c8350673eccab2a Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Sun, 16 Aug 2020 19:30:16 +0800 Subject: [PATCH 21/33] fix DegreeCentrality degree count limit OOL (#39) Change-Id: I1b055130c75edce039a97822d9154e1214bc80c2 --- .../algorithm/cent/DegreeCentralityAlgorithm.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java index b2030e8453..54b72c2bff 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java @@ -19,16 +19,18 @@ package com.baidu.hugegraph.job.algorithm.cent; +import java.util.Arrays; import java.util.Iterator; +import java.util.List; import java.util.Map; import org.apache.tinkerpop.gremlin.structure.Edge; import org.apache.tinkerpop.gremlin.structure.Vertex; -import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils; import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.structure.HugeEdge; +import com.baidu.hugegraph.traversal.algorithm.EdgeStep; import com.baidu.hugegraph.type.define.Directions; public class DegreeCentralityAlgorithm extends AbstractCentAlgorithm { @@ -153,10 +155,10 @@ protected Object degreeCentralityForBothDir(String label, long topN) { } private long degree(Id source, String label) { - Id labelId = this.getEdgeLabelId(label); - Iterator edges = this.edgesOfVertex(source, Directions.BOTH, - labelId, NO_LIMIT); - return IteratorUtils.count(edges); + List labels = label == null ? null : Arrays.asList(label); + EdgeStep step = new EdgeStep(this.graph(), Directions.BOTH, + labels, null, NO_LIMIT, 0); + return this.edgesCount(source, step); } } } From 52fa08b22034be8bab31e56c6353862f2033f440 Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Wed, 19 Aug 2020 12:58:50 +0800 Subject: [PATCH 22/33] add with_boundary parameter for betweeness (#42) * improve betweeness by remove boundary vertex of path Change-Id: I76924daf8d9da113ab7a1aeac536c6080eccb296 * add with_boundary parameter for betweeness also fix lpa count comms with limit Change-Id: Iaf675cd87a8dc0b5ef75476144bc8141f2dd4385 --- .../job/algorithm/AbstractAlgorithm.java | 11 ++++++ .../algorithm/cent/AbstractCentAlgorithm.java | 35 +++++++++++++++++++ .../cent/BetweenessCentralityAlgorithm.java | 23 ++++++++++-- .../job/algorithm/comm/LouvainTraverser.java | 11 ------ .../job/algorithm/comm/LpaAlgorithm.java | 6 ++-- 5 files changed, 69 insertions(+), 17 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index ffe55e8f6f..064b1e344c 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -23,6 +23,7 @@ import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; +import java.util.NoSuchElementException; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; @@ -454,6 +455,16 @@ protected V execute(GraphTraversal traversal, } } + protected Number tryNext(GraphTraversal iter) { + return this.execute(iter, () -> { + try { + return iter.next(); + } catch (NoSuchElementException e) { + return 0; + } + }); + } + protected void commitIfNeeded() { // commit if needed Transaction tx = this.graph().tx(); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index 19da8e968e..fd8453fff3 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -19,7 +19,9 @@ package com.baidu.hugegraph.job.algorithm.cent; +import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; @@ -32,15 +34,21 @@ import org.apache.tinkerpop.gremlin.structure.Column; import org.apache.tinkerpop.gremlin.structure.Direction; import org.apache.tinkerpop.gremlin.structure.Vertex; +import org.slf4j.Logger; import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.iterator.MapperIterator; import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; import com.baidu.hugegraph.structure.HugeElement; +import com.baidu.hugegraph.structure.HugeVertex; import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.Log; public abstract class AbstractCentAlgorithm extends AbstractAlgorithm { + private static final Logger LOG = Log.logger(AbstractCentAlgorithm.class); + @Override public String category() { return CATEGORY_CENT; @@ -161,6 +169,33 @@ protected GraphTraversal filterNonShortestPath( }); } + protected GraphTraversal substractPath( + GraphTraversal t, + boolean withBoundary) { + // t.select(Pop.all, "v").unfold().id() + return t.select(Pop.all, "v").flatMap(it -> { + List path = (List) it.get(); + if (withBoundary) { + @SuppressWarnings("unchecked") + Iterator items = (Iterator) + path.iterator(); + return new MapperIterator<>(items, v -> v.id()); + } + int len = path.size(); + if (len < 3) { + return Collections.emptyIterator(); + } + + LOG.debug("CentAlgorithm substract path: {}", path); + path.remove(path.size() -1); + path.remove(0); + @SuppressWarnings("unchecked") + Iterator items = (Iterator) + path.iterator(); + return new MapperIterator<>(items, v -> v.id()); + }); + } + protected GraphTraversal topN(GraphTraversal t, long topN) { if (topN > 0L || topN == NO_LIMIT) { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java index 465c6f96c9..968f636bae 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java @@ -22,21 +22,29 @@ import java.util.Map; import org.apache.tinkerpop.gremlin.process.traversal.P; -import org.apache.tinkerpop.gremlin.process.traversal.Pop; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.type.define.Directions; +import com.baidu.hugegraph.util.ParameterUtil; public class BetweenessCentralityAlgorithm extends AbstractCentAlgorithm { + public static final String KEY_WITH_BOUNDARY = "with_boundary"; + @Override public String name() { return "betweeness_centrality"; } + @Override + public void checkParameters(Map parameters) { + super.checkParameters(parameters); + withBoundary(parameters); + } + @Override public Object call(UserJob job, Map parameters) { try (Traverser traverser = new Traverser(job)) { @@ -45,6 +53,7 @@ public Object call(UserJob job, Map parameters) { depth(parameters), degree(parameters), sample(parameters), + withBoundary(parameters), sourceLabel(parameters), sourceSample(parameters), sourceCLabel(parameters), @@ -52,6 +61,13 @@ public Object call(UserJob job, Map parameters) { } } + protected static boolean withBoundary(Map parameters) { + if (!parameters.containsKey(KEY_WITH_BOUNDARY)) { + return false; + } + return ParameterUtil.parameterBoolean(parameters, KEY_WITH_BOUNDARY); + } + private static class Traverser extends AbstractCentAlgorithm.Traverser { public Traverser(UserJob job) { @@ -63,6 +79,7 @@ public Object betweenessCentrality(Directions direction, int depth, long degree, long sample, + boolean withBoundary, String sourceLabel, long sourceSample, String sourceCLabel, @@ -79,8 +96,8 @@ public Object betweenessCentrality(Directions direction, t = t.emit().until(__.loops().is(P.gte(depth))); t = filterNonShortestPath(t, false); - GraphTraversal tg = t.select(Pop.all, "v") - .unfold().id().groupCount(); + GraphTraversal tg = this.substractPath(t, withBoundary) + .groupCount(); GraphTraversal tLimit = topN(tg, topN); return this.execute(tLimit, () -> tLimit.next()); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index c6cd24fab6..42ac3e9906 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -28,7 +28,6 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.NoSuchElementException; import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -649,16 +648,6 @@ private double modularity(String label) { return q; } - private Number tryNext(GraphTraversal iter) { - return this.execute(iter, () -> { - try { - return iter.next(); - } catch (NoSuchElementException e) { - return 0; - } - }); - } - public Collection showCommunity(String community) { final String C_PASS0 = labelOfPassN(0); Collection comms = Arrays.asList(community); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java index 8b54241fea..973e93f7b2 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java @@ -117,9 +117,9 @@ public Object lpa(String sourceLabel, String edgeLabel, } } - long communities = this.graph().traversal().V().limit(100000L) - .groupCount().by(C_LABEL) - .count(Scope.local).next(); + Number communities = tryNext(this.graph().traversal().V() + .groupCount().by(C_LABEL) + .count(Scope.local)); return ImmutableMap.of("iteration_times", times, "last_precision", changedPercent, "times", maxTimes, From 029f66c7ff8a5301f86c564a1f9df31d1f339d92 Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Thu, 20 Aug 2020 18:09:56 +0800 Subject: [PATCH 23/33] add export_community for lounvain (#43) Change-Id: I01e402fc99669f53544279c752f81d886c6ce28f --- .../job/algorithm/AbstractAlgorithm.java | 10 ++++- .../job/algorithm/comm/LouvainAlgorithm.java | 15 +++++++ .../job/algorithm/comm/LouvainTraverser.java | 39 ++++++++++++++++++- 3 files changed, 62 insertions(+), 2 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 064b1e344c..943debb4b8 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -68,6 +68,9 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final long MAX_CAPACITY = MAX_QUERY_LIMIT; public static final int BATCH = 500; + public static final String USER_DIR = System.getProperty("user.dir"); + public static final String EXPORT_PATH = USER_DIR + "/export"; + public static final String CATEGORY_AGGR = "aggregate"; public static final String CATEGORY_PATH = "path"; public static final String CATEGORY_RANK = "rank"; @@ -89,6 +92,7 @@ public abstract class AbstractAlgorithm implements Algorithm { public static final String KEY_PRECISION = "precision"; public static final String KEY_SHOW_MOD= "show_modularity"; public static final String KEY_SHOW_COMM = "show_community"; + public static final String KEY_EXPORT_COMM = "export_community"; public static final String KEY_SKIP_ISOLATED = "skip_isolated"; public static final String KEY_CLEAR = "clear"; public static final String KEY_CAPACITY = "capacity"; @@ -287,7 +291,7 @@ public AlgoTraverser(UserJob job) { protected AlgoTraverser(UserJob job, String name, int workers) { super(job.graph()); this.job = job; - String prefix = name + "-" + job.task().id(); + String prefix = name + "-" + this.jobId(); this.executor = Consumers.newThreadPool(prefix, workers); } @@ -295,6 +299,10 @@ public void updateProgress(long progress) { this.job.updateProgress((int) progress); } + public Id jobId() { + return this.job.task().id(); + } + @Override public void close() { if (this.executor != null) { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java index 3b3b0a6b8f..3789d6a19f 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainAlgorithm.java @@ -44,6 +44,7 @@ public void checkParameters(Map parameters) { sourceCLabel(parameters); showModularity(parameters); showCommunity(parameters); + exportCommunity(parameters); skipIsolated(parameters); clearPass(parameters); workers(parameters); @@ -60,6 +61,7 @@ public Object call(UserJob job, Map parameters) { Long clearPass = clearPass(parameters); Long modPass = showModularity(parameters); String showComm = showCommunity(parameters); + Long exportPass = exportCommunity(parameters); try (LouvainTraverser traverser = new LouvainTraverser( job, workers, degree, @@ -68,6 +70,10 @@ public Object call(UserJob job, Map parameters) { return traverser.clearPass(clearPass.intValue()); } else if (modPass != null) { return traverser.modularity(modPass.intValue()); + } else if (exportPass != null) { + boolean vertexFirst = showComm == null; + int pass = exportPass.intValue(); + return traverser.exportCommunity(pass, vertexFirst); } else if (showComm != null) { return traverser.showCommunity(showComm); } else { @@ -99,6 +105,15 @@ protected static Long showModularity(Map parameters) { return pass; } + protected static Long exportCommunity(Map parameters) { + if (!parameters.containsKey(KEY_EXPORT_COMM)) { + return null; + } + long pass = ParameterUtil.parameterLong(parameters, KEY_EXPORT_COMM); + HugeTraverser.checkNonNegative(pass, KEY_EXPORT_COMM); + return pass; + } + protected static boolean skipIsolated(Map parameters) { if (!parameters.containsKey(KEY_SKIP_ISOLATED)) { return true; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index 42ac3e9906..4359d46b80 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -19,6 +19,9 @@ package com.baidu.hugegraph.job.algorithm.comm; +import java.io.BufferedOutputStream; +import java.io.FileOutputStream; +import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -59,6 +62,7 @@ import com.baidu.hugegraph.type.define.Directions; import com.baidu.hugegraph.util.InsertionOrderUtil; import com.baidu.hugegraph.util.Log; +import com.baidu.hugegraph.util.StringEncoding; import com.google.common.collect.ImmutableMap; public class LouvainTraverser extends AlgoTraverser { @@ -660,7 +664,7 @@ public Collection showCommunity(String community) { Vertex sub = subComms.next(); if (sub.property(C_MEMBERS).isPresent()) { Set members = sub.value(C_MEMBERS); - reachPass0 = sub.label().equals(C_PASS0); + reachPass0 = sub.label().equals(C_PASS0); comms.addAll(members); } } @@ -668,6 +672,39 @@ public Collection showCommunity(String community) { return comms; } + public long exportCommunity(int pass, boolean vertexFirst) { + String exportFile = String.format("%s/louvain-%s.txt", + LouvainAlgorithm.EXPORT_PATH, + this.jobId()); + String label = labelOfPassN(pass); + GraphTraversal t = this.g.V().hasLabel(label); + this.execute(t, () -> { + try (OutputStream os = new FileOutputStream(exportFile); + BufferedOutputStream bos = new BufferedOutputStream(os)) { + while (t.hasNext()) { + String comm = t.next().id().toString(); + Collection members = this.showCommunity(comm); + if (vertexFirst) { + for (Object member : members) { + bos.write(StringEncoding.encode(member.toString())); + bos.write(StringEncoding.encode("\t")); + bos.write(StringEncoding.encode(comm)); + bos.write(StringEncoding.encode("\n")); + } + } else { + bos.write(StringEncoding.encode(comm)); + bos.write(StringEncoding.encode(": ")); + bos.write(StringEncoding.encode(members.toString())); + bos.write(StringEncoding.encode("\n")); + } + } + } + return null; + }); + + return this.progress; + } + public long clearPass(int pass) { GraphTraversal te = this.g.E(); if (pass < 0) { From 65c37984e07be7a0f30f0c74f1307a60fb0e7be6 Mon Sep 17 00:00:00 2001 From: houzhizhen Date: Mon, 21 Sep 2020 14:03:22 +0800 Subject: [PATCH 24/33] update Betweenness with Stressness (#60) --- .../job/algorithm/AlgorithmPool.java | 4 +- .../job/algorithm/SubgraphStatAlgorithm.java | 6 +-- .../algorithm/cent/AbstractCentAlgorithm.java | 1 + ...hm.java => StressCentralityAlgorithm.java} | 44 +++++++++---------- 4 files changed, 28 insertions(+), 27 deletions(-) rename hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/{BetweenessCentralityAlgorithm.java => StressCentralityAlgorithm.java} (68%) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java index 9a84120772..3252936699 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java @@ -22,7 +22,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import com.baidu.hugegraph.job.algorithm.cent.BetweenessCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.StressCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.ClosenessCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.DegreeCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.EigenvectorCentralityAlgorithm; @@ -45,7 +45,7 @@ public class AlgorithmPool { INSTANCE.register(new CountEdgeAlgorithm()); INSTANCE.register(new DegreeCentralityAlgorithm()); - INSTANCE.register(new BetweenessCentralityAlgorithm()); + INSTANCE.register(new StressCentralityAlgorithm()); INSTANCE.register(new ClosenessCentralityAlgorithm()); INSTANCE.register(new EigenvectorCentralityAlgorithm()); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java index bf35097346..09f994d14e 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java @@ -33,7 +33,7 @@ import com.baidu.hugegraph.config.CoreOptions; import com.baidu.hugegraph.config.HugeConfig; import com.baidu.hugegraph.job.UserJob; -import com.baidu.hugegraph.job.algorithm.cent.BetweenessCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.StressCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.ClosenessCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.DegreeCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.EigenvectorCentralityAlgorithm; @@ -158,8 +158,8 @@ public Object subgraphStat(UserJob job) { Map parameters = ImmutableMap.copyOf(PARAMS); results.put("degrees", algo.call(job, parameters)); - algo = new BetweenessCentralityAlgorithm(); - results.put("betweeness", algo.call(job, parameters)); + algo = new StressCentralityAlgorithm(); + results.put("stress", algo.call(job, parameters)); algo = new EigenvectorCentralityAlgorithm(); results.put("eigenvectors", algo.call(job, parameters)); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index fd8453fff3..752dc74ba5 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -157,6 +157,7 @@ protected GraphTraversal filterNonShortestPath( // ignore non shortest path return false; } + // TODO: len may be smaller than shortest if (shortest == null) { triples.put(key, len); } else { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithm.java similarity index 68% rename from hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java rename to hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithm.java index 968f636bae..87f1471d4b 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithm.java @@ -30,13 +30,13 @@ import com.baidu.hugegraph.type.define.Directions; import com.baidu.hugegraph.util.ParameterUtil; -public class BetweenessCentralityAlgorithm extends AbstractCentAlgorithm { +public class StressCentralityAlgorithm extends AbstractCentAlgorithm { public static final String KEY_WITH_BOUNDARY = "with_boundary"; @Override public String name() { - return "betweeness_centrality"; + return "stress_centrality"; } @Override @@ -48,16 +48,16 @@ public void checkParameters(Map parameters) { @Override public Object call(UserJob job, Map parameters) { try (Traverser traverser = new Traverser(job)) { - return traverser.betweenessCentrality(direction(parameters), - edgeLabel(parameters), - depth(parameters), - degree(parameters), - sample(parameters), - withBoundary(parameters), - sourceLabel(parameters), - sourceSample(parameters), - sourceCLabel(parameters), - top(parameters)); + return traverser.stressCentrality(direction(parameters), + edgeLabel(parameters), + depth(parameters), + degree(parameters), + sample(parameters), + withBoundary(parameters), + sourceLabel(parameters), + sourceSample(parameters), + sourceCLabel(parameters), + top(parameters)); } } @@ -74,16 +74,16 @@ public Traverser(UserJob job) { super(job); } - public Object betweenessCentrality(Directions direction, - String label, - int depth, - long degree, - long sample, - boolean withBoundary, - String sourceLabel, - long sourceSample, - String sourceCLabel, - long topN) { + public Object stressCentrality(Directions direction, + String label, + int depth, + long degree, + long sample, + boolean withBoundary, + String sourceLabel, + long sourceSample, + String sourceCLabel, + long topN) { assert depth > 0; assert degree > 0L || degree == NO_LIMIT; assert topN >= 0L || topN == NO_LIMIT; From a6fbb7c2ef5579a5fb38b9ebe555d2b6d2dfe65e Mon Sep 17 00:00:00 2001 From: houzhizhen Date: Sun, 27 Sep 2020 09:06:14 +0800 Subject: [PATCH 25/33] add betweenness algorithm (#63) * add betweenness BetweennessCentralityAlgorithmV2 --- .../job/algorithm/AlgorithmPool.java | 2 + .../job/algorithm/SubgraphStatAlgorithm.java | 4 + .../BetweennessCentralityAlgorithmV2.java | 238 ++++++++++++++++++ 3 files changed, 244 insertions(+) create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithmV2.java diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java index 3252936699..0299cf501d 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java @@ -22,6 +22,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import com.baidu.hugegraph.job.algorithm.cent.BetweennessCentralityAlgorithmV2; import com.baidu.hugegraph.job.algorithm.cent.StressCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.ClosenessCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.DegreeCentralityAlgorithm; @@ -46,6 +47,7 @@ public class AlgorithmPool { INSTANCE.register(new DegreeCentralityAlgorithm()); INSTANCE.register(new StressCentralityAlgorithm()); + INSTANCE.register(new BetweennessCentralityAlgorithmV2()); INSTANCE.register(new ClosenessCentralityAlgorithm()); INSTANCE.register(new EigenvectorCentralityAlgorithm()); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java index 09f994d14e..46aa797822 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java @@ -33,6 +33,7 @@ import com.baidu.hugegraph.config.CoreOptions; import com.baidu.hugegraph.config.HugeConfig; import com.baidu.hugegraph.job.UserJob; +import com.baidu.hugegraph.job.algorithm.cent.BetweennessCentralityAlgorithmV2; import com.baidu.hugegraph.job.algorithm.cent.StressCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.ClosenessCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.DegreeCentralityAlgorithm; @@ -161,6 +162,9 @@ public Object subgraphStat(UserJob job) { algo = new StressCentralityAlgorithm(); results.put("stress", algo.call(job, parameters)); + algo = new BetweennessCentralityAlgorithmV2(); + results.put("betweenness", algo.call(job, parameters)); + algo = new EigenvectorCentralityAlgorithm(); results.put("eigenvectors", algo.call(job, parameters)); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithmV2.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithmV2.java new file mode 100644 index 0000000000..f5f7e4db68 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithmV2.java @@ -0,0 +1,238 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.cent; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Map; +import java.util.Stack; + +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.backend.query.Query; +import com.baidu.hugegraph.job.UserJob; +import com.baidu.hugegraph.structure.HugeEdge; +import com.baidu.hugegraph.structure.HugeVertex; +import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; +import com.baidu.hugegraph.type.define.Directions; + + +public class BetweennessCentralityAlgorithmV2 extends AbstractCentAlgorithm { + + @Override + public String name() { + return "betweenness_centrality"; + } + + @Override + public void checkParameters(Map parameters) { + super.checkParameters(parameters); + } + + @Override + public Object call(UserJob job, Map parameters) { + try (Traverser traverser = new Traverser(job)) { + return traverser.betweenessCentrality(direction(parameters), + edgeLabel(parameters), + depth(parameters), + degree(parameters), + sample(parameters), + sourceLabel(parameters), + sourceSample(parameters), + sourceCLabel(parameters), + top(parameters)); + } + } + + private static class Traverser extends AbstractCentAlgorithm.Traverser { + + private Traverser(UserJob job) { + super(job); + } + + private Object betweenessCentrality(Directions direction, + String label, + int depth, + long degree, + long sample, + String sourceLabel, + long sourceSample, + String sourceCLabel, + long topN) { + assert depth > 0; + assert degree > 0L || degree == NO_LIMIT; + assert topN >= 0L || topN == NO_LIMIT; + + Map globalBetweennesses = new HashMap<>(); + Id edgeLabelId = null; + if (label != null) { + edgeLabelId = graph().edgeLabel(label).id(); + } + + // TODO: sample the startVertices + Iterator startVertices = this.vertices(sourceLabel, + sourceCLabel, + Query.NO_LIMIT); + while (startVertices.hasNext()) { + Id startVertex = ((HugeVertex) startVertices.next()).id(); + globalBetweennesses.putIfAbsent(startVertex, 0.0f); + Stack traversedVertices = new Stack<>(); + Map localBetweennesses = new HashMap<>(); + BetweennessNode startNode = new BetweennessNode(1, 0); + localBetweennesses.put(startVertex, startNode); + this.computeDistance(startVertex, localBetweennesses, + traversedVertices, direction, + edgeLabelId, depth, degree); + this.computeBetweenness(startVertex, traversedVertices, + globalBetweennesses, + localBetweennesses); + } + if (topN > 0) { + return HugeTraverser.topN(globalBetweennesses, true, topN); + } else { + return globalBetweennesses; + } + } + + private void computeDistance(Id startVertex, + Map betweennesses, + Stack traversedVertices, Directions direction, + Id edgeLabelId, long degree, long depth) { + LinkedList traversingVertices = new LinkedList<>(); + traversingVertices.add(startVertex); + + while (!traversingVertices.isEmpty()) { + Id source = traversingVertices.removeFirst(); + traversedVertices.push(source); + BetweennessNode sourceNode = betweennesses.get(source); + if (sourceNode == null) { + sourceNode = new BetweennessNode(); + betweennesses.put(source, sourceNode); + } + // TODO: sample the edges + Iterator edges = (Iterator) this.edgesOfVertex( + source, direction, edgeLabelId, + degree); + while (edges.hasNext()) { + HugeEdge edge = edges.next(); + Id targetId = edge.otherVertex().id(); + BetweennessNode targetNode = betweennesses.get(targetId); + // edge's targetNode is arrived at first time + if (targetNode == null) { + targetNode = new BetweennessNode(sourceNode); + betweennesses.put(targetId, targetNode); + if (depth == NO_LIMIT || + targetNode.distance() <= depth) { + traversingVertices.addLast(targetId); + } + } + targetNode.addParentNodeIfNeeded(sourceNode, source); + } + } + } + + private void computeBetweenness( + Id startVertex, + Stack traversedVertices, + Map globalBetweennesses, + Map localBetweennesses) { + while (!traversedVertices.empty()) { + Id currentId = traversedVertices.pop(); + BetweennessNode currentNode = + localBetweennesses.get(currentId); + if (currentId.equals(startVertex)) { + continue; + } + // add to globalBetweennesses + float betweenness = globalBetweennesses.getOrDefault(currentId, + 0.0f); + betweenness += currentNode.betweenness(); + globalBetweennesses.put(currentId, betweenness); + + // contribute to parent + for (Id v : currentNode.parents()) { + BetweennessNode parentNode = localBetweennesses.get(v); + parentNode.increaseBetweenness(currentNode); + } + } + } + } + + /** + * the temp data structure for a vertex used in computing process. + */ + private static class BetweennessNode { + + private Id[] parents; + private int pathCount; + private int distance; + private float betweenness; + + public BetweennessNode() { + this(0, -1); + } + + public BetweennessNode(BetweennessNode parentNode) { + this(0, parentNode.distance + 1); + } + + public BetweennessNode(int pathCount, int distance) { + this.pathCount = pathCount; + this.distance = distance; + this.parents = new Id[0]; + this.betweenness = 0.0f; + } + + public int distance() { + return this.distance; + } + + public Id[] parents() { + return this.parents; + } + + public void addParent(Id parentId) { + Id[] newParents = new Id[this.parents.length + 1]; + System.arraycopy(this.parents, 0, newParents, 0, + this.parents.length); + newParents[newParents.length - 1] = parentId; + this.parents = newParents; + } + + public void increaseBetweenness(BetweennessNode childNode) { + float increase = (float) this.pathCount / childNode.pathCount * + (1 + childNode.betweenness); + this.betweenness += increase; + } + + public void addParentNodeIfNeeded(BetweennessNode node, Id parentId) { + if (this.distance == node.distance + 1) { + this.pathCount += node.pathCount; + this.addParent(parentId); + } + } + + public float betweenness() { + return this.betweenness; + } + } +} From 6d19a9e57cd3185f9d46c00841b1ea60e5d1c322 Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Tue, 13 Oct 2020 14:51:51 +0800 Subject: [PATCH 26/33] fix closeness distance: 1 extra length is calculated (#67) Change-Id: I35bc20698bf93002a530b95688cd0359a9154fdf --- .../job/algorithm/cent/ClosenessCentralityAlgorithm.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java index 3391a191a1..6a95794a0a 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java @@ -89,10 +89,17 @@ public Object closenessCentrality(Directions direction, t = t.emit().until(__.loops().is(P.gte(depth))); t = filterNonShortestPath(t, true); + /* + * We use Marchiori's algorithm(sum of reciprocal of distances): + * .math("_-1").sack(Operator.div).sack().sum() + * for Bavelas's algorithm: + * .math("_-1").sum().sack(Operator.div).sack() + * see https://en.wikipedia.org/wiki/Closeness_centrality + */ GraphTraversal tg; tg = t.group().by(__.select(Pop.first, "v").id()) .by(__.select(Pop.all, "v").count(Scope.local) - .sack(Operator.div).sack().sum()); + .math("_-1").sack(Operator.div).sack().sum()); GraphTraversal tLimit = topN(tg, topN); return this.execute(tLimit, () -> tLimit.next()); From 9158f61145b195908625412f589a61dcbe9c2891 Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Tue, 13 Oct 2020 15:29:19 +0800 Subject: [PATCH 27/33] add depth-first version betweennes centrality (#68) Change-Id: Id4fc070268d8957ef2764bffbb3c762d9c034a0d --- .../job/algorithm/AlgorithmPool.java | 14 +- .../job/algorithm/SubgraphStatAlgorithm.java | 25 ++- .../algorithm/cent/AbstractCentAlgorithm.java | 4 +- .../cent/BetweennessCentralityAlgorithm.java | 150 ++++++++++++++++++ 4 files changed, 173 insertions(+), 20 deletions(-) create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithm.java diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java index 0299cf501d..9ded512ef4 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java @@ -22,11 +22,11 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import com.baidu.hugegraph.job.algorithm.cent.BetweennessCentralityAlgorithmV2; -import com.baidu.hugegraph.job.algorithm.cent.StressCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.BetweennessCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.ClosenessCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.DegreeCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.EigenvectorCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.StressCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.ClusterCoeffcientAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.KCoreAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.LouvainAlgorithm; @@ -36,6 +36,7 @@ import com.baidu.hugegraph.job.algorithm.path.RingsDetectAlgorithm; import com.baidu.hugegraph.job.algorithm.rank.PageRankAlgorithm; import com.baidu.hugegraph.job.algorithm.similarity.FusiformSimilarityAlgorithm; +import com.baidu.hugegraph.util.E; public class AlgorithmPool { @@ -47,7 +48,7 @@ public class AlgorithmPool { INSTANCE.register(new DegreeCentralityAlgorithm()); INSTANCE.register(new StressCentralityAlgorithm()); - INSTANCE.register(new BetweennessCentralityAlgorithmV2()); + INSTANCE.register(new BetweennessCentralityAlgorithm()); INSTANCE.register(new ClosenessCentralityAlgorithm()); INSTANCE.register(new EigenvectorCentralityAlgorithm()); @@ -81,6 +82,13 @@ public Algorithm find(String name) { return this.algorithms.get(name); } + public Algorithm get(String name) { + Algorithm algorithm = this.algorithms.get(name); + E.checkArgument(algorithm != null, + "Not found algorithm '%s'", name); + return algorithm; + } + public static AlgorithmPool instance() { return INSTANCE; } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java index 46aa797822..d91748e41e 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java @@ -33,14 +33,6 @@ import com.baidu.hugegraph.config.CoreOptions; import com.baidu.hugegraph.config.HugeConfig; import com.baidu.hugegraph.job.UserJob; -import com.baidu.hugegraph.job.algorithm.cent.BetweennessCentralityAlgorithmV2; -import com.baidu.hugegraph.job.algorithm.cent.StressCentralityAlgorithm; -import com.baidu.hugegraph.job.algorithm.cent.ClosenessCentralityAlgorithm; -import com.baidu.hugegraph.job.algorithm.cent.DegreeCentralityAlgorithm; -import com.baidu.hugegraph.job.algorithm.cent.EigenvectorCentralityAlgorithm; -import com.baidu.hugegraph.job.algorithm.comm.ClusterCoeffcientAlgorithm; -import com.baidu.hugegraph.job.algorithm.path.RingsDetectAlgorithm; -import com.baidu.hugegraph.job.algorithm.rank.PageRankAlgorithm; import com.baidu.hugegraph.task.HugeTask; import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; import com.baidu.hugegraph.traversal.optimize.HugeScriptTraversal; @@ -149,34 +141,35 @@ public Traverser(UserJob job) { } public Object subgraphStat(UserJob job) { + AlgorithmPool pool = AlgorithmPool.instance(); Map results = InsertionOrderUtil.newMap(); GraphTraversalSource g = job.graph().traversal(); results.put("vertices_count", g.V().count().next()); results.put("edges_count", g.E().count().next()); - Algorithm algo = new DegreeCentralityAlgorithm(); + Algorithm algo = pool.get("degree_centrality"); Map parameters = ImmutableMap.copyOf(PARAMS); results.put("degrees", algo.call(job, parameters)); - algo = new StressCentralityAlgorithm(); + algo = pool.get("stress_centrality"); results.put("stress", algo.call(job, parameters)); - algo = new BetweennessCentralityAlgorithmV2(); + algo = pool.get("betweenness_centrality"); results.put("betweenness", algo.call(job, parameters)); - algo = new EigenvectorCentralityAlgorithm(); + algo = pool.get("eigenvector_centrality"); results.put("eigenvectors", algo.call(job, parameters)); - algo = new ClosenessCentralityAlgorithm(); + algo = pool.get("closeness_centrality"); results.put("closeness", algo.call(job, parameters)); results.put("page_ranks", pageRanks(job)); - algo = new ClusterCoeffcientAlgorithm(); + algo = pool.get("cluster_coeffcient"); results.put("cluster_coeffcient", algo.call(job, parameters)); - algo = new RingsDetectAlgorithm(); + algo = pool.get("rings"); parameters = ImmutableMap.builder() .putAll(PARAMS) .put("count_only", true) @@ -189,7 +182,7 @@ public Object subgraphStat(UserJob job) { } private Map pageRanks(UserJob job) { - PageRankAlgorithm algo = new PageRankAlgorithm(); + Algorithm algo = AlgorithmPool.instance().get("page_rank"); algo.call(job, ImmutableMap.of("alpha", 0.15)); // Collect page ranks diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index 752dc74ba5..066234873b 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -150,7 +150,9 @@ protected GraphTraversal filterNonShortestPath( return t.filter(it -> { Id start = it.path(Pop.first, "v").id(); Id end = it.path(Pop.last, "v").id(); - int len = it.>path(Pop.all, "v").size(); + int len = it.path().size(); + assert len == it.>path(Pop.all, "v").size(); + Pair key = Pair.of(start, end); Integer shortest = triples.get(key); if (shortest != null && len > shortest) { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithm.java new file mode 100644 index 0000000000..46f4d4a405 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithm.java @@ -0,0 +1,150 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.cent; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang3.mutable.MutableFloat; +import org.apache.tinkerpop.gremlin.process.traversal.P; +import org.apache.tinkerpop.gremlin.process.traversal.Pop; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversal; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.backend.id.SplicingIdGenerator; +import com.baidu.hugegraph.job.UserJob; +import com.baidu.hugegraph.structure.HugeElement; +import com.baidu.hugegraph.type.define.Directions; + +public class BetweennessCentralityAlgorithm extends AbstractCentAlgorithm { + + @Override + public String name() { + return "betweenness_centrality"; + } + + @Override + public void checkParameters(Map parameters) { + super.checkParameters(parameters); + } + + @Override + public Object call(UserJob job, Map parameters) { + try (Traverser traverser = new Traverser(job)) { + return traverser.betweennessCentrality(direction(parameters), + edgeLabel(parameters), + depth(parameters), + degree(parameters), + sample(parameters), + sourceLabel(parameters), + sourceSample(parameters), + sourceCLabel(parameters), + top(parameters)); + } + } + + private static class Traverser extends AbstractCentAlgorithm.Traverser { + + public Traverser(UserJob job) { + super(job); + } + + public Object betweennessCentrality(Directions direction, + String label, + int depth, + long degree, + long sample, + String sourceLabel, + long sourceSample, + String sourceCLabel, + long topN) { + assert depth > 0; + assert degree > 0L || degree == NO_LIMIT; + assert topN >= 0L || topN == NO_LIMIT; + + GraphTraversal t = constructSource(sourceLabel, + sourceSample, + sourceCLabel); + t = constructPath(t, direction, label, degree, sample, + sourceLabel, sourceCLabel); + t = t.emit().until(__.loops().is(P.gte(depth))); + t = filterNonShortestPath(t, false); + + GraphTraversal tg = this.groupPathByEndpoints(t); + tg = this.computeBetweenness(tg); + GraphTraversal tLimit = topN(tg, topN); + + return this.execute(tLimit, () -> tLimit.next()); + } + + protected GraphTraversal groupPathByEndpoints( + GraphTraversal t) { + return t.map(it -> { + // t.select(Pop.all, "v").unfold().id() + List path = it.path(Pop.all, "v"); + List pathById = new ArrayList<>(path.size()); + for (HugeElement v : path) { + pathById.add(v.id()); + } + return pathById; + }).group().by(it -> { + // group by the first and last vertex + @SuppressWarnings("unchecked") + List path = (List) it; + assert path.size() >= 2; + String first = path.get(0).toString(); + String last = path.get(path.size() -1).toString(); + return SplicingIdGenerator.concat(first, last); + }).unfold(); + } + + protected GraphTraversal computeBetweenness( + GraphTraversal t) { + return t.fold(new HashMap(), (results, it) -> { + @SuppressWarnings("unchecked") + Map.Entry> entry = (Map.Entry>) it; + @SuppressWarnings("unchecked") + List> paths = (List>) entry.getValue(); + for (List path : paths) { + int len = path.size(); + if (len <= 2) { + // only two vertex, no betweenness vertex + continue; + } + // skip the first and last vertex + for (int i = 1; i < len - 1; i++) { + Id vertex = path.get(i); + MutableFloat value = results.get(vertex); + if (value == null) { + value = new MutableFloat(); + results.put(vertex, value); + } + value.add(1.0f / paths.size()); + } + } + return results; + }); + } + } +} From 3519c27b4182400197004ad856736831fde2ea33 Mon Sep 17 00:00:00 2001 From: houzhizhen Date: Wed, 14 Oct 2020 16:58:55 +0800 Subject: [PATCH 28/33] add StressCentrality v2 (#65) * add StressCentralityAlgorithmV2 * add BfsTraverser and ClosenessCentralityAlgorithmV2 --- .../job/algorithm/AlgorithmPool.java | 7 + .../hugegraph/job/algorithm/BfsTraverser.java | 150 ++++++++++++++ .../BetweennessCentralityAlgorithmV2.java | 192 ++++++------------ .../cent/ClosenessCentralityAlgorithmV2.java | 135 ++++++++++++ .../cent/StressCentralityAlgorithmV2.java | 182 +++++++++++++++++ 5 files changed, 539 insertions(+), 127 deletions(-) create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/BfsTraverser.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithmV2.java create mode 100644 hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithmV2.java diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java index 9ded512ef4..02ac4c24ea 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java @@ -23,10 +23,13 @@ import java.util.concurrent.ConcurrentHashMap; import com.baidu.hugegraph.job.algorithm.cent.BetweennessCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.BetweennessCentralityAlgorithmV2; import com.baidu.hugegraph.job.algorithm.cent.ClosenessCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.ClosenessCentralityAlgorithmV2; import com.baidu.hugegraph.job.algorithm.cent.DegreeCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.EigenvectorCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.StressCentralityAlgorithm; +import com.baidu.hugegraph.job.algorithm.cent.StressCentralityAlgorithmV2; import com.baidu.hugegraph.job.algorithm.comm.ClusterCoeffcientAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.KCoreAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.LouvainAlgorithm; @@ -65,6 +68,10 @@ public class AlgorithmPool { INSTANCE.register(new PageRankAlgorithm()); INSTANCE.register(new SubgraphStatAlgorithm()); + + INSTANCE.register(new StressCentralityAlgorithmV2()); + INSTANCE.register(new BetweennessCentralityAlgorithmV2()); + INSTANCE.register(new ClosenessCentralityAlgorithmV2()); } private final Map algorithms; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/BfsTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/BfsTraverser.java new file mode 100644 index 0000000000..034887f277 --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/BfsTraverser.java @@ -0,0 +1,150 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Map; +import java.util.Stack; + +import org.apache.tinkerpop.gremlin.structure.Edge; + +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.job.UserJob; +import com.baidu.hugegraph.structure.HugeEdge; +import com.baidu.hugegraph.type.define.Directions; + +public abstract class BfsTraverser + extends AbstractAlgorithm.AlgoTraverser + implements AutoCloseable { + + private Stack traversedVertices = new Stack<>(); + + public BfsTraverser(UserJob job) { + super(job); + } + + protected void compute(Id startVertex, Directions direction, + Id edgeLabel, long degree, long depth) { + Map localNodes = this.traverse(startVertex, direction, + edgeLabel, degree, depth); + this.backtrack(startVertex, localNodes); + } + + protected Map traverse(Id startVertex, Directions direction, + Id edgeLabel, long degree, long depth) { + Map localNodes = new HashMap<>(); + localNodes.put(startVertex, this.createStartNode()); + + LinkedList traversingVertices = new LinkedList<>(); + traversingVertices.add(startVertex); + while (!traversingVertices.isEmpty()) { + Id source = traversingVertices.removeFirst(); + this.traversedVertices.push(source); + T sourceNode = localNodes.get(source); + if (depth != NO_LIMIT && sourceNode.distance() >= depth) { + continue; + } + // TODO: sample the edges + Iterator edges = this.edgesOfVertex(source, direction, + edgeLabel, degree); + while (edges.hasNext()) { + HugeEdge edge = (HugeEdge) edges.next(); + Id target = edge.otherVertex().id(); + T targetNode = localNodes.get(target); + boolean firstTime = false; + // Edge's targetNode is arrived at first time + if (targetNode == null) { + firstTime = true; + targetNode = this.createNode(sourceNode); + localNodes.put(target, targetNode); + traversingVertices.addLast(target); + } + this.meetNode(target, targetNode, source, + sourceNode, firstTime); + } + } + return localNodes; + } + + protected void backtrack(Id startVertex, Map localNodes) { + while (!this.traversedVertices.empty()) { + Id currentVertex = this.traversedVertices.pop(); + this.backtrack(startVertex, currentVertex, localNodes); + } + } + + protected abstract T createStartNode(); + + protected abstract T createNode(T parentNode); + + protected abstract void meetNode(Id currentVertex, T currentNode, + Id parentVertex, T parentNode, + boolean firstTime); + + protected abstract void backtrack(Id startVertex, Id currentVertex, + Map localNodes); + + public static class Node { + + private Id[] parents; + private int pathCount; + private int distance; + + public Node(Node parentNode) { + this(0, parentNode.distance + 1); + } + + public Node(int pathCount, int distance) { + this.pathCount = pathCount; + this.distance = distance; + this.parents = new Id[0]; + } + + public int distance() { + return this.distance; + } + + public Id[] parents() { + return this.parents; + } + + public void addParent(Id parentId) { + // TODO: test if need to allocate more memory in advance + Id[] newParents = new Id[this.parents.length + 1]; + System.arraycopy(this.parents, 0, newParents, 0, + this.parents.length); + newParents[newParents.length - 1] = parentId; + this.parents = newParents; + } + + public void addParentNodeIfNeeded(Node node, Id parentId) { + if (this.distance == node.distance + 1) { + this.pathCount += node.pathCount; + this.addParent(parentId); + } + } + + protected int pathCount() { + return this.pathCount; + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithmV2.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithmV2.java index f5f7e4db68..1391021a29 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithmV2.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithmV2.java @@ -21,21 +21,19 @@ import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedList; import java.util.Map; -import java.util.Stack; +import org.apache.commons.lang3.mutable.MutableFloat; import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.backend.query.Query; import com.baidu.hugegraph.job.UserJob; -import com.baidu.hugegraph.structure.HugeEdge; +import com.baidu.hugegraph.job.algorithm.BfsTraverser; import com.baidu.hugegraph.structure.HugeVertex; import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; import com.baidu.hugegraph.type.define.Directions; - public class BetweennessCentralityAlgorithmV2 extends AbstractCentAlgorithm { @Override @@ -63,29 +61,31 @@ public Object call(UserJob job, Map parameters) { } } - private static class Traverser extends AbstractCentAlgorithm.Traverser { + private static class Traverser extends BfsTraverser { + + private Map globalBetweennesses; private Traverser(UserJob job) { super(job); } private Object betweenessCentrality(Directions direction, - String label, - int depth, - long degree, - long sample, - String sourceLabel, - long sourceSample, - String sourceCLabel, - long topN) { + String label, + int depth, + long degree, + long sample, + String sourceLabel, + long sourceSample, + String sourceCLabel, + long topN) { assert depth > 0; assert degree > 0L || degree == NO_LIMIT; assert topN >= 0L || topN == NO_LIMIT; - Map globalBetweennesses = new HashMap<>(); + this.globalBetweennesses = new HashMap<>(); Id edgeLabelId = null; if (label != null) { - edgeLabelId = graph().edgeLabel(label).id(); + edgeLabelId = this.graph().edgeLabel(label).id(); } // TODO: sample the startVertices @@ -93,144 +93,82 @@ private Object betweenessCentrality(Directions direction, sourceCLabel, Query.NO_LIMIT); while (startVertices.hasNext()) { - Id startVertex = ((HugeVertex) startVertices.next()).id(); - globalBetweennesses.putIfAbsent(startVertex, 0.0f); - Stack traversedVertices = new Stack<>(); - Map localBetweennesses = new HashMap<>(); - BetweennessNode startNode = new BetweennessNode(1, 0); - localBetweennesses.put(startVertex, startNode); - this.computeDistance(startVertex, localBetweennesses, - traversedVertices, direction, - edgeLabelId, depth, degree); - this.computeBetweenness(startVertex, traversedVertices, - globalBetweennesses, - localBetweennesses); + Id startVertex = ((HugeVertex) startVertices.next()).id(); + this.globalBetweennesses.putIfAbsent(startVertex, + new MutableFloat()); + this.compute(startVertex, direction, edgeLabelId, + degree, depth); } - if (topN > 0) { - return HugeTraverser.topN(globalBetweennesses, true, topN); + if (topN > 0L || topN == NO_LIMIT) { + return HugeTraverser.topN(this.globalBetweennesses, + true, topN); } else { - return globalBetweennesses; + return this.globalBetweennesses; } } - private void computeDistance(Id startVertex, - Map betweennesses, - Stack traversedVertices, Directions direction, - Id edgeLabelId, long degree, long depth) { - LinkedList traversingVertices = new LinkedList<>(); - traversingVertices.add(startVertex); - - while (!traversingVertices.isEmpty()) { - Id source = traversingVertices.removeFirst(); - traversedVertices.push(source); - BetweennessNode sourceNode = betweennesses.get(source); - if (sourceNode == null) { - sourceNode = new BetweennessNode(); - betweennesses.put(source, sourceNode); - } - // TODO: sample the edges - Iterator edges = (Iterator) this.edgesOfVertex( - source, direction, edgeLabelId, - degree); - while (edges.hasNext()) { - HugeEdge edge = edges.next(); - Id targetId = edge.otherVertex().id(); - BetweennessNode targetNode = betweennesses.get(targetId); - // edge's targetNode is arrived at first time - if (targetNode == null) { - targetNode = new BetweennessNode(sourceNode); - betweennesses.put(targetId, targetNode); - if (depth == NO_LIMIT || - targetNode.distance() <= depth) { - traversingVertices.addLast(targetId); - } - } - targetNode.addParentNodeIfNeeded(sourceNode, source); - } - } + @Override + protected BetweennessNode createNode(BetweennessNode parentNode) { + return new BetweennessNode(parentNode); } - private void computeBetweenness( - Id startVertex, - Stack traversedVertices, - Map globalBetweennesses, - Map localBetweennesses) { - while (!traversedVertices.empty()) { - Id currentId = traversedVertices.pop(); - BetweennessNode currentNode = - localBetweennesses.get(currentId); - if (currentId.equals(startVertex)) { - continue; - } - // add to globalBetweennesses - float betweenness = globalBetweennesses.getOrDefault(currentId, - 0.0f); - betweenness += currentNode.betweenness(); - globalBetweennesses.put(currentId, betweenness); - - // contribute to parent - for (Id v : currentNode.parents()) { - BetweennessNode parentNode = localBetweennesses.get(v); - parentNode.increaseBetweenness(currentNode); - } + @Override + protected void meetNode(Id currentVertex, BetweennessNode currentNode, + Id parentVertex, BetweennessNode parentNode, + boolean firstTime) { + currentNode.addParentNodeIfNeeded(parentNode, parentVertex); + } + + @Override + protected BetweennessNode createStartNode() { + return new BetweennessNode(1, 0); + } + + @Override + protected void backtrack(Id startVertex, Id currentVertex, + Map localNodes) { + if (startVertex.equals(currentVertex)) { + return; + } + MutableFloat betweenness = this.globalBetweennesses.get( + currentVertex); + if (betweenness == null) { + betweenness = new MutableFloat(0.0F); + this.globalBetweennesses.put(currentVertex, betweenness); + } + BetweennessNode node = localNodes.get(currentVertex); + betweenness.add(node.betweenness()); + + // Contribute to parents + for (Id v : node.parents()) { + BetweennessNode parentNode = localNodes.get(v); + parentNode.increaseBetweenness(node); } } } /** - * the temp data structure for a vertex used in computing process. + * Temp data structure for a vertex used in computing process. */ - private static class BetweennessNode { + private static class BetweennessNode extends BfsTraverser.Node { - private Id[] parents; - private int pathCount; - private int distance; private float betweenness; - public BetweennessNode() { - this(0, -1); - } - public BetweennessNode(BetweennessNode parentNode) { - this(0, parentNode.distance + 1); + this(0, parentNode.distance() + 1); } public BetweennessNode(int pathCount, int distance) { - this.pathCount = pathCount; - this.distance = distance; - this.parents = new Id[0]; - this.betweenness = 0.0f; - } - - public int distance() { - return this.distance; - } - - public Id[] parents() { - return this.parents; - } - - public void addParent(Id parentId) { - Id[] newParents = new Id[this.parents.length + 1]; - System.arraycopy(this.parents, 0, newParents, 0, - this.parents.length); - newParents[newParents.length - 1] = parentId; - this.parents = newParents; + super(pathCount, distance); + this.betweenness = 0.0F; } public void increaseBetweenness(BetweennessNode childNode) { - float increase = (float) this.pathCount / childNode.pathCount * - (1 + childNode.betweenness); + float increase = (float) this.pathCount() / childNode.pathCount() * + (1.0F + childNode.betweenness); this.betweenness += increase; } - public void addParentNodeIfNeeded(BetweennessNode node, Id parentId) { - if (this.distance == node.distance + 1) { - this.pathCount += node.pathCount; - this.addParent(parentId); - } - } - public float betweenness() { return this.betweenness; } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithmV2.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithmV2.java new file mode 100644 index 0000000000..1651c8943d --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithmV2.java @@ -0,0 +1,135 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package com.baidu.hugegraph.job.algorithm.cent; + +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.backend.query.Query; +import com.baidu.hugegraph.exception.NotSupportException; +import com.baidu.hugegraph.job.UserJob; +import com.baidu.hugegraph.job.algorithm.BfsTraverser; +import com.baidu.hugegraph.structure.HugeVertex; +import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; +import com.baidu.hugegraph.type.define.Directions; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +public class ClosenessCentralityAlgorithmV2 extends AbstractCentAlgorithm { + + @Override + public String name() { + return "closeness_centrality"; + } + + @Override + public void checkParameters(Map parameters) { + super.checkParameters(parameters); + } + + @Override + public Object call(UserJob job, Map parameters) { + try (Traverser traverser = new Traverser(job)) { + return traverser.closenessCentrality(direction(parameters), + edgeLabel(parameters), + depth(parameters), + degree(parameters), + sample(parameters), + sourceLabel(parameters), + sourceSample(parameters), + sourceCLabel(parameters), + top(parameters)); + } + } + + private static class Traverser extends BfsTraverser { + + private Map globalCloseness; + + private float startVertexCloseness; + + private Traverser(UserJob job) { + super(job); + this.globalCloseness = new HashMap<>(); + } + + private Object closenessCentrality(Directions direction, + String label, + int depth, + long degree, + long sample, + String sourceLabel, + long sourceSample, + String sourceCLabel, + long topN) { + assert depth > 0; + assert degree > 0L || degree == NO_LIMIT; + assert topN >= 0L || topN == NO_LIMIT; + + Id edgeLabelId = null; + if (label != null) { + edgeLabelId = this.graph().edgeLabel(label).id(); + } + + // TODO: sample the startVertices + Iterator startVertices = this.vertices(sourceLabel, + sourceCLabel, + Query.NO_LIMIT); + while (startVertices.hasNext()) { + this.startVertexCloseness = 0.0F; + Id startVertex = ((HugeVertex) startVertices.next()).id(); + this.traverse(startVertex, direction, edgeLabelId, + degree, depth); + this.globalCloseness.put(startVertex, + this.startVertexCloseness); + } + if (topN > 0L || topN == NO_LIMIT) { + return HugeTraverser.topN(this.globalCloseness, true, topN); + } else { + return this.globalCloseness; + } + } + + @Override + protected Node createStartNode() { + return new Node(1, 0); + } + + @Override + protected Node createNode(Node parentNode) { + return new Node(parentNode); + } + + @Override + protected void meetNode(Id currentVertex, Node currentNode, + Id parentVertex, Node parentNode, + boolean firstTime) { + if (firstTime) { + this.startVertexCloseness += 1.0F / currentNode.distance(); + } + } + + @Override + protected void backtrack(Id startVertex, Id currentVertex, + Map localNodes) { + throw new NotSupportException("backtrack()"); + } + } +} diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithmV2.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithmV2.java new file mode 100644 index 0000000000..b01486b7ec --- /dev/null +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithmV2.java @@ -0,0 +1,182 @@ +/* + * Copyright 2017 HugeGraph Authors + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +package com.baidu.hugegraph.job.algorithm.cent; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import org.apache.commons.lang3.mutable.MutableLong; +import org.apache.tinkerpop.gremlin.structure.Vertex; + +import com.baidu.hugegraph.backend.id.Id; +import com.baidu.hugegraph.backend.query.Query; +import com.baidu.hugegraph.job.UserJob; +import com.baidu.hugegraph.job.algorithm.BfsTraverser; +import com.baidu.hugegraph.structure.HugeVertex; +import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; +import com.baidu.hugegraph.type.define.Directions; + +public class StressCentralityAlgorithmV2 extends AbstractCentAlgorithm { + + @Override + public String name() { + return "stress_centrality"; + } + + @Override + public void checkParameters(Map parameters) { + super.checkParameters(parameters); + } + + @Override + public Object call(UserJob job, Map parameters) { + try (Traverser traverser = new Traverser(job)) { + return traverser.stressCentrality(direction(parameters), + edgeLabel(parameters), + depth(parameters), + degree(parameters), + sample(parameters), + sourceLabel(parameters), + sourceSample(parameters), + sourceCLabel(parameters), + top(parameters)); + } + } + + private static class Traverser extends BfsTraverser { + + private Map globalStresses; + + private Traverser(UserJob job) { + super(job); + this.globalStresses = new HashMap<>(); + } + + private Object stressCentrality(Directions direction, + String label, + int depth, + long degree, + long sample, + String sourceLabel, + long sourceSample, + String sourceCLabel, + long topN) { + assert depth > 0; + assert degree > 0L || degree == NO_LIMIT; + assert topN >= 0L || topN == NO_LIMIT; + + Id edgeLabelId = null; + if (label != null) { + edgeLabelId = this.graph().edgeLabel(label).id(); + } + + // TODO: sample the startVertices + Iterator startVertices = this.vertices(sourceLabel, + sourceCLabel, + Query.NO_LIMIT); + while (startVertices.hasNext()) { + Id startVertex = ((HugeVertex) startVertices.next()).id(); + this.globalStresses.putIfAbsent(startVertex, new MutableLong(0L)); + this.compute(startVertex, direction, edgeLabelId, + degree, depth); + } + if (topN > 0L || topN == NO_LIMIT) { + return HugeTraverser.topN(this.globalStresses, true, topN); + } else { + return this.globalStresses; + } + } + + @Override + protected StressNode createStartNode() { + return new StressNode(1, 0); + } + + @Override + protected StressNode createNode(StressNode parentNode) { + return new StressNode(parentNode); + } + + @Override + protected void meetNode(Id currentVertex, StressNode currentNode, + Id parentVertex, StressNode parentNode, + boolean firstTime) { + currentNode.addParentNodeIfNeeded(parentNode, parentVertex); + } + + @Override + protected void backtrack(Id startVertex, Id currentVertex, + Map localNodes) { + if (startVertex.equals(currentVertex)) { + return; + } + StressNode currentNode = localNodes.get(currentVertex); + + // Add local stresses to global stresses + MutableLong stress = this.globalStresses.get(currentVertex); + if (stress == null) { + stress = new MutableLong(0L); + this.globalStresses.put(currentVertex, stress); + } + stress.add(currentNode.stress()); + + // Contribute to parents + for (Id v : currentNode.parents()) { + StressNode parentNode = localNodes.get(v); + parentNode.increaseStress(currentNode); + } + } + } + + /** + * Temp data structure for a vertex used in computing process. + */ + private static class StressNode extends BfsTraverser.Node { + + private long stress; + + public StressNode(StressNode parentNode) { + this(0, parentNode.distance() + 1); + } + + public StressNode(int pathCount, int distance) { + super(pathCount, distance); + this.stress = 0L; + } + + public void increaseStress(StressNode childNode) { + /* + * `childNode.stress` is the contribution after child node. + * `childNode.pathCount` is the contribution of the child node. + * The sum of them is contribution to current node, there may be + * multi parents node of the child node, so contribute to current + * node proportionally. + */ + long total = childNode.stress + childNode.pathCount(); + long received = total * this.pathCount() / childNode.pathCount(); + this.stress += received; + } + + public long stress() { + return this.stress; + } + } +} From 2729971b1c54ad76e5ecf6cba03eb576fd834a6e Mon Sep 17 00:00:00 2001 From: houzhizhen Date: Thu, 15 Oct 2020 16:31:02 +0800 Subject: [PATCH 29/33] performance improvement: the meetNode is invoked only when node.distance is parent.distance() + 1 (#69) --- .../hugegraph/job/algorithm/BfsTraverser.java | 18 +++++++++++------- .../cent/BetweennessCentralityAlgorithmV2.java | 2 +- .../cent/StressCentralityAlgorithmV2.java | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/BfsTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/BfsTraverser.java index 034887f277..3b0920855f 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/BfsTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/BfsTraverser.java @@ -78,8 +78,10 @@ protected Map traverse(Id startVertex, Directions direction, localNodes.put(target, targetNode); traversingVertices.addLast(target); } - this.meetNode(target, targetNode, source, - sourceNode, firstTime); + if (targetNode.distance() == sourceNode.distance() + 1) { + this.meetNode(target, targetNode, source, + sourceNode, firstTime); + } } } return localNodes; @@ -96,6 +98,10 @@ protected void backtrack(Id startVertex, Map localNodes) { protected abstract T createNode(T parentNode); + /** + * This method is invoked when currentVertex.distance() equals + * parentVertex.distance() + 1. + */ protected abstract void meetNode(Id currentVertex, T currentNode, Id parentVertex, T parentNode, boolean firstTime); @@ -136,11 +142,9 @@ public void addParent(Id parentId) { this.parents = newParents; } - public void addParentNodeIfNeeded(Node node, Id parentId) { - if (this.distance == node.distance + 1) { - this.pathCount += node.pathCount; - this.addParent(parentId); - } + public void addParentNode(Node node, Id parentId) { + this.pathCount += node.pathCount; + this.addParent(parentId); } protected int pathCount() { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithmV2.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithmV2.java index 1391021a29..0cb2fef321 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithmV2.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithmV2.java @@ -116,7 +116,7 @@ protected BetweennessNode createNode(BetweennessNode parentNode) { protected void meetNode(Id currentVertex, BetweennessNode currentNode, Id parentVertex, BetweennessNode parentNode, boolean firstTime) { - currentNode.addParentNodeIfNeeded(parentNode, parentVertex); + currentNode.addParentNode(parentNode, parentVertex); } @Override diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithmV2.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithmV2.java index b01486b7ec..bbce4d7add 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithmV2.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithmV2.java @@ -119,7 +119,7 @@ protected StressNode createNode(StressNode parentNode) { protected void meetNode(Id currentVertex, StressNode currentNode, Id parentVertex, StressNode parentNode, boolean firstTime) { - currentNode.addParentNodeIfNeeded(parentNode, parentVertex); + currentNode.addParentNode(parentNode, parentVertex); } @Override From 5ba5b947feff8663521da4d9c58c6c929a42ec56 Mon Sep 17 00:00:00 2001 From: zhoney Date: Wed, 16 Dec 2020 16:04:33 +0800 Subject: [PATCH 30/33] fix ap compile error for edition 0.11 (#72) Change-Id: Ibf0f415ee6bd9cf0ff8b598e2166cc70667c49b7 --- .../job/algorithm/cent/DegreeCentralityAlgorithm.java | 2 +- .../hugegraph/job/algorithm/comm/KCoreAlgorithm.java | 8 ++------ .../algorithm/similarity/FusiformSimilarityAlgorithm.java | 4 +--- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java index 54b72c2bff..6592c119d2 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java @@ -30,7 +30,7 @@ import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.structure.HugeEdge; -import com.baidu.hugegraph.traversal.algorithm.EdgeStep; +import com.baidu.hugegraph.traversal.algorithm.steps.EdgeStep; import com.baidu.hugegraph.type.define.Directions; public class DegreeCentralityAlgorithm extends AbstractCentAlgorithm { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java index f03db565ed..8ea2877961 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java @@ -34,7 +34,6 @@ import com.baidu.hugegraph.HugeGraph; import com.baidu.hugegraph.backend.id.Id; import com.baidu.hugegraph.job.UserJob; -import com.baidu.hugegraph.schema.EdgeLabel; import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser; import com.baidu.hugegraph.type.define.Directions; import com.baidu.hugegraph.util.CollectionUtil; @@ -111,7 +110,6 @@ public Object kcore(String sourceLabel, String sourceCLabel, Directions dir, String label, int k, double alpha, long degree, boolean merged) { HugeGraph graph = this.graph(); - EdgeLabel edgeLabel = label == null ? null : graph.edgeLabel(label); KcoreTraverser traverser = new KcoreTraverser(graph); JsonMap kcoresJson = new JsonMap(); @@ -123,8 +121,7 @@ public Object kcore(String sourceLabel, String sourceCLabel, this.traverse(sourceLabel, sourceCLabel, v -> { Set kcore = traverser.kcore(IteratorUtils.of(v), - dir, edgeLabel, k, alpha, - degree); + dir, label, k, alpha, degree); if (kcore.isEmpty()) { return; } @@ -183,8 +180,7 @@ public KcoreTraverser(HugeGraph graph) { } public Set kcore(Iterator vertices, Directions direction, - EdgeLabel label, int k, double alpha, - long degree) { + String label, int k, double alpha, long degree) { int minNeighbors = (int) Math.floor(1.0 / alpha * k); SimilarsMap map = fusiformSimilarity(vertices, direction, label, minNeighbors, alpha, k - 1, diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java index 0a1679fcfd..68d3ef5485 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java @@ -28,7 +28,6 @@ import com.baidu.hugegraph.job.UserJob; import com.baidu.hugegraph.job.algorithm.AbstractAlgorithm; import com.baidu.hugegraph.job.algorithm.Consumers.StopExecution; -import com.baidu.hugegraph.schema.EdgeLabel; import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser; import com.baidu.hugegraph.traversal.algorithm.FusiformSimilarityTraverser.SimilarsMap; import com.baidu.hugegraph.traversal.algorithm.HugeTraverser; @@ -165,7 +164,6 @@ public Object fusiformSimilars(String sourceLabel, String sourceCLabel, String groupProperty, int minGroups, long degree, long limit) { HugeGraph graph = this.graph(); - EdgeLabel edgeLabel = label == null ? null : graph.edgeLabel(label); FusiformSimilarityTraverser traverser = new FusiformSimilarityTraverser(graph); @@ -177,7 +175,7 @@ public Object fusiformSimilars(String sourceLabel, String sourceCLabel, this.traverse(sourceLabel, sourceCLabel, v -> { SimilarsMap similars = traverser.fusiformSimilarity( IteratorUtils.of(v), direction, - edgeLabel, minNeighbors, alpha, + label, minNeighbors, alpha, minSimilars, (int) topSimilars, groupProperty, minGroups, degree, MAX_CAPACITY, NO_LIMIT, true); From c2e30822461f1ce708da50c57f270eb0ae8f9c99 Mon Sep 17 00:00:00 2001 From: zhoney Date: Tue, 2 Feb 2021 17:53:40 +0800 Subject: [PATCH 31/33] fix fusiform and kcore min_groups args default value (#79) Change-Id: I843c595172d7b45b894d764bf859dec61a35c8b5 --- .../com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java | 2 +- .../job/algorithm/similarity/FusiformSimilarityAlgorithm.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java index 8ea2877961..de707ff353 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/KCoreAlgorithm.java @@ -184,7 +184,7 @@ public Set kcore(Iterator vertices, Directions direction, int minNeighbors = (int) Math.floor(1.0 / alpha * k); SimilarsMap map = fusiformSimilarity(vertices, direction, label, minNeighbors, alpha, k - 1, - 0, null, 1, degree, + 0, null, 0, degree, NO_LIMIT, NO_LIMIT, true); if (map.isEmpty()) { return ImmutableSet.of(); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java index 68d3ef5485..cb62d71ff8 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/similarity/FusiformSimilarityAlgorithm.java @@ -48,7 +48,7 @@ public class FusiformSimilarityAlgorithm extends AbstractAlgorithm { public static final int DEFAULT_MIN_NEIGHBORS = 10; public static final int DEFAULT_MIN_SIMILARS = 6; public static final int DEFAULT_TOP_SIMILARS = 0; - public static final int DEFAULT_MIN_GROUPS = 1; + public static final int DEFAULT_MIN_GROUPS = 0; @Override public String category() { From 36f7cd0c9746b183cf31464343ad3874495d9f26 Mon Sep 17 00:00:00 2001 From: Jermy Li Date: Tue, 2 Feb 2021 18:12:35 +0800 Subject: [PATCH 32/33] fix lpa not exist c_label property when a request pass source_label (#78) Change-Id: Ib2a9766e29708fa4fcbaec152f5c9e571c96a0a6 --- .../com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java index 973e93f7b2..f25201bea9 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java @@ -29,6 +29,7 @@ import org.apache.commons.lang3.mutable.MutableInt; import org.apache.tinkerpop.gremlin.process.traversal.Scope; +import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__; import org.apache.tinkerpop.gremlin.structure.Vertex; import com.baidu.hugegraph.backend.id.Id; @@ -118,6 +119,7 @@ public Object lpa(String sourceLabel, String edgeLabel, } Number communities = tryNext(this.graph().traversal().V() + .filter(__.properties(C_LABEL)) .groupCount().by(C_LABEL) .count(Scope.local)); return ImmutableMap.of("iteration_times", times, From 97748a475a40b014059fc2ceda98371493173d12 Mon Sep 17 00:00:00 2001 From: imbajin Date: Tue, 1 Nov 2022 18:05:04 +0800 Subject: [PATCH 33/33] adapt the latest version & clean code also fix the sec alert --- .../baidu/hugegraph/api/job/AlgorithmAPI.java | 18 ++++----- .../job/algorithm/AbstractAlgorithm.java | 11 ++---- .../hugegraph/job/algorithm/Algorithm.java | 8 ++-- .../job/algorithm/AlgorithmPool.java | 4 +- .../hugegraph/job/algorithm/BfsTraverser.java | 4 +- .../hugegraph/job/algorithm/Consumers.java | 5 ++- .../job/algorithm/SubgraphStatAlgorithm.java | 18 ++++----- .../algorithm/cent/AbstractCentAlgorithm.java | 11 ++---- .../cent/BetweennessCentralityAlgorithm.java | 2 +- .../cent/ClosenessCentralityAlgorithm.java | 2 +- .../cent/ClosenessCentralityAlgorithmV2.java | 2 +- .../cent/DegreeCentralityAlgorithm.java | 4 +- .../cent/EigenvectorCentralityAlgorithm.java | 2 +- .../cent/StressCentralityAlgorithm.java | 2 +- ....java => ClusterCoefficientAlgorithm.java} | 13 +++---- .../job/algorithm/comm/LouvainTraverser.java | 37 ++++++++----------- .../job/algorithm/comm/LpaAlgorithm.java | 2 +- .../job/computer/AbstractComputer.java | 5 +-- .../baidu/hugegraph/job/schema/SchemaJob.java | 4 +- .../job/system/DeleteExpiredIndexJob.java | 2 +- .../hugegraph/job/system/JobCounters.java | 11 +++--- 21 files changed, 75 insertions(+), 92 deletions(-) rename hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/{ClusterCoeffcientAlgorithm.java => ClusterCoefficientAlgorithm.java} (84%) diff --git a/hugegraph-api/src/main/java/com/baidu/hugegraph/api/job/AlgorithmAPI.java b/hugegraph-api/src/main/java/com/baidu/hugegraph/api/job/AlgorithmAPI.java index c965e02a56..b0e0d06925 100644 --- a/hugegraph-api/src/main/java/com/baidu/hugegraph/api/job/AlgorithmAPI.java +++ b/hugegraph-api/src/main/java/com/baidu/hugegraph/api/job/AlgorithmAPI.java @@ -21,15 +21,6 @@ import java.util.Map; -import javax.inject.Singleton; -import javax.ws.rs.Consumes; -import javax.ws.rs.NotFoundException; -import javax.ws.rs.POST; -import javax.ws.rs.Path; -import javax.ws.rs.PathParam; -import javax.ws.rs.Produces; -import javax.ws.rs.core.Context; - import org.slf4j.Logger; import com.baidu.hugegraph.HugeGraph; @@ -46,6 +37,15 @@ import com.codahale.metrics.annotation.Timed; import com.google.common.collect.ImmutableMap; +import jakarta.inject.Singleton; +import jakarta.ws.rs.Consumes; +import jakarta.ws.rs.NotFoundException; +import jakarta.ws.rs.POST; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.PathParam; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.core.Context; + @Path("graphs/{graph}/jobs/algorithm") @Singleton public class AlgorithmAPI extends API { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java index 943debb4b8..dbe3f7e2b8 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AbstractAlgorithm.java @@ -57,8 +57,7 @@ import com.baidu.hugegraph.util.E; import com.baidu.hugegraph.util.JsonUtil; import com.baidu.hugegraph.util.ParameterUtil; - -import jersey.repackaged.com.google.common.base.Objects; +import com.google.common.base.Objects; @SuppressWarnings("deprecation") // StringEscapeUtils public abstract class AbstractAlgorithm implements Algorithm { @@ -382,9 +381,7 @@ protected Iterator vertices(Object label, long limit) { ConditionQuery query = new ConditionQuery(HugeType.VERTEX); query.capacity(Query.NO_CAPACITY); query.limit(limit); - if (label != null) { - query.eq(HugeKeys.LABEL, this.getVertexLabelId(label)); - } + query.eq(HugeKeys.LABEL, this.getVertexLabelId(label)); return this.graph().vertices(query); } @@ -544,8 +541,8 @@ public JsonMap() { this(4 * (int) Bytes.KB); } - public JsonMap(int initCapaticy) { - this.json = new StringBuilder(initCapaticy); + public JsonMap(int initCapacity) { + this.json = new StringBuilder(initCapacity); } public void startObject() { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Algorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Algorithm.java index b1cb531443..856e38dbcb 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Algorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Algorithm.java @@ -25,11 +25,11 @@ public interface Algorithm { - public String name(); + String name(); - public String category(); + String category(); - public Object call(UserJob job, Map parameters); + Object call(UserJob job, Map parameters); - public void checkParameters(Map parameters); + void checkParameters(Map parameters); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java index 02ac4c24ea..7031318acb 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/AlgorithmPool.java @@ -30,7 +30,7 @@ import com.baidu.hugegraph.job.algorithm.cent.EigenvectorCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.StressCentralityAlgorithm; import com.baidu.hugegraph.job.algorithm.cent.StressCentralityAlgorithmV2; -import com.baidu.hugegraph.job.algorithm.comm.ClusterCoeffcientAlgorithm; +import com.baidu.hugegraph.job.algorithm.comm.ClusterCoefficientAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.KCoreAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.LouvainAlgorithm; import com.baidu.hugegraph.job.algorithm.comm.LpaAlgorithm; @@ -56,7 +56,7 @@ public class AlgorithmPool { INSTANCE.register(new EigenvectorCentralityAlgorithm()); INSTANCE.register(new TriangleCountAlgorithm()); - INSTANCE.register(new ClusterCoeffcientAlgorithm()); + INSTANCE.register(new ClusterCoefficientAlgorithm()); INSTANCE.register(new LpaAlgorithm()); INSTANCE.register(new LouvainAlgorithm()); INSTANCE.register(new WeakConnectedComponent()); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/BfsTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/BfsTraverser.java index 3b0920855f..a85cef0220 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/BfsTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/BfsTraverser.java @@ -36,7 +36,7 @@ public abstract class BfsTraverser extends AbstractAlgorithm.AlgoTraverser implements AutoCloseable { - private Stack traversedVertices = new Stack<>(); + private final Stack traversedVertices = new Stack<>(); public BfsTraverser(UserJob job) { super(job); @@ -113,7 +113,7 @@ public static class Node { private Id[] parents; private int pathCount; - private int distance; + private final int distance; public Node(Node parentNode) { this(0, parentNode.distance + 1); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java index 1c68413fc0..9a60e3031a 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/Consumers.java @@ -92,7 +92,7 @@ private Void runAndDone() { this.run(); this.done(); } catch (Throwable e) { - // Only the first exception of one thread can be stored + // Only the first exception to one thread can be stored this.exception = e; if (!(e instanceof StopExecution)) { LOG.error("Error when running task", e); @@ -110,7 +110,8 @@ private void run() { this.consume(); } assert this.ending; - while (this.consume()); + while (this.consume()) { + } LOG.debug("Worker finished"); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java index d91748e41e..814277b2a1 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/SubgraphStatAlgorithm.java @@ -22,7 +22,7 @@ import java.util.Iterator; import java.util.Map; -import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.commons.configuration2.PropertiesConfiguration; import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource; import org.apache.tinkerpop.gremlin.structure.Vertex; import org.slf4j.Logger; @@ -90,7 +90,6 @@ private HugeGraph createTempGraph(UserJob job) { PropertiesConfiguration config = new PropertiesConfiguration(); config.setProperty(CoreOptions.BACKEND.name(), "memory"); config.setProperty(CoreOptions.STORE.name(), name); - config.setDelimiterParsingDisabled(true); /* * NOTE: this temp graph don't need to init backend because no task info * required, also not set started because no task to be scheduled. @@ -129,12 +128,11 @@ protected static boolean copySchema(Map parameters) { private static class Traverser extends AlgoTraverser { - private static Map PARAMS = ImmutableMap.of( - "depth", 10L, - "degree", -1L, - "sample", -1L, - "top", -1L /* sorted */, - "workers", 0); + private static final Map PARAMS = ImmutableMap.of("depth", 10L, + "degree", -1L, + "sample", -1L, + "top", -1L /* sorted */, + "workers", 0); public Traverser(UserJob job) { super(job); @@ -166,8 +164,8 @@ public Object subgraphStat(UserJob job) { results.put("page_ranks", pageRanks(job)); - algo = pool.get("cluster_coeffcient"); - results.put("cluster_coeffcient", algo.call(job, parameters)); + algo = pool.get("cluster_coefficient"); + results.put("cluster_coefficient", algo.call(job, parameters)); algo = pool.get("rings"); parameters = ImmutableMap.builder() diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java index 066234873b..6a7ba396a4 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/AbstractCentAlgorithm.java @@ -86,8 +86,7 @@ protected GraphTraversal constructSource( t = t.filter(it -> { this.updateProgress(++this.progress); - return sourceCLabel == null ? true : - match(it.get(), sourceCLabel); + return sourceCLabel == null || match(it.get(), sourceCLabel); }); if (sourceSample > 0L) { @@ -164,9 +163,7 @@ protected GraphTraversal filterNonShortestPath( triples.put(key, len); } else { assert len == shortest; - if (keepOneShortestPath) { - return false; - } + return !keepOneShortestPath; } return true; }); @@ -182,7 +179,7 @@ protected GraphTraversal substractPath( @SuppressWarnings("unchecked") Iterator items = (Iterator) path.iterator(); - return new MapperIterator<>(items, v -> v.id()); + return new MapperIterator<>(items, HugeVertex::id); } int len = path.size(); if (len < 3) { @@ -195,7 +192,7 @@ protected GraphTraversal substractPath( @SuppressWarnings("unchecked") Iterator items = (Iterator) path.iterator(); - return new MapperIterator<>(items, v -> v.id()); + return new MapperIterator<>(items, HugeVertex::id); }); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithm.java index 46f4d4a405..25e1451cf4 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/BetweennessCentralityAlgorithm.java @@ -95,7 +95,7 @@ public Object betweennessCentrality(Directions direction, tg = this.computeBetweenness(tg); GraphTraversal tLimit = topN(tg, topN); - return this.execute(tLimit, () -> tLimit.next()); + return this.execute(tLimit, tLimit::next); } protected GraphTraversal groupPathByEndpoints( diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java index 6a95794a0a..81979dc015 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithm.java @@ -102,7 +102,7 @@ public Object closenessCentrality(Directions direction, .math("_-1").sack(Operator.div).sack().sum()); GraphTraversal tLimit = topN(tg, topN); - return this.execute(tLimit, () -> tLimit.next()); + return this.execute(tLimit, tLimit::next); } } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithmV2.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithmV2.java index 1651c8943d..55dc93ad99 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithmV2.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/ClosenessCentralityAlgorithmV2.java @@ -61,7 +61,7 @@ public Object call(UserJob job, Map parameters) { private static class Traverser extends BfsTraverser { - private Map globalCloseness; + private final Map globalCloseness; private float startVertexCloseness; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java index 6592c119d2..6032b56e2e 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/DegreeCentralityAlgorithm.java @@ -19,7 +19,7 @@ package com.baidu.hugegraph.job.algorithm.cent; -import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -155,7 +155,7 @@ protected Object degreeCentralityForBothDir(String label, long topN) { } private long degree(Id source, String label) { - List labels = label == null ? null : Arrays.asList(label); + List labels = label == null ? null : Collections.singletonList(label); EdgeStep step = new EdgeStep(this.graph(), Directions.BOTH, labels, null, NO_LIMIT, 0); return this.edgesCount(source, step); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java index 15748ec726..d396f3cf39 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/EigenvectorCentralityAlgorithm.java @@ -96,7 +96,7 @@ public Object eigenvectorCentrality(Directions direction, GraphTraversal tCap = t.cap("m"); GraphTraversal tLimit = topN(tCap, topN); - return this.execute(tLimit, () -> tLimit.next()); + return this.execute(tLimit, tLimit::next); } } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithm.java index 87f1471d4b..6f41892553 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/cent/StressCentralityAlgorithm.java @@ -100,7 +100,7 @@ public Object stressCentrality(Directions direction, .groupCount(); GraphTraversal tLimit = topN(tg, topN); - return this.execute(tLimit, () -> tLimit.next()); + return this.execute(tLimit, tLimit::next); } } } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoefficientAlgorithm.java similarity index 84% rename from hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java rename to hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoefficientAlgorithm.java index 2a0cf1a42e..b7a3895a31 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoeffcientAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/ClusterCoefficientAlgorithm.java @@ -26,9 +26,9 @@ import com.baidu.hugegraph.util.E; import com.baidu.hugegraph.util.InsertionOrderUtil; -public class ClusterCoeffcientAlgorithm extends AbstractCommAlgorithm { +public class ClusterCoefficientAlgorithm extends AbstractCommAlgorithm { - public static final String ALGO_NAME = "cluster_coeffcient"; + public static final String ALGO_NAME = "cluster_coefficient"; @Override public String name() { @@ -46,8 +46,7 @@ public void checkParameters(Map parameters) { public Object call(UserJob job, Map parameters) { int workers = workersWhenBoth(parameters); try (Traverser traverser = new Traverser(job, workers)) { - return traverser.clusterCoeffcient(direction(parameters), - degree(parameters)); + return traverser.clusterCoefficient(direction(parameters), degree(parameters)); } } @@ -67,18 +66,18 @@ public Traverser(UserJob job, int workers) { super(job, ALGO_NAME, workers); } - public Object clusterCoeffcient(Directions direction, long degree) { + public Object clusterCoefficient(Directions direction, long degree) { Map results = this.triangles(direction, degree); results = InsertionOrderUtil.newMap(results); long triangles = results.remove(KEY_TRIANGLES); long triads = results.remove(KEY_TRIADS); assert triangles <= triads; - double coeffcient = triads == 0L ? 0d : 1d * triangles / triads; + double coefficient = triads == 0L ? 0d : 1d * triangles / triads; @SuppressWarnings({ "unchecked", "rawtypes" }) Map converted = (Map) results; - converted.put("cluster_coeffcient", coeffcient); + converted.put("cluster_coefficient", coefficient); return results; } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java index 4359d46b80..d6f647baf5 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LouvainTraverser.java @@ -20,11 +20,12 @@ package com.baidu.hugegraph.job.algorithm.comm; import java.io.BufferedOutputStream; -import java.io.FileOutputStream; import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -238,10 +239,7 @@ private boolean needSkipVertex(int pass, Vertex v) { } } // skip the vertex with unmatched clabel - if (this.sourceCLabel != null && !match(v, this.sourceCLabel)) { - return true; - } - return false; + return this.sourceCLabel != null && !match(v, this.sourceCLabel); } private Iterator sourceVertices(int pass) { @@ -321,9 +319,7 @@ private Community wrapCommunity(Vertex v, List nbs) { return comm; } - private Collection> nbCommunities( - int pass, - List edges) { + private Collection> nbCommunities(int pass, List edges) { // comms is a map of cid:[community,weight] Map> comms = new HashMap<>(); for (Edge edge : edges) { @@ -512,7 +508,7 @@ private void mergeCommunity(int pass, Community c, Set cvertices) { if (cvertices.contains(otherV.id())) { // inner edges of this community, will be calc twice // due to both e-in and e-out are in vertices, - kin += weightOfEdge(edge); + kin += (int) weightOfEdge(edge); continue; } assert this.cache.vertex2Community(otherV.id()) != null; @@ -579,11 +575,10 @@ public Object louvain(int maxTimes, int stableTimes, double precision) { int times = maxTimes; int movedTimes = 0; double movedPercent = 0d; - double lastMovedPercent = 0d; + double lastMovedPercent; for (int i = 0; i < maxTimes; i++) { boolean finished = true; - movedPercent = 0d; lastMovedPercent = 1d; int tinyChanges = 0; while ((movedPercent = this.moveCommunities(i)) > 0d) { @@ -654,7 +649,7 @@ private double modularity(String label) { public Collection showCommunity(String community) { final String C_PASS0 = labelOfPassN(0); - Collection comms = Arrays.asList(community); + Collection comms = Collections.singletonList(community); boolean reachPass0 = false; while (comms.size() > 0 && !reachPass0) { Iterator subComms = this.vertices(comms.iterator()); @@ -679,7 +674,7 @@ public long exportCommunity(int pass, boolean vertexFirst) { String label = labelOfPassN(pass); GraphTraversal t = this.g.V().hasLabel(label); this.execute(t, () -> { - try (OutputStream os = new FileOutputStream(exportFile); + try (OutputStream os = Files.newOutputStream(Paths.get(exportFile)); BufferedOutputStream bos = new BufferedOutputStream(os)) { while (t.hasNext()) { String comm = t.next().id().toString(); @@ -712,7 +707,7 @@ public long clearPass(int pass) { List els = this.cpassEdgeLabels(); if (els.size() > 0) { String first = els.remove(0); - te = te.hasLabel(first, els.toArray(new String[els.size()])); + te = te.hasLabel(first, els.toArray(new String[0])); this.drop(te); } // drop schema @@ -736,7 +731,7 @@ public long clearPass(int pass) { List vls = this.cpassVertexLabels(); if (vls.size() > 0) { String first = vls.remove(0); - tv = tv.hasLabel(first, vls.toArray(new String[vls.size()])); + tv = tv.hasLabel(first, vls.toArray(new String[0])); this.drop(tv); } // drop schema @@ -895,7 +890,7 @@ public Id genId2(int pass, Id cid) { // gen id for merge-community vertex String id = cid.toString(); if (pass == 0) { - // conncat pass with cid + // concat pass with cid id = pass + "~" + id; } else { // replace last pass with current pass @@ -915,11 +910,9 @@ public Collection>> communities(){ if (c.empty()) { continue; } - Pair> pair = comms.get(c.cid); - if (pair == null) { - pair = Pair.of(c, new HashSet<>()); - comms.put(c.cid, pair); - } + Pair> pair = comms.computeIfAbsent(c.cid, k -> { + return Pair.of(c, new HashSet<>()); + }); // collect members joined to the community [current pass] pair.getRight().add(e.getKey()); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java index f25201bea9..59d53245f8 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/algorithm/comm/LpaAlgorithm.java @@ -184,7 +184,7 @@ private String voteCommunityOfVertex(Vertex vertex, String edgeLabel, Iterator neighbors = this.adjacentVertices(source, dir, labelId, degree); - // whether or not include vertex itself, greatly affects the result. + // whether include vertex itself, greatly affects the result. // get a larger number of small communities if include itself //neighbors.inject(v); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/computer/AbstractComputer.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/computer/AbstractComputer.java index ca08ec9a38..3ef49c751d 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/computer/AbstractComputer.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/computer/AbstractComputer.java @@ -174,7 +174,7 @@ private Map readSubConfig(String sub) { "'%s' must be contained in config '%s'", sub); ImmutableNode root = null; - NodeHandler nodeHandler = null; + NodeHandler nodeHandler; Map results = new HashMap<>(nodes.size()); for (HierarchicalConfiguration node : nodes) { NodeModel nodeModel = node.getNodeModel(); @@ -191,8 +191,7 @@ private Map readSubConfig(String sub) { private String[] constructShellCommands(Map configs) { String hadoopHome = System.getenv(HADOOP_HOME); String commandPrefix = String.format(MAIN_COMMAND, hadoopHome); - List command = new ArrayList<>(); - command.addAll(Arrays.asList(commandPrefix.split(SPACE))); + List command = new ArrayList<>(Arrays.asList(commandPrefix.split(SPACE))); command.add(this.name()); for (Map.Entry entry : configs.entrySet()) { command.add(MINUS_C); diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/schema/SchemaJob.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/schema/SchemaJob.java index afa6c30693..4841ac4843 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/schema/SchemaJob.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/schema/SchemaJob.java @@ -106,8 +106,8 @@ protected static void removeSchema(SchemaTransaction tx, /** * Use reflection to call SchemaTransaction.updateSchema(), * which is protected - * @param tx The update operation actual executer - * @param schema the schema to be update + * @param tx The update operation actual execute + * @param schema the schema to be updated */ protected static void updateSchema(SchemaTransaction tx, SchemaElement schema) { diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/system/DeleteExpiredIndexJob.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/system/DeleteExpiredIndexJob.java index ede9a03bb3..d7889a3be5 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/system/DeleteExpiredIndexJob.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/system/DeleteExpiredIndexJob.java @@ -34,7 +34,7 @@ public class DeleteExpiredIndexJob extends DeleteExpiredJob { private static final String JOB_TYPE = "delete_expired_index"; - private Set indexes; + private final Set indexes; public DeleteExpiredIndexJob(Set indexes) { E.checkArgument(indexes != null && !indexes.isEmpty(), diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/system/JobCounters.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/system/JobCounters.java index afbe890903..9f4bb1cae3 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/job/system/JobCounters.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/job/system/JobCounters.java @@ -30,8 +30,7 @@ public class JobCounters { - private ConcurrentHashMap jobCounters = - new ConcurrentHashMap<>(); + private final ConcurrentHashMap jobCounters = new ConcurrentHashMap<>(); public JobCounter jobCounter(HugeGraph g) { int batch = g.option(CoreOptions.TASK_TTL_DELETE_BATCH); @@ -44,10 +43,10 @@ public JobCounter jobCounter(HugeGraph g) { public static class JobCounter { - private AtomicInteger jobs; + private final AtomicInteger jobs; private Set elements; private Set indexes; - private int batchSize; + private final int batchSize; public JobCounter(int batchSize) { this.jobs = new AtomicInteger(0); @@ -94,7 +93,7 @@ public boolean addAndTriggerDelete(Object object) { /** * Try to add element in collection waiting to be deleted * @param element - * @return true if should create a new delete job, false otherwise + * @return true if we should create a new delete job, false otherwise */ public boolean addElementAndTriggerDelete(HugeElement element) { if (this.elements.size() >= this.batchSize) { @@ -107,7 +106,7 @@ public boolean addElementAndTriggerDelete(HugeElement element) { /** * Try to add edge in collection waiting to be deleted * @param index - * @return true if should create a new delete job, false otherwise + * @return true if we should create a new delete job, false otherwise */ public boolean addIndexAndTriggerDelete(HugeIndex index) { if (this.indexes.size() >= this.batchSize) {