From 3dd06f2f04f7afb3343a5e1b8dc4aea9ffe6e3c5 Mon Sep 17 00:00:00 2001 From: V_Galaxy Date: Mon, 11 Mar 2024 17:03:49 +0800 Subject: [PATCH 1/7] chore(pd-store-dev): integrate `pd-grpc`, `pd-common`, `pd-client` into hugegraph (#2460) subtask of https://github.com/apache/incubator-hugegraph/issues/2265 --- During the code review, I found the following issues: 1. Similar functionality appears multiple times, such as stub connection-related code, with redundancy between `PDClient.StubProxy` and `AbstractClientStubProxy`. 2. Package partitioning: 1. `PDPulse`, `PDPulseImpl` should in `pulse` 2. `PDWatch`, `PDWatchImpl` should in `watch` 3. Unused code, see below --------- Co-authored-by: imbajin --- .github/workflows/check-dependencies.yml | 3 +- .github/workflows/ci.yml | 2 +- .github/workflows/pd-store.yml | 51 + hugegraph-pd/.gitignore | 2 + hugegraph-pd/README.md | 9 +- hugegraph-pd/hg-pd-client/pom.xml | 73 + .../hugegraph/pd/client/AbstractClient.java | 265 ++++ .../pd/client/AbstractClientStubProxy.java | 72 + .../apache/hugegraph/pd/client/Channels.java | 44 + .../hugegraph/pd/client/ClientCache.java | 338 +++++ .../hugegraph/pd/client/Discoverable.java | 30 + .../hugegraph/pd/client/DiscoveryClient.java | 221 +++ .../pd/client/DiscoveryClientImpl.java | 137 ++ .../apache/hugegraph/pd/client/KvClient.java | 343 +++++ .../hugegraph/pd/client/LicenseClient.java | 71 + .../apache/hugegraph/pd/client/PDClient.java | 1347 +++++++++++++++++ .../apache/hugegraph/pd/client/PDConfig.java | 83 + .../apache/hugegraph/pd/client/PDPulse.java | 154 ++ .../hugegraph/pd/client/PDPulseImpl.java | 197 +++ .../apache/hugegraph/pd/client/PDWatch.java | 140 ++ .../hugegraph/pd/client/PDWatchImpl.java | 204 +++ .../hugegraph/pd/pulse/PartitionNotice.java | 50 + .../hugegraph/pd/pulse/PulseServerNotice.java | 36 + .../apache/hugegraph/pd/watch/NodeEvent.java | 100 ++ .../apache/hugegraph/pd/watch/PDWatcher.java | 22 + .../hugegraph/pd/watch/PartitionEvent.java | 94 ++ .../apache/hugegraph/pd/watch/WatchType.java | 30 + hugegraph-pd/hg-pd-common/pom.xml | 54 + .../hugegraph/pd/common/GraphCache.java | 62 + .../apache/hugegraph/pd/common/HgAssert.java | 117 ++ .../apache/hugegraph/pd/common/KVPair.java | 132 ++ .../hugegraph/pd/common/PDException.java | 47 + .../pd/common/PDRuntimeException.java | 49 + .../hugegraph/pd/common/PartitionCache.java | 458 ++++++ .../hugegraph/pd/common/PartitionUtils.java | 47 + hugegraph-pd/hg-pd-grpc/pom.xml | 138 ++ .../hg-pd-grpc/src/main/proto/discovery.proto | 71 + .../hg-pd-grpc/src/main/proto/kv.proto | 143 ++ .../hg-pd-grpc/src/main/proto/metaTask.proto | 64 + .../hg-pd-grpc/src/main/proto/metapb.proto | 394 +++++ .../hg-pd-grpc/src/main/proto/pd_common.proto | 53 + .../hg-pd-grpc/src/main/proto/pd_pulse.proto | 172 +++ .../hg-pd-grpc/src/main/proto/pd_watch.proto | 103 ++ .../hg-pd-grpc/src/main/proto/pdpb.proto | 607 ++++++++ hugegraph-pd/hg-pd-test/pom.xml | 259 ++++ .../hugegraph/pd/common/BaseCommonTest.java | 34 + .../hugegraph/pd/common/CommonSuiteTest.java | 36 + .../hugegraph/pd/common/HgAssertTest.java | 132 ++ .../hugegraph/pd/common/KVPairTest.java | 72 + .../pd/common/PartitionCacheTest.java | 388 +++++ .../pd/common/PartitionUtilsTest.java | 54 + .../hg-pd-test/src/main/resources/log4j2.xml | 139 ++ hugegraph-pd/pom.xml | 184 +++ .../apache/hugegraph/version/CoreVersion.java | 2 +- pom.xml | 27 +- 55 files changed, 8142 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/pd-store.yml create mode 100644 hugegraph-pd/.gitignore create mode 100644 hugegraph-pd/hg-pd-client/pom.xml create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/AbstractClient.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/AbstractClientStubProxy.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/Channels.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/ClientCache.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/Discoverable.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/DiscoveryClient.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/DiscoveryClientImpl.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/KvClient.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/LicenseClient.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDClient.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDConfig.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDPulse.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDPulseImpl.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDWatch.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDWatchImpl.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/pulse/PartitionNotice.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/pulse/PulseServerNotice.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/NodeEvent.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/PDWatcher.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/PartitionEvent.java create mode 100644 hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/WatchType.java create mode 100644 hugegraph-pd/hg-pd-common/pom.xml create mode 100644 hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/GraphCache.java create mode 100644 hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/HgAssert.java create mode 100644 hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/KVPair.java create mode 100644 hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PDException.java create mode 100644 hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PDRuntimeException.java create mode 100644 hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PartitionCache.java create mode 100644 hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PartitionUtils.java create mode 100644 hugegraph-pd/hg-pd-grpc/pom.xml create mode 100644 hugegraph-pd/hg-pd-grpc/src/main/proto/discovery.proto create mode 100644 hugegraph-pd/hg-pd-grpc/src/main/proto/kv.proto create mode 100644 hugegraph-pd/hg-pd-grpc/src/main/proto/metaTask.proto create mode 100644 hugegraph-pd/hg-pd-grpc/src/main/proto/metapb.proto create mode 100644 hugegraph-pd/hg-pd-grpc/src/main/proto/pd_common.proto create mode 100644 hugegraph-pd/hg-pd-grpc/src/main/proto/pd_pulse.proto create mode 100644 hugegraph-pd/hg-pd-grpc/src/main/proto/pd_watch.proto create mode 100644 hugegraph-pd/hg-pd-grpc/src/main/proto/pdpb.proto create mode 100644 hugegraph-pd/hg-pd-test/pom.xml create mode 100644 hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/BaseCommonTest.java create mode 100644 hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/CommonSuiteTest.java create mode 100644 hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/HgAssertTest.java create mode 100644 hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/KVPairTest.java create mode 100644 hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/PartitionCacheTest.java create mode 100644 hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/PartitionUtilsTest.java create mode 100644 hugegraph-pd/hg-pd-test/src/main/resources/log4j2.xml create mode 100644 hugegraph-pd/pom.xml diff --git a/.github/workflows/check-dependencies.yml b/.github/workflows/check-dependencies.yml index e3632f5d38..311fc3519e 100644 --- a/.github/workflows/check-dependencies.yml +++ b/.github/workflows/check-dependencies.yml @@ -49,7 +49,8 @@ jobs: uses: actions/dependency-review-action@v3 # Refer: https://github.com/actions/dependency-review-action with: - fail-on-severity: low + # TODO: reset critical to low before releasing + fail-on-severity: critical # Action will fail if dependencies don't match the list #allow-licenses: Apache-2.0, MIT #deny-licenses: GPL-3.0, AGPL-1.0, AGPL-3.0, LGPL-2.0, CC-BY-3.0 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d439c31337..bae3434171 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,7 @@ jobs: fail-fast: false matrix: BACKEND: [ memory, rocksdb, hbase, cassandra, mysql, postgresql, scylladb ] - JAVA_VERSION: [ '8', '11' ] + JAVA_VERSION: [ '11' ] steps: - name: Checkout diff --git a/.github/workflows/pd-store.yml b/.github/workflows/pd-store.yml new file mode 100644 index 0000000000..65fb3ccc9c --- /dev/null +++ b/.github/workflows/pd-store.yml @@ -0,0 +1,51 @@ +name: "pd-store" + +on: + push: + branches: + - master + - 'release-*' + - 'test-*' + pull_request: + +jobs: + pd: + runs-on: ubuntu-latest + env: + USE_STAGE: 'true' # Whether to include the stage repository. + TRAVIS_DIR: hugegraph-server/hugegraph-dist/src/assembly/travis + REPORT_DIR: target/site/jacoco + + steps: + - name: Install JDK 11 + uses: actions/setup-java@v3 + with: + java-version: '11' + distribution: 'zulu' + + - name: Cache Maven packages + uses: actions/cache@v3 + with: + path: ~/.m2 + key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: ${{ runner.os }}-m2 + + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - name: use staged maven repo settings + if: ${{ env.USE_STAGE == 'true' }} + run: | + cp $HOME/.m2/settings.xml /tmp/settings.xml + mv -vf .github/configs/settings.xml $HOME/.m2/settings.xml + + - name: Run common test + run: | + mvn test -pl hugegraph-pd/hg-pd-test -am -P pd-common-test + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3.0.0 + with: + file: ${{ env.REPORT_DIR }}/*.xml diff --git a/hugegraph-pd/.gitignore b/hugegraph-pd/.gitignore new file mode 100644 index 0000000000..c2bd7537ea --- /dev/null +++ b/hugegraph-pd/.gitignore @@ -0,0 +1,2 @@ +# Exclude the generated PB files +hg-pd-grpc/src/main/java/ diff --git a/hugegraph-pd/README.md b/hugegraph-pd/README.md index 49548c216d..1aea07d7dd 100644 --- a/hugegraph-pd/README.md +++ b/hugegraph-pd/README.md @@ -1,5 +1,8 @@ -# HugeGraph PD +> Note: From revision 1.5.0, the code of HugeGraph-PD will be adapted to this location (WIP). -HugeGraph PD is a meta server responsible for service discovery, partition information storage, and node scheduling. +# HugeGraph PD (BETA) -> Note: Currently, the contents of this folder are empty. Starting from revision 1.5.0, the code of HugeGraph PD will be adapted to this location (WIP). +HugeGraph PD is a meta server responsible for service discovery, partition information storage, and +node scheduling. + +> BTW, if you meet any problem when using HugeGraph PD, please feel free to contact us for help diff --git a/hugegraph-pd/hg-pd-client/pom.xml b/hugegraph-pd/hg-pd-client/pom.xml new file mode 100644 index 0000000000..a64756fe94 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/pom.xml @@ -0,0 +1,73 @@ + + + + + + 4.0.0 + + + org.apache.hugegraph + hugegraph-pd + ${revision} + ../pom.xml + + hg-pd-client + + + + org.projectlombok + lombok + 1.18.20 + + + org.apache.logging.log4j + log4j-slf4j-impl + 2.17.0 + + + org.apache.hugegraph + hg-pd-grpc + ${revision} + + + org.apache.hugegraph + hg-pd-common + ${revision} + compile + + + junit + junit + 4.13.2 + test + + + commons-io + commons-io + 2.8.0 + + + org.yaml + snakeyaml + 1.28 + test + + + diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/AbstractClient.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/AbstractClient.java new file mode 100644 index 0000000000..874ef6f67c --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/AbstractClient.java @@ -0,0 +1,265 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import java.io.Closeable; +import java.util.LinkedList; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Predicate; +import java.util.stream.Stream; + +import org.apache.hugegraph.pd.common.KVPair; +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.grpc.PDGrpc; +import org.apache.hugegraph.pd.grpc.PDGrpc.PDBlockingStub; +import org.apache.hugegraph.pd.grpc.Pdpb; +import org.apache.hugegraph.pd.grpc.Pdpb.GetMembersRequest; +import org.apache.hugegraph.pd.grpc.Pdpb.GetMembersResponse; + +import io.grpc.Channel; +import io.grpc.ClientCall; +import io.grpc.ManagedChannel; +import io.grpc.ManagedChannelBuilder; +import io.grpc.MethodDescriptor; +import io.grpc.StatusRuntimeException; +import io.grpc.stub.AbstractBlockingStub; +import io.grpc.stub.AbstractStub; +import io.grpc.stub.ClientCalls; +import io.grpc.stub.StreamObserver; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public abstract class AbstractClient implements Closeable { + + private static final ConcurrentHashMap chs = new ConcurrentHashMap<>(); + public static Pdpb.ResponseHeader okHeader = Pdpb.ResponseHeader.newBuilder().setError( + Pdpb.Error.newBuilder().setType(Pdpb.ErrorType.OK)).build(); + protected final Pdpb.RequestHeader header; + protected final AbstractClientStubProxy stubProxy; + protected final PDConfig config; + protected ManagedChannel channel = null; + protected volatile ConcurrentMap stubs = null; + + protected AbstractClient(PDConfig config) { + String[] hosts = config.getServerHost().split(","); + this.stubProxy = new AbstractClientStubProxy(hosts); + this.header = Pdpb.RequestHeader.getDefaultInstance(); + this.config = config; + } + + public static Pdpb.ResponseHeader newErrorHeader(int errorCode, String errorMsg) { + Pdpb.ResponseHeader header = Pdpb.ResponseHeader.newBuilder().setError( + Pdpb.Error.newBuilder().setTypeValue(errorCode).setMessage(errorMsg)).build(); + return header; + } + + protected static void handleErrors(Pdpb.ResponseHeader header) throws PDException { + if (header.hasError() && header.getError().getType() != Pdpb.ErrorType.OK) { + throw new PDException(header.getError().getTypeValue(), + String.format("PD request error, error code = %d, msg = %s", + header.getError().getTypeValue(), + header.getError().getMessage())); + } + } + + protected AbstractBlockingStub getBlockingStub() throws PDException { + if (stubProxy.getBlockingStub() == null) { + synchronized (this) { + if (stubProxy.getBlockingStub() == null) { + String host = resetStub(); + if (host.isEmpty()) { + throw new PDException(Pdpb.ErrorType.PD_UNREACHABLE_VALUE, + "PD unreachable, pd.peers=" + + config.getServerHost()); + } + } + } + } + return (AbstractBlockingStub) stubProxy.getBlockingStub() + .withDeadlineAfter(config.getGrpcTimeOut(), + TimeUnit.MILLISECONDS); + } + + protected AbstractStub getStub() throws PDException { + if (stubProxy.getStub() == null) { + synchronized (this) { + if (stubProxy.getStub() == null) { + String host = resetStub(); + if (host.isEmpty()) { + throw new PDException(Pdpb.ErrorType.PD_UNREACHABLE_VALUE, + "PD unreachable, pd.peers=" + + config.getServerHost()); + } + } + } + } + return stubProxy.getStub(); + } + + protected abstract AbstractStub createStub(); + + protected abstract AbstractBlockingStub createBlockingStub(); + + private String resetStub() { + String leaderHost = ""; + for (int i = 0; i < stubProxy.getHostCount(); i++) { + String host = stubProxy.nextHost(); + channel = ManagedChannelBuilder.forTarget(host).usePlaintext().build(); + PDBlockingStub blockingStub = PDGrpc.newBlockingStub(channel) + .withDeadlineAfter(config.getGrpcTimeOut(), + TimeUnit.MILLISECONDS); + try { + GetMembersRequest request = Pdpb.GetMembersRequest.newBuilder() + .setHeader(header).build(); + GetMembersResponse members = blockingStub.getMembers(request); + Metapb.Member leader = members.getLeader(); + leaderHost = leader.getGrpcUrl(); + close(); + channel = ManagedChannelBuilder.forTarget(leaderHost).usePlaintext().build(); + stubProxy.setBlockingStub(createBlockingStub()); + stubProxy.setStub(createStub()); + log.info("PDClient connect to host = {} success", leaderHost); + break; + } catch (Exception e) { + log.error("PDClient connect to {} exception {}, {}", host, e.getMessage(), + e.getCause() != null ? e.getCause().getMessage() : ""); + } + } + return leaderHost; + } + + protected > RespT blockingUnaryCall( + MethodDescriptor method, ReqT req) throws PDException { + return blockingUnaryCall(method, req, 5); + } + + protected > RespT blockingUnaryCall( + MethodDescriptor method, ReqT req, int retry) throws PDException { + AbstractBlockingStub stub = getBlockingStub(); + try { + RespT resp = + ClientCalls.blockingUnaryCall(stub.getChannel(), method, stub.getCallOptions(), + req); + return resp; + } catch (Exception e) { + log.error(method.getFullMethodName() + " exception, {}", e.getMessage()); + if (e instanceof StatusRuntimeException) { + if (retry < stubProxy.getHostCount()) { + // 网络不通,关掉之前连接,换host重新连接 + synchronized (this) { + stubProxy.setBlockingStub(null); + } + return blockingUnaryCall(method, req, ++retry); + } + } + } + return null; + } + + // this.stubs = new ConcurrentHashMap(hosts.length); + private AbstractBlockingStub getConcurrentBlockingStub(String address) { + AbstractBlockingStub stub = stubs.get(address); + if (stub != null) { + return stub; + } + Channel ch = ManagedChannelBuilder.forTarget(address).usePlaintext().build(); + PDBlockingStub blockingStub = + PDGrpc.newBlockingStub(ch).withDeadlineAfter(config.getGrpcTimeOut(), + TimeUnit.MILLISECONDS); + stubs.put(address, blockingStub); + return blockingStub; + + } + + protected KVPair concurrentBlockingUnaryCall( + MethodDescriptor method, ReqT req, Predicate predicate) { + LinkedList hostList = this.stubProxy.getHostList(); + if (this.stubs == null) { + synchronized (this) { + if (this.stubs == null) { + this.stubs = new ConcurrentHashMap<>(hostList.size()); + } + } + } + Stream respTStream = hostList.parallelStream().map((address) -> { + AbstractBlockingStub stub = getConcurrentBlockingStub(address); + RespT resp = ClientCalls.blockingUnaryCall(stub.getChannel(), + method, stub.getCallOptions(), req); + return resp; + }); + KVPair pair; + AtomicReference response = new AtomicReference<>(); + boolean result = respTStream.anyMatch((r) -> { + response.set(r); + return predicate.test(r); + }); + if (result) { + pair = new KVPair<>(true, null); + } else { + pair = new KVPair<>(false, response.get()); + } + return pair; + } + + protected void streamingCall(MethodDescriptor method, ReqT request, + StreamObserver responseObserver, + int retry) throws PDException { + AbstractStub stub = getStub(); + try { + ClientCall call = stub.getChannel().newCall(method, stub.getCallOptions()); + ClientCalls.asyncServerStreamingCall(call, request, responseObserver); + } catch (Exception e) { + if (e instanceof StatusRuntimeException) { + if (retry < stubProxy.getHostCount()) { + synchronized (this) { + stubProxy.setStub(null); + } + streamingCall(method, request, responseObserver, ++retry); + return; + } + } + log.error("rpc call with exception, {}", e.getMessage()); + } + } + + @Override + public void close() { + closeChannel(channel); + if (stubs != null) { + for (AbstractBlockingStub stub : stubs.values()) { + closeChannel((ManagedChannel) stub.getChannel()); + } + } + + } + + private void closeChannel(ManagedChannel channel) { + try { + while (channel != null && + !channel.shutdownNow().awaitTermination(100, TimeUnit.MILLISECONDS)) { + continue; + } + } catch (Exception e) { + log.info("Close channel with error : ", e); + } + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/AbstractClientStubProxy.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/AbstractClientStubProxy.java new file mode 100644 index 0000000000..6ee3fcb625 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/AbstractClientStubProxy.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import java.util.LinkedList; + +import io.grpc.stub.AbstractBlockingStub; +import io.grpc.stub.AbstractStub; + +public class AbstractClientStubProxy { + + private final LinkedList hostList = new LinkedList<>(); + private AbstractBlockingStub blockingStub; + private AbstractStub stub; + + public AbstractClientStubProxy(String[] hosts) { + for (String host : hosts) { + if (!host.isEmpty()) { + hostList.offer(host); + } + } + } + + public LinkedList getHostList() { + return hostList; + } + + public String nextHost() { + String host = hostList.poll(); + hostList.offer(host); //移到尾部 + return host; + } + + public AbstractBlockingStub getBlockingStub() { + return this.blockingStub; + } + + public void setBlockingStub(AbstractBlockingStub stub) { + this.blockingStub = stub; + } + + public String getHost() { + return hostList.peek(); + } + + public int getHostCount() { + return hostList.size(); + } + + public AbstractStub getStub() { + return stub; + } + + public void setStub(AbstractStub stub) { + this.stub = stub; + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/Channels.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/Channels.java new file mode 100644 index 0000000000..34616e6374 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/Channels.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import java.util.concurrent.ConcurrentHashMap; + +import io.grpc.ManagedChannel; +import io.grpc.ManagedChannelBuilder; + +public class Channels { + + private static final ConcurrentHashMap chs = new ConcurrentHashMap<>(); + + public static ManagedChannel getChannel(String target) { + + ManagedChannel channel; + if ((channel = chs.get(target)) == null || channel.isShutdown() || channel.isTerminated()) { + synchronized (chs) { + if ((channel = chs.get(target)) == null || channel.isShutdown() || + channel.isTerminated()) { + channel = ManagedChannelBuilder.forTarget(target).usePlaintext().build(); + chs.put(target, channel); + } + } + } + + return channel; + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/ClientCache.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/ClientCache.java new file mode 100644 index 0000000000..d4fd50ffe9 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/ClientCache.java @@ -0,0 +1,338 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.hugegraph.pd.common.GraphCache; +import org.apache.hugegraph.pd.common.KVPair; +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.common.PartitionUtils; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.grpc.Metapb.Partition; +import org.apache.hugegraph.pd.grpc.Metapb.Shard; +import org.apache.hugegraph.pd.grpc.Metapb.ShardGroup; +import org.apache.hugegraph.pd.grpc.Pdpb.CachePartitionResponse; +import org.apache.hugegraph.pd.grpc.Pdpb.CacheResponse; + +import com.google.common.collect.Range; +import com.google.common.collect.RangeMap; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class ClientCache { + + private final AtomicBoolean initialized = new AtomicBoolean(false); + private final org.apache.hugegraph.pd.client.PDClient client; + private volatile Map> groups; + private volatile Map stores; + private volatile Map caches = new ConcurrentHashMap<>(); + + public ClientCache(org.apache.hugegraph.pd.client.PDClient pdClient) { + groups = new ConcurrentHashMap<>(); + stores = new ConcurrentHashMap<>(); + client = pdClient; + } + + private GraphCache getGraphCache(String graphName) { + GraphCache graph; + if ((graph = caches.get(graphName)) == null) { + synchronized (caches) { + if ((graph = caches.get(graphName)) == null) { + graph = new GraphCache(); + caches.put(graphName, graph); + } + } + } + return graph; + } + + public KVPair getPartitionById(String graphName, int partId) { + try { + GraphCache graph = initGraph(graphName); + Partition partition = graph.getPartition(partId); + Shard shard = groups.get(partId).getValue(); + if (partition == null || shard == null) { + return null; + } + return new KVPair<>(partition, shard); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private KVPair getPair(int partId, GraphCache graph) { + Partition p = graph.getPartition(partId); + KVPair pair = groups.get(partId); + if (p != null && pair != null) { + Shard s = pair.getValue(); + if (s == null) { + pair.setValue(getLeader(partId)); + return new KVPair<>(p, pair.getValue()); + } else { + return new KVPair<>(p, s); + } + } + return null; + } + + /** + * 根据key的hashcode返回分区信息 + * + * @param graphName + * @param code + * @return + */ + public KVPair getPartitionByCode(String graphName, long code) { + try { + GraphCache graph = initGraph(graphName); + RangeMap range = graph.getRange(); + Integer pId = range.get(code); + if (pId != null) { + return getPair(pId, graph); + } + return null; + } catch (PDException e) { + throw new RuntimeException(e); + } + } + + private GraphCache initGraph(String graphName) throws PDException { + initCache(); + GraphCache graph = getGraphCache(graphName); + if (!graph.getInitialized().get()) { + synchronized (graph) { + if (!graph.getInitialized().get()) { + CachePartitionResponse pc = client.getPartitionCache(graphName); + RangeMap range = graph.getRange(); + List ps = pc.getPartitionsList(); + HashMap gps = new HashMap<>(ps.size(), 1); + for (Partition p : ps) { + gps.put(p.getId(), p); + range.put(Range.closedOpen(p.getStartKey(), p.getEndKey()), p.getId()); + } + graph.setPartitions(gps); + graph.getInitialized().set(true); + } + } + } + return graph; + } + + private void initCache() throws PDException { + if (!initialized.get()) { + synchronized (this) { + if (!initialized.get()) { + CacheResponse cache = client.getClientCache(); + List shardGroups = cache.getShardsList(); + for (ShardGroup s : shardGroups) { + this.groups.put(s.getId(), new KVPair<>(s, getLeader(s.getId()))); + } + List stores = cache.getStoresList(); + for (Metapb.Store store : stores) { + this.stores.put(store.getId(), store); + } + List graphs = cache.getGraphsList(); + for (Metapb.Graph g : graphs) { + GraphCache c = new GraphCache(g); + caches.put(g.getGraphName(), c); + } + initialized.set(true); + } + } + } + } + + /** + * 返回key所在的分区信息 + * + * @param key + * @return + */ + public KVPair getPartitionByKey(String graphName, byte[] key) { + int code = PartitionUtils.calcHashcode(key); + return getPartitionByCode(graphName, code); + } + + public boolean update(String graphName, int partId, Partition partition) { + GraphCache graph = getGraphCache(graphName); + try { + Partition p = graph.getPartition(partId); + if (p != null && p.equals(partition)) { + return false; + } + RangeMap range = graph.getRange(); + graph.addPartition(partId, partition); + if (p != null) { + // old [1-3) 被 [2-3)覆盖了。当 [1-3) 变成[1-2) 不应该删除原先的[1-3) + // 当确认老的 start, end 都是自己的时候,才可以删除老的. (即还没覆盖) + if (Objects.equals(partition.getId(), range.get(partition.getStartKey())) && + Objects.equals(partition.getId(), range.get(partition.getEndKey() - 1))) { + range.remove(range.getEntry(partition.getStartKey()).getKey()); + } + } + range.put(Range.closedOpen(partition.getStartKey(), partition.getEndKey()), partId); + } catch (Exception e) { + throw new RuntimeException(e); + } + return true; + } + + public void removePartition(String graphName, int partId) { + GraphCache graph = getGraphCache(graphName); + Partition p = graph.removePartition(partId); + if (p != null) { + RangeMap range = graph.getRange(); + if (Objects.equals(p.getId(), range.get(p.getStartKey())) && + Objects.equals(p.getId(), range.get(p.getEndKey() - 1))) { + range.remove(range.getEntry(p.getStartKey()).getKey()); + } + } + } + + /** + * remove all partitions + */ + public void removePartitions() { + for (Entry entry : caches.entrySet()) { + removePartitions(entry.getValue()); + } + } + + private void removePartitions(GraphCache graph) { + graph.getState().clear(); + graph.getRange().clear(); + } + + /** + * remove partition cache of graphName + * + * @param graphName + */ + public void removeAll(String graphName) { + GraphCache graph = caches.get(graphName); + if (graph != null) { + removePartitions(graph); + } + } + + public boolean updateShardGroup(ShardGroup shardGroup) { + KVPair old = groups.get(shardGroup.getId()); + Shard leader = getLeader(shardGroup); + if (old != null) { + old.setKey(shardGroup); + old.setValue(leader); + return false; + } + groups.put(shardGroup.getId(), new KVPair<>(shardGroup, leader)); + return true; + } + + public void deleteShardGroup(int shardGroupId) { + groups.remove(shardGroupId); + } + + public ShardGroup getShardGroup(int groupId) { + KVPair pair = groups.get(groupId); + if (pair != null) { + return pair.getKey(); + } + return null; + } + + public boolean addStore(Long storeId, Metapb.Store store) { + Metapb.Store oldStore = stores.get(storeId); + if (oldStore != null && oldStore.equals(store)) { + return false; + } + stores.put(storeId, store); + return true; + } + + public Metapb.Store getStoreById(Long storeId) { + return stores.get(storeId); + } + + public void removeStore(Long storeId) { + stores.remove(storeId); + } + + public void reset() { + groups = new ConcurrentHashMap<>(); + stores = new ConcurrentHashMap<>(); + caches = new ConcurrentHashMap<>(); + } + + public Shard getLeader(int partitionId) { + KVPair pair = groups.get(partitionId); + if (pair != null) { + if (pair.getValue() != null) { + return pair.getValue(); + } + for (Shard shard : pair.getKey().getShardsList()) { + if (shard.getRole() == Metapb.ShardRole.Leader) { + pair.setValue(shard); + return shard; + } + } + } + + return null; + } + + public Shard getLeader(ShardGroup shardGroup) { + if (shardGroup != null) { + for (Shard shard : shardGroup.getShardsList()) { + if (shard.getRole() == Metapb.ShardRole.Leader) { + return shard; + } + } + } + + return null; + } + + public void updateLeader(int partitionId, Shard leader) { + KVPair pair = groups.get(partitionId); + if (pair != null && leader != null) { + Shard l = getLeader(partitionId); + if (l == null || leader.getStoreId() != l.getStoreId()) { + ShardGroup shardGroup = pair.getKey(); + ShardGroup.Builder builder = ShardGroup.newBuilder(shardGroup).clearShards(); + for (var shard : shardGroup.getShardsList()) { + builder.addShards( + Shard.newBuilder() + .setStoreId(shard.getStoreId()) + .setRole(shard.getStoreId() == leader.getStoreId() ? + Metapb.ShardRole.Leader : Metapb.ShardRole.Follower) + .build() + ); + } + pair.setKey(builder.build()); + pair.setValue(leader); + } + } + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/Discoverable.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/Discoverable.java new file mode 100644 index 0000000000..abdcac414c --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/Discoverable.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import org.apache.hugegraph.pd.grpc.discovery.NodeInfos; +import org.apache.hugegraph.pd.grpc.discovery.Query; + +public interface Discoverable { + + NodeInfos getNodeInfos(Query query); + + void scheduleTask(); + + void cancelTask(); +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/DiscoveryClient.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/DiscoveryClient.java new file mode 100644 index 0000000000..7a9f28c013 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/DiscoveryClient.java @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import java.io.Closeable; +import java.util.LinkedList; +import java.util.Timer; +import java.util.TimerTask; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Consumer; +import java.util.function.Function; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.grpc.discovery.DiscoveryServiceGrpc; +import org.apache.hugegraph.pd.grpc.discovery.NodeInfo; +import org.apache.hugegraph.pd.grpc.discovery.NodeInfos; +import org.apache.hugegraph.pd.grpc.discovery.Query; +import org.apache.hugegraph.pd.grpc.discovery.RegisterInfo; + +import io.grpc.ManagedChannel; +import io.grpc.ManagedChannelBuilder; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public abstract class DiscoveryClient implements Closeable, Discoverable { + + private final Timer timer = new Timer("serverHeartbeat", true); + private final AtomicBoolean requireResetStub = new AtomicBoolean(false); + protected int period; //心跳周期 + LinkedList pdAddresses = new LinkedList<>(); + ReentrantReadWriteLock readWriteLock = new ReentrantReadWriteLock(); + private volatile int currentIndex; // 当前在用pd地址位置 + private int maxTime = 6; + private ManagedChannel channel = null; + private DiscoveryServiceGrpc.DiscoveryServiceBlockingStub registerStub; + private DiscoveryServiceGrpc.DiscoveryServiceBlockingStub blockingStub; + + public DiscoveryClient(String centerAddress, int delay) { + String[] addresses = centerAddress.split(","); + for (int i = 0; i < addresses.length; i++) { + String singleAddress = addresses[i]; + if (singleAddress == null || singleAddress.length() <= 0) { + continue; + } + pdAddresses.add(addresses[i]); + } + this.period = delay; + if (maxTime < addresses.length) { + maxTime = addresses.length; + } + } + + private R tryWithTimes(Function function, V v) { + R r; + Exception ex = null; + for (int i = 0; i < maxTime; i++) { + try { + r = function.apply(v); + return r; + } catch (Exception e) { + requireResetStub.set(true); + resetStub(); + ex = e; + } + } + if (ex != null) { + log.error("Try discovery method with error: {}", ex.getMessage()); + } + return null; + } + + /*** + * 按照pd列表重置stub + */ + private void resetStub() { + String errLog = null; + for (int i = currentIndex + 1; i <= pdAddresses.size() + currentIndex; i++) { + currentIndex = i % pdAddresses.size(); + String singleAddress = pdAddresses.get(currentIndex); + try { + if (requireResetStub.get()) { + resetChannel(singleAddress); + } + errLog = null; + break; + } catch (Exception e) { + requireResetStub.set(true); + if (errLog == null) { + errLog = e.getMessage(); + } + continue; + } + } + if (errLog != null) { + log.error(errLog); + } + } + + /*** + * 按照某个pd的地址重置channel和stub + * @param singleAddress + * @throws PDException + */ + private void resetChannel(String singleAddress) throws PDException { + + readWriteLock.writeLock().lock(); + try { + if (requireResetStub.get()) { + while (channel != null && !channel.shutdownNow().awaitTermination( + 100, TimeUnit.MILLISECONDS)) { + continue; + } + channel = ManagedChannelBuilder.forTarget( + singleAddress).usePlaintext().build(); + this.registerStub = DiscoveryServiceGrpc.newBlockingStub( + channel); + this.blockingStub = DiscoveryServiceGrpc.newBlockingStub( + channel); + requireResetStub.set(false); + } + } catch (Exception e) { + throw new PDException(-1, String.format( + "Reset channel with error : %s.", e.getMessage())); + } finally { + readWriteLock.writeLock().unlock(); + } + } + + /*** + * 获取注册节点信息 + * @param query + * @return + */ + @Override + public NodeInfos getNodeInfos(Query query) { + return tryWithTimes((q) -> { + this.readWriteLock.readLock().lock(); + NodeInfos nodes; + try { + nodes = this.blockingStub.getNodes(q); + } catch (Exception e) { + throw e; + } finally { + this.readWriteLock.readLock().unlock(); + } + return nodes; + }, query); + } + + /*** + * 启动心跳任务 + */ + @Override + public void scheduleTask() { + timer.schedule(new TimerTask() { + @Override + public void run() { + NodeInfo nodeInfo = getRegisterNode(); + tryWithTimes((t) -> { + RegisterInfo register; + readWriteLock.readLock().lock(); + try { + register = registerStub.register(t); + log.debug("Discovery Client work done."); + Consumer consumer = getRegisterConsumer(); + if (consumer != null) { + consumer.accept(register); + } + } catch (Exception e) { + throw e; + } finally { + readWriteLock.readLock().unlock(); + } + return register; + }, nodeInfo); + } + }, 0, period); + } + + abstract NodeInfo getRegisterNode(); + + abstract Consumer getRegisterConsumer(); + + @Override + public void cancelTask() { + this.timer.cancel(); + } + + @Override + public void close() { + this.timer.cancel(); + readWriteLock.writeLock().lock(); + try { + while (channel != null && !channel.shutdownNow().awaitTermination( + 100, TimeUnit.MILLISECONDS)) { + continue; + } + } catch (Exception e) { + log.info("Close channel with error : {}.", e); + } finally { + readWriteLock.writeLock().unlock(); + } + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/DiscoveryClientImpl.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/DiscoveryClientImpl.java new file mode 100644 index 0000000000..0ded328c17 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/DiscoveryClientImpl.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import java.util.Map; +import java.util.function.Consumer; + +import org.apache.hugegraph.pd.grpc.discovery.NodeInfo; +import org.apache.hugegraph.pd.grpc.discovery.RegisterType; + +public class DiscoveryClientImpl extends DiscoveryClient { + + private final String id; + private final RegisterType type; // 心跳类型,备用 + private final String version; + private final String appName; + private final int times; // 心跳过期次数,备用 + private final String address; + private final Map labels; + private final Consumer registerConsumer; + + private DiscoveryClientImpl(Builder builder) { + super(builder.centerAddress, builder.delay); + period = builder.delay; + id = builder.id; + type = builder.type; + version = builder.version; + appName = builder.appName; + times = builder.times; + address = builder.address; + labels = builder.labels; + registerConsumer = builder.registerConsumer; + } + + public static Builder newBuilder() { + return new Builder(); + } + + @Override + NodeInfo getRegisterNode() { + return NodeInfo.newBuilder().setAddress(this.address) + .setVersion(this.version) + .setAppName(this.appName).setInterval(this.period) + .setId(this.id).putAllLabels(labels).build(); + } + + @Override + Consumer getRegisterConsumer() { + return registerConsumer; + } + + public static final class Builder { + + private int delay; + private String centerAddress; + private String id; + private RegisterType type; + private String address; + private Map labels; + private String version; + private String appName; + private int times; + private Consumer registerConsumer; + + private Builder() { + } + + public Builder setDelay(int val) { + delay = val; + return this; + } + + public Builder setCenterAddress(String val) { + centerAddress = val; + return this; + } + + public Builder setId(String val) { + id = val; + return this; + } + + public Builder setType(RegisterType val) { + type = val; + return this; + } + + public Builder setAddress(String val) { + address = val; + return this; + } + + public Builder setLabels(Map val) { + labels = val; + return this; + } + + public Builder setVersion(String val) { + version = val; + return this; + } + + public Builder setAppName(String val) { + appName = val; + return this; + } + + public Builder setTimes(int val) { + times = val; + return this; + } + + public Builder setRegisterConsumer(Consumer registerConsumer) { + this.registerConsumer = registerConsumer; + return this; + } + + public DiscoveryClientImpl build() { + return new DiscoveryClientImpl(this); + } + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/KvClient.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/KvClient.java new file mode 100644 index 0000000000..7e0795b2e4 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/KvClient.java @@ -0,0 +1,343 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import java.io.Closeable; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Semaphore; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiConsumer; +import java.util.function.Consumer; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.grpc.kv.K; +import org.apache.hugegraph.pd.grpc.kv.KResponse; +import org.apache.hugegraph.pd.grpc.kv.Kv; +import org.apache.hugegraph.pd.grpc.kv.KvResponse; +import org.apache.hugegraph.pd.grpc.kv.KvServiceGrpc; +import org.apache.hugegraph.pd.grpc.kv.LockRequest; +import org.apache.hugegraph.pd.grpc.kv.LockResponse; +import org.apache.hugegraph.pd.grpc.kv.ScanPrefixResponse; +import org.apache.hugegraph.pd.grpc.kv.TTLRequest; +import org.apache.hugegraph.pd.grpc.kv.TTLResponse; +import org.apache.hugegraph.pd.grpc.kv.WatchEvent; +import org.apache.hugegraph.pd.grpc.kv.WatchKv; +import org.apache.hugegraph.pd.grpc.kv.WatchRequest; +import org.apache.hugegraph.pd.grpc.kv.WatchResponse; +import org.apache.hugegraph.pd.grpc.kv.WatchType; + +import io.grpc.stub.AbstractBlockingStub; +import io.grpc.stub.AbstractStub; +import io.grpc.stub.StreamObserver; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class KvClient extends AbstractClient implements Closeable { + + private final AtomicLong clientId = new AtomicLong(0); + private final Semaphore semaphore = new Semaphore(1); + private final ConcurrentHashMap observers = new ConcurrentHashMap<>(); + + public KvClient(PDConfig pdConfig) { + super(pdConfig); + } + + @Override + protected AbstractStub createStub() { + return KvServiceGrpc.newStub(channel); + } + + @Override + protected AbstractBlockingStub createBlockingStub() { + return KvServiceGrpc.newBlockingStub(channel); + } + + public KvResponse put(String key, String value) throws PDException { + Kv kv = Kv.newBuilder().setKey(key).setValue(value).build(); + KvResponse response = blockingUnaryCall(KvServiceGrpc.getPutMethod(), kv); + handleErrors(response.getHeader()); + return response; + } + + public KResponse get(String key) throws PDException { + K k = K.newBuilder().setKey(key).build(); + KResponse response = blockingUnaryCall(KvServiceGrpc.getGetMethod(), k); + handleErrors(response.getHeader()); + return response; + } + + public KvResponse delete(String key) throws PDException { + K k = K.newBuilder().setKey(key).build(); + KvResponse response = blockingUnaryCall(KvServiceGrpc.getDeleteMethod(), k); + handleErrors(response.getHeader()); + return response; + } + + public KvResponse deletePrefix(String prefix) throws PDException { + K k = K.newBuilder().setKey(prefix).build(); + KvResponse response = blockingUnaryCall(KvServiceGrpc.getDeletePrefixMethod(), k); + handleErrors(response.getHeader()); + return response; + } + + public ScanPrefixResponse scanPrefix(String prefix) throws PDException { + K k = K.newBuilder().setKey(prefix).build(); + ScanPrefixResponse response = blockingUnaryCall(KvServiceGrpc.getScanPrefixMethod(), k); + handleErrors(response.getHeader()); + return response; + } + + public TTLResponse keepTTLAlive(String key) throws PDException { + TTLRequest request = TTLRequest.newBuilder().setKey(key).build(); + TTLResponse response = blockingUnaryCall(KvServiceGrpc.getKeepTTLAliveMethod(), request); + handleErrors(response.getHeader()); + return response; + } + + public TTLResponse putTTL(String key, String value, long ttl) throws PDException { + TTLRequest request = + TTLRequest.newBuilder().setKey(key).setValue(value).setTtl(ttl).build(); + TTLResponse response = blockingUnaryCall(KvServiceGrpc.getPutTTLMethod(), request); + handleErrors(response.getHeader()); + return response; + } + + private void onEvent(WatchResponse value, Consumer consumer) { + log.info("receive message for {},event Count:{}", value, value.getEventsCount()); + clientId.compareAndSet(0L, value.getClientId()); + if (value.getEventsCount() != 0) { + consumer.accept((T) value); + } + } + + private StreamObserver getObserver(String key, Consumer consumer, + BiConsumer listenWrapper, + long client) { + StreamObserver observer; + if ((observer = observers.get(client)) == null) { + synchronized (this) { + if ((observer = observers.get(client)) == null) { + observer = getObserver(key, consumer, listenWrapper); + observers.put(client, observer); + } + } + } + return observer; + } + + private StreamObserver getObserver(String key, Consumer consumer, + BiConsumer listenWrapper) { + return new StreamObserver() { + @Override + public void onNext(WatchResponse value) { + switch (value.getState()) { + case Starting: + boolean b = clientId.compareAndSet(0, value.getClientId()); + if (b) { + observers.put(value.getClientId(), this); + log.info("set watch client id to :{}", value.getClientId()); + } + semaphore.release(); + break; + case Started: + onEvent(value, consumer); + break; + case Leader_Changed: + listenWrapper.accept(key, consumer); + break; + case Alive: + // only for check client is alive, do nothing + break; + default: + break; + } + } + + @Override + public void onError(Throwable t) { + listenWrapper.accept(key, consumer); + } + + @Override + public void onCompleted() { + + } + }; + } + + public void listen(String key, Consumer consumer) throws PDException { + long value = clientId.get(); + StreamObserver observer = getObserver(key, consumer, listenWrapper, value); + acquire(); + WatchRequest k = WatchRequest.newBuilder().setClientId(value).setKey(key).build(); + streamingCall(KvServiceGrpc.getWatchMethod(), k, observer, 1); + } + + public void listenPrefix(String prefix, Consumer consumer) throws PDException { + long value = clientId.get(); + StreamObserver observer = + getObserver(prefix, consumer, prefixListenWrapper, value); + acquire(); + WatchRequest k = + WatchRequest.newBuilder().setClientId(clientId.get()).setKey(prefix).build(); + streamingCall(KvServiceGrpc.getWatchPrefixMethod(), k, observer, 1); + } + + private void acquire() { + if (clientId.get() == 0L) { + try { + semaphore.acquire(); + if (clientId.get() != 0L) { + semaphore.release(); + } + } catch (InterruptedException e) { + log.error("get semaphore with error:", e); + } + } + } + + public List getWatchList(T response) { + List values = new LinkedList<>(); + List eventsList = response.getEventsList(); + for (WatchEvent event : eventsList) { + if (event.getType() != WatchType.Put) { + return null; + } + String value = event.getCurrent().getValue(); + values.add(value); + } + return values; + } + + public Map getWatchMap(T response) { + Map values = new HashMap<>(); + List eventsList = response.getEventsList(); + for (WatchEvent event : eventsList) { + if (event.getType() != WatchType.Put) { + return null; + } + WatchKv current = event.getCurrent(); + String key = current.getKey(); + String value = current.getValue(); + values.put(key, value); + } + return values; + } + + public LockResponse lock(String key, long ttl) throws PDException { + acquire(); + LockResponse response; + try { + LockRequest k = + LockRequest.newBuilder().setKey(key).setClientId(clientId.get()).setTtl(ttl) + .build(); + response = blockingUnaryCall(KvServiceGrpc.getLockMethod(), k); + handleErrors(response.getHeader()); + if (clientId.compareAndSet(0L, response.getClientId())) { + semaphore.release(); + } + } catch (Exception e) { + if (clientId.get() == 0L) { + semaphore.release(); + } + throw e; + } + return response; + } + + public LockResponse lockWithoutReentrant(String key, long ttl) throws PDException { + acquire(); + LockResponse response; + try { + LockRequest k = + LockRequest.newBuilder().setKey(key).setClientId(clientId.get()).setTtl(ttl) + .build(); + response = blockingUnaryCall(KvServiceGrpc.getLockWithoutReentrantMethod(), k); + handleErrors(response.getHeader()); + if (clientId.compareAndSet(0L, response.getClientId())) { + semaphore.release(); + } + } catch (Exception e) { + if (clientId.get() == 0L) { + semaphore.release(); + } + throw e; + } + return response; + } + + public LockResponse isLocked(String key) throws PDException { + LockRequest k = LockRequest.newBuilder().setKey(key).setClientId(clientId.get()).build(); + LockResponse response = blockingUnaryCall(KvServiceGrpc.getIsLockedMethod(), k); + handleErrors(response.getHeader()); + return response; + } + + public LockResponse unlock(String key) throws PDException { + assert clientId.get() != 0; + LockRequest k = LockRequest.newBuilder().setKey(key).setClientId(clientId.get()).build(); + LockResponse response = blockingUnaryCall(KvServiceGrpc.getUnlockMethod(), k); + handleErrors(response.getHeader()); + clientId.compareAndSet(0L, response.getClientId()); + assert clientId.get() == response.getClientId(); + return response; + } + + public LockResponse keepAlive(String key) throws PDException { + assert clientId.get() != 0; + LockRequest k = LockRequest.newBuilder().setKey(key).setClientId(clientId.get()).build(); + LockResponse response = blockingUnaryCall(KvServiceGrpc.getKeepAliveMethod(), k); + handleErrors(response.getHeader()); + clientId.compareAndSet(0L, response.getClientId()); + assert clientId.get() == response.getClientId(); + return response; + } + + @Override + public void close() { + super.close(); + } + + BiConsumer listenWrapper = (key, consumer) -> { + try { + listen(key, consumer); + } catch (PDException e) { + try { + log.warn("start listen with warning:", e); + Thread.sleep(1000); + } catch (InterruptedException ex) { + } + } + }; + + BiConsumer prefixListenWrapper = (key, consumer) -> { + try { + listenPrefix(key, consumer); + } catch (PDException e) { + try { + log.warn("start listenPrefix with warning:", e); + Thread.sleep(1000); + } catch (InterruptedException ex) { + } + } + }; +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/LicenseClient.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/LicenseClient.java new file mode 100644 index 0000000000..a96185e5af --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/LicenseClient.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import org.apache.hugegraph.pd.common.KVPair; +import org.apache.hugegraph.pd.grpc.PDGrpc; +import org.apache.hugegraph.pd.grpc.Pdpb; + +import com.google.protobuf.ByteString; + +import io.grpc.stub.AbstractBlockingStub; +import io.grpc.stub.AbstractStub; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class LicenseClient extends AbstractClient { + + public LicenseClient(PDConfig config) { + super(config); + } + + @Override + protected AbstractStub createStub() { + return PDGrpc.newStub(channel); + } + + @Override + protected AbstractBlockingStub createBlockingStub() { + return PDGrpc.newBlockingStub(channel); + } + + public Pdpb.PutLicenseResponse putLicense(byte[] content) { + Pdpb.PutLicenseRequest request = Pdpb.PutLicenseRequest.newBuilder() + .setContent( + ByteString.copyFrom(content)) + .build(); + try { + KVPair pair = concurrentBlockingUnaryCall( + PDGrpc.getPutLicenseMethod(), request, + (rs) -> rs.getHeader().getError().getType().equals(Pdpb.ErrorType.OK)); + if (pair.getKey()) { + Pdpb.PutLicenseResponse.Builder builder = Pdpb.PutLicenseResponse.newBuilder(); + builder.setHeader(okHeader); + return builder.build(); + } else { + return pair.getValue(); + } + } catch (Exception e) { + e.printStackTrace(); + log.debug("put license with error:{} ", e); + Pdpb.ResponseHeader rh = + newErrorHeader(Pdpb.ErrorType.LICENSE_ERROR_VALUE, e.getMessage()); + return Pdpb.PutLicenseResponse.newBuilder().setHeader(rh).build(); + } + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDClient.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDClient.java new file mode 100644 index 0000000000..6c3eae4251 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDClient.java @@ -0,0 +1,1347 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import static org.apache.hugegraph.pd.watch.NodeEvent.EventType.NODE_PD_LEADER_CHANGE; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.TimeUnit; + +import org.apache.hugegraph.pd.common.KVPair; +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.common.PartitionUtils; +import org.apache.hugegraph.pd.grpc.MetaTask; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.grpc.Metapb.ShardGroup; +import org.apache.hugegraph.pd.grpc.PDGrpc; +import org.apache.hugegraph.pd.grpc.Pdpb; +import org.apache.hugegraph.pd.grpc.Pdpb.CachePartitionResponse; +import org.apache.hugegraph.pd.grpc.Pdpb.CacheResponse; +import org.apache.hugegraph.pd.grpc.Pdpb.GetGraphRequest; +import org.apache.hugegraph.pd.grpc.Pdpb.GetPartitionByCodeRequest; +import org.apache.hugegraph.pd.grpc.Pdpb.GetPartitionRequest; +import org.apache.hugegraph.pd.grpc.Pdpb.GetPartitionResponse; +import org.apache.hugegraph.pd.grpc.watch.WatchResponse; +import org.apache.hugegraph.pd.watch.NodeEvent; +import org.apache.hugegraph.pd.watch.PartitionEvent; + +import com.google.protobuf.ByteString; + +import io.grpc.ManagedChannel; +import io.grpc.MethodDescriptor; +import io.grpc.StatusRuntimeException; +import io.grpc.stub.AbstractBlockingStub; +import lombok.extern.slf4j.Slf4j; + +/** + * PD客户端实现类 + */ +@Slf4j +public class PDClient { + + private final PDConfig config; + private final Pdpb.RequestHeader header; + private final ClientCache cache; + private final StubProxy stubProxy; + private final List eventListeners; + private PDWatch.Watcher partitionWatcher; + private PDWatch.Watcher storeWatcher; + private PDWatch.Watcher graphWatcher; + private PDWatch.Watcher shardGroupWatcher; + private PDWatch pdWatch; + + private PDClient(PDConfig config) { + this.config = config; + this.header = Pdpb.RequestHeader.getDefaultInstance(); + this.stubProxy = new StubProxy(config.getServerHost().split(",")); + this.eventListeners = new CopyOnWriteArrayList<>(); + this.cache = new ClientCache(this); + } + + /** + * 创建PDClient对象,并初始化stub + * + * @param config + * @return + */ + public static PDClient create(PDConfig config) { + return new PDClient(config); + } + + private synchronized void newBlockingStub() throws PDException { + if (stubProxy.get() != null) { + return; + } + + String host = newLeaderStub(); + if (host.isEmpty()) { + throw new PDException(Pdpb.ErrorType.PD_UNREACHABLE_VALUE, + "PD unreachable, pd.peers=" + config.getServerHost()); + } + + log.info("PDClient enable cache, init PDWatch object"); + connectPdWatch(host); + } + + public void connectPdWatch(String leader) { + + if (pdWatch != null && Objects.equals(pdWatch.getCurrentHost(), leader) && + pdWatch.checkChannel()) { + return; + } + + log.info("PDWatch client connect host:{}", leader); + pdWatch = new PDWatchImpl(leader); + + partitionWatcher = pdWatch.watchPartition(new PDWatch.Listener<>() { + @Override + public void onNext(PartitionEvent response) { + // log.info("PDClient receive partition event {}-{} {}", + // response.getGraph(), response.getPartitionId(), response.getChangeType()); + invalidPartitionCache(response.getGraph(), response.getPartitionId()); + + if (response.getChangeType() == PartitionEvent.ChangeType.DEL) { + cache.removeAll(response.getGraph()); + } + + eventListeners.forEach(listener -> { + listener.onPartitionChanged(response); + }); + } + + @Override + public void onError(Throwable throwable) { + log.error("watchPartition exception {}", throwable.getMessage()); + closeStub(false); + } + }); + + storeWatcher = pdWatch.watchNode(new PDWatch.Listener<>() { + @Override + public void onNext(NodeEvent response) { + log.info("PDClient receive store event {} {}", + response.getEventType(), Long.toHexString(response.getNodeId())); + + if (response.getEventType() == NODE_PD_LEADER_CHANGE) { + // pd raft change + var leaderIp = response.getGraph(); + log.info("watchNode: pd leader changed to {}, current watch:{}", + leaderIp, pdWatch.getCurrentHost()); + closeStub(!Objects.equals(pdWatch.getCurrentHost(), leaderIp)); + connectPdWatch(leaderIp); + } + + invalidStoreCache(response.getNodeId()); + eventListeners.forEach(listener -> { + listener.onStoreChanged(response); + }); + } + + @Override + public void onError(Throwable throwable) { + log.error("watchNode exception {}", throwable.getMessage()); + closeStub(false); + } + + }); + + graphWatcher = pdWatch.watchGraph(new PDWatch.Listener<>() { + @Override + public void onNext(WatchResponse response) { + eventListeners.forEach(listener -> { + listener.onGraphChanged(response); + }); + } + + @Override + public void onError(Throwable throwable) { + log.warn("graphWatcher exception {}", throwable.getMessage()); + } + }); + + shardGroupWatcher = pdWatch.watchShardGroup(new PDWatch.Listener<>() { + @Override + public void onNext(WatchResponse response) { + var shardResponse = response.getShardGroupResponse(); + // log.info("PDClient receive shard group event: raft {}-{}", shardResponse + // .getShardGroupId(), + // shardResponse.getType()); + if (config.isEnableCache()) { + switch (shardResponse.getType()) { + case WATCH_CHANGE_TYPE_DEL: + cache.deleteShardGroup(shardResponse.getShardGroupId()); + break; + case WATCH_CHANGE_TYPE_ALTER: + cache.updateShardGroup( + response.getShardGroupResponse().getShardGroup()); + break; + default: + break; + } + } + eventListeners.forEach(listener -> listener.onShardGroupChanged(response)); + } + + @Override + public void onError(Throwable throwable) { + log.warn("shardGroupWatcher exception {}", throwable.getMessage()); + } + }); + + } + + private synchronized void closeStub(boolean closeWatcher) { + // TODO ManagedChannel 没有正常关闭 + stubProxy.set(null); + cache.reset(); + + if (closeWatcher) { + if (partitionWatcher != null) { + partitionWatcher.close(); + partitionWatcher = null; + } + if (storeWatcher != null) { + storeWatcher.close(); + storeWatcher = null; + } + if (graphWatcher != null) { + graphWatcher.close(); + graphWatcher = null; + } + + if (shardGroupWatcher != null) { + shardGroupWatcher.close(); + shardGroupWatcher = null; + } + + pdWatch = null; + } + } + + private PDGrpc.PDBlockingStub getStub() throws PDException { + if (stubProxy.get() == null) { + newBlockingStub(); + } + return stubProxy.get().withDeadlineAfter(config.getGrpcTimeOut(), TimeUnit.MILLISECONDS); + } + + private PDGrpc.PDBlockingStub newStub() throws PDException { + if (stubProxy.get() == null) { + newBlockingStub(); + } + return PDGrpc.newBlockingStub(stubProxy.get().getChannel()) + .withDeadlineAfter(config.getGrpcTimeOut(), + TimeUnit.MILLISECONDS); + } + + private String newLeaderStub() { + String leaderHost = ""; + for (int i = 0; i < stubProxy.getHostCount(); i++) { + String host = stubProxy.nextHost(); + ManagedChannel channel = Channels.getChannel(host); + + PDGrpc.PDBlockingStub stub = PDGrpc.newBlockingStub(channel) + .withDeadlineAfter(config.getGrpcTimeOut(), + TimeUnit.MILLISECONDS); + try { + var leaderIp = getLeaderIp(stub); + if (!leaderIp.equalsIgnoreCase(host)) { + leaderHost = leaderIp; + stubProxy.set(PDGrpc.newBlockingStub(channel) + .withDeadlineAfter(config.getGrpcTimeOut(), + TimeUnit.MILLISECONDS)); + } else { + stubProxy.set(stub); + leaderHost = host; + } + stubProxy.setLeader(leaderIp); + + log.info("PDClient connect to host = {} success", leaderHost); + break; + } catch (Exception e) { + log.error("PDClient connect to {} exception {}, {}", host, e.getMessage(), + e.getCause() != null ? e.getCause().getMessage() : ""); + } + } + return leaderHost; + } + + public String getLeaderIp() { + + return getLeaderIp(stubProxy.get()); + } + + private String getLeaderIp(PDGrpc.PDBlockingStub stub) { + if (stub == null) { + try { + getStub(); + return stubProxy.getLeader(); + } catch (PDException e) { + throw new RuntimeException(e); + } + } + + Pdpb.GetMembersRequest request = Pdpb.GetMembersRequest.newBuilder() + .setHeader(header) + .build(); + Metapb.Member leader = stub.getMembers(request).getLeader(); + return leader.getGrpcUrl(); + } + + /** + * Store注册,返回storeID,初次注册会返回新ID + * + * @param store + * @return + */ + public long registerStore(Metapb.Store store) throws PDException { + Pdpb.RegisterStoreRequest request = Pdpb.RegisterStoreRequest.newBuilder() + .setHeader(header) + .setStore(store).build(); + + Pdpb.RegisterStoreResponse response = + blockingUnaryCall(PDGrpc.getRegisterStoreMethod(), request); + handleResponseError(response.getHeader()); + return response.getStoreId(); + } + + /** + * 根据storeId返回Store对象 + * + * @param storeId + * @return + * @throws PDException + */ + public Metapb.Store getStore(long storeId) throws PDException { + Metapb.Store store = cache.getStoreById(storeId); + if (store == null) { + Pdpb.GetStoreRequest request = Pdpb.GetStoreRequest.newBuilder() + .setHeader(header) + .setStoreId(storeId).build(); + Pdpb.GetStoreResponse response = getStub().getStore(request); + handleResponseError(response.getHeader()); + store = response.getStore(); + if (config.isEnableCache()) { + cache.addStore(storeId, store); + } + } + return store; + } + + /** + * 更新Store信息,包括上下线等 + * + * @param store + * @return + */ + public Metapb.Store updateStore(Metapb.Store store) throws PDException { + Pdpb.SetStoreRequest request = Pdpb.SetStoreRequest.newBuilder() + .setHeader(header) + .setStore(store).build(); + + Pdpb.SetStoreResponse response = getStub().setStore(request); + handleResponseError(response.getHeader()); + store = response.getStore(); + if (config.isEnableCache()) { + cache.addStore(store.getId(), store); + } + return store; + } + + /** + * 返回活跃的Store + * + * @param graphName + * @return + */ + public List getActiveStores(String graphName) throws PDException { + List stores = new ArrayList<>(); + KVPair ptShard = this.getPartitionByCode(graphName, 0); + while (ptShard != null) { + stores.add(this.getStore(ptShard.getValue().getStoreId())); + if (ptShard.getKey().getEndKey() < PartitionUtils.MAX_VALUE) { + ptShard = this.getPartitionByCode(graphName, ptShard.getKey().getEndKey()); + } else { + ptShard = null; + } + } + return stores; + } + + public List getActiveStores() throws PDException { + Pdpb.GetAllStoresRequest request = Pdpb.GetAllStoresRequest.newBuilder() + .setHeader(header) + .setGraphName("") + .setExcludeOfflineStores(true) + .build(); + Pdpb.GetAllStoresResponse response = getStub().getAllStores(request); + handleResponseError(response.getHeader()); + return response.getStoresList(); + + } + + /** + * 返回活跃的Store + * + * @param graphName + * @return + */ + public List getAllStores(String graphName) throws PDException { + Pdpb.GetAllStoresRequest request = Pdpb.GetAllStoresRequest.newBuilder() + .setHeader(header) + .setGraphName(graphName) + .setExcludeOfflineStores(false) + .build(); + Pdpb.GetAllStoresResponse response = getStub().getAllStores(request); + handleResponseError(response.getHeader()); + return response.getStoresList(); + + } + + /** + * Store心跳,定期调用,保持在线状态 + * + * @param stats + * @throws PDException + */ + public Metapb.ClusterStats storeHeartbeat(Metapb.StoreStats stats) throws PDException { + Pdpb.StoreHeartbeatRequest request = Pdpb.StoreHeartbeatRequest.newBuilder() + .setHeader(header) + .setStats(stats).build(); + Pdpb.StoreHeartbeatResponse response = getStub().storeHeartbeat(request); + handleResponseError(response.getHeader()); + return response.getClusterStats(); + } + + private KVPair getKvPair(String graphName, byte[] key, + KVPair partShard) throws + PDException { + if (partShard == null) { + GetPartitionRequest request = GetPartitionRequest.newBuilder() + .setHeader(header) + .setGraphName(graphName) + .setKey(ByteString.copyFrom(key)) + .build(); + GetPartitionResponse response = + blockingUnaryCall(PDGrpc.getGetPartitionMethod(), request); + handleResponseError(response.getHeader()); + partShard = new KVPair<>(response.getPartition(), response.getLeader()); + cache.update(graphName, partShard.getKey().getId(), partShard.getKey()); + } + return partShard; + } + + /** + * 查询Key所属分区信息 + * + * @param graphName + * @param key + * @return + * @throws PDException + */ + public KVPair getPartition(String graphName, byte[] key) throws + PDException { + // 先查cache,cache没有命中,在调用PD + KVPair partShard = cache.getPartitionByKey(graphName, key); + partShard = getKvPair(graphName, key, partShard); + return partShard; + } + + public KVPair getPartition(String graphName, byte[] key, + int code) throws + PDException { + KVPair partShard = + cache.getPartitionByCode(graphName, code); + partShard = getKvPair(graphName, key, partShard); + return partShard; + } + + /** + * 根据hashcode查询所属分区信息 + * + * @param graphName + * @param hashCode + * @return + * @throws PDException + */ + public KVPair getPartitionByCode(String graphName, + long hashCode) + throws PDException { + // 先查cache,cache没有命中,在调用PD + KVPair partShard = + cache.getPartitionByCode(graphName, hashCode); + if (partShard == null) { + GetPartitionByCodeRequest request = GetPartitionByCodeRequest.newBuilder() + .setHeader(header) + .setGraphName(graphName) + .setCode(hashCode).build(); + GetPartitionResponse response = + blockingUnaryCall(PDGrpc.getGetPartitionByCodeMethod(), request); + handleResponseError(response.getHeader()); + partShard = new KVPair<>(response.getPartition(), response.getLeader()); + cache.update(graphName, partShard.getKey().getId(), partShard.getKey()); + cache.updateShardGroup(getShardGroup(partShard.getKey().getId())); + } + + if (partShard.getValue() == null) { + ShardGroup shardGroup = getShardGroup(partShard.getKey().getId()); + if (shardGroup != null) { + for (var shard : shardGroup.getShardsList()) { + if (shard.getRole() == Metapb.ShardRole.Leader) { + partShard.setValue(shard); + } + } + } else { + log.error("getPartitionByCode: get shard group failed, {}", + partShard.getKey().getId()); + } + } + return partShard; + } + + /** + * 获取Key的哈希值 + */ + public int keyToCode(String graphName, byte[] key) { + return PartitionUtils.calcHashcode(key); + } + + /** + * 根据分区id返回分区信息, RPC请求 + * + * @param graphName + * @param partId + * @return + * @throws PDException + */ + public KVPair getPartitionById(String graphName, + int partId) throws PDException { + KVPair partShard = + cache.getPartitionById(graphName, partId); + if (partShard == null) { + Pdpb.GetPartitionByIDRequest request = Pdpb.GetPartitionByIDRequest.newBuilder() + .setHeader(header) + .setGraphName( + graphName) + .setPartitionId( + partId) + .build(); + GetPartitionResponse response = + blockingUnaryCall(PDGrpc.getGetPartitionByIDMethod(), request); + handleResponseError(response.getHeader()); + partShard = new KVPair<>(response.getPartition(), response.getLeader()); + if (config.isEnableCache()) { + cache.update(graphName, partShard.getKey().getId(), partShard.getKey()); + cache.updateShardGroup(getShardGroup(partShard.getKey().getId())); + } + } + if (partShard.getValue() == null) { + var shardGroup = getShardGroup(partShard.getKey().getId()); + if (shardGroup != null) { + for (var shard : shardGroup.getShardsList()) { + if (shard.getRole() == Metapb.ShardRole.Leader) { + partShard.setValue(shard); + } + } + } else { + log.error("getPartitionById: get shard group failed, {}", + partShard.getKey().getId()); + } + } + return partShard; + } + + public ShardGroup getShardGroup(int partId) throws PDException { + ShardGroup group = cache.getShardGroup(partId); + if (group == null) { + Pdpb.GetShardGroupRequest request = Pdpb.GetShardGroupRequest.newBuilder() + .setHeader(header) + .setGroupId(partId) + .build(); + Pdpb.GetShardGroupResponse response = + blockingUnaryCall(PDGrpc.getGetShardGroupMethod(), request); + handleResponseError(response.getHeader()); + group = response.getShardGroup(); + if (config.isEnableCache()) { + cache.updateShardGroup(group); + } + } + return group; + } + + public void updateShardGroup(ShardGroup shardGroup) throws PDException { + Pdpb.UpdateShardGroupRequest request = Pdpb.UpdateShardGroupRequest.newBuilder() + .setHeader(header) + .setShardGroup( + shardGroup) + .build(); + Pdpb.UpdateShardGroupResponse response = + blockingUnaryCall(PDGrpc.getUpdateShardGroupMethod(), request); + handleResponseError(response.getHeader()); + + if (config.isEnableCache()) { + cache.updateShardGroup(shardGroup); + } + } + + /** + * 返回startKey和endKey跨越的所有分区信息 + * + * @param graphName + * @param startKey + * @param endKey + * @return + * @throws PDException + */ + public List> scanPartitions(String graphName, + byte[] startKey, + byte[] endKey) throws + PDException { + List> partitions = new ArrayList<>(); + KVPair startPartShard = getPartition(graphName, startKey); + KVPair endPartShard = getPartition(graphName, endKey); + if (startPartShard == null || endPartShard == null) { + return null; + } + + partitions.add(startPartShard); + while (startPartShard.getKey().getEndKey() < endPartShard.getKey().getEndKey() + && startPartShard.getKey().getEndKey() < + PartitionUtils.MAX_VALUE /*排除最后一个分区*/) { + startPartShard = getPartitionByCode(graphName, startPartShard.getKey().getEndKey()); + partitions.add(startPartShard); + } + return partitions; + } + + /** + * 根据条件查询分区信息 + * + * @return + * @throws PDException + */ + public List getPartitionsByStore(long storeId) throws PDException { + + Metapb.PartitionQuery query = Metapb.PartitionQuery.newBuilder() + .setStoreId(storeId) + .build(); + Pdpb.QueryPartitionsRequest request = Pdpb.QueryPartitionsRequest.newBuilder() + .setQuery(query).build(); + Pdpb.QueryPartitionsResponse response = + blockingUnaryCall(PDGrpc.getQueryPartitionsMethod(), request); + + handleResponseError(response.getHeader()); + return response.getPartitionsList(); + } + + /** + * 查找指定store上的指定partitionId + * + * @return + * @throws PDException + */ + public List queryPartitions(long storeId, int partitionId) throws + PDException { + + Metapb.PartitionQuery query = Metapb.PartitionQuery.newBuilder() + .setStoreId(storeId) + .setPartitionId(partitionId) + .build(); + Pdpb.QueryPartitionsRequest request = Pdpb.QueryPartitionsRequest.newBuilder() + .setQuery(query).build(); + Pdpb.QueryPartitionsResponse response = + blockingUnaryCall(PDGrpc.getQueryPartitionsMethod(), request); + + handleResponseError(response.getHeader()); + return response.getPartitionsList(); + } + + public List getPartitions(long storeId, String graphName) throws PDException { + + Metapb.PartitionQuery query = Metapb.PartitionQuery.newBuilder() + .setStoreId(storeId) + .setGraphName(graphName).build(); + Pdpb.QueryPartitionsRequest request = Pdpb.QueryPartitionsRequest.newBuilder() + .setQuery(query).build(); + Pdpb.QueryPartitionsResponse response = + blockingUnaryCall(PDGrpc.getQueryPartitionsMethod(), request); + + handleResponseError(response.getHeader()); + return response.getPartitionsList(); + + } + + public Metapb.Graph setGraph(Metapb.Graph graph) throws PDException { + Pdpb.SetGraphRequest request = Pdpb.SetGraphRequest.newBuilder() + .setGraph(graph) + .build(); + Pdpb.SetGraphResponse response = + blockingUnaryCall(PDGrpc.getSetGraphMethod(), request); + + handleResponseError(response.getHeader()); + return response.getGraph(); + } + + public Metapb.Graph getGraph(String graphName) throws PDException { + GetGraphRequest request = GetGraphRequest.newBuilder() + .setGraphName(graphName) + .build(); + Pdpb.GetGraphResponse response = + blockingUnaryCall(PDGrpc.getGetGraphMethod(), request); + + handleResponseError(response.getHeader()); + return response.getGraph(); + } + + public Metapb.Graph getGraphWithOutException(String graphName) throws + PDException { + GetGraphRequest request = GetGraphRequest.newBuilder() + .setGraphName( + graphName) + .build(); + Pdpb.GetGraphResponse response = blockingUnaryCall( + PDGrpc.getGetGraphMethod(), request); + return response.getGraph(); + } + + public Metapb.Graph delGraph(String graphName) throws PDException { + Pdpb.DelGraphRequest request = Pdpb.DelGraphRequest.newBuilder() + .setGraphName(graphName) + .build(); + Pdpb.DelGraphResponse response = + blockingUnaryCall(PDGrpc.getDelGraphMethod(), request); + + handleResponseError(response.getHeader()); + return response.getGraph(); + } + + public List updatePartition(List partitions) throws + PDException { + + Pdpb.UpdatePartitionRequest request = Pdpb.UpdatePartitionRequest.newBuilder() + .addAllPartition( + partitions) + .build(); + Pdpb.UpdatePartitionResponse response = + blockingUnaryCall(PDGrpc.getUpdatePartitionMethod(), request); + handleResponseError(response.getHeader()); + invalidPartitionCache(); + + return response.getPartitionList(); + } + + public Metapb.Partition delPartition(String graphName, int partitionId) throws PDException { + + Pdpb.DelPartitionRequest request = Pdpb.DelPartitionRequest.newBuilder() + .setGraphName(graphName) + .setPartitionId(partitionId) + .build(); + Pdpb.DelPartitionResponse response = + blockingUnaryCall(PDGrpc.getDelPartitionMethod(), request); + + handleResponseError(response.getHeader()); + invalidPartitionCache(graphName, partitionId); + return response.getPartition(); + } + + /** + * 删除分区缓存 + */ + public void invalidPartitionCache(String graphName, int partitionId) { + // 检查是否存在缓存 + if (null != cache.getPartitionById(graphName, partitionId)) { + cache.removePartition(graphName, partitionId); + } + } + + /** + * 删除分区缓存 + */ + public void invalidPartitionCache() { + // 检查是否存在缓存 + cache.removePartitions(); + } + + /** + * 删除分区缓存 + */ + public void invalidStoreCache(long storeId) { + cache.removeStore(storeId); + } + + /** + * Hugegraph server 调用,Leader发生改变,更新缓存 + */ + public void updatePartitionLeader(String graphName, int partId, long leaderStoreId) { + KVPair partShard = null; + try { + partShard = this.getPartitionById(graphName, partId); + + if (partShard != null && partShard.getValue().getStoreId() != leaderStoreId) { + var shardGroup = this.getShardGroup(partId); + Metapb.Shard shard = null; + List shards = new ArrayList<>(); + + for (Metapb.Shard s : shardGroup.getShardsList()) { + if (s.getStoreId() == leaderStoreId) { + shard = s; + shards.add(Metapb.Shard.newBuilder(s) + .setStoreId(s.getStoreId()) + .setRole(Metapb.ShardRole.Leader).build()); + } else { + shards.add(Metapb.Shard.newBuilder(s) + .setStoreId(s.getStoreId()) + .setRole(Metapb.ShardRole.Follower).build()); + } + } + + if (config.isEnableCache()) { + if (shard == null) { + // 分区的shard中未找到leader,说明分区发生了迁移 + cache.removePartition(graphName, partId); + } + } + } + } catch (PDException e) { + log.error("getPartitionException: {}", e.getMessage()); + } + } + + /** + * Hugegraph-store调用,更新缓存 + * + * @param partition + */ + public void updatePartitionCache(Metapb.Partition partition, Metapb.Shard leader) { + if (config.isEnableCache()) { + cache.update(partition.getGraphName(), partition.getId(), partition); + cache.updateLeader(partition.getId(), leader); + } + } + + public Pdpb.GetIdResponse getIdByKey(String key, int delta) throws PDException { + Pdpb.GetIdRequest request = Pdpb.GetIdRequest.newBuilder() + .setHeader(header) + .setKey(key) + .setDelta(delta) + .build(); + Pdpb.GetIdResponse response = blockingUnaryCall(PDGrpc.getGetIdMethod(), request); + handleResponseError(response.getHeader()); + return response; + } + + public Pdpb.ResetIdResponse resetIdByKey(String key) throws PDException { + Pdpb.ResetIdRequest request = Pdpb.ResetIdRequest.newBuilder() + .setHeader(header) + .setKey(key) + .build(); + Pdpb.ResetIdResponse response = blockingUnaryCall(PDGrpc.getResetIdMethod(), request); + handleResponseError(response.getHeader()); + return response; + } + + public Metapb.Member getLeader() throws PDException { + Pdpb.GetMembersRequest request = Pdpb.GetMembersRequest.newBuilder() + .setHeader(header) + .build(); + Pdpb.GetMembersResponse response = blockingUnaryCall(PDGrpc.getGetMembersMethod(), request); + handleResponseError(response.getHeader()); + return response.getLeader(); + } + + public Pdpb.GetMembersResponse getMembers() throws PDException { + Pdpb.GetMembersRequest request = Pdpb.GetMembersRequest.newBuilder() + .setHeader(header) + .build(); + Pdpb.GetMembersResponse response = blockingUnaryCall(PDGrpc.getGetMembersMethod(), request); + handleResponseError(response.getHeader()); + return response; + } + + public Metapb.ClusterStats getClusterStats() throws PDException { + Pdpb.GetClusterStatsRequest request = Pdpb.GetClusterStatsRequest.newBuilder() + .setHeader(header) + .build(); + Pdpb.GetClusterStatsResponse response = + blockingUnaryCall(PDGrpc.getGetClusterStatsMethod(), request); + handleResponseError(response.getHeader()); + return response.getCluster(); + } + + private > RespT + blockingUnaryCall(MethodDescriptor method, ReqT req) throws PDException { + return blockingUnaryCall(method, req, 1); + } + + private > RespT + blockingUnaryCall(MethodDescriptor method, ReqT req, int retry) throws + PDException { + io.grpc.stub.AbstractBlockingStub stub = (AbstractBlockingStub) getStub(); + try { + RespT resp = io.grpc.stub.ClientCalls.blockingUnaryCall(stub.getChannel(), method, + stub.getCallOptions(), req); + return resp; + } catch (Exception e) { + log.error(method.getFullMethodName() + " exception, {}", e.getMessage()); + if (e instanceof StatusRuntimeException) { + StatusRuntimeException se = (StatusRuntimeException) e; + //se.getStatus() == Status.UNAVAILABLE && + if (retry < stubProxy.getHostCount()) { + // 网络不通,关掉之前连接,换host重新连接 + closeStub(true); + return blockingUnaryCall(method, req, ++retry); + } + } + } + return null; + } + + private void handleResponseError(Pdpb.ResponseHeader header) throws + PDException { + var errorType = header.getError().getType(); + if (header.hasError() && errorType != Pdpb.ErrorType.OK) { + + throw new PDException(header.getError().getTypeValue(), + String.format( + "PD request error, error code = %d, msg = %s", + header.getError().getTypeValue(), + header.getError().getMessage())); + } + } + + public void addEventListener(PDEventListener listener) { + eventListeners.add(listener); + } + + public PDWatch getWatchClient() { + return new PDWatchImpl(stubProxy.getHost()); + } + + /** + * 返回Store状态信息 + */ + public List getStoreStatus(boolean offlineExcluded) throws PDException { + Pdpb.GetAllStoresRequest request = Pdpb.GetAllStoresRequest.newBuilder() + .setHeader(header) + .setExcludeOfflineStores( + offlineExcluded) + .build(); + Pdpb.GetAllStoresResponse response = getStub().getStoreStatus(request); + handleResponseError(response.getHeader()); + List stores = response.getStoresList(); + return stores; + } + + public void setGraphSpace(String graphSpaceName, long storageLimit) throws PDException { + Metapb.GraphSpace graphSpace = Metapb.GraphSpace.newBuilder().setName(graphSpaceName) + .setStorageLimit(storageLimit) + .setTimestamp(System.currentTimeMillis()) + .build(); + Pdpb.SetGraphSpaceRequest request = Pdpb.SetGraphSpaceRequest.newBuilder() + .setHeader(header) + .setGraphSpace(graphSpace) + .build(); + Pdpb.SetGraphSpaceResponse response = getStub().setGraphSpace(request); + handleResponseError(response.getHeader()); + } + + public List getGraphSpace(String graphSpaceName) throws + PDException { + Pdpb.GetGraphSpaceRequest.Builder builder = Pdpb.GetGraphSpaceRequest.newBuilder(); + Pdpb.GetGraphSpaceRequest request; + builder.setHeader(header); + if (graphSpaceName != null && graphSpaceName.length() > 0) { + builder.setGraphSpaceName(graphSpaceName); + } + request = builder.build(); + Pdpb.GetGraphSpaceResponse response = getStub().getGraphSpace(request); + List graphSpaceList = response.getGraphSpaceList(); + handleResponseError(response.getHeader()); + return graphSpaceList; + } + + public void setPDConfig(int partitionCount, String peerList, int shardCount, + long version) throws PDException { + Metapb.PDConfig pdConfig = Metapb.PDConfig.newBuilder().setPartitionCount(partitionCount) + .setPeersList(peerList).setShardCount(shardCount) + .setVersion(version) + .setTimestamp(System.currentTimeMillis()) + .build(); + Pdpb.SetPDConfigRequest request = Pdpb.SetPDConfigRequest.newBuilder() + .setHeader(header) + .setPdConfig(pdConfig) + .build(); + Pdpb.SetPDConfigResponse response = getStub().setPDConfig(request); + handleResponseError(response.getHeader()); + } + + public Metapb.PDConfig getPDConfig() throws PDException { + Pdpb.GetPDConfigRequest request = Pdpb.GetPDConfigRequest.newBuilder() + .setHeader(header) + .build(); + Pdpb.GetPDConfigResponse response = getStub().getPDConfig(request); + handleResponseError(response.getHeader()); + return response.getPdConfig(); + } + + public void setPDConfig(Metapb.PDConfig pdConfig) throws PDException { + Pdpb.SetPDConfigRequest request = Pdpb.SetPDConfigRequest.newBuilder() + .setHeader(header) + .setPdConfig(pdConfig) + .build(); + Pdpb.SetPDConfigResponse response = getStub().setPDConfig(request); + handleResponseError(response.getHeader()); + } + + public Metapb.PDConfig getPDConfig(long version) throws PDException { + Pdpb.GetPDConfigRequest request = Pdpb.GetPDConfigRequest.newBuilder().setHeader( + header).setVersion(version).build(); + Pdpb.GetPDConfigResponse response = getStub().getPDConfig(request); + handleResponseError(response.getHeader()); + return response.getPdConfig(); + } + + public void changePeerList(String peerList) throws PDException { + Pdpb.ChangePeerListRequest request = Pdpb.ChangePeerListRequest.newBuilder() + .setPeerList(peerList) + .setHeader(header).build(); + Pdpb.getChangePeerListResponse response = + blockingUnaryCall(PDGrpc.getChangePeerListMethod(), request); + handleResponseError(response.getHeader()); + } + + /** + * 工作模式 + * Auto:自动分裂,每个Store上分区数达到最大值 + * + * @throws PDException + */ + public void splitData() throws PDException { + Pdpb.SplitDataRequest request = Pdpb.SplitDataRequest.newBuilder() + .setHeader(header) + .setMode(Pdpb.OperationMode.Auto) + .build(); + Pdpb.SplitDataResponse response = getStub().splitData(request); + handleResponseError(response.getHeader()); + } + + /** + * 工作模式 + * Auto:自动分裂,每个Store上分区数达到最大值 + * Expert:专家模式,需要指定splitParams + * + * @param mode + * @param params + * @throws PDException + */ + public void splitData(Pdpb.OperationMode mode, List params) throws + PDException { + Pdpb.SplitDataRequest request = Pdpb.SplitDataRequest.newBuilder() + .setHeader(header) + .setMode(mode) + .addAllParam(params).build(); + Pdpb.SplitDataResponse response = getStub().splitData(request); + handleResponseError(response.getHeader()); + } + + public void splitGraphData(String graphName, int toCount) throws PDException { + Pdpb.SplitGraphDataRequest request = Pdpb.SplitGraphDataRequest.newBuilder() + .setHeader(header) + .setGraphName(graphName) + .setToCount(toCount) + .build(); + Pdpb.SplitDataResponse response = getStub().splitGraphData(request); + handleResponseError(response.getHeader()); + } + + /** + * 自动转移,达到每个Store上分区数量相同 + * + * @throws PDException + */ + public void balancePartition() throws PDException { + Pdpb.MovePartitionRequest request = Pdpb.MovePartitionRequest.newBuilder() + .setHeader(header) + .setMode( + Pdpb.OperationMode.Auto) + .build(); + Pdpb.MovePartitionResponse response = getStub().movePartition(request); + handleResponseError(response.getHeader()); + } + + /** + * //工作模式 + * // Auto:自动转移,达到每个Store上分区数量相同 + * // Expert:专家模式,需要指定transferParams + * + * @param mode + * @param params + * @throws PDException + */ + public void movePartition(Pdpb.OperationMode mode, List params) throws + PDException { + Pdpb.MovePartitionRequest request = Pdpb.MovePartitionRequest.newBuilder() + .setHeader(header) + .setMode(mode) + .addAllParam(params).build(); + Pdpb.MovePartitionResponse response = getStub().movePartition(request); + handleResponseError(response.getHeader()); + } + + public void reportTask(MetaTask.Task task) throws PDException { + Pdpb.ReportTaskRequest request = Pdpb.ReportTaskRequest.newBuilder() + .setHeader(header) + .setTask(task).build(); + Pdpb.ReportTaskResponse response = blockingUnaryCall(PDGrpc.getReportTaskMethod(), request); + handleResponseError(response.getHeader()); + } + + public Metapb.PartitionStats getPartitionsStats(String graph, int partId) throws PDException { + Pdpb.GetPartitionStatsRequest request = Pdpb.GetPartitionStatsRequest.newBuilder() + .setHeader(header) + .setGraphName(graph) + .setPartitionId(partId) + .build(); + Pdpb.GetPartitionStatsResponse response = getStub().getPartitionStats(request); + handleResponseError(response.getHeader()); + return response.getPartitionStats(); + } + + /** + * 平衡不同store中leader的数量 + */ + public void balanceLeaders() throws PDException { + Pdpb.BalanceLeadersRequest request = Pdpb.BalanceLeadersRequest.newBuilder() + .setHeader(header) + .build(); + Pdpb.BalanceLeadersResponse response = getStub().balanceLeaders(request); + handleResponseError(response.getHeader()); + } + + /** + * 从pd中删除store + */ + public Metapb.Store delStore(long storeId) throws PDException { + Pdpb.DetStoreRequest request = Pdpb.DetStoreRequest.newBuilder() + .setHeader(header) + .setStoreId(storeId) + .build(); + Pdpb.DetStoreResponse response = getStub().delStore(request); + handleResponseError(response.getHeader()); + return response.getStore(); + } + + /** + * 对rocksdb整体进行compaction + * + * @throws PDException + */ + public void dbCompaction() throws PDException { + Pdpb.DbCompactionRequest request = Pdpb.DbCompactionRequest + .newBuilder() + .setHeader(header) + .build(); + Pdpb.DbCompactionResponse response = getStub().dbCompaction(request); + handleResponseError(response.getHeader()); + } + + /** + * 对rocksdb指定表进行compaction + * + * @param tableName + * @throws PDException + */ + public void dbCompaction(String tableName) throws PDException { + Pdpb.DbCompactionRequest request = Pdpb.DbCompactionRequest + .newBuilder() + .setHeader(header) + .setTableName(tableName) + .build(); + Pdpb.DbCompactionResponse response = getStub().dbCompaction(request); + handleResponseError(response.getHeader()); + } + + /** + * 分区合并,把当前的分区缩容至toCount个 + * + * @param toCount 缩容到分区的个数 + * @throws PDException + */ + public void combineCluster(int toCount) throws PDException { + Pdpb.CombineClusterRequest request = Pdpb.CombineClusterRequest + .newBuilder() + .setHeader(header) + .setToCount(toCount) + .build(); + Pdpb.CombineClusterResponse response = getStub().combineCluster(request); + handleResponseError(response.getHeader()); + } + + /** + * 将单图缩容到 toCount个 + * + * @param graphName graph name + * @param toCount target count + * @throws PDException + */ + public void combineGraph(String graphName, int toCount) throws PDException { + Pdpb.CombineGraphRequest request = Pdpb.CombineGraphRequest + .newBuilder() + .setHeader(header) + .setGraphName(graphName) + .setToCount(toCount) + .build(); + Pdpb.CombineGraphResponse response = getStub().combineGraph(request); + handleResponseError(response.getHeader()); + } + + public void deleteShardGroup(int groupId) throws PDException { + Pdpb.DeleteShardGroupRequest request = Pdpb.DeleteShardGroupRequest + .newBuilder() + .setHeader(header) + .setGroupId(groupId) + .build(); + Pdpb.DeleteShardGroupResponse response = + blockingUnaryCall(PDGrpc.getDeleteShardGroupMethod(), request); + + handleResponseError(response.getHeader()); + } + + /** + * 用于 store的 shard list重建 + * + * @param groupId shard group id + * @param shards shard list,delete when shards size is 0 + */ + public void updateShardGroupOp(int groupId, List shards) throws PDException { + Pdpb.ChangeShardRequest request = Pdpb.ChangeShardRequest.newBuilder() + .setHeader(header) + .setGroupId(groupId) + .addAllShards(shards) + .build(); + Pdpb.ChangeShardResponse response = getStub().updateShardGroupOp(request); + handleResponseError(response.getHeader()); + } + + /** + * invoke fireChangeShard command + * + * @param groupId shard group id + * @param shards shard list + */ + public void changeShard(int groupId, List shards) throws PDException { + Pdpb.ChangeShardRequest request = Pdpb.ChangeShardRequest.newBuilder() + .setHeader(header) + .setGroupId(groupId) + .addAllShards(shards) + .build(); + Pdpb.ChangeShardResponse response = getStub().changeShard(request); + handleResponseError(response.getHeader()); + } + + public ClientCache getCache() { + return cache; + } + + public CacheResponse getClientCache() throws PDException { + GetGraphRequest request = GetGraphRequest.newBuilder().setHeader(header).build(); + CacheResponse cache = getStub().getCache(request); + handleResponseError(cache.getHeader()); + return cache; + } + + public CachePartitionResponse getPartitionCache(String graph) throws PDException { + GetGraphRequest request = + GetGraphRequest.newBuilder().setHeader(header).setGraphName(graph).build(); + CachePartitionResponse ps = getStub().getPartitions(request); + handleResponseError(ps.getHeader()); + return ps; + } + + public void updatePdRaft(String raftConfig) throws PDException { + Pdpb.UpdatePdRaftRequest request = Pdpb.UpdatePdRaftRequest.newBuilder() + .setHeader(header) + .setConfig(raftConfig) + .build(); + Pdpb.UpdatePdRaftResponse response = getStub().updatePdRaft(request); + handleResponseError(response.getHeader()); + } + + public interface PDEventListener { + + void onStoreChanged(NodeEvent event); + + void onPartitionChanged(PartitionEvent event); + + void onGraphChanged(WatchResponse event); + + default void onShardGroupChanged(WatchResponse event) { + } + + } + + static class StubProxy { + + private final LinkedList hostList = new LinkedList<>(); + private volatile PDGrpc.PDBlockingStub stub; + private String leader; + + public StubProxy(String[] hosts) { + for (String host : hosts) { + if (!host.isEmpty()) { + hostList.offer(host); + } + } + } + + public String nextHost() { + String host = hostList.poll(); + hostList.offer(host); //移到尾部 + return host; + } + + public void set(PDGrpc.PDBlockingStub stub) { + this.stub = stub; + } + + public PDGrpc.PDBlockingStub get() { + return this.stub; + } + + public String getHost() { + return hostList.peek(); + } + + public int getHostCount() { + return hostList.size(); + } + + public String getLeader() { + return leader; + } + + public void setLeader(String leader) { + this.leader = leader; + } + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDConfig.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDConfig.java new file mode 100644 index 0000000000..a1c72a2bcf --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDConfig.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +public final class PDConfig { + + //TODO multi-server + private String serverHost = "localhost:9000"; + private long grpcTimeOut = 60000; // grpc调用超时时间 10秒 + + // 是否接收PD异步通知 + private boolean enablePDNotify = false; + + private boolean enableCache = false; + + private PDConfig() { + } + + public static PDConfig of() { + return new PDConfig(); + } + + public static PDConfig of(String serverHost) { + PDConfig config = new PDConfig(); + config.serverHost = serverHost; + return config; + } + + public static PDConfig of(String serverHost, long timeOut) { + PDConfig config = new PDConfig(); + config.serverHost = serverHost; + config.grpcTimeOut = timeOut; + return config; + } + + public String getServerHost() { + return serverHost; + } + + public long getGrpcTimeOut() { + return grpcTimeOut; + } + + @Deprecated + public PDConfig setEnablePDNotify(boolean enablePDNotify) { + this.enablePDNotify = enablePDNotify; + + // TODO 临时代码,hugegraph修改完后删除 + this.enableCache = enablePDNotify; + return this; + } + + public boolean isEnableCache() { + return enableCache; + } + + public PDConfig setEnableCache(boolean enableCache) { + this.enableCache = enableCache; + return this; + } + + @Override + public String toString() { + return "PDConfig{" + + "serverHost='" + serverHost + '\'' + + '}'; + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDPulse.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDPulse.java new file mode 100644 index 0000000000..485417b917 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDPulse.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import java.io.Closeable; +import java.util.function.Consumer; + +import org.apache.hugegraph.pd.grpc.pulse.PartitionHeartbeatRequest; +import org.apache.hugegraph.pd.grpc.pulse.PulseResponse; +import org.apache.hugegraph.pd.pulse.PulseServerNotice; + +/** + * Bidirectional communication interface of pd-client and pd-server + */ +public interface PDPulse { + + /*** inner static methods ***/ + static Listener listener(Consumer onNext) { + return listener(onNext, t -> { + }, () -> { + }); + } + + static Listener listener(Consumer onNext, Consumer onError) { + return listener(onNext, onError, () -> { + }); + } + + static Listener listener(Consumer onNext, Runnable onCompleted) { + return listener(onNext, t -> { + }, onCompleted); + } + + static Listener listener(Consumer onNext, Consumer onError, + Runnable onCompleted) { + return new Listener<>() { + @Override + public void onNext(T response) { + onNext.accept(response); + } + + @Override + public void onNotice(PulseServerNotice notice) { + + } + + @Override + public void onError(Throwable throwable) { + onError.accept(throwable); + } + + @Override + public void onCompleted() { + onCompleted.run(); + } + }; + } + + /** + * @param listener + * @return + */ + Notifier connectPartition(Listener listener); + + /** + * 切换成新的host。做 channel/host的检查,如果需要关闭,notifier调用close方法。 + * + * @param host new host + * @param notifier notifier + * @return true if create new stub, otherwise false + */ + boolean resetStub(String host, Notifier notifier); + + /** + * Interface of pulse. + */ + interface Listener { + + /** + * Invoked on new events. + * + * @param response the response. + */ + @Deprecated + default void onNext(T response) { + } + + /** + * Invoked on new events. + * + * @param notice a wrapper of response + */ + default void onNotice(PulseServerNotice notice) { + notice.ack(); + } + + /** + * Invoked on errors. + * + * @param throwable the error. + */ + void onError(Throwable throwable); + + /** + * Invoked on completion. + */ + void onCompleted(); + + } + + /** + * Interface of notifier that can send notice to server. + * + * @param + */ + interface Notifier extends Closeable { + + /** + * closes this watcher and all its resources. + */ + @Override + void close(); + + /** + * Send notice to pd-server. + * + * @return + */ + void notifyServer(T t); + + /** + * Send an error report to pd-server. + * + * @param error + */ + void crash(String error); + + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDPulseImpl.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDPulseImpl.java new file mode 100644 index 0000000000..0afc10c831 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDPulseImpl.java @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import org.apache.hugegraph.pd.grpc.pulse.HgPdPulseGrpc; +import org.apache.hugegraph.pd.grpc.pulse.PartitionHeartbeatRequest; +import org.apache.hugegraph.pd.grpc.pulse.PulseAckRequest; +import org.apache.hugegraph.pd.grpc.pulse.PulseCreateRequest; +import org.apache.hugegraph.pd.grpc.pulse.PulseNoticeRequest; +import org.apache.hugegraph.pd.grpc.pulse.PulseRequest; +import org.apache.hugegraph.pd.grpc.pulse.PulseResponse; +import org.apache.hugegraph.pd.grpc.pulse.PulseType; +import org.apache.hugegraph.pd.pulse.PartitionNotice; + +import com.google.common.util.concurrent.ThreadFactoryBuilder; + +import io.grpc.ManagedChannel; +import io.grpc.stub.StreamObserver; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public final class PDPulseImpl implements PDPulse { + + private static final ConcurrentHashMap chs = new ConcurrentHashMap<>(); + private final ExecutorService threadPool; + private HgPdPulseGrpc.HgPdPulseStub stub; + private String pdServerAddress; + + // TODO: support several servers. + public PDPulseImpl(String pdServerAddress) { + this.pdServerAddress = pdServerAddress; + this.stub = HgPdPulseGrpc.newStub(Channels.getChannel(pdServerAddress)); + var namedThreadFactory = + new ThreadFactoryBuilder().setNameFormat("ack-notice-pool-%d").build(); + threadPool = Executors.newSingleThreadExecutor(namedThreadFactory); + } + + private String getCurrentHost() { + return this.pdServerAddress; + } + + private boolean checkChannel() { + return stub != null && !((ManagedChannel) stub.getChannel()).isShutdown(); + } + + /* TODO: handle this override problem */ + @Override + public Notifier connectPartition(Listener + listener) { + return new PartitionHeartbeat(listener); + } + + @Override + public boolean resetStub(String host, Notifier notifier) { + log.info("reset stub: current, {}, new: {}, channel state:{}", getCurrentHost(), host, + checkChannel()); + if (Objects.equals(host, getCurrentHost()) && checkChannel()) { + return false; + } + + if (notifier != null) { + notifier.close(); + } + + this.stub = HgPdPulseGrpc.newStub(Channels.getChannel(host)); + log.info("pd pulse connect to {}", host); + this.pdServerAddress = host; + return true; + } + + /*** PartitionHeartbeat's implement ***/ + private class PartitionHeartbeat extends + AbstractConnector { + + private long observerId = -1; + + PartitionHeartbeat(Listener listener) { + super(listener, PulseType.PULSE_TYPE_PARTITION_HEARTBEAT); + } + + private void setObserverId(long observerId) { + if (this.observerId == -1) { + this.observerId = observerId; + } + } + + @Override + public void notifyServer(PartitionHeartbeatRequest.Builder requestBuilder) { + this.reqStream.onNext(PulseRequest.newBuilder() + .setNoticeRequest( + PulseNoticeRequest.newBuilder() + .setPartitionHeartbeatRequest( + requestBuilder.build() + ).build() + ).build() + ); + } + + @Override + public void onNext(PulseResponse pulseResponse) { + this.setObserverId(pulseResponse.getObserverId()); + long noticeId = pulseResponse.getNoticeId(); + this.listener.onNext(pulseResponse); + this.listener.onNotice(new PartitionNotice(noticeId, + e -> super.ackNotice(e, observerId), + pulseResponse)); + } + + } + + private abstract class AbstractConnector implements Notifier, + StreamObserver { + + Listener listener; + StreamObserver reqStream; + PulseType pulseType; + PulseRequest.Builder reqBuilder = PulseRequest.newBuilder(); + PulseAckRequest.Builder ackBuilder = PulseAckRequest.newBuilder(); + + private AbstractConnector(Listener listener, PulseType pulseType) { + this.listener = listener; + this.pulseType = pulseType; + this.init(); + } + + void init() { + PulseCreateRequest.Builder builder = PulseCreateRequest.newBuilder() + .setPulseType(this.pulseType); + + this.reqStream = PDPulseImpl.this.stub.pulse(this); + this.reqStream.onNext(reqBuilder.clear().setCreateRequest(builder).build()); + } + + /*** notifier ***/ + @Override + public void close() { + this.reqStream.onCompleted(); + } + + @Override + public abstract void notifyServer(N t); + + @Override + public void crash(String error) { + this.reqStream.onError(new Throwable(error)); + } + + /*** listener ***/ + @Override + public abstract void onNext(PulseResponse pulseResponse); + + @Override + public void onError(Throwable throwable) { + this.listener.onError(throwable); + } + + @Override + public void onCompleted() { + this.listener.onCompleted(); + } + + protected void ackNotice(long noticeId, long observerId) { + threadPool.execute(() -> { + // log.info("send ack: {}, ts: {}", noticeId, System.currentTimeMillis()); + this.reqStream.onNext(reqBuilder.clear() + .setAckRequest( + this.ackBuilder.clear() + .setNoticeId(noticeId) + .setObserverId(observerId) + .build() + ).build() + ); + }); + } + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDWatch.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDWatch.java new file mode 100644 index 0000000000..c6c46d03d1 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDWatch.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import java.io.Closeable; +import java.util.function.Consumer; + +import org.apache.hugegraph.pd.grpc.watch.WatchResponse; +import org.apache.hugegraph.pd.watch.NodeEvent; +import org.apache.hugegraph.pd.watch.PartitionEvent; + +public interface PDWatch { + + /** + * Watch the events of all store-nodes registered in the remote PD-Server. + * + * @param listener + * @return + */ + //PDWatcher watchNode(Listener listener); + + /*** inner static methods ***/ + static Listener listener(Consumer onNext) { + return listener(onNext, t -> { + }, () -> { + }); + } + + static Listener listener(Consumer onNext, Consumer onError) { + return listener(onNext, onError, () -> { + }); + } + + static Listener listener(Consumer onNext, Runnable onCompleted) { + return listener(onNext, t -> { + }, onCompleted); + } + + static Listener listener(Consumer onNext, Consumer onError, + Runnable onCompleted) { + return new Listener() { + @Override + public void onNext(T response) { + onNext.accept(response); + } + + @Override + public void onError(Throwable throwable) { + onError.accept(throwable); + } + + @Override + public void onCompleted() { + onCompleted.run(); + } + }; + } + + /** + * Watch the events of the store-nodes assigned to a specified graph. + * + * @param graph the graph name which you want to watch + * @param listener + * @return + */ + //PDWatcher watchNode(String graph, Listener listener); + + String getCurrentHost(); + + boolean checkChannel(); + + /** + * @param listener + * @return + */ + Watcher watchPartition(Listener listener); + + Watcher watchNode(Listener listener); + + Watcher watchGraph(Listener listener); + + Watcher watchShardGroup(Listener listener); + + /** + * Interface of Watcher. + */ + interface Listener { + + /** + * Invoked on new events. + * + * @param response the response. + */ + void onNext(T response); + + /** + * Invoked on errors. + * + * @param throwable the error. + */ + void onError(Throwable throwable); + + /** + * Invoked on completion. + */ + default void onCompleted() { + } + + } + + interface Watcher extends Closeable { + + /** + * closes this watcher and all its resources. + */ + @Override + void close(); + + /** + * Requests the latest revision processed and propagates it to listeners + */ + // TODO: what's it for? + //void requestProgress(); + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDWatchImpl.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDWatchImpl.java new file mode 100644 index 0000000000..9b136bb26a --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/client/PDWatchImpl.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.client; + +import java.util.function.Supplier; + +import org.apache.hugegraph.pd.grpc.watch.HgPdWatchGrpc; +import org.apache.hugegraph.pd.grpc.watch.WatchCreateRequest; +import org.apache.hugegraph.pd.grpc.watch.WatchNodeResponse; +import org.apache.hugegraph.pd.grpc.watch.WatchPartitionResponse; +import org.apache.hugegraph.pd.grpc.watch.WatchRequest; +import org.apache.hugegraph.pd.grpc.watch.WatchResponse; +import org.apache.hugegraph.pd.grpc.watch.WatchType; +import org.apache.hugegraph.pd.watch.NodeEvent; +import org.apache.hugegraph.pd.watch.PartitionEvent; + +import io.grpc.ManagedChannel; +import io.grpc.stub.StreamObserver; + +final class PDWatchImpl implements PDWatch { + + private final HgPdWatchGrpc.HgPdWatchStub stub; + + private final String pdServerAddress; + + // TODO: support several servers. + PDWatchImpl(String pdServerAddress) { + this.pdServerAddress = pdServerAddress; + this.stub = HgPdWatchGrpc.newStub(Channels.getChannel(pdServerAddress)); + } + + @Override + public String getCurrentHost() { + return this.pdServerAddress; + } + + @Override + public boolean checkChannel() { + return stub != null && !((ManagedChannel) stub.getChannel()).isShutdown(); + } + + /** + * Get Partition change watcher. + * + * @param listener + * @return + */ + @Override + public Watcher watchPartition(Listener listener) { + return new PartitionWatcher(listener); + } + + /** + * Get Store-Node change watcher. + * + * @param listener + * @return + */ + @Override + public Watcher watchNode(Listener listener) { + return new NodeWatcher(listener); + } + + @Override + public Watcher watchGraph(Listener listener) { + return new GraphWatcher(listener); + } + + @Override + public Watcher watchShardGroup(Listener listener) { + return new ShardGroupWatcher(listener); + } + + private class GraphWatcher extends AbstractWatcher { + + private GraphWatcher(Listener listener) { + super(listener, + () -> WatchCreateRequest + .newBuilder() + .setWatchType(WatchType.WATCH_TYPE_GRAPH_CHANGE) + .build() + ); + } + + @Override + public void onNext(WatchResponse watchResponse) { + this.listener.onNext(watchResponse); + } + } + + private class ShardGroupWatcher extends AbstractWatcher { + + private ShardGroupWatcher(Listener listener) { + super(listener, + () -> WatchCreateRequest + .newBuilder() + .setWatchType(WatchType.WATCH_TYPE_SHARD_GROUP_CHANGE) + .build() + ); + } + + @Override + public void onNext(WatchResponse watchResponse) { + this.listener.onNext(watchResponse); + } + } + + private class PartitionWatcher extends AbstractWatcher { + + private PartitionWatcher(Listener listener) { + super(listener, + () -> WatchCreateRequest + .newBuilder() + .setWatchType(WatchType.WATCH_TYPE_PARTITION_CHANGE) + .build() + ); + } + + @Override + public void onNext(WatchResponse watchResponse) { + WatchPartitionResponse res = watchResponse.getPartitionResponse(); + PartitionEvent event = new PartitionEvent(res.getGraph(), res.getPartitionId(), + PartitionEvent.ChangeType.grpcTypeOf( + res.getChangeType())); + this.listener.onNext(event); + } + } + + private class NodeWatcher extends AbstractWatcher { + + private NodeWatcher(Listener listener) { + super(listener, + () -> WatchCreateRequest + .newBuilder() + .setWatchType(WatchType.WATCH_TYPE_STORE_NODE_CHANGE) + .build() + ); + } + + @Override + public void onNext(WatchResponse watchResponse) { + WatchNodeResponse res = watchResponse.getNodeResponse(); + NodeEvent event = new NodeEvent(res.getGraph(), res.getNodeId(), + NodeEvent.EventType.grpcTypeOf(res.getNodeEventType())); + this.listener.onNext(event); + } + } + + private abstract class AbstractWatcher implements Watcher, StreamObserver { + + Listener listener; + StreamObserver reqStream; + Supplier requestSupplier; + + private AbstractWatcher(Listener listener, + Supplier requestSupplier) { + this.listener = listener; + this.requestSupplier = requestSupplier; + this.init(); + } + + void init() { + this.reqStream = PDWatchImpl.this.stub.watch(this); + this.reqStream.onNext(WatchRequest.newBuilder().setCreateRequest( + this.requestSupplier.get() + ).build()); + } + + @Override + public void close() { + this.reqStream.onCompleted(); + } + + @Override + public abstract void onNext(WatchResponse watchResponse); + + @Override + public void onError(Throwable throwable) { + + this.listener.onError(throwable); + } + + @Override + public void onCompleted() { + this.listener.onCompleted(); + } + } + +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/pulse/PartitionNotice.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/pulse/PartitionNotice.java new file mode 100644 index 0000000000..80aa8951b7 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/pulse/PartitionNotice.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.pulse; + +import java.util.function.Consumer; + +import org.apache.hugegraph.pd.grpc.pulse.PulseResponse; + +public class PartitionNotice implements PulseServerNotice { + + private final long noticeId; + private final Consumer ackConsumer; + private final PulseResponse content; + + public PartitionNotice(long noticeId, Consumer ackConsumer, PulseResponse content) { + this.noticeId = noticeId; + this.ackConsumer = ackConsumer; + this.content = content; + } + + @Override + public void ack() { + this.ackConsumer.accept(this.noticeId); + } + + @Override + public long getNoticeId() { + return this.noticeId; + } + + @Override + public PulseResponse getContent() { + return this.content; + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/pulse/PulseServerNotice.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/pulse/PulseServerNotice.java new file mode 100644 index 0000000000..9a30e2679a --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/pulse/PulseServerNotice.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.pulse; + +public interface PulseServerNotice { + + /** + * @throws RuntimeException when failed to send ack-message to pd-server + */ + void ack(); + + long getNoticeId(); + + /** + * Return a response object of gRPC stream. + * + * @return + */ + T getContent(); + +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/NodeEvent.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/NodeEvent.java new file mode 100644 index 0000000000..bb68383b83 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/NodeEvent.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.watch; + +import java.util.Objects; + +import org.apache.hugegraph.pd.grpc.watch.NodeEventType; + +public class NodeEvent { + + private final String graph; + private final long nodeId; + private final EventType eventType; + + public NodeEvent(String graph, long nodeId, EventType eventType) { + this.graph = graph; + this.nodeId = nodeId; + this.eventType = eventType; + } + + public String getGraph() { + return graph; + } + + public long getNodeId() { + return nodeId; + } + + public EventType getEventType() { + return eventType; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + NodeEvent nodeEvent = (NodeEvent) o; + return nodeId == nodeEvent.nodeId && Objects.equals(graph, + nodeEvent.graph) && + eventType == nodeEvent.eventType; + } + + @Override + public int hashCode() { + return Objects.hash(graph, nodeId, eventType); + } + + @Override + public String toString() { + return "NodeEvent{" + + "graph='" + graph + '\'' + + ", nodeId=" + nodeId + + ", eventType=" + eventType + + '}'; + } + + public enum EventType { + UNKNOWN, + NODE_ONLINE, + NODE_OFFLINE, + NODE_RAFT_CHANGE, + NODE_PD_LEADER_CHANGE; + + public static EventType grpcTypeOf(NodeEventType grpcType) { + switch (grpcType) { + case NODE_EVENT_TYPE_NODE_ONLINE: + return NODE_ONLINE; + case NODE_EVENT_TYPE_NODE_OFFLINE: + return NODE_OFFLINE; + case NODE_EVENT_TYPE_NODE_RAFT_CHANGE: + return NODE_RAFT_CHANGE; + case NODE_EVENT_TYPE_PD_LEADER_CHANGE: + return NODE_PD_LEADER_CHANGE; + default: + return UNKNOWN; + } + + } + + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/PDWatcher.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/PDWatcher.java new file mode 100644 index 0000000000..d663f34a3c --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/PDWatcher.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.watch; + +public class PDWatcher { + +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/PartitionEvent.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/PartitionEvent.java new file mode 100644 index 0000000000..e5be1b3484 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/PartitionEvent.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.watch; + +import java.util.Objects; + +import org.apache.hugegraph.pd.grpc.watch.WatchChangeType; + +public class PartitionEvent { + + private final String graph; + private final int partitionId; + private final ChangeType changeType; + + public PartitionEvent(String graph, int partitionId, ChangeType changeType) { + this.graph = graph; + this.partitionId = partitionId; + this.changeType = changeType; + } + + public String getGraph() { + return this.graph; + } + + public int getPartitionId() { + return this.partitionId; + } + + public ChangeType getChangeType() { + return this.changeType; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + PartitionEvent that = (PartitionEvent) o; + return partitionId == that.partitionId && Objects.equals(graph, that.graph) && + changeType == that.changeType; + } + + @Override + public int hashCode() { + return Objects.hash(graph, partitionId, changeType); + } + + @Override + public String toString() { + return "PartitionEvent{" + + "graph='" + graph + '\'' + + ", partitionId=" + partitionId + + ", changeType=" + changeType + + '}'; + } + + public enum ChangeType { + UNKNOWN, + ADD, + ALTER, + DEL; + + public static ChangeType grpcTypeOf(WatchChangeType grpcType) { + switch (grpcType) { + case WATCH_CHANGE_TYPE_ADD: + return ADD; + case WATCH_CHANGE_TYPE_ALTER: + return ALTER; + case WATCH_CHANGE_TYPE_DEL: + return DEL; + default: + return UNKNOWN; + } + } + } +} diff --git a/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/WatchType.java b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/WatchType.java new file mode 100644 index 0000000000..e537701936 --- /dev/null +++ b/hugegraph-pd/hg-pd-client/src/main/java/org/apache/hugegraph/pd/watch/WatchType.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.watch; + +enum WatchType { + + PARTITION_CHANGE(10); + + private final int value; + + WatchType(int value) { + this.value = value; + } + +} diff --git a/hugegraph-pd/hg-pd-common/pom.xml b/hugegraph-pd/hg-pd-common/pom.xml new file mode 100644 index 0000000000..918c8deab8 --- /dev/null +++ b/hugegraph-pd/hg-pd-common/pom.xml @@ -0,0 +1,54 @@ + + + + + + 4.0.0 + + org.apache.hugegraph + hugegraph-pd + ${revision} + ../pom.xml + + hg-pd-common + + + 11 + 11 + + + + + org.apache.hugegraph + hg-pd-grpc + ${revision} + + + org.projectlombok + lombok + 1.18.24 + + + org.apache.commons + commons-collections4 + 4.4 + + + diff --git a/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/GraphCache.java b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/GraphCache.java new file mode 100644 index 0000000000..07c7c332d9 --- /dev/null +++ b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/GraphCache.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import org.apache.hugegraph.pd.grpc.Metapb.Graph; +import org.apache.hugegraph.pd.grpc.Metapb.Partition; + +import com.google.common.collect.RangeMap; +import com.google.common.collect.TreeRangeMap; + +import lombok.Data; + +@Data +public class GraphCache { + + private Graph graph; + private AtomicBoolean initialized = new AtomicBoolean(false); + private AtomicBoolean writing = new AtomicBoolean(false); + private ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + private Map state = new ConcurrentHashMap<>(); + private Map partitions = new ConcurrentHashMap<>(); + private RangeMap range = TreeRangeMap.create(); + + public GraphCache(Graph graph) { + this.graph = graph; + } + + public GraphCache() { + } + + public Partition getPartition(Integer id) { + return partitions.get(id); + } + + public Partition addPartition(Integer id, Partition p) { + return partitions.put(id, p); + } + + public Partition removePartition(Integer id) { + return partitions.remove(id); + } +} diff --git a/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/HgAssert.java b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/HgAssert.java new file mode 100644 index 0000000000..710f96f28c --- /dev/null +++ b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/HgAssert.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +import java.util.Collection; +import java.util.Map; + +public final class HgAssert { + + public static void isTrue(boolean expression, String message) { + if (message == null) { + throw new IllegalArgumentException("message is null"); + } + + if (!expression) { + throw new IllegalArgumentException(message); + } + } + + public static void isFalse(boolean expression, String message) { + isTrue(!expression, message); + } + + public static void isArgumentValid(byte[] bytes, String parameter) { + isFalse(isInvalid(bytes), "The argument is invalid: " + parameter); + } + + public static void isArgumentValid(String str, String parameter) { + isFalse(isInvalid(str), "The argument is invalid: " + parameter); + } + + public static void isArgumentNotNull(Object obj, String parameter) { + isTrue(obj != null, "The argument is null: " + parameter); + } + + public static void istValid(byte[] bytes, String msg) { + isFalse(isInvalid(bytes), msg); + } + + public static void isValid(String str, String msg) { + isFalse(isInvalid(str), msg); + } + + public static void isNotNull(Object obj, String msg) { + isTrue(obj != null, msg); + } + + public static boolean isContains(Object[] objs, Object obj) { + if (objs == null || objs.length == 0 || obj == null) { + return false; + } + for (Object item : objs) { + if (obj.equals(item)) { + return true; + } + } + return false; + } + + public static boolean isInvalid(String... strs) { + if (strs == null || strs.length == 0) { + return true; + } + for (String item : strs) { + if (item == null || "".equals(item.trim())) { + return true; + } + } + return false; + } + + public static boolean isInvalid(byte[] bytes) { + return bytes == null || bytes.length == 0; + } + + public static boolean isInvalid(Map map) { + return map == null || map.isEmpty(); + } + + public static boolean isInvalid(Collection list) { + return list == null || list.isEmpty(); + } + + public static boolean isContains(Collection list, T item) { + if (list == null || item == null) { + return false; + } + return list.contains(item); + } + + public static boolean isNull(Object... objs) { + if (objs == null) { + return true; + } + for (Object item : objs) { + if (item == null) { + return true; + } + } + return false; + } +} diff --git a/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/KVPair.java b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/KVPair.java new file mode 100644 index 0000000000..b5e916c481 --- /dev/null +++ b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/KVPair.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +import java.io.Serializable; +import java.util.Objects; + +public class KVPair implements Serializable { + + /** + * Key of this Pair. + */ + private K key; + /** + * Value of this this Pair. + */ + private V value; + + /** + * Creates a new pair + * + * @param key The key for this pair + * @param value The value to use for this pair + */ + public KVPair(K key, V value) { + this.key = key; + this.value = value; + } + + /** + * Gets the key for this pair. + * + * @return key for this pair + */ + public K getKey() { + return key; + } + + public void setKey(K key) { + this.key = key; + } + + /** + * Gets the value for this pair. + * + * @return value for this pair + */ + public V getValue() { + return value; + } + + public void setValue(V value) { + this.value = value; + } + + /** + *

String representation of this + * Pair.

+ * + *

The default name/value delimiter '=' is always used.

+ * + * @return String representation of this Pair + */ + @Override + public String toString() { + return key + "=" + value; + } + + /** + *

Generate a hash code for this Pair.

+ * + *

The hash code is calculated using both the name and + * the value of the Pair.

+ * + * @return hash code for this Pair + */ + @Override + public int hashCode() { + // name's hashCode is multiplied by an arbitrary prime number (13) + // in order to make sure there is a difference in the hashCode between + // these two parameters: + // name: a value: aa + // name: aa value: a + return key.hashCode() * 13 + (value == null ? 0 : value.hashCode()); + } + + /** + *

Test this Pair for equality with another + * Object.

+ * + *

If the Object to be tested is not a + * Pair or is null, then this method + * returns false.

+ * + *

Two Pairs are considered equal if and only if + * both the names and values are equal.

+ * + * @param o the Object to test for + * equality with this Pair + * @return true if the given Object is + * equal to this Pair else false + */ + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o instanceof KVPair) { + KVPair pair = (KVPair) o; + if (!Objects.equals(key, pair.key)) { + return false; + } + return Objects.equals(value, pair.value); + } + return false; + } +} diff --git a/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PDException.java b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PDException.java new file mode 100644 index 0000000000..b398137e82 --- /dev/null +++ b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PDException.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +public class PDException extends Exception { + + private final int errorCode; + + public PDException(int error) { + super(String.format("Error code = %d", error)); + this.errorCode = error; + } + + public PDException(int error, String msg) { + super(msg); + this.errorCode = error; + } + + public PDException(int error, Throwable e) { + super(e); + this.errorCode = error; + } + + public PDException(int error, String msg, Throwable e) { + super(msg, e); + this.errorCode = error; + } + + public int getErrorCode() { + return errorCode; + } +} diff --git a/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PDRuntimeException.java b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PDRuntimeException.java new file mode 100644 index 0000000000..0bd90241df --- /dev/null +++ b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PDRuntimeException.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +public class PDRuntimeException extends RuntimeException { + + // public static final int LICENSE_ERROR = -11; + + private int errorCode = 0; + + public PDRuntimeException(int error) { + super(String.format("Error code = %d", error)); + this.errorCode = error; + } + + public PDRuntimeException(int error, String msg) { + super(msg); + this.errorCode = error; + } + + public PDRuntimeException(int error, Throwable e) { + super(e); + this.errorCode = error; + } + + public PDRuntimeException(int error, String msg, Throwable e) { + super(msg, e); + this.errorCode = error; + } + + public int getErrorCode() { + return errorCode; + } +} diff --git a/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PartitionCache.java b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PartitionCache.java new file mode 100644 index 0000000000..9bd233fd21 --- /dev/null +++ b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PartitionCache.java @@ -0,0 +1,458 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import org.apache.hugegraph.pd.grpc.Metapb; + +import com.google.common.collect.Range; +import com.google.common.collect.RangeMap; +import com.google.common.collect.TreeRangeMap; + +/** + * 放弃 copy on write 的方式 + * 1. 在 graph * partition 数量极多的时候,效率严重下降,不能用 + */ +public class PartitionCache { + + // 读写锁对象 + private final ReadWriteLock readWriteLock = new ReentrantReadWriteLock(); + private final Map locks = new HashMap<>(); + Lock writeLock = readWriteLock.writeLock(); + // 每张图一个缓存 + private volatile Map> keyToPartIdCache; + // graphName + PartitionID 组成 key + private volatile Map> partitionCache; + private volatile Map shardGroupCache; + private volatile Map storeCache; + private volatile Map graphCache; + + public PartitionCache() { + keyToPartIdCache = new HashMap<>(); + partitionCache = new HashMap<>(); + shardGroupCache = new ConcurrentHashMap<>(); + storeCache = new ConcurrentHashMap<>(); + graphCache = new ConcurrentHashMap<>(); + } + + private AtomicBoolean getOrCreateGraphLock(String graphName) { + var lock = this.locks.get(graphName); + if (lock == null) { + try { + writeLock.lock(); + if ((lock = this.locks.get(graphName)) == null) { + lock = new AtomicBoolean(); + locks.put(graphName, lock); + } + } finally { + writeLock.unlock(); + } + } + return lock; + } + + public void waitGraphLock(String graphName) { + var lock = getOrCreateGraphLock(graphName); + while (lock.get()) { + Thread.onSpinWait(); + } + } + + public void lockGraph(String graphName) { + var lock = getOrCreateGraphLock(graphName); + while (lock.compareAndSet(false, true)) { + Thread.onSpinWait(); + } + } + + public void unlockGraph(String graphName) { + var lock = getOrCreateGraphLock(graphName); + lock.set(false); + } + + /** + * 根据 partitionId 返回分区信息 + * + * @param graphName + * @param partId + * @return + */ + public KVPair getPartitionById(String graphName, int partId) { + waitGraphLock(graphName); + var graphs = partitionCache.get(graphName); + if (graphs != null) { + var partition = graphs.get(partId); + if (partition != null) { + return new KVPair<>(partition, getLeaderShard(partId)); + } + } + + return null; + } + + /** + * 返回 key 所在的分区信息 + * + * @param key + * @return + */ + public KVPair getPartitionByKey(String graphName, byte[] key) { + int code = PartitionUtils.calcHashcode(key); + return getPartitionByCode(graphName, code); + } + + /** + * 根据 key 的 hashcode 返回分区信息 + * + * @param graphName + * @param code + * @return + */ + public KVPair getPartitionByCode(String graphName, long code) { + waitGraphLock(graphName); + RangeMap rangeMap = keyToPartIdCache.get(graphName); + if (rangeMap != null) { + Integer partId = rangeMap.get(code); + if (partId != null) { + return getPartitionById(graphName, partId); + } + } + return null; + } + + public List getPartitions(String graphName) { + waitGraphLock(graphName); + + List partitions = new ArrayList<>(); + if (!partitionCache.containsKey(graphName)) { + return partitions; + } + partitionCache.get(graphName).forEach((k, v) -> { + partitions.add(v); + }); + + return partitions; + } + + public boolean addPartition(String graphName, int partId, Metapb.Partition partition) { + waitGraphLock(graphName); + Metapb.Partition old = null; + + if (partitionCache.containsKey(graphName)) { + old = partitionCache.get(graphName).get(partId); + } + + if (old != null && old.equals(partition)) { + return false; + } + try { + + lockGraph(graphName); + + partitionCache.computeIfAbsent(graphName, k -> new HashMap<>()).put(partId, partition); + + if (old != null) { + // old [1-3) 被 [2-3) 覆盖了。当 [1-3) 变成 [1-2) 不应该删除原先的 [1-3) + // 当确认老的 start, end 都是自己的时候,才可以删除老的。(即还没覆盖) + var graphRange = keyToPartIdCache.get(graphName); + if (Objects.equals(partition.getId(), graphRange.get(partition.getStartKey())) && + Objects.equals(partition.getId(), graphRange.get(partition.getEndKey() - 1))) { + graphRange.remove(graphRange.getEntry(partition.getStartKey()).getKey()); + } + } + + keyToPartIdCache.computeIfAbsent(graphName, k -> TreeRangeMap.create()) + .put(Range.closedOpen(partition.getStartKey(), + partition.getEndKey()), partId); + } finally { + unlockGraph(graphName); + } + return true; + } + + public void updatePartition(String graphName, int partId, Metapb.Partition partition) { + try { + lockGraph(graphName); + Metapb.Partition old = null; + var graphs = partitionCache.get(graphName); + if (graphs != null) { + old = graphs.get(partId); + } + + if (old != null) { + var graphRange = keyToPartIdCache.get(graphName); + if (Objects.equals(partition.getId(), graphRange.get(partition.getStartKey())) && + Objects.equals(partition.getId(), graphRange.get(partition.getEndKey() - 1))) { + graphRange.remove(graphRange.getEntry(partition.getStartKey()).getKey()); + } + } + + partitionCache.computeIfAbsent(graphName, k -> new HashMap<>()).put(partId, partition); + keyToPartIdCache.computeIfAbsent(graphName, k -> TreeRangeMap.create()) + .put(Range.closedOpen(partition.getStartKey(), partition.getEndKey()), + partId); + } finally { + unlockGraph(graphName); + } + } + + public boolean updatePartition(Metapb.Partition partition) { + + var graphName = partition.getGraphName(); + var partitionId = partition.getId(); + + var old = getPartitionById(graphName, partitionId); + if (old != null && Objects.equals(partition, old.getKey())) { + return false; + } + + updatePartition(graphName, partitionId, partition); + return true; + } + + public void removePartition(String graphName, int partId) { + try { + lockGraph(graphName); + var partition = partitionCache.get(graphName).remove(partId); + if (partition != null) { + var graphRange = keyToPartIdCache.get(graphName); + + if (Objects.equals(partition.getId(), graphRange.get(partition.getStartKey())) && + Objects.equals(partition.getId(), graphRange.get(partition.getEndKey() - 1))) { + graphRange.remove(graphRange.getEntry(partition.getStartKey()).getKey()); + } + } + } finally { + unlockGraph(graphName); + } + } + + /** + * remove partition id of graph name + * + * @param graphName + * @param id + */ + public void remove(String graphName, int id) { + removePartition(graphName, id); + } + + /** + * remove all partitions + */ + public void removePartitions() { + writeLock.lock(); + try { + partitionCache = new HashMap<>(); + keyToPartIdCache = new HashMap<>(); + locks.clear(); + } finally { + writeLock.unlock(); + } + } + + /** + * remove partition cache of graphName + * + * @param graphName + */ + public void removeAll(String graphName) { + try { + lockGraph(graphName); + partitionCache.remove(graphName); + keyToPartIdCache.remove(graphName); + locks.remove(graphName); + } finally { + unlockGraph(graphName); + } + } + + private String makePartitionKey(String graphName, int partId) { + return graphName + "/" + partId; + } + + public boolean updateShardGroup(Metapb.ShardGroup shardGroup) { + Metapb.ShardGroup oldShardGroup = shardGroupCache.get(shardGroup.getId()); + if (oldShardGroup != null && oldShardGroup.equals(shardGroup)) { + return false; + } + shardGroupCache.put(shardGroup.getId(), shardGroup); + return true; + } + + public void deleteShardGroup(int shardGroupId) { + shardGroupCache.remove(shardGroupId); + } + + public Metapb.ShardGroup getShardGroup(int groupId) { + return shardGroupCache.get(groupId); + } + + public boolean addStore(Long storeId, Metapb.Store store) { + Metapb.Store oldStore = storeCache.get(storeId); + if (oldStore != null && oldStore.equals(store)) { + return false; + } + storeCache.put(storeId, store); + return true; + } + + public Metapb.Store getStoreById(Long storeId) { + return storeCache.get(storeId); + } + + public void removeStore(Long storeId) { + storeCache.remove(storeId); + } + + public boolean hasGraph(String graphName) { + return getPartitions(graphName).size() > 0; + } + + public void updateGraph(Metapb.Graph graph) { + if (Objects.equals(graph, getGraph(graph.getGraphName()))) { + return; + } + graphCache.put(graph.getGraphName(), graph); + } + + public Metapb.Graph getGraph(String graphName) { + return graphCache.get(graphName); + } + + public List getGraphs() { + List graphs = new ArrayList<>(); + graphCache.forEach((k, v) -> { + graphs.add(v); + }); + return graphs; + } + + public void reset() { + writeLock.lock(); + try { + partitionCache = new HashMap<>(); + keyToPartIdCache = new HashMap<>(); + shardGroupCache = new ConcurrentHashMap<>(); + storeCache = new ConcurrentHashMap<>(); + graphCache = new ConcurrentHashMap<>(); + locks.clear(); + } finally { + writeLock.unlock(); + } + } + + public void clear() { + reset(); + } + + public String debugCacheByGraphName(String graphName) { + StringBuilder builder = new StringBuilder(); + builder.append("Graph:").append(graphName).append(", cache info: range info: {"); + var rangeMap = keyToPartIdCache.get(graphName); + builder.append(rangeMap == null ? "" : rangeMap).append("}"); + + if (rangeMap != null) { + builder.append(", partition info : {"); + rangeMap.asMapOfRanges().forEach((k, v) -> { + var partition = partitionCache.get(graphName).get(v); + builder.append("[part_id:").append(v); + if (partition != null) { + builder.append(", start_key:").append(partition.getStartKey()) + .append(", end_key:").append(partition.getEndKey()) + .append(", state:").append(partition.getState().name()); + } + builder.append("], "); + }); + builder.append("}"); + } + + builder.append(", graph info:{"); + var graph = graphCache.get(graphName); + if (graph != null) { + builder.append("partition_count:").append(graph.getPartitionCount()) + .append(", state:").append(graph.getState().name()); + } + builder.append("}]"); + return builder.toString(); + } + + public Metapb.Shard getLeaderShard(int partitionId) { + var shardGroup = shardGroupCache.get(partitionId); + if (shardGroup != null) { + for (Metapb.Shard shard : shardGroup.getShardsList()) { + if (shard.getRole() == Metapb.ShardRole.Leader) { + return shard; + } + } + } + + return null; + } + + public void updateShardGroupLeader(int partitionId, Metapb.Shard leader) { + if (shardGroupCache.containsKey(partitionId) && leader != null) { + if (!Objects.equals(getLeaderShard(partitionId), leader)) { + var shardGroup = shardGroupCache.get(partitionId); + var builder = Metapb.ShardGroup.newBuilder(shardGroup).clearShards(); + for (var shard : shardGroup.getShardsList()) { + builder.addShards( + Metapb.Shard.newBuilder() + .setStoreId(shard.getStoreId()) + .setRole(shard.getStoreId() == leader.getStoreId() ? + Metapb.ShardRole.Leader : + Metapb.ShardRole.Follower) + .build() + ); + } + shardGroupCache.put(partitionId, builder.build()); + } + } + } + + public String debugShardGroup() { + StringBuilder builder = new StringBuilder(); + builder.append("shard group cache:{"); + shardGroupCache.forEach((partitionId, shardGroup) -> { + builder.append(partitionId).append("::{") + .append("version:").append(shardGroup.getVersion()) + .append(", conf_version:").append(shardGroup.getConfVer()) + .append(", state:").append(shardGroup.getState().name()) + .append(", shards:["); + + for (var shard : shardGroup.getShardsList()) { + builder.append("{store_id:").append(shard.getStoreId()) + .append(", role:").append(shard.getRole().name()) + .append("},"); + } + builder.append("], "); + }); + builder.append("}"); + return builder.toString(); + } +} diff --git a/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PartitionUtils.java b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PartitionUtils.java new file mode 100644 index 0000000000..0e35cc555e --- /dev/null +++ b/hugegraph-pd/hg-pd-common/src/main/java/org/apache/hugegraph/pd/common/PartitionUtils.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +public class PartitionUtils { + + public static final int MAX_VALUE = 0xffff; + + /** + * 计算key的hashcode + * + * @param key + * @return hashcode + */ + public static int calcHashcode(byte[] key) { + final int p = 16777619; + int hash = (int) 2166136261L; + for (byte element : key) { + hash = (hash ^ element) * p; + } + hash += hash << 13; + hash ^= hash >> 7; + hash += hash << 3; + hash ^= hash >> 17; + hash += hash << 5; + hash = hash & PartitionUtils.MAX_VALUE; + if (hash == PartitionUtils.MAX_VALUE) { + hash = PartitionUtils.MAX_VALUE - 1; + } + return hash; + } +} diff --git a/hugegraph-pd/hg-pd-grpc/pom.xml b/hugegraph-pd/hg-pd-grpc/pom.xml new file mode 100644 index 0000000000..cef49e957d --- /dev/null +++ b/hugegraph-pd/hg-pd-grpc/pom.xml @@ -0,0 +1,138 @@ + + + + + + 4.0.0 + + + org.apache.hugegraph + hugegraph-pd + ${revision} + ../pom.xml + + hg-pd-grpc + + + + 1.6.0 + 1.39.0 + 3.17.2 + 0.6.1 + + + + + io.grpc + grpc-netty-shaded + ${grpc.version} + + + io.grpc + grpc-protobuf + ${grpc.version} + + + io.grpc + grpc-stub + ${grpc.version} + + + javax.annotation + javax.annotation-api + 1.3.2 + + + + + ${basedir}/src/main/java + + + src/main/resources + + + src/main/proto + + + + + kr.motd.maven + os-maven-plugin + ${os.plugin.version} + + + + + org.xolstice.maven.plugins + protobuf-maven-plugin + ${protobuf.plugin.version} + true + + + com.google.protobuf:protoc:${protoc.version}:exe:${os.detected.classifier} + + grpc-java + + io.grpc:protoc-gen-grpc-java:${grpc.version}:exe:${os.detected.classifier} + + + ${project.basedir}/src/main/proto + + + ${project.basedir}/src/main/java + + false + + + + + + generate-sources + + + compile + + compile-custom + + + + + + maven-clean-plugin + + + + src/main/java + + + + + + + initialize + + clean + + + + + + + diff --git a/hugegraph-pd/hg-pd-grpc/src/main/proto/discovery.proto b/hugegraph-pd/hg-pd-grpc/src/main/proto/discovery.proto new file mode 100644 index 0000000000..b434ab0e86 --- /dev/null +++ b/hugegraph-pd/hg-pd-grpc/src/main/proto/discovery.proto @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; +package discovery; +import "pdpb.proto"; + +option java_package = "org.apache.hugegraph.pd.grpc.discovery"; +option java_multiple_files = true; + + +service DiscoveryService { + rpc register(NodeInfo) returns (RegisterInfo); + rpc getNodes(Query) returns (NodeInfos); + // rpc getNodesByLabel(Conditions) returns (NodeInfos); +} + +/* requests */ +message NodeInfo { + string id = 1; + string appName = 2; + string version = 3; + string address = 4; + int64 interval = 5; + map labels = 6; +} +message Query { + string appName = 1; + string version = 2; + map labels = 3; +} +message LeaseInfo { + int64 registrationTs = 1; + int64 lastHeartbeatTs = 2; + int64 serverUpTs = 3; +} +message RegisterInfo { + NodeInfo nodeInfo = 1; + LeaseInfo leaseInfo = 2 ; + RegisterType type = 3 ; + pdpb.ResponseHeader header = 4; +} +enum RegisterType { + Register = 0; + Heartbeat = 1; + Dislodge = 2; +} +//message Condition{ +// string label = 1; +//} +//message Conditions{ +// string label = 1; +// string value = 2; +//} +message NodeInfos{ + repeated NodeInfo info = 1; +} diff --git a/hugegraph-pd/hg-pd-grpc/src/main/proto/kv.proto b/hugegraph-pd/hg-pd-grpc/src/main/proto/kv.proto new file mode 100644 index 0000000000..22007cda31 --- /dev/null +++ b/hugegraph-pd/hg-pd-grpc/src/main/proto/kv.proto @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; +package kv; +import "pdpb.proto"; +import "metapb.proto"; + +option java_package = "org.apache.hugegraph.pd.grpc.kv"; +option java_multiple_files = true; + + +service KvService { + rpc put(Kv) returns (KvResponse); + rpc get(K) returns (KResponse); + rpc delete(K) returns (KvResponse); + rpc deletePrefix(K) returns (KvResponse); + rpc scanPrefix(K) returns (ScanPrefixResponse); + rpc watch(WatchRequest) returns (stream WatchResponse); + rpc watchPrefix(WatchRequest) returns (stream WatchResponse); + rpc lock(LockRequest) returns (LockResponse); + rpc lockWithoutReentrant(LockRequest) returns (LockResponse); + rpc unlock(LockRequest) returns (LockResponse); + rpc keepAlive(LockRequest) returns (LockResponse); + rpc isLocked(LockRequest) returns (LockResponse); + rpc putTTL(TTLRequest) returns (TTLResponse); + rpc keepTTLAlive(TTLRequest) returns (TTLResponse); +} + +/* requests */ +message Kv { + pdpb.RequestHeader header = 1; + string key = 2; + string value = 3; +} +message KvResponse { + pdpb.ResponseHeader header = 1; +} + +message K{ + pdpb.RequestHeader header = 1; + string key = 2; +} + +message KResponse{ + pdpb.ResponseHeader header = 1; + string value = 2; +} + +message ScanPrefixResponse { + pdpb.ResponseHeader header = 1; + map kvs = 2; +} + +message LockRequest{ + pdpb.RequestHeader header = 1; + string key = 2; + int64 ttl = 3; + int64 clientId = 4; +} +message LockResponse{ + pdpb.ResponseHeader header = 1; + string key = 2; + int64 ttl = 3; + int64 clientId = 4; + bool succeed = 5; +} + +message LockAliveResponse{ + pdpb.ResponseHeader header = 1; + int64 clientId = 2; +} + + +message WatchKv { + string key = 1; + string value = 2; +} + +enum WatchType { + Put = 0; + Delete = 1; + Unrecognized = 2; +} + +message WatchEvent { + WatchKv current = 1; + WatchKv prev = 2; + WatchType type = 3; +} + +message WatchResponse { + pdpb.ResponseHeader header = 1; + repeated WatchEvent events = 2; + int64 clientId = 3; + WatchState state = 4; +} + +enum WatchState { + Starting = 0; + Started = 1; + Leader_Changed = 2; + Alive = 3; +} + +message WatchRequest { + pdpb.RequestHeader header = 1; + WatchState state = 2; + string key = 3; + int64 clientId = 4; +} + +message V{ + string value = 1; + int64 ttl = 2; + int64 st = 3; +} + +message TTLRequest{ + pdpb.RequestHeader header = 1; + string key = 2; + string value = 3; + int64 ttl = 4; +} + +message TTLResponse{ + pdpb.ResponseHeader header = 1; + bool succeed = 2; +} diff --git a/hugegraph-pd/hg-pd-grpc/src/main/proto/metaTask.proto b/hugegraph-pd/hg-pd-grpc/src/main/proto/metaTask.proto new file mode 100644 index 0000000000..c4bb8bde10 --- /dev/null +++ b/hugegraph-pd/hg-pd-grpc/src/main/proto/metaTask.proto @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; +package metaTask; +import "metapb.proto"; +import "pd_pulse.proto"; +option java_package = "org.apache.hugegraph.pd.grpc"; + +enum TaskType { + Unknown = 0; + Split_Partition = 1; + Change_Shard = 2; + Move_Partition = 3; + Clean_Partition = 4; + Change_KeyRange = 5; +} + +// 一条任务信息 +message Task { + uint64 id = 1; + TaskType type = 2; + TaskState state = 3; + int64 start_timestamp = 4; + metapb.Partition partition = 5; + string message = 6; + //每个shard执行的任务状态 + repeated ShardTaskState shardState = 7; + ChangeShard changeShard = 9; + SplitPartition splitPartition = 10; + MovePartition movePartition = 11; + CleanPartition cleanPartition = 12; + PartitionKeyRange partitionKeyRange = 13; +} + +enum TaskState{ + Task_Unknown = 0; + Task_Ready = 1; //任务就绪 + Task_Doing = 2; //执行中 + Task_Done = 3; //完成 + Task_Exit = 4; //退出 + Task_Stop = 10; + Task_Success = 11; + Task_Failure = 12; +} + +message ShardTaskState{ + uint64 store_id = 1; + TaskState state = 2; +} diff --git a/hugegraph-pd/hg-pd-grpc/src/main/proto/metapb.proto b/hugegraph-pd/hg-pd-grpc/src/main/proto/metapb.proto new file mode 100644 index 0000000000..a8a695be04 --- /dev/null +++ b/hugegraph-pd/hg-pd-grpc/src/main/proto/metapb.proto @@ -0,0 +1,394 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; +package metapb; +option java_package = "org.apache.hugegraph.pd.grpc"; +import "google/protobuf/any.proto"; + +enum ClusterState{ + // 集群健康 + Cluster_OK = 0; + // 分区警告,存在部分故障节点,短时间不影响读写 + Cluster_Warn = 2; + // 分区下线,可以读,无法写 + Cluster_Offline = 10; + // 分区故障,无法读写,需要尽快修复故障节点。 + Cluster_Fault = 11; + Cluster_Not_Ready = -1; +} +// 集群状态 +message ClusterStats{ + ClusterState state = 1; + string message = 2; + uint64 timestamp = 16; +} + +enum StoreState { + Unknown = 0; + // 未激活 + Pending = 4; + // 在线 + Up = 1; + // 离线 + Offline = 2; + // 下线中 + Exiting = 5; + // 已下线 + Tombstone = 3; +} + +// Store label for Storage grouping. +message StoreLabel { + string key = 1; + string value = 2; +} + +message Store { + uint64 id = 1; + // Address to handle client requests + string address = 2; + string raft_address = 3; + repeated StoreLabel labels = 4; + // Store软件版本号 + string version = 5; + StoreState state = 6; + // The start timestamp of the current store + int64 start_timestamp = 7; + string deploy_path = 8; + // The last heartbeat timestamp of the store. + int64 last_heartbeat = 9; + StoreStats stats = 10; + // 数据格式版本号 + int32 data_version = 11; + int32 cores = 12; + string data_path = 13; +} + +enum ShardRole { + None = 0; + Leader = 1; + Follower = 2; + // Learner/None -> Learner + Learner = 3; +} + +message Shard { + uint64 store_id = 2; + ShardRole role = 3; +} + +message ShardGroup{ + uint32 id = 1; + uint64 version = 2; + uint64 conf_ver = 3; + repeated Shard shards = 6; + PartitionState state = 10; + string message = 11; +} + +message Graph { + string graph_name = 2; + // 分区数量,0表示无效,不能大于raft分组总数 + int32 partition_count = 3; + // 当前工作状态 + PartitionState state = 10; + string message = 11; + GraphState graph_state = 12; +} +// 分区工作状态 +enum PartitionState{ + PState_None = 0; + // + PState_Normal = 1; + // 分区警告,存在部分故障节点,短时间不影响读写 + PState_Warn = 2; + // 分区下线,可以读,无法写 + PState_Offline = 10; + // 分区故障,无法读写,需要尽快修复故障节点。 + PState_Fault = 11; +} + +message PartitionV36 { + uint32 id = 1; + string graph_name = 3; + // 分区范围 [start_key, end_key). + uint64 start_key = 4; + uint64 end_key = 5; + repeated Shard shards = 6; + // Leader任期,leader切换后递增 + uint64 version = 7; + // shards版本号,每次改变后递增 + uint64 conf_ver = 8; + // 当前工作状态 + PartitionState state = 10; + string message = 11; +} + +message Partition { + uint32 id = 1; + string graph_name = 3; + // 分区范围 [start_key, end_key). + uint64 start_key = 4; + uint64 end_key = 5; + // Partition 对象不在保存 shard list(根据对应的shard group 去查询), version 和 conf version不再有实际的意义 + // repeated Shard shards = 6; + // key range 每次改变后递增 + uint64 version = 7; + // shards版本号,每次改变后递增 + // uint64 conf_ver = 8; + // 当前工作状态 + PartitionState state = 10; + string message = 11; +} + +message PartitionShard { + metapb.Partition partition = 1; + metapb.Shard leader = 2; + // 离线的Shard + repeated metapb.Shard offline_shards = 3; +} +// 记录分区所在的存储位置 +message PartitionStore { + uint32 partition_id = 1; + string graph_name = 3; + // 存储位置 + string store_location = 4; +} + +message PartitionRaft { + uint32 partition_id = 1; + string graph_name = 3; + // 存储位置 + string raft_location = 4; +} + +message ShardStats{ + uint64 store_id = 2; + ShardRole role = 3; + ShardState state = 4; + // 安装快照的进度 + uint32 progress = 5; +} +message PartitionStats{ + uint32 id = 1; + // raft分组的任期. + uint64 leader_term = 2; + repeated string graph_name = 3; + metapb.Shard leader = 4; + // 离线 shards + repeated metapb.Shard shard = 5; + repeated metapb.Shard learner = 6; + uint64 conf_ver = 7; + // 分区状态 + PartitionState state = 8; + repeated ShardStats shardStats = 9; + // 分区近似大小 + uint64 approximate_size = 10; + // 分区key的近似数量 + uint64 approximate_keys = 13; + // heartbeat timestamp + int64 timestamp = 16; +} + +message GraphStats{ + // 图名 + string graph_name = 1; + // 分区近似大小 + uint64 approximate_size = 2; + // 分区key的近似数量 + uint64 approximate_keys = 3; + // // committed index + // uint64 committed_index = 4; + uint32 partition_id = 5; + ShardRole role = 6; + // 当前工作状态 + PartitionState work_state = 8; +} + +message RaftStats { + // partition id + uint32 partition_id = 1; + // committed index + uint64 committed_index = 2; +} + +message TimeInterval { + // The unix timestamp in seconds of the start of this period. + uint64 start_timestamp = 1; + // The unix timestamp in seconds of the end of this period. + uint64 end_timestamp = 2; +} + +message RecordPair { + string key = 1; + uint64 value = 2; +} + + +message QueryStats { + uint64 GC = 1; + uint64 Get = 2; + uint64 Scan = 3; + uint64 Coprocessor = 4; + uint64 Delete = 5; + uint64 DeleteRange = 6; + uint64 Put = 7; +} + +enum ShardState{ + SState_None = 0; + // 正常 + SState_Normal = 1; + // 安装快照 + SState_Snapshot = 2; + // 离线 + SState_Offline = 10; +} + + +message StoreStats { + uint64 store_id = 1; + // Capacity for the store. + uint64 capacity = 2; + // Available size for the store. + uint64 available = 3; + // Total partition count in this store. + uint32 partition_count = 4; + // Current sending snapshot count. + uint32 sending_snap_count = 5; + // Current receiving snapshot count. + uint32 receiving_snap_count = 6; + // When the store is started (unix timestamp in seconds). + uint32 start_time = 7; + // How many partition is applying snapshot. + uint32 applying_snap_count = 8; + // If the store is busy + bool is_busy = 9; + // Actually used space by db + uint64 used_size = 10; + // Bytes written for the store during this period. + uint64 bytes_written = 11; + // Keys written for the store during this period. + uint64 keys_written = 12; + // Bytes read for the store during this period. + uint64 bytes_read = 13; + // Keys read for the store during this period. + uint64 keys_read = 14; + // Actually reported time interval + TimeInterval interval = 15; + // Threads' CPU usages in the store + repeated RecordPair cpu_usages = 16; + // Threads' read disk I/O rates in the store + repeated RecordPair read_io_rates = 17; + // Threads' write disk I/O rates in the store + repeated RecordPair write_io_rates = 18; + // Operations' latencies in the store + repeated RecordPair op_latencies = 19; + // Store query stats + QueryStats query_stats = 21; + // graph stats + repeated GraphStats graph_stats = 22; + // raft stats + repeated RaftStats raft_stats = 23; + int32 cores = 24; + // system metrics + repeated RecordPair system_metrics = 25; +} + +// 分区查询条件 +message PartitionQuery{ + optional uint64 store_id = 1; // 0 表示查询条件不包含store_id + optional string graph_name = 2; + optional uint32 partition_id = 4; +} + +//PD 节点信息 +message Member { + uint64 cluster_id = 1; + string raft_url = 3; + string grpc_url = 4; + string rest_url = 5; + string data_path = 6; + StoreState state = 7; + ShardRole role = 8; + string replicator_state = 9; +} + +// 图空间配置 +message GraphSpace{ + string name = 1; + // 最大占用存储 + uint64 storage_limit = 2; + // 已使用空间 + uint64 used_size = 3; + // 修改时间 + uint64 timestamp = 10; +} + +// PD 配置 +message PDConfig{ + uint64 version = 1; + // 分区数量, 初始化根据Store数量动态计算,分裂后进行修改 + int32 partition_count = 2; + // 每分区副本数量 + int32 shard_count = 3; + // pd集群列表 + string peers_list = 4; + // 集群中最少store数量 + int32 min_store_count = 6; + // 每个store最大副本数 + int32 max_Shards_Per_Store = 7; + // 修改时间 + uint64 timestamp = 10; +} + + + +//消息持久化 +message QueueItem{ + string item_id = 1; + string item_class = 2; + bytes item_content = 3; + int64 timestamp = 10; +} + +message LogRecord{ + string action = 1; + int64 timestamp = 2; + map labels = 3; + google.protobuf.Any object = 4; + string message = 5; +} + +message GraphState{ + GraphMode mode = 1; + GraphModeReason reason = 2; +} + +enum GraphMode{ + ReadWrite = 0; + ReadOnly = 1; + WriteOnly = 2; +} + +enum GraphModeReason{ + Empty = 0; // 空 + Initiative = 1; // 主动的状态设置 + Quota = 2; // 达到限额条件 + +} diff --git a/hugegraph-pd/hg-pd-grpc/src/main/proto/pd_common.proto b/hugegraph-pd/hg-pd-grpc/src/main/proto/pd_common.proto new file mode 100644 index 0000000000..c9eec81494 --- /dev/null +++ b/hugegraph-pd/hg-pd-grpc/src/main/proto/pd_common.proto @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +option java_multiple_files = true; +option java_package = "org.apache.hugegraph.pd.grpc.common"; +option java_outer_classname = "HgPdCommonProto"; + +message RequestHeader { + // 集群 ID. + uint64 cluster_id = 1; + // 发送者 ID. + uint64 sender_id = 2; +} + +message ResponseHeader { + // cluster_id is the ID of the cluster which sent the response. + uint64 cluster_id = 1; + Error error = 2; +} + +enum ErrorType { + OK = 0; + UNKNOWN = 1; + STORE_NON_EXIST = 101; + STORE_TOMBSTONE = 103; + ALREADY_BOOTSTRAPPED = 4; + INCOMPATIBLE_VERSION = 5; + PARTITION_NOT_FOUND = 6; + + ETCD_READ_ERROR = 1000; + ETCD_WRITE_ERROR = 1001; +} + +message Error { + ErrorType type = 1; + string message = 2; +} diff --git a/hugegraph-pd/hg-pd-grpc/src/main/proto/pd_pulse.proto b/hugegraph-pd/hg-pd-grpc/src/main/proto/pd_pulse.proto new file mode 100644 index 0000000000..fb8940df6c --- /dev/null +++ b/hugegraph-pd/hg-pd-grpc/src/main/proto/pd_pulse.proto @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +import "metapb.proto"; +import "pd_common.proto"; + +option java_multiple_files = true; +option java_package = "org.apache.hugegraph.pd.grpc.pulse"; +option java_outer_classname = "HgPdPulseProto"; + +service HgPdPulse { + rpc Pulse(stream PulseRequest) returns (stream PulseResponse); +} + +/* requests */ +message PulseRequest { + PulseCreateRequest create_request = 1; + PulseCancelRequest cancel_request = 2; + PulseNoticeRequest notice_request = 3; + PulseAckRequest ack_request = 4; +} + +message PulseCreateRequest { + PulseType pulse_type = 1; +} + +message PulseCancelRequest { + int64 observer_id = 1; +} + +message PulseNoticeRequest { + int64 observer_id = 1; + oneof request_union { + PartitionHeartbeatRequest partition_heartbeat_request = 10; + } +} + +message PulseAckRequest { + int64 observer_id = 1; + int64 notice_id = 2; +} + +// 分区心跳,分区的peer增减、leader改变等事件发生时,由leader发送心跳。 +// 同时pd对分区进行shard增减通过Response发送给leader +message PartitionHeartbeatRequest { + RequestHeader header = 1; + // Leader Peer sending the heartbeat + metapb.PartitionStats states = 4; +} + +/* responses */ +message PulseResponse { + PulseType pulse_type = 1; + int64 observer_id = 2; + int32 status = 3; //0=ok,1=fail + int64 notice_id = 4; + oneof response_union { + PartitionHeartbeatResponse partition_heartbeat_response = 10; + PdInstructionResponse instruction_response = 11; + } +} + +message PartitionHeartbeatResponse { + ResponseHeader header = 1; + uint64 id = 3; + metapb.Partition partition = 2; + ChangeShard change_shard = 4; + + TransferLeader transfer_leader = 5; + // 拆分成多个分区,第一个SplitPartition是原分区,从第二开始是新分区 + SplitPartition split_partition = 6; + // rocksdb compaction 指定的表,null是针对所有 + DbCompaction db_compaction = 7; + // 将partition的数据,迁移到 target + MovePartition move_partition = 8; + // 清理partition的graph的数据 + CleanPartition clean_partition = 9; + // partition key range 变化 + PartitionKeyRange key_range = 10; +} + +/* Date model */ +message ChangeShard { + repeated metapb.Shard shard = 1; + ConfChangeType change_type = 2; +} + +message TransferLeader { + metapb.Shard shard = 1; +} + +message SplitPartition { + repeated metapb.Partition new_partition = 1; +} + +message DbCompaction { + string table_name = 3; +} + +message MovePartition{ + // target partition的key range为,迁移后的新range + metapb.Partition target_partition = 1; + // partition 的 key start 和 key end的所有数据, + // 会迁移到 target partition 上 + uint64 key_start = 2; + uint64 key_end = 3; +} + +message CleanPartition { + uint64 key_start = 1; + uint64 key_end = 2; + CleanType clean_type = 3; + bool delete_partition = 4; //是否删除分区 +} + +message PartitionKeyRange{ + uint32 partition_id = 1; + uint64 key_start = 2; + uint64 key_end = 3; +} + +message PdInstructionResponse { + PdInstructionType instruction_type = 1; + string leader_ip = 2; +} + +/* enums */ +enum PulseType { + PULSE_TYPE_UNKNOWN = 0; + PULSE_TYPE_PARTITION_HEARTBEAT = 1; + PULSE_TYPE_PD_INSTRUCTION = 2; +} + +enum PulseChangeType { + PULSE_CHANGE_TYPE_UNKNOWN = 0; + PULSE_CHANGE_TYPE_ADD = 1; + PULSE_CHANGE_TYPE_ALTER = 2; + PULSE_CHANGE_TYPE_DEL = 3; +} + +enum ConfChangeType { + CONF_CHANGE_TYPE_UNKNOWN = 0; + CONF_CHANGE_TYPE_ADD_NODE = 1; + CONF_CHANGE_TYPE_REMOVE_NODE = 2; + CONF_CHANGE_TYPE_ADD_LEARNER_NODE = 3; + CONF_CHANGE_TYPE_ADJUST = 4; // 调整shard,leader根据新的配置动态增减。 +} + +enum CleanType { + CLEAN_TYPE_KEEP_RANGE = 0; // 仅保留这个range + CLEAN_TYPE_EXCLUDE_RANGE = 1; // 删除这个range +} + +enum PdInstructionType { + CHANGE_TO_FOLLOWER = 0; +} diff --git a/hugegraph-pd/hg-pd-grpc/src/main/proto/pd_watch.proto b/hugegraph-pd/hg-pd-grpc/src/main/proto/pd_watch.proto new file mode 100644 index 0000000000..febc41f522 --- /dev/null +++ b/hugegraph-pd/hg-pd-grpc/src/main/proto/pd_watch.proto @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +import "metapb.proto"; + +option java_multiple_files = true; +option java_package = "org.apache.hugegraph.pd.grpc.watch"; +option java_outer_classname = "HgPdWatchProto"; + +service HgPdWatch { + rpc Watch(stream WatchRequest) returns (stream WatchResponse); +} + +message WatchRequest { + WatchCreateRequest create_request = 1; + WatchCancelRequest cancel_request = 2; +} + +message WatchCreateRequest { + WatchType watch_type = 1; +} + +message WatchCancelRequest { + int64 watcher_id = 1; +} + +message WatchResponse { + WatchType watch_type = 1; + int64 watcher_id = 2; + int32 status = 3; //0=ok,1=fail + int64 notice_id = 4; + string msg = 5; + oneof response_union { + WatchPartitionResponse partition_response = 10; + WatchNodeResponse node_response = 11; + WatchGraphResponse graph_response = 12; + WatchShardGroupResponse shard_group_response = 13; + } +} + +message WatchPartitionResponse { + string graph = 1; + int32 partition_id = 2; + WatchChangeType change_type = 3; +} + +message WatchNodeResponse { + string graph = 1; + uint64 node_id = 2; + NodeEventType node_event_type = 3; +} + +message WatchGraphResponse { + metapb.Graph graph = 1; + WatchType type = 2; +} + +message WatchShardGroupResponse { + metapb.ShardGroup shard_group = 1; + WatchChangeType type = 2; + int32 shard_group_id = 3; +} + +enum WatchType { + WATCH_TYPE_UNKNOWN = 0; + WATCH_TYPE_PARTITION_CHANGE = 1; + WATCH_TYPE_STORE_NODE_CHANGE = 2; + WATCH_TYPE_GRAPH_CHANGE = 3; + WATCH_TYPE_SHARD_GROUP_CHANGE = 4; +} + +enum WatchChangeType { + WATCH_CHANGE_TYPE_UNKNOWN = 0; + WATCH_CHANGE_TYPE_ADD = 1; + WATCH_CHANGE_TYPE_ALTER = 2; + WATCH_CHANGE_TYPE_DEL = 3; + WATCH_CHANGE_TYPE_SPECIAL1 = 4; +} + +enum NodeEventType { + NODE_EVENT_TYPE_UNKNOWN = 0; + NODE_EVENT_TYPE_NODE_ONLINE = 1; + NODE_EVENT_TYPE_NODE_OFFLINE = 2; + NODE_EVENT_TYPE_NODE_RAFT_CHANGE = 3; + // pd leader 变更 + NODE_EVENT_TYPE_PD_LEADER_CHANGE = 4; +} diff --git a/hugegraph-pd/hg-pd-grpc/src/main/proto/pdpb.proto b/hugegraph-pd/hg-pd-grpc/src/main/proto/pdpb.proto new file mode 100644 index 0000000000..4e293ca08e --- /dev/null +++ b/hugegraph-pd/hg-pd-grpc/src/main/proto/pdpb.proto @@ -0,0 +1,607 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; +package pdpb; + +import "metapb.proto"; +import "metaTask.proto"; + +option java_package = "org.apache.hugegraph.pd.grpc"; + +service PD { + // 注册store,首次注册会生成新的store_id, store_id是store唯一标识 + rpc RegisterStore(RegisterStoreRequest) returns (RegisterStoreResponse) {} + rpc GetStore(GetStoreRequest) returns (GetStoreResponse) {} + // 修改Store状态等信息. + rpc SetStore(SetStoreRequest) returns (SetStoreResponse) {} + // 根据可以查找所属分区 + rpc DelStore(DetStoreRequest) returns (DetStoreResponse) {} + rpc GetAllStores(GetAllStoresRequest) returns (GetAllStoresResponse) {} + rpc StoreHeartbeat(StoreHeartbeatRequest) returns (StoreHeartbeatResponse) {} + + // 根据可以查找所属分区 + rpc GetPartition(GetPartitionRequest) returns (GetPartitionResponse) {} + + // 根据HashCode查找所属分区 + rpc GetPartitionByCode(GetPartitionByCodeRequest) returns (GetPartitionResponse) {} + // 根据PartitionID返回分区 + rpc GetPartitionByID(GetPartitionByIDRequest) returns (GetPartitionResponse) {} + rpc ScanPartitions(ScanPartitionsRequest) returns (ScanPartitionsResponse) {} + // 更新分区信息,主要用来更新分区key范围,调用此接口需谨慎,否则会造成数据丢失。 + rpc UpdatePartition(UpdatePartitionRequest) returns (UpdatePartitionResponse) {} + // 根据可以查找所属分区 + rpc DelPartition(DelPartitionRequest) returns (DelPartitionResponse) {} + // 根据条件查询分区信息, 包括Store、Graph等条件 + rpc QueryPartitions(QueryPartitionsRequest) returns (QueryPartitionsResponse){} + // 读取图信息 + rpc GetGraph(GetGraphRequest) returns (GetGraphResponse){} + // 修改图信息 + rpc SetGraph(SetGraphRequest) returns (SetGraphResponse){} + rpc DelGraph(DelGraphRequest) returns (DelGraphResponse){} + // 全局唯一自增ID + rpc GetId(GetIdRequest) returns (GetIdResponse){} + rpc ResetId(ResetIdRequest) returns (ResetIdResponse){} + // PD的集群列表 + rpc GetMembers(GetMembersRequest) returns (GetMembersResponse) {} + rpc GetStoreStatus(GetAllStoresRequest) returns (GetAllStoresResponse) {} + rpc GetPDConfig(GetPDConfigRequest) returns (GetPDConfigResponse){} + rpc SetPDConfig(SetPDConfigRequest) returns (SetPDConfigResponse){} + rpc GetGraphSpace(GetGraphSpaceRequest) returns (GetGraphSpaceResponse){} + rpc SetGraphSpace(SetGraphSpaceRequest) returns (SetGraphSpaceResponse){} + // 获取集群健康状态 + rpc GetClusterStats(GetClusterStatsRequest) returns (GetClusterStatsResponse){} + // 替换PD的集群节点 + rpc ChangePeerList(ChangePeerListRequest) returns (getChangePeerListResponse) {} + // 数据分裂 + rpc SplitData(SplitDataRequest) returns (SplitDataResponse){} + + rpc SplitGraphData(SplitGraphDataRequest) returns (SplitDataResponse) {} + // 数据迁移 + rpc MovePartition(MovePartitionRequest) returns (MovePartitionResponse){} + // 汇报分区分裂等任务执行结果 + rpc ReportTask(ReportTaskRequest) returns (ReportTaskResponse){} + + rpc GetPartitionStats(GetPartitionStatsRequest) returns (GetPartitionStatsResponse){} + //平衡store中分区leader的数量 + rpc BalanceLeaders(BalanceLeadersRequest) returns (BalanceLeadersResponse){} + + // 替换license文件 + rpc PutLicense(PutLicenseRequest) returns (PutLicenseResponse){} + + // 通知rocksdb进行compaction + rpc DbCompaction(DbCompactionRequest) returns (DbCompactionResponse){} + + // 合并分区 + rpc CombineCluster(CombineClusterRequest) returns (CombineClusterResponse){} + // 单个图缩容 + rpc CombineGraph(CombineGraphRequest) returns (CombineGraphResponse) {} + + // shard group + rpc GetShardGroup(GetShardGroupRequest) returns (GetShardGroupResponse){} + rpc UpdateShardGroup(UpdateShardGroupRequest) returns (UpdateShardGroupResponse){} + // 删除掉shard group + rpc DeleteShardGroup(DeleteShardGroupRequest) returns (DeleteShardGroupResponse) {} + // shard group 运维相关的处理 + rpc UpdateShardGroupOp(ChangeShardRequest) returns (ChangeShardResponse){} + // change shard + rpc ChangeShard(ChangeShardRequest) returns (ChangeShardResponse) {} + // 更新pd raft + rpc updatePdRaft(UpdatePdRaftRequest) returns (UpdatePdRaftResponse) {} + + rpc getCache(GetGraphRequest) returns (CacheResponse) {} + rpc getPartitions(GetGraphRequest) returns (CachePartitionResponse) {} +} + +message RequestHeader { + // 集群 ID. + uint64 cluster_id = 1; + // 发送者 ID. + uint64 sender_id = 2; +} + +message ResponseHeader { + // cluster_id is the ID of the cluster which sent the response. + uint64 cluster_id = 1; + Error error = 2; +} + +enum ErrorType { + OK = 0; + UNKNOWN = 1; + + NOT_LEADER = 100; + STORE_ID_NOT_EXIST = 101; + NO_ACTIVE_STORE = 102; + NOT_FOUND = 103; + PD_UNREACHABLE = 104; + LESS_ACTIVE_STORE = 105; + STORE_HAS_BEEN_REMOVED = 106; + STORE_PROHIBIT_DELETION = 111; + SET_CONFIG_SHARD_COUNT_ERROR = 112; + UPDATE_STORE_STATE_ERROR = 113; + STORE_PROHIBIT_DUPLICATE = 114; + ROCKSDB_READ_ERROR = 1002; + ROCKSDB_WRITE_ERROR = 1003; + ROCKSDB_DEL_ERROR = 1004; + ROCKSDB_SAVE_SNAPSHOT_ERROR = 1005; + ROCKSDB_LOAD_SNAPSHOT_ERROR = 1006; + + // 当前集群状态禁止分裂 + Cluster_State_Forbid_Splitting = 1007; + // 正在分裂中 + Split_Partition_Doing = 1008; + // store上分区数量超过上限 + Too_Many_Partitions_Per_Store = 1009; + // license 错误 + LICENSE_ERROR = 107; + // license 认证错误 + LICENSE_VERIFY_ERROR = 108; + + //分区下线正在进行 + Store_Tombstone_Doing = 1010; + + // 不合法的分裂个数 + Invalid_Split_Partition_Count = 1011; +} + +message Error { + ErrorType type = 1; + string message = 2; +} +message GetStoreRequest { + RequestHeader header = 1; + uint64 store_id = 2; +} + +message GetStoreResponse { + ResponseHeader header = 1; + + metapb.Store store = 2; + metapb.StoreStats stats = 3; +} + +message DetStoreRequest { + RequestHeader header = 1; + uint64 store_id = 2; +} + +message DetStoreResponse { + ResponseHeader header = 1; + metapb.Store store = 2; +} + +message RegisterStoreRequest { + RequestHeader header = 1; + metapb.Store store = 2; +} + + +message RegisterStoreResponse { + ResponseHeader header = 1; + // 初次注册,返回新的store_id + uint64 store_id = 2; +} + +message SetStoreRequest { + RequestHeader header = 1; + metapb.Store store = 2; +} + +message SetStoreResponse { + ResponseHeader header = 1; + // 返回修改后的Store + metapb.Store store = 2; +} + + +// 返回graph_name所在的所有store,如果graph_name为空值,则返回系统所有的store +message GetAllStoresRequest { + RequestHeader header = 1; + string graph_name = 2; + // 是否返回离线的store + bool exclude_offline_stores = 3; +} + +message GetAllStoresResponse { + ResponseHeader header = 1; + + repeated metapb.Store stores = 2; +} + + +message StoreHeartbeatRequest { + RequestHeader header = 1; + + metapb.StoreStats stats = 2; +} + +message StoreHeartbeatResponse { + ResponseHeader header = 1; + string cluster_version = 3; + metapb.ClusterStats clusterStats = 4; +} + +message GetPartitionRequest { + RequestHeader header = 1; + string graph_name = 2; + bytes key = 3; +} + + +message GetPartitionByCodeRequest { + RequestHeader header = 1; + string graph_name = 2; + uint64 code = 3; +} + + +message GetPartitionResponse { + ResponseHeader header = 1; + metapb.Partition partition = 2; + metapb.Shard leader = 3; + // 离线的Shard + repeated metapb.Shard offline_shards = 4; +} + +message GetPartitionByIDRequest { + RequestHeader header = 1; + string graph_name = 2; + uint32 partition_id = 3; +} + +message DelPartitionRequest { + RequestHeader header = 1; + string graph_name = 2; + uint32 partition_id = 3; +} +message DelPartitionResponse { + ResponseHeader header = 1; + metapb.Partition partition = 2; +} + +message UpdatePartitionRequest{ + RequestHeader header = 1; + repeated metapb.Partition partition = 2; +} + +message UpdatePartitionResponse{ + ResponseHeader header = 1; + repeated metapb.Partition partition = 2; +} +// Use GetPartitionResponse as the response of GetPartitionByIDRequest. + +message ScanPartitionsRequest { + RequestHeader header = 1; + string graph_name = 2; + bytes start_key = 3; + bytes end_key = 4; // end_key is +inf when it is empty. +} + + + +message ScanPartitionsResponse { + ResponseHeader header = 1; + repeated metapb.PartitionShard partitions = 4; +} + + + +message QueryPartitionsRequest{ + RequestHeader header = 1; + metapb.PartitionQuery query = 2; +} + +message QueryPartitionsResponse { + ResponseHeader header = 1; + repeated metapb.Partition partitions = 4; +} + + + +message GetGraphRequest{ + RequestHeader header = 1; + string graph_name = 2; +} + +message GetGraphResponse{ + ResponseHeader header = 1; + metapb.Graph graph = 2; +} + +message SetGraphRequest{ + RequestHeader header = 1; + metapb.Graph graph = 2; +} + +message SetGraphResponse{ + ResponseHeader header = 1; + metapb.Graph graph = 2; +} + +message DelGraphRequest{ + RequestHeader header = 1; + string graph_name = 2; +} + +message DelGraphResponse{ + ResponseHeader header = 1; + metapb.Graph graph = 2; +} + +message GetIdRequest{ + RequestHeader header = 1; + string key = 2; + int32 delta = 3; +} + +message GetIdResponse{ + ResponseHeader header = 1; + int64 id = 2; + int32 delta = 3; +} + +message ResetIdRequest{ + RequestHeader header = 1; + string key = 2; +} + +message ResetIdResponse{ + ResponseHeader header = 1; + int32 result = 2; +} + +message GetMembersRequest{ + RequestHeader header = 1; +} + +message GetMembersResponse{ + ResponseHeader header = 1; + repeated metapb.Member members = 2; + metapb.Member leader = 3; +} + +message GetPDConfigRequest{ + RequestHeader header = 1; + uint64 version = 2 ; +} + +message GetPDConfigResponse{ + ResponseHeader header = 1; + metapb.PDConfig pd_config = 2; +} + +message SetPDConfigRequest{ + RequestHeader header = 1; + metapb.PDConfig pd_config = 2; +} + +message SetPDConfigResponse{ + ResponseHeader header = 1; +} + + +message GetGraphSpaceRequest{ + RequestHeader header = 1; + string graph_Space_Name = 2; +} + +message GetGraphSpaceResponse{ + ResponseHeader header = 1; + repeated metapb.GraphSpace graph_space = 2; +} + +message SetGraphSpaceRequest{ + RequestHeader header = 1; + metapb.GraphSpace graph_space = 2; +} + +message SetGraphSpaceResponse{ + ResponseHeader header = 1; +} + +message GetClusterStatsRequest{ + RequestHeader header = 1; +} + +message GetClusterStatsResponse{ + ResponseHeader header = 1; + metapb.ClusterStats cluster = 2; +} +message ChangePeerListRequest{ + RequestHeader header = 1; + string peer_List = 2; +} +message getChangePeerListResponse{ + ResponseHeader header = 1; +} + +enum OperationMode { + Auto = 0; + Expert = 1; +} + +message SplitDataParam{ + // 被分裂的源分区ID + uint32 partition_id = 1; + //目标分区数量 + uint32 count = 2; +} + +message SplitDataRequest{ + RequestHeader header = 1; + //工作模式 + // Auto:自动分裂,每个Store上分区数达到最大值 + // Expert:专家模式,需要指定splitParams + OperationMode mode = 2; + repeated SplitDataParam param = 3; +} + +message SplitGraphDataRequest{ + RequestHeader header = 1; + //工作模式 + string graph_name = 2; + uint32 to_count = 3; +} + +message SplitDataResponse{ + ResponseHeader header = 1; +} + +message MovePartitionParam{ + uint32 partition_id = 1; + uint64 src_store_id = 2; + uint64 dst_store_id = 3; +} + +message MovePartitionRequest{ + RequestHeader header = 1; + //工作模式 + // Auto:自动转移,达到每个Store上分区数量相同 + // Expert:专家模式,需要指定transferParams + OperationMode mode = 2; + repeated MovePartitionParam param = 3; +} + +message MovePartitionResponse{ + ResponseHeader header = 1; +} + +message ReportTaskRequest{ + RequestHeader header = 1; + metaTask.Task task = 2; +} + +message ReportTaskResponse{ + ResponseHeader header = 1; +} + +message GetPartitionStatsRequest{ + RequestHeader header = 1; + uint32 partition_id = 2; + // 如果未空,返回所有图的同一分区ID + string graph_name = 4; +} + +message GetPartitionStatsResponse{ + ResponseHeader header = 1; + metapb.PartitionStats partition_stats = 2; +} + +message BalanceLeadersRequest{ + RequestHeader header = 1; +} + +message BalanceLeadersResponse{ + ResponseHeader header = 1; +} + +message PutLicenseRequest{ + RequestHeader header = 1; + bytes content = 2; +} + +message PutLicenseResponse{ + ResponseHeader header = 1; +} + +message DbCompactionRequest{ + RequestHeader header = 1; + string tableName = 2; +} + +message DbCompactionResponse{ + ResponseHeader header = 1; +} + +message CombineClusterRequest { + RequestHeader header = 1; + uint32 toCount = 2; +} + +message CombineClusterResponse { + ResponseHeader header = 1; +} + +message CombineGraphRequest { + RequestHeader header = 1; + string graphName = 2; + uint32 toCount = 3; +} + +message CombineGraphResponse { + ResponseHeader header = 1; +} + +message DeleteShardGroupRequest { + RequestHeader header = 1; + uint32 groupId = 2; +} + +message DeleteShardGroupResponse { + ResponseHeader header = 1; +} + +message GetShardGroupRequest{ + RequestHeader header = 1; + uint32 group_id = 2 ; +} + +message GetShardGroupResponse{ + ResponseHeader header = 1; + metapb.ShardGroup shardGroup = 2; +} + +message UpdateShardGroupRequest{ + RequestHeader header = 1; + metapb.ShardGroup shardGroup = 2; +} + +message UpdateShardGroupResponse{ + ResponseHeader header = 1; +} + +message ChangeShardRequest{ + RequestHeader header = 1; + uint32 groupId = 2; + repeated metapb.Shard shards = 3; +} + +message ChangeShardResponse { + ResponseHeader header = 1; +} + +message UpdatePdRaftRequest{ + RequestHeader header = 1; + string config = 3; +} + +message UpdatePdRaftResponse{ + ResponseHeader header = 1; + string message = 2; +} +message CacheResponse { + ResponseHeader header = 1; + // 返回修改后的Store + repeated metapb.Store stores = 2; + repeated metapb.ShardGroup shards = 3; + repeated metapb.Graph graphs = 4; +} +message CachePartitionResponse { + ResponseHeader header = 1; + repeated metapb.Partition partitions = 2; +} diff --git a/hugegraph-pd/hg-pd-test/pom.xml b/hugegraph-pd/hg-pd-test/pom.xml new file mode 100644 index 0000000000..31c0fd889d --- /dev/null +++ b/hugegraph-pd/hg-pd-test/pom.xml @@ -0,0 +1,259 @@ + + + + + + org.apache.hugegraph + hugegraph-pd + ${revision} + ../pom.xml + + 4.0.0 + + hg-pd-test + + + true + + 2.0.0-RC.3 + + + + + jacoco + + false + + + + + org.jacoco + jacoco-maven-plugin + 0.8.4 + + + **/grpc/**.* + **/config/**.* + + + + + + prepare-agent + + + + + + + + + + + + junit + junit + 4.13.2 + + + org.projectlombok + lombok + 1.18.24 + + + org.springframework + spring-context-support + 5.3.20 + + + org.springframework + spring-test + 5.3.20 + test + + + org.apache.logging.log4j + log4j-slf4j-impl + ${log4j2.version} + + + + org.apache.hugegraph + hg-pd-common + ${revision} + + + + com.google.code.gson + gson + 2.8.9 + + + commons-io + commons-io + 2.7 + + + com.fasterxml.jackson.core + jackson-databind + 2.13.0 + + + com.fasterxml.jackson.core + jackson-core + 2.13.0 + + + com.fasterxml.jackson.core + jackson-annotations + 2.13.0 + + + + org.springframework.boot + spring-boot-starter-test + 2.5.14 + + + org.springframework.boot + spring-boot-starter-logging + + + + + + org.powermock + powermock-classloading-xstream + ${powermock.version} + + + org.powermock + powermock-module-junit4-rule + ${powermock.version} + + + org.powermock + powermock-api-support + ${powermock.version} + + + org.powermock + powermock-module-junit4 + ${powermock.version} + compile + + + org.powermock + powermock-api-mockito2 + ${powermock.version} + compile + + + + org.apache.commons + commons-lang3 + 3.14.0 + compile + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.20 + + + pd-common-test + + ${basedir}/src/main/java/ + + ${basedir}/target/classes/ + + + **/CommonSuiteTest.java + + + + + pd-client-test + + ${basedir}/src/main/java/ + + ${basedir}/target/classes/ + + + **/PDClientSuiteTest.java + + + + + + + org.jacoco + jacoco-maven-plugin + 0.8.4 + + + pre-test + + prepare-agent + + + + post-test + test + + report-aggregate + + + ${basedir}/target/site/jacoco + + + + + + org/apache/hugegraph/pd/rest/*.class + org/apache/hugegraph/pd/service/*.class + org/apache/hugegraph/pd/model/*.class + org/apache/hugegraph/pd/watch/*.class + org/apache/hugegraph/pd/pulse/*.class + org/apache/hugegraph/pd/license/*.class + org/apache/hugegraph/pd/notice/*.class + org/apache/hugegraph/pd/util/*.class + org/apache/hugegraph/pd/metrics/*.class + org/apache/hugegraph/pd/util/grpc/*.class + org/apache/hugegraph/pd/boot/*.class + org/apache/hugegraph/pd/grpc/**/*.class + org/apache/hugegraph/pd/raft/*.class + **/RaftKVStore.class + + + + + + + src/main/resources/ + true + + + + diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/BaseCommonTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/BaseCommonTest.java new file mode 100644 index 0000000000..fb4478e3d6 --- /dev/null +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/BaseCommonTest.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +import org.junit.After; +import org.junit.BeforeClass; + +public class BaseCommonTest { + + @BeforeClass + public static void init() { + + } + + @After + public void teardown() { + // pass + } +} diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/CommonSuiteTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/CommonSuiteTest.java new file mode 100644 index 0000000000..02a5dfca64 --- /dev/null +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/CommonSuiteTest.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +import org.junit.runner.RunWith; +import org.junit.runners.Suite; + +import lombok.extern.slf4j.Slf4j; + +@RunWith(Suite.class) +@Suite.SuiteClasses({ + PartitionUtilsTest.class, + PartitionCacheTest.class, + HgAssertTest.class, + KVPairTest.class, +}) + +@Slf4j +public class CommonSuiteTest { + +} diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/HgAssertTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/HgAssertTest.java new file mode 100644 index 0000000000..3e61dd0a94 --- /dev/null +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/HgAssertTest.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.HashMap; + +import org.junit.Test; + +public class HgAssertTest { + + @Test(expected = IllegalArgumentException.class) + public void testIsTrue() { + HgAssert.isTrue(false, ""); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsTrue2() { + HgAssert.isTrue(true, null); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsFalse() { + HgAssert.isFalse(true, ""); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsFalse2() { + HgAssert.isTrue(false, null); + } + + @Test(expected = IllegalArgumentException.class) + public void isArgumentValid() { + HgAssert.isArgumentValid(new byte[0], ""); + } + + @Test(expected = IllegalArgumentException.class) + public void isArgumentValidStr() { + HgAssert.isArgumentValid("", ""); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsArgumentNotNull() { + HgAssert.isArgumentNotNull(null, ""); + } + + @Test(expected = IllegalArgumentException.class) + public void testIstValid() { + HgAssert.istValid(new byte[0], ""); + } + + @Test(expected = IllegalArgumentException.class) + public void testIstValidStr() { + HgAssert.isValid("", ""); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsNotNull() { + HgAssert.isNotNull(null, ""); + } + + @Test + public void testIsInvalid() { + assertFalse(HgAssert.isInvalid("abc", "test")); + assertTrue(HgAssert.isInvalid("", null)); + } + + @Test + public void testIsInvalidByte() { + assertTrue(HgAssert.isInvalid(new byte[0])); + assertFalse(HgAssert.isInvalid(new byte[1])); + } + + @Test + public void testIsInvalidMap() { + assertTrue(HgAssert.isInvalid(new HashMap())); + assertFalse(HgAssert.isInvalid(new HashMap() {{ + put(1, 1); + }})); + } + + @Test + public void testIsInvalidCollection() { + assertTrue(HgAssert.isInvalid(new ArrayList())); + assertFalse(HgAssert.isInvalid(new ArrayList() {{ + add(1); + }})); + } + + @Test + public void testIsContains() { + assertTrue(HgAssert.isContains(new Object[]{Integer.valueOf(1), Long.valueOf(2)}, + Long.valueOf(2))); + assertFalse(HgAssert.isContains(new Object[]{Integer.valueOf(1), Long.valueOf(2)}, + Long.valueOf(3))); + } + + @Test + public void testIsContainsT() { + assertTrue(HgAssert.isContains(new ArrayList<>() {{ + add(1); + }}, 1)); + assertFalse(HgAssert.isContains(new ArrayList<>() {{ + add(1); + }}, 2)); + } + + @Test + public void testIsNull() { + assertTrue(HgAssert.isNull(null)); + assertFalse(HgAssert.isNull("abc", "cdf")); + } + +} diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/KVPairTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/KVPairTest.java new file mode 100644 index 0000000000..9fb676d392 --- /dev/null +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/KVPairTest.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class KVPairTest { + + KVPair pair; + + @Before + public void init() { + this.pair = new KVPair<>("key", 1); + } + + @Test + public void testGetKey() { + assertEquals(this.pair.getKey(), "key"); + } + + @Test + public void testSetKey() { + this.pair.setKey("key2"); + assertEquals(this.pair.getKey(), "key2"); + } + + @Test + public void testGetValue() { + assertEquals(1, this.pair.getValue()); + } + + @Test + public void testSetValue() { + this.pair.setValue(2); + assertEquals(2, this.pair.getValue()); + } + + @Test + public void testToString() { + + } + + @Test + public void testHashCode() { + + } + + @Test + public void testEquals() { + var pair2 = new KVPair<>("key", 1); + Assert.assertEquals(pair2, this.pair); + } +} diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/PartitionCacheTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/PartitionCacheTest.java new file mode 100644 index 0000000000..21e757ffa9 --- /dev/null +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/PartitionCacheTest.java @@ -0,0 +1,388 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hugegraph.pd.grpc.Metapb; +import org.junit.Before; +import org.junit.Test; + +public class PartitionCacheTest { + + private PartitionCache cache; + + private static Metapb.Partition createPartition(int pid, String graphName, long start, + long end) { + return Metapb.Partition.newBuilder() + .setId(pid) + .setGraphName(graphName) + .setStartKey(start) + .setEndKey(end) + .setState(Metapb.PartitionState.PState_Normal) + .setVersion(1) + .build(); + } + + private static Metapb.ShardGroup creteShardGroup(int pid) { + return Metapb.ShardGroup.newBuilder() + .addShards( + Metapb.Shard.newBuilder().setStoreId(0) + .setRole(Metapb.ShardRole.Leader).build() + ) + .setId(pid) + .setVersion(0) + .setConfVer(0) + .setState(Metapb.PartitionState.PState_Normal) + .build(); + } + + private static Metapb.Shard createShard() { + return Metapb.Shard.newBuilder() + .setStoreId(0) + .setRole(Metapb.ShardRole.Leader) + .build(); + } + + private static Metapb.Store createStore(long storeId) { + return Metapb.Store.newBuilder() + .setId(storeId) + .setAddress("127.0.0.1") + .setCores(4) + .setVersion("1") + .setDataPath("/tmp/junit") + .setDataVersion(1) + .setLastHeartbeat(System.currentTimeMillis()) + .setStartTimestamp(System.currentTimeMillis()) + .setState(Metapb.StoreState.Up) + .setDeployPath("/tmp/junit") + .build(); + } + + private static Metapb.Graph createGraph(String graphName, int partitionCount) { + return Metapb.Graph.newBuilder() + .setGraphName(graphName) + .setPartitionCount(partitionCount) + .setState(Metapb.PartitionState.PState_Normal) + .build(); + } + + private static Metapb.ShardGroup createShardGroup() { + List shards = new ArrayList<>(); + for (int i = 0; i < 3; i++) { + shards.add(Metapb.Shard.newBuilder() + .setStoreId(i) + .setRole(i == 0 ? Metapb.ShardRole.Leader : + Metapb.ShardRole.Follower) + .build() + ); + } + + return Metapb.ShardGroup.newBuilder() + .setId(1) + .setVersion(1) + .setConfVer(1) + .setState(Metapb.PartitionState.PState_Normal) + .addAllShards(shards) + .build(); + } + + @Before + public void setup() { + this.cache = new PartitionCache(); + } + + @Test + public void testGetPartitionById() { + var partition = createPartition(0, "graph0", 0, 65535); + this.cache.updateShardGroup(creteShardGroup(0)); + this.cache.updatePartition(partition); + var ret = this.cache.getPartitionById("graph0", 0); + assertNotNull(ret); + assertEquals(ret.getKey(), partition); + } + + @Test + public void testGetPartitionByKey() throws UnsupportedEncodingException { + var partition = createPartition(0, "graph0", 0, 65535); + this.cache.updateShardGroup(creteShardGroup(0)); + this.cache.updatePartition(partition); + var ret = this.cache.getPartitionByKey("graph0", "0".getBytes(StandardCharsets.UTF_8)); + assertNotNull(ret); + assertEquals(ret.getKey(), partition); + } + + @Test + public void getPartitionByCode() { + var partition = createPartition(0, "graph0", 0, 1024); + this.cache.updateShardGroup(creteShardGroup(0)); + this.cache.updatePartition(partition); + var ret = this.cache.getPartitionByCode("graph0", 10); + assertNotNull(ret); + assertEquals(ret.getKey(), partition); + assertNull(this.cache.getPartitionByCode("graph0", 2000)); + } + + @Test + public void testGetPartitions() { + var partition1 = createPartition(0, "graph0", 0, 1024); + this.cache.updateShardGroup(creteShardGroup(0)); + this.cache.updatePartition(partition1); + assertEquals(this.cache.getPartitions("graph0").size(), 1); + var partition2 = createPartition(1, "graph0", 1024, 2048); + this.cache.updateShardGroup(creteShardGroup(1)); + this.cache.updatePartition(partition2); + assertEquals(this.cache.getPartitions("graph0").size(), 2); + System.out.println(this.cache.debugCacheByGraphName("graph0")); + } + + @Test + public void testAddPartition() { + var partition = createPartition(0, "graph0", 0, 65535); + this.cache.addPartition("graph0", 0, partition); + var ret = this.cache.getPartitionById("graph0", 0); + assertNotNull(ret); + assertEquals(ret.getKey(), partition); + assertNotNull(this.cache.getPartitionByCode("graph0", 2000)); + System.out.println(this.cache.debugCacheByGraphName("graph0")); + var partition2 = createPartition(0, "graph0", 0, 1024); + this.cache.addPartition("graph0", 0, partition2); + ret = this.cache.getPartitionById("graph0", 0); + assertNotNull(ret); + assertEquals(ret.getKey(), partition2); + assertNull(this.cache.getPartitionByCode("graph0", 2000)); + System.out.println(this.cache.debugCacheByGraphName("graph0")); + } + + @Test + public void testUpdatePartition() { + var partition = createPartition(0, "graph0", 0, 65535); + this.cache.updateShardGroup(creteShardGroup(0)); + this.cache.addPartition("graph0", 0, partition); + var partition2 = createPartition(0, "graph0", 0, 1024); + this.cache.updatePartition("graph0", 0, partition2); + var ret = this.cache.getPartitionById("graph0", 0); + assertNotNull(ret); + assertEquals(ret.getKey(), partition2); + assertNull(this.cache.getPartitionByCode("graph0", 2000)); + } + + @Test + public void testUpdatePartition2() { + var partition = createPartition(0, "graph0", 0, 1024); + this.cache.updateShardGroup(creteShardGroup(0)); + assertTrue(this.cache.updatePartition(partition)); + assertFalse(this.cache.updatePartition(partition)); + var ret = this.cache.getPartitionById("graph0", 0); + assertNotNull(ret); + assertEquals(ret.getKey(), partition); + assertNull(this.cache.getPartitionByCode("graph0", 2000)); + } + + @Test + public void testRemovePartition() { + var partition = createPartition(0, "graph0", 0, 1024); + this.cache.updateShardGroup(creteShardGroup(0)); + this.cache.updatePartition(partition); + assertNotNull(this.cache.getPartitionById("graph0", 0)); + this.cache.removePartition("graph0", 0); + assertNull(this.cache.getPartitionById("graph0", 0)); + System.out.println(this.cache.debugCacheByGraphName("graph0")); + } + + @Test + public void testRange() { + var partition1 = createPartition(1, "graph0", 0, 3); + var partition2 = createPartition(2, "graph0", 3, 6); + this.cache.updatePartition(partition1); + this.cache.updatePartition(partition2); + + var partition3 = createPartition(3, "graph0", 1, 2); + var partition4 = createPartition(4, "graph0", 2, 3); + this.cache.updatePartition(partition3); + this.cache.updatePartition(partition4); + System.out.println(this.cache.debugCacheByGraphName("graph0")); + + var partition6 = createPartition(1, "graph0", 0, 1); + this.cache.updatePartition(partition6); + System.out.println(this.cache.debugCacheByGraphName("graph0")); + + var partition5 = createPartition(1, "graph0", 0, 3); + this.cache.updatePartition(partition5); + System.out.println(this.cache.debugCacheByGraphName("graph0")); + } + + @Test + public void testRange2() { + var partition1 = createPartition(1, "graph0", 0, 3); + var partition2 = createPartition(2, "graph0", 3, 6); + this.cache.updatePartition(partition1); + this.cache.updatePartition(partition2); + System.out.println(this.cache.debugCacheByGraphName("graph0")); + + // 中间有缺失 + var partition3 = createPartition(1, "graph0", 2, 3); + this.cache.updatePartition(partition3); + System.out.println(this.cache.debugCacheByGraphName("graph0")); + + var partition5 = createPartition(1, "graph0", 0, 3); + this.cache.updatePartition(partition5); + System.out.println(this.cache.debugCacheByGraphName("graph0")); + } + + @Test + public void testRemovePartitions() { + var partition1 = createPartition(0, "graph0", 0, 1024); + var partition2 = createPartition(1, "graph0", 1024, 2048); + this.cache.updateShardGroup(creteShardGroup(0)); + this.cache.updatePartition(partition1); + this.cache.updateShardGroup(creteShardGroup(1)); + this.cache.updatePartition(partition2); + assertEquals(this.cache.getPartitions("graph0").size(), 2); + this.cache.removePartitions(); + assertEquals(this.cache.getPartitions("graph0").size(), 0); + } + + @Test + public void testRemoveAll() { + var partition1 = createPartition(0, "graph0", 0, 1024); + var partition2 = createPartition(1, "graph0", 1024, 2048); + var partition3 = createPartition(0, "graph1", 0, 2048); + this.cache.updateShardGroup(creteShardGroup(0)); + this.cache.updateShardGroup(creteShardGroup(1)); + this.cache.updatePartition(partition1); + this.cache.updatePartition(partition2); + this.cache.updatePartition(partition3); + + assertEquals(this.cache.getPartitions("graph0").size(), 2); + assertEquals(this.cache.getPartitions("graph1").size(), 1); + this.cache.removeAll("graph0"); + assertEquals(this.cache.getPartitions("graph0").size(), 0); + assertEquals(this.cache.getPartitions("graph1").size(), 1); + } + + @Test + public void testUpdateShardGroup() { + var shardGroup = createShardGroup(); + this.cache.updateShardGroup(shardGroup); + assertNotNull(this.cache.getShardGroup(shardGroup.getId())); + } + + @Test + public void testGetShardGroup() { + var shardGroup = createShardGroup(); + this.cache.updateShardGroup(shardGroup); + assertEquals(this.cache.getShardGroup(shardGroup.getId()), shardGroup); + } + + @Test + public void testAddStore() { + var store = createStore(1); + this.cache.addStore(1L, store); + assertEquals(this.cache.getStoreById(1L), store); + } + + @Test + public void testGetStoreById() { + var store = createStore(1); + this.cache.addStore(1L, store); + assertEquals(this.cache.getStoreById(1L), store); + } + + @Test + public void testRemoveStore() { + var store = createStore(1); + this.cache.addStore(1L, store); + assertEquals(this.cache.getStoreById(1L), store); + + this.cache.removeStore(1L); + assertNull(this.cache.getStoreById(1L)); + } + + @Test + public void testHasGraph() { + var partition = createPartition(0, "graph0", 0, 65535); + this.cache.updateShardGroup(creteShardGroup(0)); + this.cache.updatePartition(partition); + assertTrue(this.cache.hasGraph("graph0")); + assertFalse(this.cache.hasGraph("graph1")); + } + + @Test + public void testUpdateGraph() { + var graph = createGraph("graph0", 10); + this.cache.updateGraph(graph); + assertEquals(this.cache.getGraph("graph0"), graph); + graph = createGraph("graph0", 12); + this.cache.updateGraph(graph); + assertEquals(this.cache.getGraph("graph0"), graph); + } + + @Test + public void testGetGraph() { + var graph = createGraph("graph0", 12); + this.cache.updateGraph(graph); + assertEquals(this.cache.getGraph("graph0"), graph); + } + + @Test + public void testGetGraphs() { + var graph1 = createGraph("graph0", 12); + var graph2 = createGraph("graph1", 12); + var graph3 = createGraph("graph2", 12); + this.cache.updateGraph(graph1); + this.cache.updateGraph(graph2); + this.cache.updateGraph(graph3); + assertEquals(this.cache.getGraphs().size(), 3); + } + + @Test + public void testReset() { + var graph1 = createGraph("graph0", 12); + var graph2 = createGraph("graph1", 12); + var graph3 = createGraph("graph2", 12); + this.cache.updateGraph(graph1); + this.cache.updateGraph(graph2); + this.cache.updateGraph(graph3); + assertEquals(this.cache.getGraphs().size(), 3); + this.cache.reset(); + assertEquals(this.cache.getGraphs().size(), 0); + } + + @Test + public void testUpdateShardGroupLeader() { + var shardGroup = createShardGroup(); + this.cache.updateShardGroup(shardGroup); + + var leader = + Metapb.Shard.newBuilder().setStoreId(2).setRole(Metapb.ShardRole.Leader).build(); + this.cache.updateShardGroupLeader(shardGroup.getId(), leader); + + assertEquals(this.cache.getLeaderShard(shardGroup.getId()), leader); + } + +} diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/PartitionUtilsTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/PartitionUtilsTest.java new file mode 100644 index 0000000000..e0742a4838 --- /dev/null +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/common/PartitionUtilsTest.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.common; + +import java.nio.charset.StandardCharsets; + +import org.junit.Assert; +import org.junit.Test; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class PartitionUtilsTest extends BaseCommonTest { + + @Test + public void testCalcHashcode() { + byte[] key = new byte[5]; + long code = PartitionUtils.calcHashcode(key); + Assert.assertEquals(code, 31912L); + } + + // @Test + public void testHashCode() { + int partCount = 10; + int partSize = PartitionUtils.MAX_VALUE / partCount + 1; + int[] counter = new int[partCount]; + for (int i = 0; i < 10000; i++) { + String s = String.format("BATCH-GET-UNIT-%02d", i); + int c = PartitionUtils.calcHashcode(s.getBytes(StandardCharsets.UTF_8)); + + counter[c / partSize]++; + + } + + for (int i = 0; i < counter.length; i++) { + System.out.println(i + " " + counter[i]); + } + } +} diff --git a/hugegraph-pd/hg-pd-test/src/main/resources/log4j2.xml b/hugegraph-pd/hg-pd-test/src/main/resources/log4j2.xml new file mode 100644 index 0000000000..e462bf16e9 --- /dev/null +++ b/hugegraph-pd/hg-pd-test/src/main/resources/log4j2.xml @@ -0,0 +1,139 @@ + + + + + + + + logs + hg-pd-test + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hugegraph-pd/pom.xml b/hugegraph-pd/pom.xml new file mode 100644 index 0000000000..6253cfd443 --- /dev/null +++ b/hugegraph-pd/pom.xml @@ -0,0 +1,184 @@ + + + + + 4.0.0 + hugegraph-pd + ${revision} + pom + + + org.apache.hugegraph + hugegraph + ${revision} + ../pom.xml + + + + hg-pd-grpc + hg-pd-common + hg-pd-client + hg-pd-test + + + + + + + + + 11 + 11 + 2.17.0 + + + + + + org.apache.logging.log4j + log4j-slf4j-impl + 2.17.0 + + + org.apache.hugegraph + hg-pd-grpc + ${revision} + + + org.apache.hugegraph + hg-pd-common + ${revision} + + + + + + + junit + junit + 4.13.2 + test + + + + + + + org.jacoco + jacoco-maven-plugin + 0.8.4 + + + **/grpc/**.* + **/config/**.* + + + + + + prepare-agent + + + + + + org.codehaus.mojo + flatten-maven-plugin + 1.2.7 + + true + resolveCiFriendliesOnly + + + + flatten + process-resources + + flatten + + + + flatten.clean + clean + + clean + + + + + remove-flattened-pom + install + + clean + + + + + + org.apache.maven.plugins + maven-clean-plugin + + + + ${project.basedir}/ + + *.tar + *.tar.gz + .flattened-pom.xml + + dist/** + + false + + + + + + + + + + + + pd-common-test + + true + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.20 + + + pd-common-test + + test + + test + + + + + + + + diff --git a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/version/CoreVersion.java b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/version/CoreVersion.java index 84dc28dedc..d878ab6ea3 100644 --- a/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/version/CoreVersion.java +++ b/hugegraph-server/hugegraph-core/src/main/java/org/apache/hugegraph/version/CoreVersion.java @@ -23,7 +23,7 @@ public class CoreVersion { public static final String NAME = "hugegraph-core"; - public static final String DEFAULT_VERSION = "1.2.0"; + public static final String DEFAULT_VERSION = "1.5.0.1"; /** * The second parameter of Version.of() is for IDE running without JAR */ diff --git a/pom.xml b/pom.xml index 3de7393dba..1706c79ef3 100644 --- a/pom.xml +++ b/pom.xml @@ -84,18 +84,14 @@ scm:git:https://github.com/apache/hugegraph.git - - 3.5.0 - - - 1.2.0 + 1.5.0.1 hugegraph-server - - + hugegraph-pd + @@ -180,6 +176,8 @@ **/hbase-*/** **/apache-cassandra-*/** **/pid + + **/src/main/java/org/apache/hugegraph/pd/grpc/** true @@ -290,7 +288,7 @@ - + stage @@ -300,5 +298,18 @@ + + + arm-mac + + + mac + aarch64 + + + + osx-x86_64 + + From ffe562599516ca8dbc74a2edba5fec616c591cce Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Thu, 14 Mar 2024 13:32:13 +0800 Subject: [PATCH 2/7] git add hugegraph-pd/hg-pd-core/ --- hugegraph-pd/hg-pd-core/pom.xml | 88 + .../apache/hugegraph/pd/ConfigService.java | 138 ++ .../org/apache/hugegraph/pd/IdService.java | 82 + .../org/apache/hugegraph/pd/KvService.java | 317 ++++ .../org/apache/hugegraph/pd/LogService.java | 67 + .../pd/PartitionInstructionListener.java | 52 + .../apache/hugegraph/pd/PartitionService.java | 1563 +++++++++++++++++ .../hugegraph/pd/PartitionStatusListener.java | 29 + .../apache/hugegraph/pd/RegistryService.java | 44 + .../pd/ShardGroupStatusListener.java | 26 + .../hugegraph/pd/StoreMonitorDataService.java | 266 +++ .../apache/hugegraph/pd/StoreNodeService.java | 1074 +++++++++++ .../hugegraph/pd/StoreStatusListener.java | 31 + .../hugegraph/pd/TaskScheduleService.java | 853 +++++++++ .../apache/hugegraph/pd/config/PDConfig.java | 277 +++ .../hugegraph/pd/meta/ConfigMetaStore.java | 73 + .../hugegraph/pd/meta/DiscoveryMetaStore.java | 105 ++ .../apache/hugegraph/pd/meta/IdMetaStore.java | 255 +++ .../org/apache/hugegraph/pd/meta/LogMeta.java | 48 + .../hugegraph/pd/meta/MetadataFactory.java | 87 + .../hugegraph/pd/meta/MetadataKeyHelper.java | 378 ++++ .../pd/meta/MetadataRocksDBStore.java | 184 ++ .../hugegraph/pd/meta/MetadataStoreBase.java | 124 ++ .../hugegraph/pd/meta/PartitionMeta.java | 295 ++++ .../apache/hugegraph/pd/meta/QueueStore.java | 56 + .../hugegraph/pd/meta/StoreInfoMeta.java | 206 +++ .../hugegraph/pd/meta/TaskInfoMeta.java | 132 ++ .../pd/raft/FutureClosureAdapter.java | 48 + .../apache/hugegraph/pd/raft/KVOperation.java | 161 ++ .../hugegraph/pd/raft/KVStoreClosure.java | 33 + .../apache/hugegraph/pd/raft/RaftEngine.java | 377 ++++ .../hugegraph/pd/raft/RaftRpcClient.java | 87 + .../hugegraph/pd/raft/RaftRpcProcessor.java | 127 ++ .../hugegraph/pd/raft/RaftStateListener.java | 22 + .../hugegraph/pd/raft/RaftStateMachine.java | 330 ++++ .../hugegraph/pd/raft/RaftTaskHandler.java | 27 + .../apache/hugegraph/pd/raft/ZipUtils.java | 93 + .../pd/store/BaseKVStoreClosure.java | 48 + .../apache/hugegraph/pd/store/HgKVStore.java | 58 + .../hugegraph/pd/store/HgKVStoreImpl.java | 343 ++++ .../org/apache/hugegraph/pd/store/KV.java | 45 + .../hugegraph/pd/store/RaftKVStore.java | 324 ++++ 42 files changed, 8973 insertions(+) create mode 100644 hugegraph-pd/hg-pd-core/pom.xml create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ConfigService.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/IdService.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/KvService.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/LogService.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionInstructionListener.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionService.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionStatusListener.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/RegistryService.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ShardGroupStatusListener.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreMonitorDataService.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreNodeService.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreStatusListener.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/TaskScheduleService.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/config/PDConfig.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/ConfigMetaStore.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/DiscoveryMetaStore.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/IdMetaStore.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/LogMeta.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataFactory.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataRocksDBStore.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataStoreBase.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/PartitionMeta.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/QueueStore.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/StoreInfoMeta.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/TaskInfoMeta.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/FutureClosureAdapter.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/KVOperation.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/KVStoreClosure.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcClient.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcProcessor.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateListener.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateMachine.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftTaskHandler.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/ZipUtils.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/BaseKVStoreClosure.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStore.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStoreImpl.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/KV.java create mode 100644 hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/RaftKVStore.java diff --git a/hugegraph-pd/hg-pd-core/pom.xml b/hugegraph-pd/hg-pd-core/pom.xml new file mode 100644 index 0000000000..e59b5ac35e --- /dev/null +++ b/hugegraph-pd/hg-pd-core/pom.xml @@ -0,0 +1,88 @@ + + + + + 4.0.0 + + + org.apache.hugegraph + hugegraph-pd + ${revision} + ../pom.xml + + + hg-pd-core + + + 0.5.10 + + + + com.alipay.sofa + jraft-core + + 1.3.13 + + + org.rocksdb + rocksdbjni + + + + + org.rocksdb + rocksdbjni + 6.29.5 + + + org.apache.hugegraph + hg-pd-grpc + + + org.springframework + spring-context + 5.3.20 + + + org.apache.hugegraph + hg-pd-common + + + org.springframework.boot + spring-boot + 2.5.14 + + + org.projectlombok + lombok + 1.18.24 + + + org.apache.commons + commons-lang3 + 3.12.0 + + + com.google.code.gson + gson + 2.8.9 + + + diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ConfigService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ConfigService.java new file mode 100644 index 0000000000..2557745c88 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ConfigService.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import java.util.List; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.meta.ConfigMetaStore; +import org.apache.hugegraph.pd.meta.MetadataFactory; +import org.apache.hugegraph.pd.raft.RaftEngine; +import org.apache.hugegraph.pd.raft.RaftStateListener; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class ConfigService implements RaftStateListener { + + private final ConfigMetaStore meta; + private PDConfig pdConfig; + + public ConfigService(PDConfig config) { + this.pdConfig = config; + config.setConfigService(this); + meta = MetadataFactory.newConfigMeta(config); + } + + + public Metapb.PDConfig getPDConfig(long version) throws PDException { + return this.meta.getPdConfig(version); + } + + public Metapb.PDConfig getPDConfig() throws PDException { + return this.meta.getPdConfig(0); + } + + public Metapb.PDConfig setPDConfig(Metapb.PDConfig mConfig) throws PDException { + Metapb.PDConfig oldCfg = getPDConfig(); + Metapb.PDConfig.Builder builder = oldCfg.toBuilder().mergeFrom(mConfig) + .setVersion(oldCfg.getVersion() + 1) + .setTimestamp(System.currentTimeMillis()); + mConfig = this.meta.setPdConfig(builder.build()); + log.info("PDConfig has been modified, new PDConfig is {}", mConfig); + updatePDConfig(mConfig); + return mConfig; + } + + public List getGraphSpace(String graphSpaceName) throws PDException { + return this.meta.getGraphSpace(graphSpaceName); + } + + public Metapb.GraphSpace setGraphSpace(Metapb.GraphSpace graphSpace) throws PDException { + return this.meta.setGraphSpace(graphSpace.toBuilder() + .setTimestamp(System.currentTimeMillis()) + .build()); + } + + /** + * 从存储中读取配置项,并覆盖全局的PDConfig对象 + * + * @return + */ + public PDConfig loadConfig() { + try { + Metapb.PDConfig mConfig = this.meta.getPdConfig(0); + if (mConfig == null) { + mConfig = Metapb.PDConfig.newBuilder() + .setPartitionCount(pdConfig.getInitialPartitionCount()) + .setShardCount(pdConfig.getPartition().getShardCount()) + .setVersion(1) + .setTimestamp(System.currentTimeMillis()) + .setMaxShardsPerStore( + pdConfig.getPartition().getMaxShardsPerStore()) + .build(); + } + if (RaftEngine.getInstance().isLeader()) { + this.meta.setPdConfig(mConfig); + } + pdConfig = updatePDConfig(mConfig); + } catch (Exception e) { + log.error("ConfigService loadConfig exception {}", e); + } + return pdConfig; + } + + public synchronized PDConfig updatePDConfig(Metapb.PDConfig mConfig) { + log.info("update pd config: mConfig:{}", mConfig); + pdConfig.getPartition().setShardCount(mConfig.getShardCount()); + pdConfig.getPartition().setTotalCount(mConfig.getPartitionCount()); + pdConfig.getPartition().setMaxShardsPerStore(mConfig.getMaxShardsPerStore()); + return pdConfig; + } + + public synchronized PDConfig setPartitionCount(int count) { + Metapb.PDConfig mConfig = null; + try { + mConfig = getPDConfig(); + mConfig = mConfig.toBuilder().setPartitionCount(count).build(); + setPDConfig(mConfig); + } catch (PDException e) { + log.error("ConfigService exception {}", e); + e.printStackTrace(); + } + return pdConfig; + } + + /** + * meta store中的数量 + * 由于可能会受分区分裂/合并的影响,原始的partition count不推荐使用 + * + * @return partition count of cluster + * @throws PDException when io error + */ + public int getPartitionCount() throws PDException { + return getPDConfig().getPartitionCount(); + } + + @Override + public void onRaftLeaderChanged() { + loadConfig(); + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/IdService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/IdService.java new file mode 100644 index 0000000000..0c854d06db --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/IdService.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.meta.IdMetaStore; +import org.apache.hugegraph.pd.meta.MetadataFactory; + +public class IdService { + + private final IdMetaStore meta; + private PDConfig pdConfig; + + public IdService(PDConfig config) { + this.pdConfig = config; + meta = MetadataFactory.newHugeServerMeta(config); + } + + public PDConfig getPdConfig() { + return pdConfig; + } + + public void setPdConfig(PDConfig pdConfig) { + this.pdConfig = pdConfig; + } + + public long getId(String key, int delta) throws PDException { + return meta.getId(key, delta); + } + + public void resetId(String key) throws PDException { + meta.resetId(key); + } + + /** + * 获取自增循环不重复id, 达到上限后从0开始自增.自动跳过正在使用的cid + * + * @param key + * @param max + * @return + * @throws PDException + */ + public long getCId(String key, long max) throws PDException { + return meta.getCId(key, max); + } + + public long getCId(String key, String name, long max) throws PDException { + return meta.getCId(key, name, max); + } + + /** + * 删除一个自增循环id + * + * @param key + * @param cid + * @return + * @throws PDException + */ + public long delCId(String key, long cid) throws PDException { + return meta.delCId(key, cid); + } + + public long delCIdDelay(String key, String name, long cid) throws PDException { + return meta.delCIdDelay(key, name, cid); + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/KvService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/KvService.java new file mode 100644 index 0000000000..e85cfcb1eb --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/KvService.java @@ -0,0 +1,317 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import java.nio.charset.Charset; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.kv.Kv; +import org.apache.hugegraph.pd.grpc.kv.V; +import org.apache.hugegraph.pd.meta.MetadataKeyHelper; +import org.apache.hugegraph.pd.meta.MetadataRocksDBStore; +import org.apache.hugegraph.pd.store.KV; +import org.springframework.stereotype.Service; + +import com.google.protobuf.InvalidProtocolBufferException; + +import lombok.extern.slf4j.Slf4j; + +/** + * + **/ +@Slf4j +@Service +public class KvService { + + + public static final char KV_DELIMITER = '@'; + // TODO 主前缀之后,增加类名做区分 + private static final String TTL_PREFIX = "T"; + private static final String KV_PREFIX = "K"; + private static final String LOCK_PREFIX = "L"; + private static final String KV_PREFIX_DELIMITER = KV_PREFIX + KV_DELIMITER; + private static final byte[] EMPTY_VALUE = new byte[0]; + private final MetadataRocksDBStore meta; + private PDConfig pdConfig; + + public KvService(PDConfig config) { + this.pdConfig = config; + meta = new MetadataRocksDBStore(config); + } + + public static String getKey(Object... keys) { + StringBuilder builder = MetadataKeyHelper.getStringBuilderHelper(); + builder.append(KV_PREFIX).append(KV_DELIMITER); + for (Object key : keys) { + builder.append(key == null ? "" : key).append(KV_DELIMITER); + } + return builder.substring(0, builder.length() - 1); + } + + public static byte[] getKeyBytes(Object... keys) { + String key = getKey(keys); + return key.getBytes(Charset.defaultCharset()); + } + + public static String getKeyWithoutPrefix(Object... keys) { + StringBuilder builder = MetadataKeyHelper.getStringBuilderHelper(); + for (Object key : keys) { + builder.append(key == null ? "" : key).append(KV_DELIMITER); + } + return builder.substring(0, builder.length() - 1); + } + + public static String getDelimiter() { + return String.valueOf(KV_DELIMITER); + } + + public PDConfig getPdConfig() { + return pdConfig; + } + + public void setPdConfig(PDConfig pdConfig) { + this.pdConfig = pdConfig; + } + + public void put(String key, String value) throws PDException { + V storeValue = V.newBuilder().setValue(value).setTtl(0).build(); + meta.put(getStoreKey(key), storeValue.toByteArray()); + // log.warn("add key with key-{}:value-{}", key, value); + } + + public void put(String key, String value, long ttl) throws PDException { + long curTime = System.currentTimeMillis(); + curTime += ttl; + V storeValue = V.newBuilder().setValue(value).setSt(ttl).setTtl(curTime).build(); + meta.put(getStoreKey(key), storeValue.toByteArray()); + meta.put(getTTLStoreKey(key, curTime), EMPTY_VALUE); + // log.warn("add key with key-{}:value-{}:ttl-{}", key, value, ttl); + } + + public String get(String key) throws PDException { + byte[] storeKey = getStoreKey(key); + return get(storeKey); + } + + public String get(byte[] keyBytes) throws PDException { + byte[] bytes = meta.getOne(keyBytes); + String v = getValue(keyBytes, bytes); + return v; + } + + private String getValue(byte[] keyBytes, byte[] valueBytes) throws PDException { + if (valueBytes == null || valueBytes.length == 0) { + return ""; + } + try { + V v = V.parseFrom(valueBytes); + if (v.getTtl() == 0 || v.getTtl() >= System.currentTimeMillis()) { + return v.getValue(); + } else { + meta.remove(keyBytes); + meta.remove(getTTLStoreKey(new String(keyBytes), v.getTtl())); + } + } catch (Exception e) { + log.error("parse value with error:{}", e.getMessage()); + throw new PDException(-1, e.getMessage()); + } + return null; + } + + public boolean keepAlive(String key) throws PDException { + byte[] bytes = meta.getOne(getStoreKey(key)); + try { + if (bytes == null || bytes.length == 0) { + return false; + } + V v = V.parseFrom(bytes); + if (v != null) { + long ttl = v.getTtl(); + long st = v.getSt(); + meta.remove(getTTLStoreKey(key, ttl)); + put(key, v.getValue(), st); + return true; + } else { + return false; + } + } catch (InvalidProtocolBufferException e) { + throw new PDException(-1, e.getMessage()); + } + } + + public Kv delete(String key) throws PDException { + byte[] storeKey = getStoreKey(key); + String value = this.get(storeKey); + meta.remove(storeKey); + Kv.Builder builder = Kv.newBuilder().setKey(key); + if (value != null) { + builder.setValue(value); + } + Kv kv = builder.build(); + // log.warn("delete kv with key :{}", key); + return kv; + } + + public List deleteWithPrefix(String key) throws PDException { + byte[] storeKey = getStoreKey(key); + //TODO to many rows for scan + List kvList = meta.scanPrefix(storeKey); + LinkedList kvs = new LinkedList<>(); + for (KV kv : kvList) { + String kvKey = new String(kv.getKey()).replaceFirst(KV_PREFIX_DELIMITER, ""); + String kvValue = getValue(kv.getKey(), kv.getValue()); + if (kvValue != null) { + kvs.add(Kv.newBuilder().setKey(kvKey).setValue(kvValue).build()); + } + } + meta.removeByPrefix(storeKey); + // log.warn("delete kv with key prefix :{}", key); + return kvs; + } + + /** + * scan result ranged from key start and key end + * + * @param keyStart + * @param keyEnd + * @return Records + * @throws PDException + */ + public Map scanRange(String keyStart, String keyEnd) throws PDException { + List list = meta.scanRange(getStoreKey(keyStart), getStoreKey(keyEnd)); + Map map = new HashMap<>(); + for (KV kv : list) { + String kvKey = new String(kv.getKey()).replaceFirst(KV_PREFIX_DELIMITER, ""); + String kvValue = getValue(kv.getKey(), kv.getValue()); + if (kvValue != null) { + map.put(kvKey, kvValue); + } + } + return map; + } + + public Map scanWithPrefix(String key) throws PDException { + List kvList = meta.scanPrefix(getStoreKey(key)); + HashMap map = new HashMap<>(); + for (KV kv : kvList) { + String kvKey = new String(kv.getKey()).replaceFirst(KV_PREFIX_DELIMITER, ""); + String kvValue = getValue(kv.getKey(), kv.getValue()); + if (kvValue != null) { + map.put(kvKey, kvValue); + } + } + return map; + } + + public boolean locked(String key) throws PDException { + String lockKey = KvService.getKeyWithoutPrefix(KvService.LOCK_PREFIX, key); + Map allLock = scanWithPrefix(lockKey); + return allLock != null && allLock.size() != 0; + } + + private boolean owned(String key, long clientId) throws PDException { + String lockKey = KvService.getKeyWithoutPrefix(KvService.LOCK_PREFIX, key); + Map allLock = scanWithPrefix(lockKey); + if (allLock.size() == 0) { + return true; + } + for (Map.Entry entry : allLock.entrySet()) { + String entryKey = entry.getKey(); + String[] split = entryKey.split(String.valueOf(KV_DELIMITER)); + if (Long.valueOf(split[split.length - 1]).equals(clientId)) { + return true; + } + } + return false; + } + + public boolean lock(String key, long ttl, long clientId) throws PDException { + //TODO lock improvement + synchronized (KvService.class) { + if (!owned(key, clientId)) { + return false; + } + put(getLockKey(key, clientId), " ", ttl); + return true; + } + } + + public boolean lockWithoutReentrant(String key, long ttl, + long clientId) throws PDException { + synchronized (KvService.class) { + if (locked(key)) { + return false; + } + put(getLockKey(key, clientId), " ", ttl); + return true; + } + } + + public boolean unlock(String key, long clientId) throws PDException { + synchronized (KvService.class) { + if (!owned(key, clientId)) { + return false; + } + delete(getLockKey(key, clientId)); + return true; + } + } + + public boolean keepAlive(String key, long clientId) throws PDException { + String lockKey = getLockKey(key, clientId); + return keepAlive(lockKey); + } + + public String getLockKey(String key, long clientId) { + return getKeyWithoutPrefix(LOCK_PREFIX, key, clientId); + } + + public byte[] getStoreKey(String key) { + return getKeyBytes(key); + } + + public byte[] getTTLStoreKey(String key, long time) { + return getKeyBytes(TTL_PREFIX, time, key); + } + + public void clearTTLData() { + try { + byte[] ttlStartKey = getTTLStoreKey("", 0); + byte[] ttlEndKey = getTTLStoreKey("", System.currentTimeMillis()); + List kvList = meta.scanRange(ttlStartKey, ttlEndKey); + for (KV kv : kvList) { + String key = new String(kv.getKey()); + int index = key.indexOf(KV_DELIMITER, 2); + String delKey = key.substring(index + 1); + delete(delKey); + meta.remove(kv.getKey()); + } + } catch (Exception e) { + log.error("clear ttl data with error :", e); + } + } + + public MetadataRocksDBStore getMeta() { + return meta; + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/LogService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/LogService.java new file mode 100644 index 0000000000..35959849bc --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/LogService.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import java.util.List; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.meta.LogMeta; +import org.apache.hugegraph.pd.meta.MetadataFactory; +import org.springframework.stereotype.Service; + +import com.google.protobuf.Any; +import com.google.protobuf.GeneratedMessageV3; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Service +public class LogService { + + public static final String GRPC = "GRPC"; + public static final String REST = "REST"; + public static final String TASK = "TASK"; + public static final String NODE_CHANGE = "NODE_CHANGE"; + public static final String PARTITION_CHANGE = "PARTITION_CHANGE"; + private final LogMeta logMeta; + + public LogService(PDConfig pdConfig) { + logMeta = MetadataFactory.newLogMeta(pdConfig); + } + + public List getLog(String action, Long start, Long end) throws PDException { + return logMeta.getLog(action, start, end); + } + + public void insertLog(String action, String message, GeneratedMessageV3 target) { + try { + Metapb.LogRecord logRecord = Metapb.LogRecord.newBuilder() + .setAction(action) + .setMessage(message) + .setTimestamp(System.currentTimeMillis()) + .setObject(Any.pack(target)) + .build(); + logMeta.insertLog(logRecord); + } catch (PDException e) { + log.debug("Insert log with error:{}", e); + } + + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionInstructionListener.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionInstructionListener.java new file mode 100644 index 0000000000..2188f6ca78 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionInstructionListener.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.grpc.pulse.ChangeShard; +import org.apache.hugegraph.pd.grpc.pulse.CleanPartition; +import org.apache.hugegraph.pd.grpc.pulse.DbCompaction; +import org.apache.hugegraph.pd.grpc.pulse.MovePartition; +import org.apache.hugegraph.pd.grpc.pulse.PartitionKeyRange; +import org.apache.hugegraph.pd.grpc.pulse.SplitPartition; +import org.apache.hugegraph.pd.grpc.pulse.TransferLeader; + +/** + * 分区命令监听 + */ +public interface PartitionInstructionListener { + void changeShard(Metapb.Partition partition, ChangeShard changeShard) throws PDException; + + void transferLeader(Metapb.Partition partition, TransferLeader transferLeader) throws + PDException; + + void splitPartition(Metapb.Partition partition, SplitPartition splitPartition) throws + PDException; + + void dbCompaction(Metapb.Partition partition, DbCompaction dbCompaction) throws PDException; + + void movePartition(Metapb.Partition partition, MovePartition movePartition) throws PDException; + + void cleanPartition(Metapb.Partition partition, CleanPartition cleanPartition) throws + PDException; + + void changePartitionKeyRange(Metapb.Partition partition, + PartitionKeyRange partitionKeyRange) throws PDException; + +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionService.java new file mode 100644 index 0000000000..9291a813c9 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionService.java @@ -0,0 +1,1563 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hugegraph.pd.common.KVPair; +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.common.PartitionUtils; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.MetaTask; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.grpc.Pdpb; +import org.apache.hugegraph.pd.grpc.pulse.ChangeShard; +import org.apache.hugegraph.pd.grpc.pulse.CleanPartition; +import org.apache.hugegraph.pd.grpc.pulse.CleanType; +import org.apache.hugegraph.pd.grpc.pulse.ConfChangeType; +import org.apache.hugegraph.pd.grpc.pulse.DbCompaction; +import org.apache.hugegraph.pd.grpc.pulse.MovePartition; +import org.apache.hugegraph.pd.grpc.pulse.PartitionKeyRange; +import org.apache.hugegraph.pd.grpc.pulse.SplitPartition; +import org.apache.hugegraph.pd.grpc.pulse.TransferLeader; +import org.apache.hugegraph.pd.meta.MetadataFactory; +import org.apache.hugegraph.pd.meta.PartitionMeta; +import org.apache.hugegraph.pd.meta.TaskInfoMeta; +import org.apache.hugegraph.pd.raft.RaftStateListener; + +import lombok.extern.slf4j.Slf4j; + +/** + * 分区管理 + */ +@Slf4j +public class PartitionService implements RaftStateListener { + + private final long Partition_Version_Skip = 0x0F; + private final StoreNodeService storeService; + private final PartitionMeta partitionMeta; + private final PDConfig pdConfig; + // 分区命令监听 + private final List instructionListeners; + + // 分区状态监听 + private final List statusListeners; + + public PartitionService(PDConfig config, StoreNodeService storeService) { + this.pdConfig = config; + this.storeService = storeService; + partitionMeta = MetadataFactory.newPartitionMeta(config); + instructionListeners = + Collections.synchronizedList(new ArrayList()); + statusListeners = Collections.synchronizedList(new ArrayList()); + } + + public void init() throws PDException { + partitionMeta.init(); + storeService.addStatusListener(new StoreStatusListener() { + @Override + public void onStoreStatusChanged(Metapb.Store store, Metapb.StoreState old, + Metapb.StoreState status) { + if (status == Metapb.StoreState.Tombstone) { + // Store被停机,通知所有该store所有分区,迁移数据 + storeOffline(store); + } + } + + @Override + public void onGraphChange(Metapb.Graph graph, + Metapb.GraphState stateOld, + Metapb.GraphState stateNew) { + + } + + @Override + public void onStoreRaftChanged(Metapb.Store store) { + + } + }); + } + + /** + * 返回Key所属的partition + * + * @param graphName + * @param key + * @return + */ + public Metapb.PartitionShard getPartitionShard(String graphName, byte[] key) throws + PDException { + long code = PartitionUtils.calcHashcode(key); + return getPartitionByCode(graphName, code); + } + + /** + * 根据hashcode返回所属的partition + * + * @param graphName + * @param code + * @return + */ + public Metapb.PartitionShard getPartitionByCode(String graphName, long code) throws + PDException { + if (code < 0 || code >= PartitionUtils.MAX_VALUE) { + throw new PDException(Pdpb.ErrorType.NOT_FOUND_VALUE, "code error"); + } + // 根据Code查找分区id,如果没有找到,创建新的分区 + Metapb.Partition partition = partitionMeta.getPartitionByCode(graphName, code); + + if (partition == null) { + synchronized (this) { + if (partition == null) { + partition = newPartition(graphName, code); + } + } + } + + Metapb.PartitionShard partShard = Metapb.PartitionShard.newBuilder() + .setPartition(partition) + .setLeader(storeService.getLeader( + partition, 0)) + .build(); + log.debug( + "{} Partition get code = {}, partition id = {}, start = {}, end = {}, leader = {}", + graphName, (code), partition.getId(), partition.getStartKey(), + partition.getEndKey(), partShard.getLeader()); + + return partShard; + } + + /** + * 根据ID返回分区信息 + * + * @param graphName + * @param partId + * @return + * @throws PDException + */ + public Metapb.PartitionShard getPartitionShardById(String graphName, int partId) throws + PDException { + Metapb.Partition partition = partitionMeta.getPartitionById(graphName, partId); + if (partition == null) { + return null; + } + + Metapb.PartitionShard partShard = Metapb.PartitionShard.newBuilder() + .setPartition(partition) + // 此处需要返回正确的leader,暂时默认取第一个 + .setLeader(storeService.getLeader( + partition, 0)) + .build(); + + return partShard; + } + + public Metapb.Partition getPartitionById(String graphName, int partId) throws PDException { + return partitionMeta.getPartitionById(graphName, partId); + } + + public List getPartitionById(int partId) throws PDException { + return partitionMeta.getPartitionById(partId); + } + + /** + * 获取图的所有分区 + */ + public List getPartitions() { + return partitionMeta.getPartitions(); + } + + public List getPartitions(String graphName) { + if (StringUtils.isAllEmpty(graphName)) { + return partitionMeta.getPartitions(); + } + return partitionMeta.getPartitions(graphName); + } + + /** + * 查找在store上的所有分区 + * + * @param store + * @return + */ + public List getPartitionByStore(Metapb.Store store) throws PDException { + List partitions = new ArrayList<>(); + getGraphs().forEach(graph -> { + getPartitions(graph.getGraphName()).forEach(partition -> { + try { + storeService.getShardGroup(partition.getId()).getShardsList().forEach(shard -> { + if (shard.getStoreId() == store.getId()) { + partitions.add(partition); + } + }); + } catch (PDException e) { + throw new RuntimeException(e); + } + }); + }); + return partitions; + } + + /** + * 产生一个新的分区 + * + * @param graphName + * @return + */ + private Metapb.Partition newPartition(String graphName, long code) throws PDException { + Metapb.Graph graph = partitionMeta.getAndCreateGraph(graphName); + int partitionSize = PartitionUtils.MAX_VALUE / graph.getPartitionCount(); + if (PartitionUtils.MAX_VALUE % graph.getPartitionCount() != 0) { + // 有余数,分区除不尽 + partitionSize++; + } + + int partitionId = (int) (code / partitionSize); + long startKey = (long) partitionSize * partitionId; + long endKey = (long) partitionSize * (partitionId + 1); + + // 检查本地 + Metapb.Partition partition = partitionMeta.getPartitionById(graphName, partitionId); + if (partition == null) { + storeService.allocShards(null, partitionId); + + // 分配store + partition = Metapb.Partition.newBuilder() + .setId(partitionId) + .setVersion(0) + .setState(Metapb.PartitionState.PState_Normal) + .setStartKey(startKey) + .setEndKey(endKey) + .setGraphName(graphName) + .build(); + + log.info("Create newPartition {}", partition); + } + + partitionMeta.updatePartition(partition); + + return partition; + } + + /** + * 计算Key所属的分区,此处采用Hash映射的方法。 + * + * @param graphName + * @param key + * @return + */ + protected int getPartitionId(String graphName, byte[] key) throws PDException { + int code = PartitionUtils.calcHashcode(key); + Metapb.Partition partition = partitionMeta.getPartitionByCode(graphName, code); + return partition != null ? partition.getId() : -1; + } + + /** + * 获取key范围所跨越的所有分区 + * 暂时使用hashcode计算,正常做法,基于key进行查询 + * + * @param graphName + * @param startKey + * @param endKey + */ + public List scanPartitions(String graphName, byte[] startKey, + byte[] endKey) + throws PDException { + int startPartId = getPartitionId(graphName, startKey); + int endPartId = getPartitionId(graphName, endKey); + + List partShards = new ArrayList<>(); + for (int id = startPartId; id <= endPartId; id++) { + Metapb.Partition partition = partitionMeta.getPartitionById(graphName, id); + partShards.add( + Metapb.PartitionShard.newBuilder() + .setPartition(partition) + // 此处需要返回正确的leader,暂时默认取第一个 + .setLeader(storeService.getLeader(partition, 0)) + .build() + ); + } + return partShards; + } + + public synchronized long updatePartition(List partitions) throws PDException { + for (Metapb.Partition pt : partitions) { + Metapb.Partition oldPt = getPartitionById(pt.getGraphName(), pt.getId()); + partitionMeta.updatePartition(pt); + onPartitionChanged(oldPt, pt); + } + return partitions.size(); + } + + /** + * 更新分区以及图的状态 + * + * @param graph + * @param partId + * @param state + * @throws PDException + */ + public synchronized void updatePartitionState(String graph, int partId, + Metapb.PartitionState state) throws PDException { + Metapb.Partition partition = getPartitionById(graph, partId); + + if (partition.getState() != state) { + Metapb.Partition newPartition = partitionMeta.updatePartition(partition.toBuilder() + .setState(state) + .build()); + + onPartitionChanged(partition, newPartition); + } + } + + public synchronized void updateGraphState(String graphName, Metapb.PartitionState state) throws + PDException { + Metapb.Graph graph = getGraph(graphName); + if (graph != null) { + partitionMeta.updateGraph(graph.toBuilder() + .setState(state).build()); + } + } + + public synchronized long removePartition(String graphName, int partId) throws PDException { + log.info("Partition {}-{} removePartition", graphName, partId); + Metapb.Partition partition = partitionMeta.getPartitionById(graphName, partId); + var ret = partitionMeta.removePartition(graphName, partId); + partitionMeta.reload(); + onPartitionRemoved(partition); + + // source中有些是 offline的,删除后,需要更新图的状态 + try { + Metapb.PartitionState state = Metapb.PartitionState.PState_Normal; + for (Metapb.Partition pt : partitionMeta.getPartitions(partition.getGraphName())) { + if (pt.getState().getNumber() > state.getNumber()) { + state = pt.getState(); + } + } + updateGraphState(partition.getGraphName(), state); + + state = Metapb.PartitionState.PState_Normal; + for (Metapb.ShardGroup group : storeService.getShardGroups()) { + if (group.getState().getNumber() > state.getNumber()) { + state = group.getState(); + } + } + storeService.updateClusterStatus(state); + + } catch (PDException e) { + log.error("onPartitionChanged", e); + } + + return ret; + } + + public Metapb.PartitionStats getPartitionStats(String graphName, int partitionId) throws + PDException { + return partitionMeta.getPartitionStats(graphName, partitionId); + } + + /** + * 获取图的分区状态 + */ + public List getPartitionStatus(String graphName) + throws PDException { + return partitionMeta.getPartitionStats(graphName); + } + + /** + * 返回图的信息 + */ + public List getGraphs() throws PDException { + return partitionMeta.getGraphs(); + } + + public Metapb.Graph getGraph(String graphName) throws PDException { + return partitionMeta.getGraph(graphName); + } + + /** + * 删除图以及图的所有分区 + */ + public Metapb.Graph delGraph(String graphName) throws PDException { + log.info("delGraph {}", graphName); + Metapb.Graph graph = getGraph(graphName); + getPartitions(graphName).forEach(partition -> { + onPartitionRemoved(partition); + }); + partitionMeta.removeAllPartitions(graphName); + partitionMeta.removeGraph(graphName); + return graph; + } + + /** + * 修改图信息,需要通知到store + */ + public synchronized Metapb.Graph updateGraph(Metapb.Graph graph) throws PDException { + Metapb.Graph lastGraph = partitionMeta.getAndCreateGraph(graph.getGraphName()); + log.info("updateGraph graph: {}, last: {}", graph, lastGraph); + + int partCount = + (graph.getGraphName().endsWith("/s") || graph.getGraphName().endsWith("/m")) ? + 1 : pdConfig.getPartition().getTotalCount(); + + // set the partition count to specified if legal. + if (graph.getPartitionCount() <= partCount && graph.getPartitionCount() > 0) { + partCount = graph.getPartitionCount(); + } + + if (partCount == 0) { + throw new PDException(10010, "update graph error, partition count = 0"); + } + + graph = lastGraph.toBuilder() + .mergeFrom(graph) + .setPartitionCount(partCount) + .build(); + partitionMeta.updateGraph(graph); + + // 分区数发生改变 + if (lastGraph.getPartitionCount() != graph.getPartitionCount()) { + log.info("updateGraph graph: {}, partition count changed from {} to {}", + graph.getGraphName(), lastGraph.getPartitionCount(), + graph.getPartitionCount()); + // TODO 修改图的分区数,需要进行数据迁移。 + } + return graph; + } + + // partitionId -> (storeId -> shard committedIndex) + public Map> getCommittedIndexStats() throws PDException { + Map> map = new HashMap<>(); + for (Metapb.Store store : storeService.getActiveStores()) { + for (Metapb.RaftStats raftStats : store.getStats().getRaftStatsList()) { + int partitionID = raftStats.getPartitionId(); + if (!map.containsKey(partitionID)) { + map.put(partitionID, new HashMap<>()); + } + Map storeMap = map.get(partitionID); + if (!storeMap.containsKey(store.getId())) { + storeMap.put(store.getId(), raftStats.getCommittedIndex()); + } + } + } + return map; + } + + /** + * 存储被下线,迁移分区数据 + * + * @param store + */ + public void storeOffline(Metapb.Store store) { + try { + log.info("storeOffline store id: {}, address: {}, state: {}", + store.getId(), store.getAddress(), store.getState()); + List partitions = getPartitionByStore(store); + var partIds = new HashSet(); + for (Metapb.Partition p : partitions) { + if (partIds.contains(p.getId())) { + continue; + } + shardOffline(p, store.getId()); + partIds.add(p.getId()); + } + } catch (PDException e) { + log.error("storeOffline exception: ", e); + } + } + + /** + * 存储被下线,迁移分区数据 + */ + public synchronized void shardOffline(Metapb.Partition partition, long storeId) { + try { + log.info("shardOffline Partition {} - {} shardOffline store : {}", + partition.getGraphName(), partition.getId(), storeId); + // partition = getPartitionById(partition.getGraphName(), partition.getId()); + // Metapb.Partition.Builder builder = Metapb.Partition.newBuilder(partition); + // builder.clearShards(); + // partition.getShardsList().forEach(shard -> { + // if (shard.getStoreId() != storeId) + // builder.addShards(shard); + // }); + // partition = builder.build(); + Metapb.Graph graph = getGraph(partition.getGraphName()); + reallocPartitionShards(graph, partition); + + } catch (PDException e) { + log.error("storeOffline exception: ", e); + } + } + + private boolean isShardListEquals(List list1, List list2) { + if (list1 == list2) { + return true; + } else if (list1 != null && list2 != null) { + + var s1 = list1.stream().map(Metapb.Shard::getStoreId).sorted(Long::compare) + .collect(Collectors.toList()); + var s2 = list2.stream().map(Metapb.Shard::getStoreId).sorted(Long::compare) + .collect(Collectors.toList()); + + if (s1.size() == s2.size()) { + for (int i = 0; i < s1.size(); i++) { + if (s1.get(i) != s2.get(i)) { + return false; + } + } + return true; + } + } + + return false; + } + + /** + * 重新分配shard + * + * @param graph + * @param partition + * @throws PDException + */ + public void reallocPartitionShards(Metapb.Graph graph, Metapb.Partition partition) throws + PDException { + if (partition == null) { + return; + } + List originalShards = storeService.getShardList(partition.getId()); + + var shardGroup = storeService.getShardGroup(partition.getId()); + + List shards = storeService.reallocShards(shardGroup); + + if (isShardListEquals(originalShards, shards)) { + log.info("reallocPartitionShards:{} vs {}", shardGroup, shards); + // partition = Metapb.Partition.newBuilder(partition) + // .clearShards().addAllShards(shards) + // .build(); + // partitionMeta.updatePartition(partition); + fireChangeShard(partition, shards, ConfChangeType.CONF_CHANGE_TYPE_ADJUST); + } + } + + public synchronized void reallocPartitionShards(String graphName, int partitionId) throws + PDException { + reallocPartitionShards(partitionMeta.getGraph(graphName), + partitionMeta.getPartitionById(graphName, partitionId)); + } + + /** + * 迁移分区副本 + */ + public synchronized void movePartitionsShard(Integer partitionId, long fromStore, + long toStore) { + try { + log.info("movePartitionsShard partitionId {} from store {} to store {}", partitionId, + fromStore, toStore); + for (Metapb.Graph graph : getGraphs()) { + Metapb.Partition partition = + this.getPartitionById(graph.getGraphName(), partitionId); + if (partition == null) { + continue; + } + + var shardGroup = storeService.getShardGroup(partitionId); + List shards = new ArrayList<>(); + shardGroup.getShardsList().forEach(shard -> { + if (shard.getStoreId() != fromStore) { + shards.add(shard); + } + }); + + shards.add(Metapb.Shard.newBuilder().setStoreId(toStore) + .setRole(Metapb.ShardRole.Follower).build()); + + // storeService.updateShardGroup(partitionId, shards, -1, -1); + // storeService.onShardGroupStatusChanged(shardGroup, newShardGroup); + fireChangeShard(partition, shards, ConfChangeType.CONF_CHANGE_TYPE_ADJUST); + // shard group和 graph无关,迁移一个就够了 + break; + } + } catch (PDException e) { + log.error("Partition {} movePartitionsShard exception {}", partitionId, e); + } + } + + /** + * 把集群中所有的分区,拆成split + * + * @param splits 拆分分区 + */ + public synchronized void splitPartition(List> splits) throws + PDException { + var tasks = new HashMap>>(); + + for (var pair : splits) { + for (var partition : getPartitionById(pair.getKey())) { + if (!tasks.containsKey(partition.getGraphName())) { + tasks.put(partition.getGraphName(), new ArrayList<>()); + } + tasks.get(partition.getGraphName()).add(pair); + } + } + + for (var entry : tasks.entrySet()) { + splitPartition(getGraph(entry.getKey()), entry.getValue()); + } + } + + /** + * 分区分裂, 把一个图拆分到N 个 + * + * @param graph graph + * @param toCount target count + * @throws PDException + */ + + public synchronized void splitPartition(Metapb.Graph graph, int toCount) throws PDException { + + var partitionCount = getPartitions(graph.getGraphName()).size(); + var maxShardsPerStore = pdConfig.getPartition().getMaxShardsPerStore(); + var shardCount = pdConfig.getPartition().getShardCount(); + + if (shardCount * toCount > storeService.getActiveStores().size() * maxShardsPerStore) { + throw new PDException(Pdpb.ErrorType.Too_Many_Partitions_Per_Store_VALUE, + "can't satisfy target shard group count, reached the upper " + + "limit of the cluster"); + } + + if (toCount % partitionCount != 0 || toCount <= partitionCount) { + throw new PDException(Pdpb.ErrorType.Invalid_Split_Partition_Count_VALUE, + "invalid split partition count, make sure to count is N time of" + + " current partition count"); + } + + // 由于是整数倍数,扩充因子为 toCount / current count + var splitCount = toCount / partitionCount; + var list = new ArrayList>(); + for (int i = 0; i < partitionCount; i++) { + list.add(new KVPair<>(i, splitCount)); + } + + splitPartition(graph, list); + } + + private synchronized void splitPartition(Metapb.Graph graph, + List> splits) + throws PDException { + var taskInfoMeta = storeService.getTaskInfoMeta(); + if (taskInfoMeta.scanSplitTask(graph.getGraphName()).size() > 0) { + return; + } + + splits.sort(Comparator.comparing(KVPair::getKey)); + log.info("split partition, graph: {}, splits:{}", graph, splits); + + // 从最后一个partition下标开始 + var i = getPartitions(graph.getGraphName()).size(); + + for (var pair : splits) { + Metapb.Partition partition = + partitionMeta.getPartitionById(graph.getGraphName(), pair.getKey()); + if (partition != null) { + var splitCount = pair.getValue(); + long splitLen = (partition.getEndKey() - partition.getStartKey()) / splitCount; + + List newPartitions = new ArrayList<>(); + // 第一个分区也就是原分区 + newPartitions.add(partition.toBuilder() + .setStartKey(partition.getStartKey()) + .setEndKey(partition.getStartKey() + splitLen) + .setId(partition.getId()) + .setState(Metapb.PartitionState.PState_Offline) + .build()); + + int idx = 0; + + for (; idx < splitCount - 2; idx++) { + newPartitions.add(partition.toBuilder() + .setStartKey(newPartitions.get(idx).getEndKey()) + .setEndKey(newPartitions.get(idx).getEndKey() + + splitLen) + .setId(i) + .setState(Metapb.PartitionState.PState_Offline) + .build()); + i += 1; + } + + newPartitions.add(partition.toBuilder() + .setStartKey(newPartitions.get(idx).getEndKey()) + .setEndKey(partition.getEndKey()) + .setId(i) + .setState(Metapb.PartitionState.PState_Offline) + .build()); + i += 1; + + // try to save new partitions, and repair shard group + for (int j = 0; j < newPartitions.size(); j++) { + var newPartition = newPartitions.get(j); + + if (j != 0) { + partitionMeta.updatePartition(newPartition); + } + // 创建shard group,如果为空,则按照partition的shard group为蓝本,去创建,保证在一个机器上 + // 如果存在,则由于各个图的分区数量不一样,需要store端复制到其他机器上 + var shardGroup = storeService.getShardGroup(newPartition.getId()); + if (shardGroup == null) { + shardGroup = storeService.getShardGroup(partition.getId()).toBuilder() + .setId(newPartition.getId()) + .build(); + storeService.getStoreInfoMeta().updateShardGroup(shardGroup); + updateShardGroupCache(shardGroup); + } + + // 做shard list的检查 + if (shardGroup.getShardsCount() != pdConfig.getPartition().getShardCount()) { + storeService.reallocShards(shardGroup); + } + } + + SplitPartition splitPartition = SplitPartition.newBuilder() + .addAllNewPartition(newPartitions) + .build(); + + fireSplitPartition(partition, splitPartition); + // 修改Partition状态为下线,任务完成后恢复为上线 + updatePartitionState(partition.getGraphName(), partition.getId(), + Metapb.PartitionState.PState_Offline); + + // 记录事务 + var task = MetaTask.Task.newBuilder().setPartition(partition) + .setSplitPartition(splitPartition) + .build(); + taskInfoMeta.addSplitTask(pair.getKey(), task.getPartition(), + task.getSplitPartition()); + } + } + } + + /** + * 转移leader到其他shard上. + * 转移一个partition即可 + */ + public void transferLeader(Integer partId, Metapb.Shard shard) { + try { + var partitions = getPartitionById(partId); + if (partitions.size() > 0) { + fireTransferLeader(partitions.get(0), + TransferLeader.newBuilder().setShard(shard).build()); + } +// for (Metapb.Graph graph : getGraphs()) { +// Metapb.Partition partition = this.getPartitionById(graph.getGraphName(), partId); +// if (partition != null) { +// fireTransferLeader(partition, TransferLeader.newBuilder().setShard(shard) +// .build()); +// } +// } + } catch (PDException e) { + log.error("Partition {} transferLeader exception {}", partId, e); + } + } + + /** + * 分区合并,将整个集群的分区数,合并到toCount个 + * + * @param toCount 目标分区数 + * @throws PDException when query errors + */ + public void combinePartition(int toCount) throws PDException { + + int shardsTotalCount = getShardGroupCount(); + for (var graph : getGraphs()) { + // 对所有大于toCount分区的图,都进行缩容 + if (graph.getPartitionCount() > toCount) { + combineGraphPartition(graph, toCount, shardsTotalCount); + } + } + } + + /** + * 针对单个图,进行分区合并 + * + * @param graphName the name of the graph + * @param toCount the target partition count + * @throws PDException when query errors + */ + + public void combineGraphPartition(String graphName, int toCount) throws PDException { + combineGraphPartition(getGraph(graphName), toCount, getShardGroupCount()); + } + + /** + * 单图合并的内部实现 + * + * @param graph the name of the graph + * @param toCount the target partition count + * @param shardCount the shard count of the clusters + * @throws PDException when query errors + */ + private synchronized void combineGraphPartition(Metapb.Graph graph, int toCount, int shardCount) + throws PDException { + if (graph == null) { + throw new PDException(1, + "Graph not exists, try to use full graph name, like " + + "/DEFAULT/GRAPH_NAME/g"); + } + + log.info("Combine graph {} partition, from {}, to {}, with shard count:{}", + graph.getGraphName(), graph.getPartitionCount(), toCount, shardCount); + + if (!checkTargetCount(graph.getPartitionCount(), toCount, shardCount)) { + log.error("Combine partition, illegal toCount:{}, graph:{}", toCount, + graph.getGraphName()); + throw new PDException(2, + "illegal partition toCount, should between 1 ~ shard group " + + "count and " + + " can be dived by shard group count"); + } + + var taskInfoMeta = storeService.getTaskInfoMeta(); + if (taskInfoMeta.scanMoveTask(graph.getGraphName()).size() > 0) { + throw new PDException(3, "Graph Combine process exists"); + } + + // 按照 key start 排序,合并后的key range 是连续的 + var partitions = getPartitions(graph.getGraphName()).stream() + .sorted(Comparator.comparing( + Metapb.Partition::getStartKey)) + .collect(Collectors.toList()); + + // 分区编号不一定是连续的 + var sortPartitions = getPartitions(graph.getGraphName()) + .stream() + .sorted(Comparator.comparing(Metapb.Partition::getId)) + .collect(Collectors.toList()); + + var groupSize = partitions.size() / toCount; // merge group size + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 共12个分区, 合并成4个 + // 方案:0,1,2 => 0, 3,4,5 -> 1, 6,7,8 ->2, 9,10,11 -> 3 + // 保证分区的连续性. + for (int i = 0; i < toCount; i++) { + var startKey = partitions.get(i * groupSize).getStartKey(); + var endKey = partitions.get(i * groupSize + groupSize - 1).getEndKey(); + // compose the key range + // the start key and end key should be changed if combine success. + + var targetPartition = Metapb.Partition.newBuilder(sortPartitions.get(i)) + .setStartKey(startKey) + .setEndKey(endKey) + .build(); + + for (int j = 0; j < groupSize; j++) { + var partition = partitions.get(i * groupSize + j); + // 分区id相同,就跳过 + if (i == partition.getId()) { + continue; + } + + log.info("combine partition of graph :{}, from part id {} to {}", + partition.getGraphName(), + partition.getId(), targetPartition.getId()); + MovePartition movePartition = MovePartition.newBuilder() + .setTargetPartition(targetPartition) + .setKeyStart(partition.getStartKey()) + .setKeyEnd(partition.getEndKey()) + .build(); + taskInfoMeta.addMovePartitionTask(partition, movePartition); + // source 下线 + updatePartitionState(partition.getGraphName(), partition.getId(), + Metapb.PartitionState.PState_Offline); + fireMovePartition(partition, movePartition); + } + // target 下线 + updatePartitionState(targetPartition.getGraphName(), targetPartition.getId(), + Metapb.PartitionState.PState_Offline); + } + + storeService.updateClusterStatus(Metapb.ClusterState.Cluster_Offline); + } + + /** + * 通过 storeService 获取 raft group 总数 + * + * @return the count of raft groups + */ + private int getShardGroupCount() { + try { + return Optional.ofNullable(storeService.getShardGroups()).orElseGet(ArrayList::new) + .size(); + } catch (PDException e) { + log.error("get shard group failed, error: {}", e); + } + return 0; + } + + /** + * 判断图分区是否能够从from合并到to个 + * + * @param fromCount 现在的分区数 + * @param toCount 目标分区数 + * @return true when available , or otherwise + */ + private boolean checkTargetCount(int fromCount, int toCount, int shardCount) { + // 要介于 1 ~ N 中间,而且可以整除 + return toCount >= 1 && toCount < fromCount && fromCount % toCount == 0 && + toCount < shardCount; + } + + /** + * 处理分区心跳, 记录Leader信息 + * 检查term和version,比较是否是最新的消息 + * + * @param stats + */ + public void partitionHeartbeat(Metapb.PartitionStats stats) throws PDException { + + Metapb.ShardGroup shardGroup = storeService.getShardGroup(stats.getId()); + // shard group version changes + // (shard group 由pd控制, 在分裂等操作后,可能出现短暂不一致的情况,以pd为准) + // store控制shard leader + if (shardGroup != null && + (shardGroup.getVersion() < stats.getLeaderTerm() || + shardGroup.getConfVer() < stats.getConfVer())) { + storeService.updateShardGroup(stats.getId(), + stats.getShardList(), stats.getLeaderTerm(), + stats.getConfVer()); + } + + List partitions = getPartitionById(stats.getId()); + for (Metapb.Partition partition : partitions) { + // partitionMeta.getAndCreateGraph(partition.getGraphName()); + checkShardState(partition, stats); + } + // 统计信息 + partitionMeta.updatePartitionStats(stats.toBuilder() + .setTimestamp(System.currentTimeMillis()).build()); + } + + /** + * 检查shard状态,离线shard影响到分区状态 + * + * @param stats + */ + private void checkShardState(Metapb.Partition partition, Metapb.PartitionStats stats) { + + try { + int offCount = 0; + for (Metapb.ShardStats shard : stats.getShardStatsList()) { + if (shard.getState() == Metapb.ShardState.SState_Offline) { + offCount++; + } + } + if (partition.getState() != Metapb.PartitionState.PState_Offline) { + if (offCount == 0) { + updatePartitionState(partition.getGraphName(), partition.getId(), + Metapb.PartitionState.PState_Normal); + } else if (offCount * 2 < stats.getShardStatsCount()) { + updatePartitionState(partition.getGraphName(), partition.getId(), + Metapb.PartitionState.PState_Warn); + } else { + updatePartitionState(partition.getGraphName(), partition.getId(), + Metapb.PartitionState.PState_Warn); + } + } + } catch (Exception e) { + log.error("Partition {}-{} checkShardState exception {}", + partition.getGraphName(), partition.getId(), e); + } + } + + + public void addInstructionListener(PartitionInstructionListener event) { + instructionListeners.add(event); + } + + public void addStatusListener(PartitionStatusListener listener) { + statusListeners.add(listener); + } + + /** + * 发起改变shard命令 + * + * @param changeType + */ + protected void fireChangeShard(Metapb.Partition partition, List shards, + ConfChangeType changeType) { + log.info("fireChangeShard partition: {}-{}, changeType:{} {}", partition.getGraphName(), + partition.getId(), changeType, shards); + instructionListeners.forEach(cmd -> { + try { + cmd.changeShard(partition, ChangeShard.newBuilder() + .addAllShard(shards).setChangeType(changeType) + .build()); + } catch (Exception e) { + log.error("fireChangeShard", e); + } + }); + } + + public void changeShard(int groupId, List shards) throws PDException { + var partitions = getPartitionById(groupId); + if (partitions.size() == 0) { + return; + } + fireChangeShard(partitions.get(0), shards, ConfChangeType.CONF_CHANGE_TYPE_ADJUST); + } + + /** + * 发送分区分裂消息 + * + * @param partition + */ + protected void fireSplitPartition(Metapb.Partition partition, SplitPartition splitPartition) { + log.info("fireSplitPartition partition: {}-{}, split :{}", + partition.getGraphName(), partition.getId(), splitPartition); + instructionListeners.forEach(cmd -> { + try { + cmd.splitPartition(partition, splitPartition); + } catch (Exception e) { + log.error("fireSplitPartition", e); + } + }); + } + + /** + * 发送Leader切换消息 + */ + protected void fireTransferLeader(Metapb.Partition partition, TransferLeader transferLeader) { + log.info("fireTransferLeader partition: {}-{}, leader :{}", + partition.getGraphName(), partition.getId(), transferLeader); + instructionListeners.forEach(cmd -> { + try { + cmd.transferLeader(partition, transferLeader); + } catch (Exception e) { + log.error("fireSplitPartition", e); + } + }); + } + + /** + * 发送分区移动数据的消息 + * + * @param partition 原分区 + * @param movePartition 目标分区,包含 key range + */ + protected void fireMovePartition(Metapb.Partition partition, MovePartition movePartition) { + log.info("fireMovePartition partition: {} -> {}", + partition, movePartition); + + instructionListeners.forEach(cmd -> { + try { + cmd.movePartition(partition, movePartition); + } catch (Exception e) { + log.error("fireMovePartition", e); + } + }); + } + + protected void fireCleanPartition(Metapb.Partition partition, CleanPartition cleanPartition) { + log.info("fireCleanPartition partition: {} -> just keep : {}->{}", + partition.getId(), cleanPartition.getKeyStart(), cleanPartition.getKeyEnd()); + + instructionListeners.forEach(cmd -> { + try { + cmd.cleanPartition(partition, cleanPartition); + } catch (Exception e) { + log.error("cleanPartition", e); + } + }); + } + + protected void fireChangePartitionKeyRange(Metapb.Partition partition, + PartitionKeyRange partitionKeyRange) { + log.info("fireChangePartitionKeyRange partition: {}-{} -> key range {}", + partition.getGraphName(), partition.getId(), partitionKeyRange); + + instructionListeners.forEach(cmd -> { + try { + cmd.changePartitionKeyRange(partition, partitionKeyRange); + } catch (Exception e) { + log.error("cleanPartition", e); + } + }); + } + + /** + * 处理图迁移任务 + * + * @param task + */ + public synchronized void handleMoveTask(MetaTask.Task task) throws PDException { + var taskInfoMeta = storeService.getTaskInfoMeta(); + var partition = task.getPartition(); + var movePartition = task.getMovePartition(); + + MetaTask.Task pdMetaTask = taskInfoMeta.getMovePartitionTask(partition.getGraphName(), + movePartition.getTargetPartition() + .getId(), + partition.getId()); + + log.info("report move task, graph:{}, pid : {}->{}, state: {}", + task.getPartition().getGraphName(), + task.getPartition().getId(), task.getMovePartition().getTargetPartition().getId(), + task.getState()); + + // 已经被处理(前面有failed) + if (pdMetaTask != null) { + var newTask = pdMetaTask.toBuilder().setState(task.getState()).build(); + taskInfoMeta.updateMovePartitionTask(newTask); + + List subTasks = taskInfoMeta.scanMoveTask(partition.getGraphName()); + + var finished = subTasks.stream().allMatch(t -> + t.getState() == + MetaTask.TaskState.Task_Success || + t.getState() == + MetaTask.TaskState.Task_Failure); + + if (finished) { + var allSuccess = subTasks.stream().allMatch( + t -> t.getState() == MetaTask.TaskState.Task_Success); + if (allSuccess) { + log.info("graph:{} combine task all success!", partition.getGraphName()); + handleMoveTaskAllSuccess(subTasks, partition.getGraphName(), taskInfoMeta); + } else { + log.info("graph:{} combine task failed!", partition.getGraphName()); + handleMoveTaskIfFailed(partition.getGraphName(), taskInfoMeta); + } + } + } + } + + /** + * 当所有的迁移子任务成功: + * 1. 发送清理source分区指令 + * 2. 设置target上线, 更新key range, 更新 graph partition count + * 3. 删除move task,任务结束 + * + * @param subTasks all move sub tasks + * @param graphName graph name + * @param taskInfoMeta task info meta + * @throws PDException returns if write db failed + */ + private void handleMoveTaskAllSuccess(List subTasks, String graphName, + TaskInfoMeta taskInfoMeta) throws PDException { + + var targetPartitionIds = new HashSet(); + var targetPartitions = new ArrayList(); + var deleteFlags = + subTasks.stream().map(task -> task.getMovePartition().getTargetPartition().getId()) + .collect(Collectors.toSet()); + + for (MetaTask.Task subTask : subTasks) { + var source = subTask.getPartition(); + var targetPartition = subTask.getMovePartition().getTargetPartition(); + // 是否处理过 + if (!targetPartitionIds.contains(targetPartition.getId())) { + // 更新range + var old = getPartitionById(targetPartition.getGraphName(), targetPartition.getId()); + var newPartition = Metapb.Partition.newBuilder(old) + .setStartKey(targetPartition.getStartKey()) + .setEndKey(targetPartition.getEndKey()) + .setState(Metapb.PartitionState.PState_Normal) + .build(); + // 在 key range之前更新,避免store没有分区的问题, 需要到pd查询 + updatePartition(List.of(newPartition)); + targetPartitions.add(newPartition); + + // 发送key range 变更消息 + PartitionKeyRange partitionKeyRange = PartitionKeyRange.newBuilder() + .setPartitionId(old.getId()) + .setKeyStart( + targetPartition.getStartKey()) + .setKeyEnd( + targetPartition.getEndKey()) + .build(); + // 通知store + fireChangePartitionKeyRange( + old.toBuilder().setState(Metapb.PartitionState.PState_Normal).build(), + partitionKeyRange); + + // 将 target 设置为上线. source 理论上可能被删掉,所以不处理 + updatePartitionState(newPartition.getGraphName(), newPartition.getId(), + Metapb.PartitionState.PState_Normal); + + targetPartitionIds.add(targetPartition.getId()); + } + + CleanPartition cleanPartition = CleanPartition.newBuilder() + .setKeyStart(source.getStartKey()) + .setKeyEnd(source.getEndKey()) + .setCleanType( + CleanType.CLEAN_TYPE_EXCLUDE_RANGE) + // target 的 partition只需要清理数据,不需要删除分区 + .setDeletePartition(!deleteFlags.contains( + source.getId())) + .build(); + + log.info("pd clean data: {}-{}, key range:{}-{}, type:{}, delete partition:{}", + source.getGraphName(), + source.getId(), + cleanPartition.getKeyStart(), + cleanPartition.getKeyEnd(), + CleanType.CLEAN_TYPE_EXCLUDE_RANGE, + cleanPartition.getDeletePartition()); + + // 清理掉被移动分区的数据 + fireCleanPartition(source, cleanPartition); + } + + // 更新key range, 本地更新,client更新 + // updatePartition(targetPartitions); + + // 更新target 分区状态,source 可能被删掉,所以不处理 + targetPartitions.forEach(p -> { + try { + updatePartitionState(p.getGraphName(), p.getId(), + Metapb.PartitionState.PState_Normal); + } catch (PDException e) { + throw new RuntimeException(e); + } + }); + + partitionMeta.reload(); + + // 更新graph partition count + var graph = getGraph(graphName).toBuilder() + .setPartitionCount(targetPartitionIds.size()) + .build(); + updateGraph(graph); + + // 事务完成 + taskInfoMeta.removeMoveTaskPrefix(graphName); + } + + /** + * 如果缩容任务有失败的,回滚合并操作 + * 1. 清理原来的target 分区,将迁移过来的数据再删掉 + * 2. 将source/target 分区设置为上线 + * 3. 删除task,任务结束 + * + * @param graphName graph name + * @param taskInfoMeta task info meta + * @throws PDException return if write to db failed + */ + private void handleMoveTaskIfFailed(String graphName, TaskInfoMeta taskInfoMeta) throws + PDException { + // 发送清理target分区的任务, 回滚target分区 + var targetPartitionIds = new HashSet(); + for (var metaTask : taskInfoMeta.scanMoveTask(graphName)) { + + var source = metaTask.getPartition(); + // 设置 source 为上线 + updatePartitionState(source.getGraphName(), source.getId(), + Metapb.PartitionState.PState_Normal); + var movedPartition = metaTask.getMovePartition().getTargetPartition(); + + if (targetPartitionIds.contains(movedPartition.getId())) { + continue; + } + + var targetPartition = getPartitionById(graphName, movedPartition.getId()); + + CleanPartition cleanPartition = CleanPartition.newBuilder() + .setKeyStart( + targetPartition.getStartKey()) + .setKeyEnd(targetPartition.getEndKey()) + .setCleanType( + CleanType.CLEAN_TYPE_KEEP_RANGE) + .setDeletePartition(false) + .build(); + fireCleanPartition(targetPartition, cleanPartition); + targetPartitionIds.add(targetPartition.getId()); + + // 设置target 上线 + updatePartitionState(targetPartition.getGraphName(), targetPartition.getId(), + Metapb.PartitionState.PState_Normal); + } + // 清理掉任务列表 + taskInfoMeta.removeMoveTaskPrefix(graphName); + } + + /** + * 处理clean task + * + * @param task clean task + */ + public void handleCleanPartitionTask(MetaTask.Task task) { + log.info("clean task {} -{}, key range:{}~{}, report: {}", + task.getPartition().getGraphName(), + task.getPartition().getId(), + task.getCleanPartition().getKeyStart(), + task.getCleanPartition().getKeyEnd(), + task.getState() + ); + + // 如果失败重试? + } + + public synchronized void handleSplitTask(MetaTask.Task task) throws PDException { + + var taskInfoMeta = storeService.getTaskInfoMeta(); + var partition = task.getPartition(); + + MetaTask.Task pdMetaTask = + taskInfoMeta.getSplitTask(partition.getGraphName(), partition.getId()); + + log.info("report split task, graph:{}, pid : {}, state: {}", + task.getPartition().getGraphName(), + task.getPartition().getId(), task.getState()); + + if (pdMetaTask != null) { + var newTask = pdMetaTask.toBuilder().setState(task.getState()).build(); + taskInfoMeta.updateSplitTask(newTask); + + List subTasks = taskInfoMeta.scanSplitTask(partition.getGraphName()); + + var finished = subTasks.stream().allMatch(t -> + t.getState() == + MetaTask.TaskState.Task_Success || + t.getState() == + MetaTask.TaskState.Task_Failure); + + if (finished) { + var allSuccess = subTasks.stream().allMatch( + t -> t.getState() == MetaTask.TaskState.Task_Success); + if (allSuccess) { + log.info("graph:{} split task all success!", partition.getGraphName()); + handleSplitTaskAllSuccess(subTasks, partition.getGraphName(), taskInfoMeta); + } else { + handleSplitTaskIfFailed(subTasks, partition.getGraphName(), taskInfoMeta); + } + } + } + } + + private void handleSplitTaskAllSuccess(List subTasks, String graphName, + TaskInfoMeta taskInfoMeta) + throws PDException { + + int addedPartitions = 0; + var partitions = new ArrayList(); + for (MetaTask.Task subTask : subTasks) { + var source = subTask.getPartition(); + var newPartition = subTask.getSplitPartition().getNewPartitionList().get(0); + + // 发送key range 变更消息 + PartitionKeyRange partitionKeyRange = PartitionKeyRange.newBuilder() + .setPartitionId(source.getId()) + .setKeyStart( + newPartition.getStartKey()) + .setKeyEnd( + newPartition.getEndKey()) + .build(); + // 通知store + fireChangePartitionKeyRange(source, partitionKeyRange); + // 将 target 设置为上线. source 理论上可能被删掉,所以不处理 + + CleanPartition cleanPartition = CleanPartition.newBuilder() + .setKeyStart(newPartition.getStartKey()) + .setKeyEnd(newPartition.getEndKey()) + .setCleanType( + CleanType.CLEAN_TYPE_KEEP_RANGE) + // target 的 partition只需要清理数据,不需要删除分区 + .setDeletePartition(false) + .build(); + + log.info("pd clean data: {}-{}, key range:{}-{}, type:{}, delete partition:{}", + source.getGraphName(), + source.getId(), + cleanPartition.getKeyStart(), + cleanPartition.getKeyEnd(), + CleanType.CLEAN_TYPE_EXCLUDE_RANGE, + cleanPartition.getDeletePartition()); + + fireCleanPartition(source, cleanPartition); + + // 更新partition state + for (var sp : subTask.getSplitPartition().getNewPartitionList()) { + partitions.add( + sp.toBuilder().setState(Metapb.PartitionState.PState_Normal).build()); + } + + addedPartitions += subTask.getSplitPartition().getNewPartitionCount() - 1; + } + + updatePartition(partitions); + partitionMeta.reload(); + + var graph = getGraph(graphName); + + // set partition count + if (pdConfig.getConfigService().getPartitionCount() != + storeService.getShardGroups().size()) { + pdConfig.getConfigService().setPartitionCount(storeService.getShardGroups().size()); + log.info("set the partition count of config server to {}", + storeService.getShardGroups().size()); + } + + // 更新graph partition count + var newGraph = graph.toBuilder() + .setPartitionCount(graph.getPartitionCount() + addedPartitions) + .build(); + updateGraph(newGraph); + + // 事务完成 + taskInfoMeta.removeSplitTaskPrefix(graphName); + } + + private void handleSplitTaskIfFailed(List subTasks, String graphName, + TaskInfoMeta taskInfoMeta) + throws PDException { + for (var metaTask : subTasks) { + var splitPartitions = metaTask.getSplitPartition().getNewPartitionList(); + for (int i = 1; i < splitPartitions.size(); i++) { + var split = splitPartitions.get(i); + CleanPartition cleanPartition = CleanPartition.newBuilder() + .setKeyStart(split.getStartKey()) + .setKeyEnd(split.getEndKey()) + .setCleanType( + CleanType.CLEAN_TYPE_EXCLUDE_RANGE) + .setDeletePartition(true) + .build(); + + fireCleanPartition(split, cleanPartition); + } + + // set partition state normal + var partition = metaTask.getPartition(); + updatePartitionState(partition.getGraphName(), partition.getId(), + Metapb.PartitionState.PState_Normal); + } + // 清理掉任务列表 + taskInfoMeta.removeSplitTaskPrefix(graphName); + } + + /** + * 接收到Leader改变的消息 + * 更新图状态,触发分区变更 + */ + protected void onPartitionChanged(Metapb.Partition old, Metapb.Partition partition) { + log.info("onPartitionChanged partition: {}", partition); + if (old != null && old.getState() != partition.getState()) { + // 状态改变,重置图的状态 + Metapb.PartitionState state = Metapb.PartitionState.PState_Normal; + for (Metapb.Partition pt : partitionMeta.getPartitions(partition.getGraphName())) { + if (pt.getState().getNumber() > state.getNumber()) { + state = pt.getState(); + } + } + try { + updateGraphState(partition.getGraphName(), state); + } catch (PDException e) { + log.error("onPartitionChanged", e); + } + + } + + statusListeners.forEach(e -> { + e.onPartitionChanged(old, partition); + }); + } + + protected void onPartitionRemoved(Metapb.Partition partition) { + log.info("onPartitionRemoved partition: {}", partition); + statusListeners.forEach(e -> { + e.onPartitionRemoved(partition); + }); + } + + /** + * PD的leader发生改变,需要重新加载数据 + */ + @Override + public void onRaftLeaderChanged() { + log.info("Partition service reload cache from rocksdb, due to leader change"); + try { + partitionMeta.reload(); + } catch (PDException e) { + log.error("Partition meta reload exception {}", e); + } + } + + /** + * 分区状态发生改变,需要传播到图、集群 + * + * @param graph + * @param partId + * @param state + */ + public void onPartitionStateChanged(String graph, int partId, + Metapb.PartitionState state) throws PDException { + updatePartitionState(graph, partId, state); + } + + /** + * Shard状态发生改变,需要传播到分区、图、集群 + * + * @param graph + * @param partId + * @param state + */ + public void onShardStateChanged(String graph, int partId, Metapb.PartitionState state) { + + } + + /** + * 发送rocksdb compaction 消息 + * + * @param partId + * @param tableName + */ + public void fireDbCompaction(int partId, String tableName) { + + try { + for (Metapb.Graph graph : getGraphs()) { + Metapb.Partition partition = + partitionMeta.getPartitionById(graph.getGraphName(), partId); + + DbCompaction dbCompaction = DbCompaction.newBuilder() + .setTableName(tableName) + .build(); + instructionListeners.forEach(cmd -> { + try { + cmd.dbCompaction(partition, dbCompaction); + } catch (Exception e) { + log.error("firedbCompaction", e); + } + }); + } + } catch (PDException e) { + e.printStackTrace(); + } + + } + + public void updateShardGroupCache(Metapb.ShardGroup group) { + partitionMeta.getPartitionCache().updateShardGroup(group); + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionStatusListener.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionStatusListener.java new file mode 100644 index 0000000000..933822f109 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionStatusListener.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import org.apache.hugegraph.pd.grpc.Metapb; + +/** + * 分区状态监听 + */ +public interface PartitionStatusListener { + void onPartitionChanged(Metapb.Partition partition, Metapb.Partition newPartition); + + void onPartitionRemoved(Metapb.Partition partition); +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/RegistryService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/RegistryService.java new file mode 100644 index 0000000000..223889cadf --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/RegistryService.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.discovery.NodeInfo; +import org.apache.hugegraph.pd.grpc.discovery.NodeInfos; +import org.apache.hugegraph.pd.grpc.discovery.Query; +import org.apache.hugegraph.pd.meta.DiscoveryMetaStore; +import org.apache.hugegraph.pd.meta.MetadataFactory; + +public class RegistryService { + private final PDConfig pdConfig; + private final DiscoveryMetaStore meta; + + public RegistryService(PDConfig config) { + this.pdConfig = config; + meta = MetadataFactory.newDiscoveryMeta(config); + } + + public void register(NodeInfo nodeInfo, int outTimes) throws PDException { + meta.register(nodeInfo, outTimes); + } + + public NodeInfos getNodes(Query query) { + return meta.getNodes(query); + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ShardGroupStatusListener.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ShardGroupStatusListener.java new file mode 100644 index 0000000000..342a335ff6 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ShardGroupStatusListener.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import org.apache.hugegraph.pd.grpc.Metapb; + +public interface ShardGroupStatusListener { + void onShardListChanged(Metapb.ShardGroup shardGroup, Metapb.ShardGroup newShardGroup); + + void onShardListOp(Metapb.ShardGroup shardGroup); +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreMonitorDataService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreMonitorDataService.java new file mode 100644 index 0000000000..7be54db0cf --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreMonitorDataService.java @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.meta.MetadataKeyHelper; +import org.springframework.stereotype.Service; + +import lombok.extern.slf4j.Slf4j; + + +@Slf4j +@Service +public class StoreMonitorDataService { + private static final String MONITOR_DATA_PREFIX = "SMD"; + private final PDConfig pdConfig; + private final KvService kvService; + /** + * the last timestamp of the store monitor data, + * used for determine the gap of store's heartbeat. + */ + private final Map lastStoreStateTimestamp; + + + public StoreMonitorDataService(PDConfig pdConfig) { + this.pdConfig = pdConfig; + this.kvService = new KvService(pdConfig); + this.lastStoreStateTimestamp = new HashMap<>(); + } + + /** + * save the store stats + * + * @param storeStats + */ + public void saveMonitorData(Metapb.StoreStats storeStats) throws PDException { + long storeId = storeStats.getStoreId(); + /** + * load the latest store timestamp when start up or alter leader + */ + if (!lastStoreStateTimestamp.containsKey(storeId)) { + long lastTimestamp = getLatestStoreMonitorDataTimeStamp(storeId); + log.debug("store id : {}, last timestamp :{}", storeId, lastTimestamp); + lastStoreStateTimestamp.put(storeId, lastTimestamp); + } + + long current = System.currentTimeMillis() / 1000; + long interval = this.pdConfig.getStore().getMonitorInterval(); + + // exceed the interval + if (current - lastStoreStateTimestamp.getOrDefault(storeId, 0L) >= interval) { + saveMonitorDataToDb(storeStats, current); + log.debug("store id: {}, system info:{}", storeId, + debugMonitorInfo(storeStats.getSystemMetricsList())); + lastStoreStateTimestamp.put(storeId, current); + } + } + + /** + * save the snapshot of store status + * + * @param storeStats store status + * @param ts, timestamp + * @return store status + * @throws PDException + */ + private void saveMonitorDataToDb(Metapb.StoreStats storeStats, long ts) throws PDException { + String key = getMonitorDataKey(storeStats.getStoreId(), ts); + log.debug("store id: {}, save monitor data info, ts:{}, my key:{}", storeStats.getStoreId(), + ts, key); + kvService.put(key, extractMetricsFromStoreStatus(storeStats)); + } + + public String debugMonitorInfo(List systemInfo) { + StringBuilder sb = new StringBuilder(); + sb.append("["); + for (Metapb.RecordPair pair : systemInfo) { + sb.append(pair.getKey()); + sb.append(":"); + sb.append(pair.getValue()); + sb.append(","); + } + sb.append("]"); + return sb.toString(); + } + + /** + * get the historical monitor data by store id, by range(start, end) + * + * @param storeId store id + * @param start range start + * @param end range end + * @return list of store stats + */ + public Map getStoreMonitorData(long storeId, long start, long end) throws + PDException { + log.debug("get monitor data, store id:{}, start{}, end:{}", + storeId, + getMonitorDataKey(storeId, start), + getMonitorDataKey(storeId, end)); + return kvService.scanRange(getMonitorDataKey(storeId, start), + getMonitorDataKey(storeId, end)); + } + + /** + * for api service + * + * @param storeId + * @return + * @throws PDException + */ + public List> getStoreMonitorData(long storeId) throws PDException { + List> result = new LinkedList<>(); + long current = System.currentTimeMillis() / 1000; + long start = current - this.pdConfig.getStore().getRetentionPeriod(); + + try { + for (Map.Entry entry : getStoreMonitorData(storeId, start, + current).entrySet()) { + String[] arr = + entry.getKey().split(String.valueOf(MetadataKeyHelper.getDelimiter())); + Map map = new HashMap(); + long timestamp = Long.parseLong(arr[arr.length - 1]); + map.put("ts", timestamp); + for (String pair : entry.getValue().split(",")) { + String[] p = pair.split(":"); + if (p.length == 2) { + map.put(p[0], Long.parseLong(p[1])); + } + } + result.add(map); + } + result.sort((o1, o2) -> o1.get("ts").compareTo(o2.get("ts"))); + } catch (PDException e) { + log.error(e.getMessage()); + } + return result; + } + + /** + * for api service, export txt + * + * @param storeId + * @return + * @throws PDException + */ + public String getStoreMonitorDataText(long storeId) throws PDException { + + List> result = getStoreMonitorData(storeId); + StringBuilder sb = new StringBuilder(); + if (result.size() > 0) { + DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + Map lastRow = result.get(result.size() - 1); + List columns = new ArrayList<>(); + // construct columns, ts + sorted keys + columns.add("ts"); + columns.addAll(lastRow.keySet().stream() + .filter(x -> !"ts".equals(x)) + .sorted() + .collect(Collectors.toList())); + sb.append(String.join(",", columns).replace("\"", "")).append("\r\n"); + for (Map row : result) { + for (String key : columns) { + // ts + , + ... + if ("ts".equals(key)) { + // format ts + sb.append(dtf.format( + LocalDateTime.ofInstant(Instant.ofEpochSecond(row.get(key)), + ZoneId.systemDefault()))); + continue; + } else { + sb.append(",").append(row.getOrDefault(key, 0L)); + } + } + sb.append("\r\n"); + } + } + return sb.toString(); + } + + /** + * remove the monitor data of the store that before till(not include) + * + * @param storeId store id + * @param till expire time + * @return affect rows + */ + public int removeExpiredMonitorData(long storeId, long till) throws PDException { + String keyStart = getMonitorDataKey(storeId, 1); + String keyEnd = getMonitorDataKey(storeId, till); + int records = 0; + for (String key : kvService.scanRange(keyStart, keyEnd).keySet()) { + kvService.delete(key); + log.debug("remove monitor data, key: {}", key); + records += 1; + } + return records; + } + + /** + * get the latest timestamp of the store monitor data + * + * @param storeId + * @return timestamp(by seconds) + */ + public long getLatestStoreMonitorDataTimeStamp(long storeId) { + long maxId = 0L; + long current = System.currentTimeMillis() / 1000; + long start = current - this.pdConfig.getStore().getMonitorInterval(); + String keyStart = getMonitorDataKey(storeId, start); + String keyEnd = getMonitorDataKey(storeId, current); + try { + for (String key : kvService.scanRange(keyStart, keyEnd).keySet()) { + String[] arr = key.split(String.valueOf(MetadataKeyHelper.getDelimiter())); + maxId = Math.max(maxId, Long.parseLong(arr[arr.length - 1])); + } + } catch (PDException e) { + } + return maxId; + } + + private String getMonitorDataKey(long storeId, long ts) { + String builder = MONITOR_DATA_PREFIX + + MetadataKeyHelper.getDelimiter() + + storeId + + MetadataKeyHelper.getDelimiter() + + ts; + return builder; + } + + private String extractMetricsFromStoreStatus(Metapb.StoreStats storeStats) { + List list = new ArrayList<>(); + for (Metapb.RecordPair pair : storeStats.getSystemMetricsList()) { + list.add("\"" + pair.getKey() + "\":" + pair.getValue()); + } + return String.join(",", list); + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreNodeService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreNodeService.java new file mode 100644 index 0000000000..bfd4f88032 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreNodeService.java @@ -0,0 +1,1074 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hugegraph.pd.common.KVPair; +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.grpc.Metapb.GraphMode; +import org.apache.hugegraph.pd.grpc.Metapb.GraphModeReason; +import org.apache.hugegraph.pd.grpc.Metapb.GraphState; +import org.apache.hugegraph.pd.grpc.Pdpb; +import org.apache.hugegraph.pd.grpc.Pdpb.CacheResponse; +import org.apache.hugegraph.pd.grpc.pulse.ConfChangeType; +import org.apache.hugegraph.pd.meta.MetadataFactory; +import org.apache.hugegraph.pd.meta.MetadataKeyHelper; +import org.apache.hugegraph.pd.meta.StoreInfoMeta; +import org.apache.hugegraph.pd.meta.TaskInfoMeta; + +import com.google.gson.Gson; + +import lombok.extern.slf4j.Slf4j; + + +/** + * HgStore注册、保活管理类 + */ +@Slf4j +public class StoreNodeService { + + private static final Long STORE_HEART_BEAT_INTERVAL = 30000L; + private static final String graphSpaceConfPrefix = "HUGEGRAPH/hg/GRAPHSPACE/CONF/"; + // Store状态监听 + private final List statusListeners; + private final List shardGroupStatusListeners; + private final StoreInfoMeta storeInfoMeta; + private final TaskInfoMeta taskInfoMeta; + private final Random random = new Random(System.currentTimeMillis()); + private final KvService kvService; + private final ConfigService configService; + private final PDConfig pdConfig; + private PartitionService partitionService; + private final Runnable quotaChecker = () -> { + try { + getQuota(); + } catch (Exception e) { + log.error( + "obtaining and sending graph space quota information with error: ", + e); + } + }; + private Metapb.ClusterStats clusterStats; + + public StoreNodeService(PDConfig config) { + this.pdConfig = config; + storeInfoMeta = MetadataFactory.newStoreInfoMeta(pdConfig); + taskInfoMeta = MetadataFactory.newTaskInfoMeta(pdConfig); + shardGroupStatusListeners = Collections.synchronizedList(new ArrayList<>()); + statusListeners = Collections.synchronizedList(new ArrayList()); + clusterStats = Metapb.ClusterStats.newBuilder() + .setState(Metapb.ClusterState.Cluster_Not_Ready) + .setTimestamp(System.currentTimeMillis()) + .build(); + kvService = new KvService(pdConfig); + configService = new ConfigService(pdConfig); + } + + public void init(PartitionService partitionService) { + this.partitionService = partitionService; + partitionService.addStatusListener(new PartitionStatusListener() { + @Override + public void onPartitionChanged(Metapb.Partition old, Metapb.Partition partition) { + if (old != null && old.getState() != partition.getState()) { + // 状态改变,重置集群状态 + try { + List partitions = + partitionService.getPartitionById(partition.getId()); + Metapb.PartitionState state = Metapb.PartitionState.PState_Normal; + for (Metapb.Partition pt : partitions) { + if (pt.getState().getNumber() > state.getNumber()) { + state = pt.getState(); + } + } + updateShardGroupState(partition.getId(), state); + + for (Metapb.ShardGroup group : getShardGroups()) { + if (group.getState().getNumber() > state.getNumber()) { + state = group.getState(); + } + } + updateClusterStatus(state); + } catch (PDException e) { + log.error("onPartitionChanged exception: ", e); + } + } + } + + @Override + public void onPartitionRemoved(Metapb.Partition partition) { + + } + }); + } + + /** + * 集群是否准备就绪 + * + * @return + */ + public boolean isOK() { + return this.clusterStats.getState().getNumber() < + Metapb.ClusterState.Cluster_Offline.getNumber(); + } + + /** + * Store注册,记录Store的ip地址,首次注册需要生成store_ID + * + * @param store + */ + public Metapb.Store register(Metapb.Store store) throws PDException { + if (store.getId() == 0) { + // 初始注册,生成新id,保证Id不重复。 + store = newStoreNode(store); + } + + if (!storeInfoMeta.storeExists(store.getId())) { + log.error("Store id {} does not belong to this PD, address = {}", store.getId(), + store.getAddress()); + // storeId不存在,抛出异常 + throw new PDException(Pdpb.ErrorType.STORE_ID_NOT_EXIST_VALUE, + String.format("Store id %d doest not exist.", store.getId())); + } + + // 如果store状态为Tombstone拒绝注册。 + Metapb.Store lastStore = storeInfoMeta.getStore(store.getId()); + if (lastStore.getState() == Metapb.StoreState.Tombstone) { + log.error("Store id {} has been removed, Please reinitialize, address = {}", + store.getId(), store.getAddress()); + // storeId不存在,抛出异常 + throw new PDException(Pdpb.ErrorType.STORE_HAS_BEEN_REMOVED_VALUE, + String.format("Store id %d has been removed. %s", store.getId(), + store.getAddress())); + } + + // offline或者up,或者在初始激活列表中,自动上线 + Metapb.StoreState storeState = lastStore.getState(); + if (storeState == Metapb.StoreState.Offline || storeState == Metapb.StoreState.Up + || inInitialStoreList(store)) { + storeState = Metapb.StoreState.Up; + } else { + storeState = Metapb.StoreState.Pending; + } + + store = Metapb.Store.newBuilder(lastStore) + .setAddress(store.getAddress()) + .setRaftAddress(store.getRaftAddress()) + .setDataVersion(store.getDataVersion()) + .setDeployPath(store.getDeployPath()) + .setVersion(store.getVersion()) + .setDataPath(store.getDataPath()) + .setState(storeState).setCores(store.getCores()) + .clearLabels().addAllLabels(store.getLabelsList()) + .setLastHeartbeat(System.currentTimeMillis()).build(); + + long current = System.currentTimeMillis(); + boolean raftChanged = false; + // 上线状态的Raft Address 发生了变更 + if (!Objects.equals(lastStore.getRaftAddress(), store.getRaftAddress()) && + storeState == Metapb.StoreState.Up) { + // 时间间隔太短,而且raft有变更,则认为是无效的store + if (current - lastStore.getLastHeartbeat() < STORE_HEART_BEAT_INTERVAL * 0.8) { + throw new PDException(Pdpb.ErrorType.STORE_PROHIBIT_DUPLICATE_VALUE, + String.format("Store id %d may be duplicate. addr: %s", + store.getId(), store.getAddress())); + } else if (current - lastStore.getLastHeartbeat() > STORE_HEART_BEAT_INTERVAL * 1.2) { + // 认为发生了变更 + raftChanged = true; + } else { + // 等待下次注册 + return Metapb.Store.newBuilder(store).setId(0L).build(); + } + } + + // 存储store信息 + storeInfoMeta.updateStore(store); + if (storeState == Metapb.StoreState.Up) { + // 更新store 活跃状态 + storeInfoMeta.keepStoreAlive(store); + onStoreStatusChanged(store, Metapb.StoreState.Offline, Metapb.StoreState.Up); + checkStoreStatus(); + } + + // 等store信息保存后,再发送变更 + if (raftChanged) { + onStoreRaftAddressChanged(store); + } + + log.info("Store register, id = {} {}", store.getId(), store); + return store; + } + + private boolean inInitialStoreList(Metapb.Store store) { + return this.pdConfig.getInitialStoreMap().containsKey(store.getAddress()); + } + + /** + * 产生一个新的store对象 + * + * @param store + * @return + * @throws PDException + */ + private synchronized Metapb.Store newStoreNode(Metapb.Store store) throws PDException { + long id = random.nextLong() & Long.MAX_VALUE; + while (id == 0 || storeInfoMeta.storeExists(id)) { + id = random.nextLong() & Long.MAX_VALUE; + } + store = Metapb.Store.newBuilder(store) + .setId(id) + .setState(Metapb.StoreState.Pending) + .setStartTimestamp(System.currentTimeMillis()).build(); + storeInfoMeta.updateStore(store); + return store; + } + + /** + * 根据store_id返回Store信息 + * + * @param id + * @return + * @throws PDException + */ + public Metapb.Store getStore(long id) throws PDException { + Metapb.Store store = storeInfoMeta.getStore(id); + if (store == null) { + throw new PDException(Pdpb.ErrorType.STORE_ID_NOT_EXIST_VALUE, + String.format("Store id %x doest not exist.", id)); + } + return store; + } + + /** + * 更新Store信息,检测Store状态的变化,通知到Hugestore + */ + public synchronized Metapb.Store updateStore(Metapb.Store store) throws PDException { + log.info("updateStore storeId: {}, address: {}, state: {}", store.getId(), + store.getAddress(), store.getState()); + Metapb.Store lastStore = storeInfoMeta.getStore(store.getId()); + if (lastStore == null) { + return null; + } + Metapb.Store.Builder builder = + Metapb.Store.newBuilder(lastStore).clearLabels().clearStats(); + store = builder.mergeFrom(store).build(); + if (store.getState() == Metapb.StoreState.Tombstone) { + List activeStores = getStores(); + if (lastStore.getState() == Metapb.StoreState.Up + && activeStores.size() - 1 < pdConfig.getMinStoreCount()) { + throw new PDException(Pdpb.ErrorType.LESS_ACTIVE_STORE_VALUE, + "The number of active stores is less then " + + pdConfig.getMinStoreCount()); + } + } + + storeInfoMeta.updateStore(store); + if (store.getState() != Metapb.StoreState.Unknown && + store.getState() != lastStore.getState()) { + // 如果希望将store下线 + if (store.getState() == Metapb.StoreState.Exiting) { + if (lastStore.getState() == Metapb.StoreState.Exiting) { + //如果已经是下线中的状态,则不作进一步处理 + return lastStore; + } + + List activeStores = this.getActiveStores(); + Map storeMap = new HashMap<>(); + activeStores.forEach(s -> { + storeMap.put(s.getId(), s); + }); + //如果store已经离线,直接从活跃中删除,如果store在线,暂时不从活跃中删除,等把状态置成Tombstone的时候再删除 + if (!storeMap.containsKey(store.getId())) { + log.info("updateStore removeActiveStores store {}", store.getId()); + storeInfoMeta.removeActiveStore(store); + } + storeTurnoff(store); + } else if (store.getState() == Metapb.StoreState.Offline) { //监控到store已经离线,从活跃中删除 + storeInfoMeta.removeActiveStore(store); + } else if (store.getState() == Metapb.StoreState.Tombstone) { + // 状态发生改变,Store关机,修改shardGroup,进行副本迁移 + log.info("updateStore removeActiveStores store {}", store.getId()); + storeInfoMeta.removeActiveStore(store); + // 存储下线 + storeTurnoff(store); + } else if (store.getState() == Metapb.StoreState.Up) { + storeInfoMeta.keepStoreAlive(store); + checkStoreStatus(); + } + onStoreStatusChanged(lastStore, lastStore.getState(), store.getState()); + } + return store; + } + + /** + * store被关机,重新分配shardGroup的shard + * + * @param store + * @throws PDException + */ + public synchronized void storeTurnoff(Metapb.Store store) throws PDException { + // 遍历ShardGroup,重新分配shard + for (Metapb.ShardGroup group : getShardGroupsByStore(store.getId())) { + Metapb.ShardGroup.Builder builder = Metapb.ShardGroup.newBuilder(group); + builder.clearShards(); + group.getShardsList().forEach(shard -> { + if (shard.getStoreId() != store.getId()) { + builder.addShards(shard); + } + }); + reallocShards(builder.build()); + } + } + + /** + * 根据图名返回stores信息,如果graphName为空,返回所有store信息 + * + * @throws PDException + */ + public List getStores() throws PDException { + return storeInfoMeta.getStores(null); + } + + public List getStores(String graphName) throws PDException { + return storeInfoMeta.getStores(graphName); + } + + public List getStoreStatus(boolean isActive) throws PDException { + return storeInfoMeta.getStoreStatus(isActive); + } + + public List getShardGroups() throws PDException { + return storeInfoMeta.getShardGroups(); + } + + public Metapb.ShardGroup getShardGroup(int groupId) throws PDException { + return storeInfoMeta.getShardGroup(groupId); + } + + public List getShardList(int groupId) throws PDException { + var shardGroup = getShardGroup(groupId); + if (shardGroup != null) { + return shardGroup.getShardsList(); + } + return new ArrayList<>(); + } + + public List getShardGroupsByStore(long storeId) throws PDException { + List shardGroups = new ArrayList<>(); + storeInfoMeta.getShardGroups().forEach(shardGroup -> { + shardGroup.getShardsList().forEach(shard -> { + if (shard.getStoreId() == storeId) { + shardGroups.add(shardGroup); + } + }); + }); + return shardGroups; + } + + /** + * 返回活跃的store + * + * @param graphName + * @return + * @throws PDException + */ + public List getActiveStores(String graphName) throws PDException { + return storeInfoMeta.getActiveStores(graphName); + } + + public List getActiveStores() throws PDException { + return storeInfoMeta.getActiveStores(); + } + + public List getTombStores() throws PDException { + List stores = new ArrayList<>(); + for (Metapb.Store store : this.getStores()) { + if (store.getState() == Metapb.StoreState.Tombstone) { + stores.add(store); + } + } + return stores; + } + + public long removeStore(Long storeId) throws PDException { + return storeInfoMeta.removeStore(storeId); + } + + /** + * 给partition分配store,根据图的配置,决定分配几个peer + * 分配完所有的shards,保存ShardGroup对象(store不变动,只执行一次) + */ + public synchronized List allocShards(Metapb.Graph graph, int partId) throws + PDException { + // 多图共用raft分组,因此分配shard只依赖partitionId. + // 图根据数据大小可以设置分区的数量,但总数不能超过raft分组数量 + if (storeInfoMeta.getShardGroup(partId) == null) { + // 获取活跃的store key + // 根据 partionID计算store + List stores = storeInfoMeta.getActiveStores(); + + if (stores.size() == 0) { + throw new PDException(Pdpb.ErrorType.NO_ACTIVE_STORE_VALUE, + "There is no any online store"); + } + + if (stores.size() < pdConfig.getMinStoreCount()) { + throw new PDException(Pdpb.ErrorType.LESS_ACTIVE_STORE_VALUE, + "The number of active stores is less then " + + pdConfig.getMinStoreCount()); + } + + int shardCount = pdConfig.getPartition().getShardCount(); + shardCount = Math.min(shardCount, stores.size()); + //两个shard无法选出leader + // 不能为0 + + if (shardCount == 2 || shardCount < 1) { + shardCount = 1; + } + + // 一次创建完所有的ShardGroup,保证初始的groupID有序,方便人工阅读 + for (int groupId = 0; groupId < pdConfig.getConfigService().getPartitionCount(); + groupId++) { + int storeIdx = groupId % stores.size(); //store分配规则,简化为取模 + List shards = new ArrayList<>(); + for (int i = 0; i < shardCount; i++) { + Metapb.Shard shard = + Metapb.Shard.newBuilder().setStoreId(stores.get(storeIdx).getId()) + .setRole(i == 0 ? Metapb.ShardRole.Leader : + Metapb.ShardRole.Follower) // + .build(); + shards.add(shard); + storeIdx = (storeIdx + 1) >= stores.size() ? 0 : ++storeIdx; // 顺序选择 + } + + Metapb.ShardGroup group = Metapb.ShardGroup.newBuilder() + .setId(groupId) + .setState( + Metapb.PartitionState.PState_Normal) + .addAllShards(shards).build(); + + // new group + storeInfoMeta.updateShardGroup(group); + partitionService.updateShardGroupCache(group); + onShardGroupStatusChanged(group, group); + log.info("alloc shard group: id {}", groupId); + } + } + return storeInfoMeta.getShardGroup(partId).getShardsList(); + } + + /** + * 根据graph的shard_count,重新分配shard + * 发送变更change shard指令 + */ + public synchronized List reallocShards(Metapb.ShardGroup shardGroup) throws + PDException { + List stores = storeInfoMeta.getActiveStores(); + + if (stores.size() == 0) { + throw new PDException(Pdpb.ErrorType.NO_ACTIVE_STORE_VALUE, + "There is no any online store"); + } + + if (stores.size() < pdConfig.getMinStoreCount()) { + throw new PDException(Pdpb.ErrorType.LESS_ACTIVE_STORE_VALUE, + "The number of active stores is less then " + + pdConfig.getMinStoreCount()); + } + + int shardCount = pdConfig.getPartition().getShardCount(); + shardCount = Math.min(shardCount, stores.size()); + if (shardCount == 2 || shardCount < 1) { + // 两个shard无法选出leader + // 不能为0 + shardCount = 1; + } + + List shards = new ArrayList<>(); + shards.addAll(shardGroup.getShardsList()); + + if (shardCount > shards.size()) { + // 需要增加shard + log.info("reallocShards ShardGroup {}, add shards from {} to {}", + shardGroup.getId(), shards.size(), shardCount); + int storeIdx = shardGroup.getId() % stores.size(); //store分配规则,简化为取模 + for (int addCount = shardCount - shards.size(); addCount > 0; ) { + // 检查是否已经存在 + if (!isStoreInShards(shards, stores.get(storeIdx).getId())) { + Metapb.Shard shard = Metapb.Shard.newBuilder() + .setStoreId(stores.get(storeIdx).getId()) + .build(); + shards.add(shard); + addCount--; + } + storeIdx = (storeIdx + 1) >= stores.size() ? 0 : ++storeIdx; // 顺序选择 + } + } else if (shardCount < shards.size()) { + // 需要减shard + log.info("reallocShards ShardGroup {}, remove shards from {} to {}", + shardGroup.getId(), shards.size(), shardCount); + + int subCount = shards.size() - shardCount; + Iterator iterator = shards.iterator(); + while (iterator.hasNext() && subCount > 0) { + if (iterator.next().getRole() != Metapb.ShardRole.Leader) { + iterator.remove(); + subCount--; + } + } + } else { + return shards; + } + + Metapb.ShardGroup group = Metapb.ShardGroup.newBuilder(shardGroup) + .clearShards() + .addAllShards(shards).build(); + storeInfoMeta.updateShardGroup(group); + partitionService.updateShardGroupCache(group); + // change shard group + onShardGroupStatusChanged(shardGroup, group); + + var partitions = partitionService.getPartitionById(shardGroup.getId()); + if (partitions.size() > 0) { + // send one message, change shard is regardless with partition/graph + partitionService.fireChangeShard(partitions.get(0), shards, + ConfChangeType.CONF_CHANGE_TYPE_ADJUST); + } + + log.info("reallocShards ShardGroup {}, shards: {}", group.getId(), group.getShardsList()); + return shards; + } + + /** + * 根据partition的数量,分配group shard + * + * @param groups list of (partition id, count) + * @return total groups + */ + public synchronized int splitShardGroups(List> groups) throws + PDException { + int sum = groups.stream().map(pair -> pair.getValue()).reduce(0, Integer::sum); + // shard group 太大 + if (sum > getActiveStores().size() * pdConfig.getPartition().getMaxShardsPerStore()) { + throw new PDException(Pdpb.ErrorType.Too_Many_Partitions_Per_Store_VALUE, + "can't satisfy target shard group count"); + } + + partitionService.splitPartition(groups); + + return sum; + } + + /** + * 分配shard group,为分裂做准备 + * + * @return true + * @throws PDException + */ + private boolean isStoreInShards(List shards, long storeId) { + AtomicBoolean exist = new AtomicBoolean(false); + shards.forEach(s -> { + if (s.getStoreId() == storeId) { + exist.set(true); + } + }); + return exist.get(); + } + + /** + * update shard group and cache. + * send shard group change message. + * + * @param groupId : shard group + * @param shards : shard lists + * @param version: term version, ignored if less than 0 + * @param confVersion : conf version, ignored if less than 0 + * @return + */ + public synchronized Metapb.ShardGroup updateShardGroup(int groupId, List shards, + long version, long confVersion) throws + PDException { + Metapb.ShardGroup group = this.storeInfoMeta.getShardGroup(groupId); + + if (group == null) { + return null; + } + + var builder = Metapb.ShardGroup.newBuilder(group); + if (version >= 0) { + builder.setVersion(version); + } + + if (confVersion >= 0) { + builder.setConfVer(confVersion); + } + + var newGroup = builder.clearShards().addAllShards(shards).build(); + + storeInfoMeta.updateShardGroup(newGroup); + partitionService.updateShardGroupCache(newGroup); + onShardGroupStatusChanged(group, newGroup); + log.info("Raft {} updateShardGroup {}", groupId, newGroup); + return group; + } + + /** + * 通知 store 进行shard group的重建操作 + * + * @param groupId raft group id + * @param shards shard list: 如果为空,则删除对应的partition engine + */ + public void shardGroupOp(int groupId, List shards) throws PDException { + + var shardGroup = getShardGroup(groupId); + + if (shardGroup == null) { + return; + } + + var newGroup = shardGroup.toBuilder().clearShards().addAllShards(shards).build(); + if (shards.size() == 0) { + var partitions = partitionService.getPartitionById(groupId); + for (var partition : partitions) { + partitionService.removePartition(partition.getGraphName(), groupId); + } + deleteShardGroup(groupId); + } + + onShardGroupOp(newGroup); + } + + /** + * 删除 shard group + * + * @param groupId shard group id + */ + public synchronized void deleteShardGroup(int groupId) throws PDException { + Metapb.ShardGroup group = this.storeInfoMeta.getShardGroup(groupId); + if (group != null) { + storeInfoMeta.deleteShardGroup(groupId); + } + + onShardGroupStatusChanged(group, null); + + // 修正store的分区数. (分区合并导致) + var shardGroups = getShardGroups(); + if (shardGroups != null) { + var count1 = pdConfig.getConfigService().getPDConfig().getPartitionCount(); + var maxGroupId = + getShardGroups().stream().map(Metapb.ShardGroup::getId).max(Integer::compareTo); + if (maxGroupId.get() < count1) { + pdConfig.getConfigService().setPartitionCount(maxGroupId.get() + 1); + } + } + } + + public synchronized void updateShardGroupState(int groupId, Metapb.PartitionState state) throws + PDException { + Metapb.ShardGroup shardGroup = storeInfoMeta.getShardGroup(groupId) + .toBuilder() + .setState(state).build(); + storeInfoMeta.updateShardGroup(shardGroup); + partitionService.updateShardGroupCache(shardGroup); + } + + /** + * 接收Store的心跳 + * + * @param storeStats + * @throws PDException + */ + public Metapb.ClusterStats heartBeat(Metapb.StoreStats storeStats) throws PDException { + this.storeInfoMeta.updateStoreStats(storeStats); + Metapb.Store lastStore = this.getStore(storeStats.getStoreId()); + if (lastStore == null) { + //store不存在 + throw new PDException(Pdpb.ErrorType.STORE_ID_NOT_EXIST_VALUE, + String.format("Store id %d does not exist.", + storeStats.getStoreId())); + } + if (lastStore.getState() == Metapb.StoreState.Tombstone) { + throw new PDException(Pdpb.ErrorType.STORE_HAS_BEEN_REMOVED_VALUE, + String.format( + "Store id %d is useless since it's state is Tombstone", + storeStats.getStoreId())); + } + Metapb.Store nowStore; + // 如果正在做store下线操作 + if (lastStore.getState() == Metapb.StoreState.Exiting) { + List activeStores = this.getActiveStores(); + Map storeMap = new HashMap<>(); + activeStores.forEach(store -> { + storeMap.put(store.getId(), store); + }); + // 下线的store的分区为0,说明已经迁移完毕,可以下线,如果非0,则迁移还在进行,需要等待 + if (storeStats.getPartitionCount() > 0 && + storeMap.containsKey(storeStats.getStoreId())) { + nowStore = Metapb.Store.newBuilder(lastStore) + .setStats(storeStats) + .setLastHeartbeat(System.currentTimeMillis()) + .setState(Metapb.StoreState.Exiting).build(); + this.storeInfoMeta.updateStore(nowStore); + return this.clusterStats; + } else { + nowStore = Metapb.Store.newBuilder(lastStore) + .setStats(storeStats) + .setLastHeartbeat(System.currentTimeMillis()) + .setState(Metapb.StoreState.Tombstone).build(); + this.storeInfoMeta.updateStore(nowStore); + storeInfoMeta.removeActiveStore(nowStore); + return this.clusterStats; + } + } + + if (lastStore.getState() == Metapb.StoreState.Pending) { + nowStore = Metapb.Store.newBuilder(lastStore) + .setStats(storeStats) + .setLastHeartbeat(System.currentTimeMillis()) + .setState(Metapb.StoreState.Pending).build(); + this.storeInfoMeta.updateStore(nowStore); + return this.clusterStats; + } else { + if (lastStore.getState() == Metapb.StoreState.Offline) { + this.updateStore( + Metapb.Store.newBuilder(lastStore).setState(Metapb.StoreState.Up).build()); + } + nowStore = Metapb.Store.newBuilder(lastStore) + .setState(Metapb.StoreState.Up) + .setStats(storeStats) + .setLastHeartbeat(System.currentTimeMillis()).build(); + this.storeInfoMeta.updateStore(nowStore); + this.storeInfoMeta.keepStoreAlive(nowStore); + this.checkStoreStatus(); + return this.clusterStats; + } + } + + public synchronized Metapb.ClusterStats updateClusterStatus(Metapb.ClusterState state) { + this.clusterStats = clusterStats.toBuilder().setState(state).build(); + return this.clusterStats; + } + + public Metapb.ClusterStats updateClusterStatus(Metapb.PartitionState state) { + Metapb.ClusterState cstate = Metapb.ClusterState.Cluster_OK; + switch (state) { + case PState_Normal: + cstate = Metapb.ClusterState.Cluster_OK; + break; + case PState_Warn: + cstate = Metapb.ClusterState.Cluster_Warn; + break; + case PState_Fault: + cstate = Metapb.ClusterState.Cluster_Fault; + break; + case PState_Offline: + cstate = Metapb.ClusterState.Cluster_Offline; + break; + } + return updateClusterStatus(cstate); + } + + public Metapb.ClusterStats getClusterStats() { + return this.clusterStats; + } + + /** + * 检查集群健康状态 + * 活跃机器数是否大于最小阈值 + * 分区shard在线数已否过半 * + */ + public synchronized void checkStoreStatus() { + Metapb.ClusterStats.Builder builder = Metapb.ClusterStats.newBuilder() + .setState( + Metapb.ClusterState.Cluster_OK); + try { + List activeStores = this.getActiveStores(); + if (activeStores.size() < pdConfig.getMinStoreCount()) { + builder.setState(Metapb.ClusterState.Cluster_Not_Ready); + builder.setMessage("The number of active stores is " + activeStores.size() + + ", less than pd.initial-store-count:" + + pdConfig.getMinStoreCount()); + } + Map storeMap = new HashMap<>(); + activeStores.forEach(store -> { + storeMap.put(store.getId(), store); + }); + + if (builder.getState() == Metapb.ClusterState.Cluster_OK) { + // 检查每个分区的在线shard数量是否大于半数 + for (Metapb.ShardGroup group : this.getShardGroups()) { + int count = 0; + for (Metapb.Shard shard : group.getShardsList()) { + count += storeMap.containsKey(shard.getStoreId()) ? 1 : 0; + } + if (count * 2 < group.getShardsList().size()) { + builder.setState(Metapb.ClusterState.Cluster_Not_Ready); + builder.setMessage( + "Less than half of active shard, partitionId is " + group.getId()); + break; + } + } + } + + } catch (PDException e) { + log.error("StoreNodeService updateClusterStatus exception {}", e); + } + this.clusterStats = builder.setTimestamp(System.currentTimeMillis()).build(); + if (this.clusterStats.getState() != Metapb.ClusterState.Cluster_OK) { + log.error("The cluster is not ready, {}", this.clusterStats); + } + } + + public void addStatusListener(StoreStatusListener listener) { + statusListeners.add(listener); + } + + protected void onStoreRaftAddressChanged(Metapb.Store store) { + log.info("onStoreRaftAddressChanged storeId = {}, new raft addr:", store.getId(), + store.getRaftAddress()); + statusListeners.forEach(e -> { + e.onStoreRaftChanged(store); + }); + } + + public void addShardGroupStatusListener(ShardGroupStatusListener listener) { + shardGroupStatusListeners.add(listener); + } + + protected void onStoreStatusChanged(Metapb.Store store, Metapb.StoreState old, + Metapb.StoreState stats) { + log.info("onStoreStatusChanged storeId = {} from {} to {}", store.getId(), old, stats); + statusListeners.forEach(e -> { + e.onStoreStatusChanged(store, old, stats); + }); + } + + protected void onShardGroupStatusChanged(Metapb.ShardGroup group, Metapb.ShardGroup newGroup) { + log.info("onShardGroupStatusChanged, groupId: {}, from {} to {}", group.getId(), group, + newGroup); + shardGroupStatusListeners.forEach(e -> e.onShardListChanged(group, newGroup)); + } + + protected void onShardGroupOp(Metapb.ShardGroup shardGroup) { + log.info("onShardGroupOp, group id: {}, shard group:{}", shardGroup.getId(), shardGroup); + shardGroupStatusListeners.forEach(e -> e.onShardListOp(shardGroup)); + } + + /** + * 检查当前store是否可下线 + * 活跃机器数小于等于最小阈值,不可下线 + * 分区shard在线数不超过半数, 不可下线 + */ + public boolean checkStoreCanOffline(Metapb.Store currentStore) { + try { + long currentStoreId = currentStore.getId(); + List activeStores = this.getActiveStores(); + Map storeMap = new HashMap<>(); + activeStores.forEach(store -> { + if (store.getId() != currentStoreId) { + storeMap.put(store.getId(), store); + } + }); + + if (storeMap.size() < pdConfig.getMinStoreCount()) { + return false; + } + + // 检查每个分区的在线shard数量是否大于半数 + for (Metapb.ShardGroup group : this.getShardGroups()) { + int count = 0; + for (Metapb.Shard shard : group.getShardsList()) { + long storeId = shard.getStoreId(); + count += storeMap.containsKey(storeId) ? 1 : 0; + } + if (count * 2 < group.getShardsList().size()) { + return false; + } + } + } catch (PDException e) { + log.error("StoreNodeService checkStoreCanOffline exception {}", e); + return false; + } + + return true; + } + + /** + * 对store上的对rocksdb进行compaction + * + * @param groupId + * @param tableName + * @return + */ + public synchronized void shardGroupsDbCompaction(int groupId, String tableName) throws + PDException { + + // 通知所有的store,对rocksdb进行compaction + partitionService.fireDbCompaction(groupId, tableName); + // TODO 异常怎么处理? + } + + public Map getQuota() throws PDException { + List graphs = partitionService.getGraphs(); + String delimiter = String.valueOf(MetadataKeyHelper.DELIMITER); + HashMap storages = new HashMap<>(); + for (Metapb.Graph g : graphs) { + String graphName = g.getGraphName(); + String[] splits = graphName.split(delimiter); + if (!graphName.endsWith("/g") || splits.length < 2) { + continue; + } + String graphSpace = splits[0]; + storages.putIfAbsent(graphSpace, 0L); + List stores = getStores(graphName); + long dataSize = 0; + for (Metapb.Store store : stores) { + List gss = store.getStats() + .getGraphStatsList(); + for (Metapb.GraphStats gs : gss) { + boolean nameEqual = graphName.equals(gs.getGraphName()); + boolean roleEqual = Metapb.ShardRole.Leader.equals( + gs.getRole()); + if (nameEqual && roleEqual) { + dataSize += gs.getApproximateSize(); + } + } + } + Long size = storages.get(graphSpace); + size += dataSize; + storages.put(graphSpace, size); + + } + Metapb.GraphSpace.Builder spaceBuilder = Metapb.GraphSpace.newBuilder(); + HashMap limits = new HashMap<>(); + for (Map.Entry item : storages.entrySet()) { + String spaceName = item.getKey(); + String value = kvService.get(graphSpaceConfPrefix + spaceName); + if (!StringUtils.isEmpty(value)) { + HashMap config = new Gson().fromJson(value, HashMap.class); + Long size = item.getValue(); + int limit = ((Double) config.get("storage_limit")).intValue(); + long limitByLong = limit * 1024L * 1024L; + try { + spaceBuilder.setName(spaceName).setStorageLimit(limitByLong).setUsedSize(size); + Metapb.GraphSpace graphSpace = spaceBuilder.build(); + configService.setGraphSpace(graphSpace); + } catch (Exception e) { + log.error("update graph space with error:", e); + } + // KB and GB * 1024L * 1024L + if (size > limitByLong) { + limits.put(spaceName, true); + continue; + } + } + limits.put(spaceName, false); + + } + GraphState.Builder stateBuilder = GraphState.newBuilder() + .setMode(GraphMode.ReadOnly) + .setReason( + GraphModeReason.Quota); + for (Metapb.Graph g : graphs) { + String graphName = g.getGraphName(); + String[] splits = graphName.split(delimiter); + if (!graphName.endsWith("/g") || splits.length < 2) { + continue; + } + String graphSpace = splits[0]; + Metapb.GraphState gsOld = g.getGraphState(); + GraphMode gmOld = gsOld != null ? gsOld.getMode() : GraphMode.ReadWrite; + GraphMode gmNew = limits.get( + graphSpace) ? GraphMode.ReadOnly : GraphMode.ReadWrite; + if (gmOld == null || gmOld.getNumber() != gmNew.getNumber()) { + stateBuilder.setMode(gmNew); + if (gmNew.getNumber() == GraphMode.ReadOnly.getNumber()) { + stateBuilder.setReason(GraphModeReason.Quota); + } + GraphState gsNew = stateBuilder.build(); + Metapb.Graph newGraph = g.toBuilder().setGraphState(gsNew) + .build(); + partitionService.updateGraph(newGraph); + statusListeners.forEach(listener -> { + listener.onGraphChange(newGraph, gsOld, gsNew); + }); + } + } + + return limits; + } + + public Runnable getQuotaChecker() { + return quotaChecker; + } + + public TaskInfoMeta getTaskInfoMeta() { + return taskInfoMeta; + } + + public StoreInfoMeta getStoreInfoMeta() { + return storeInfoMeta; + } + + /** + * 获得分区的Leader + * + * @param partition + * @param initIdx + * @return + */ + public Metapb.Shard getLeader(Metapb.Partition partition, int initIdx) { + Metapb.Shard leader = null; + try { + var shardGroup = this.getShardGroup(partition.getId()); + for (Metapb.Shard shard : shardGroup.getShardsList()) { + if (shard.getRole() == Metapb.ShardRole.Leader) { + leader = shard; + } + } + } catch (Exception e) { + log.error("get leader error: group id:{}, error: {}", + partition.getId(), e.getMessage()); + } + return leader; + } + + public CacheResponse getCache() throws PDException { + + List stores = getStores(); + List groups = getShardGroups(); + List graphs = partitionService.getGraphs(); + CacheResponse cache = CacheResponse.newBuilder().addAllGraphs(graphs) + .addAllShards(groups) + .addAllStores(stores) + .build(); + return cache; + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreStatusListener.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreStatusListener.java new file mode 100644 index 0000000000..a5b96cf307 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreStatusListener.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import org.apache.hugegraph.pd.grpc.Metapb; + +public interface StoreStatusListener { + + void onStoreStatusChanged(Metapb.Store store, Metapb.StoreState old, + Metapb.StoreState status); + + void onGraphChange(Metapb.Graph graph, Metapb.GraphState stateOld, + Metapb.GraphState stateNew); + + void onStoreRaftChanged(Metapb.Store store); +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/TaskScheduleService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/TaskScheduleService.java new file mode 100644 index 0000000000..9ec8152a0d --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/TaskScheduleService.java @@ -0,0 +1,853 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.PriorityQueue; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; + +import org.apache.hugegraph.pd.common.KVPair; +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.MetaTask; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.grpc.Pdpb; +import org.apache.hugegraph.pd.meta.TaskInfoMeta; +import org.apache.hugegraph.pd.raft.RaftEngine; + +import lombok.extern.slf4j.Slf4j; + + +/** + * 任务调度服务,定时检查Store、资源、分区的状态,及时迁移数据,错误节点 + * 1、监测Store是否离线 + * 2、监测Partition的副本是否正确 + * 3、监测Partition的工作模式是否正确 + * 4、监测Partition是否需要分裂,监测分裂是否完成 + */ +@Slf4j +public class TaskScheduleService { + private static final String BALANCE_SHARD_KEY = "BALANCE_SHARD_KEY"; + private final long TurnOffAndBalanceInterval = 30 * 60 * 1000; //机器下线30后才能进行动态平衡 + private final long BalanceLeaderInterval = 30 * 1000; // leader平衡时间间隔 + private final PDConfig pdConfig; + private final long clusterStartTime; // + private final StoreNodeService storeService; + private final PartitionService partitionService; + private final ScheduledExecutorService executor; + private final TaskInfoMeta taskInfoMeta; + private final StoreMonitorDataService storeMonitorDataService; + private final KvService kvService; + private final LogService logService; + // 先按照value排序,再按照key排序 + private final Comparator> kvPairComparatorAsc = (o1, o2) -> { + if (o1.getValue() == o2.getValue()) { + return o1.getKey().compareTo(o2.getKey()); + } + return o1.getValue().compareTo(o2.getValue()); + }; + // 先按照value排序(倒序),再按照key排序(升序) + private final Comparator> kvPairComparatorDesc = (o1, o2) -> { + if (o1.getValue() == o2.getValue()) { + return o2.getKey().compareTo(o1.getKey()); + } + return o2.getValue().compareTo(o1.getValue()); + }; + private long lastStoreTurnoffTime = 0; + private long lastBalanceLeaderTime = 0; + + + public TaskScheduleService(PDConfig config, StoreNodeService storeService, + PartitionService partitionService) { + this.pdConfig = config; + this.storeService = storeService; + this.partitionService = partitionService; + this.taskInfoMeta = new TaskInfoMeta(config); + this.logService = new LogService(pdConfig); + this.storeMonitorDataService = new StoreMonitorDataService(pdConfig); + this.clusterStartTime = System.currentTimeMillis(); + this.kvService = new KvService(pdConfig); + this.executor = new ScheduledThreadPoolExecutor(16); + } + + public void init() { + executor.scheduleWithFixedDelay(() -> { + try { + patrolStores(); + } catch (Throwable e) { + log.error("patrolStores exception: ", e); + } + + }, 60, 60, TimeUnit.SECONDS); + executor.scheduleWithFixedDelay(() -> { + try { + patrolPartitions(); + balancePartitionLeader(false); + balancePartitionShard(); + } catch (Throwable e) { + log.error("patrolPartitions exception: ", e); + } + }, pdConfig.getPatrolInterval(), pdConfig.getPatrolInterval(), TimeUnit.SECONDS); + executor.scheduleWithFixedDelay(() -> { + if (isLeader()) { + kvService.clearTTLData(); + } + }, 1000, 1000, TimeUnit.MILLISECONDS); + executor.scheduleWithFixedDelay( + () -> { + if (isLeader()) { + storeService.getQuotaChecker(); + } + }, 2, 30, + TimeUnit.SECONDS); + // clean expired monitor data each 10 minutes, delay 3min. + if (isLeader() && this.pdConfig.getStore().isMonitorDataEnabled()) { + executor.scheduleAtFixedRate(() -> { + Long expTill = System.currentTimeMillis() / 1000 - + this.pdConfig.getStore().getRetentionPeriod(); + log.debug("monitor data keys before " + expTill + " will be deleted"); + int records = 0; + try { + for (Metapb.Store store : storeService.getStores()) { + int cnt = + this.storeMonitorDataService.removeExpiredMonitorData(store.getId(), + expTill); + log.debug("store id :{}, records:{}", store.getId(), cnt); + records += cnt; + } + } catch (PDException e) { + throw new RuntimeException(e); + } + log.debug(String.format("%d records has been deleted", records)); + }, 180, 600, TimeUnit.SECONDS); + } + + storeService.addStatusListener(new StoreStatusListener() { + @Override + public void onStoreStatusChanged(Metapb.Store store, Metapb.StoreState old, + Metapb.StoreState status) { + if (status == Metapb.StoreState.Tombstone) { + lastStoreTurnoffTime = System.currentTimeMillis(); + } + + if (status == Metapb.StoreState.Up) { + executor.schedule(() -> { + try { //store 上线后延时1分钟进行leader平衡 + balancePartitionLeader(false); + } catch (PDException e) { + log.error("exception {}", e); + } + }, BalanceLeaderInterval, TimeUnit.MILLISECONDS); + + } + } + + @Override + public void onGraphChange(Metapb.Graph graph, + Metapb.GraphState stateOld, + Metapb.GraphState stateNew) { + + } + + @Override + public void onStoreRaftChanged(Metapb.Store store) { + + } + }); + } + + public void shutDown() { + executor.shutdownNow(); + } + + private boolean isLeader() { + return RaftEngine.getInstance().isLeader(); + } + + /** + * 巡查所有的store,检查是否在线,存储空间是否充足 + */ + public List patrolStores() throws PDException { + if (!isLeader()) { + return null; + } + + List changedStores = new ArrayList<>(); + // 检查store在线状态 + List stores = storeService.getStores(""); + Map activeStores = storeService.getActiveStores("") + .stream().collect( + Collectors.toMap(Metapb.Store::getId, t -> t)); + for (Metapb.Store store : stores) { + Metapb.Store changeStore = null; + if ((store.getState() == Metapb.StoreState.Up + || store.getState() == Metapb.StoreState.Unknown) + && !activeStores.containsKey(store.getId())) { + // 不在线,修改状态为离线 + changeStore = Metapb.Store.newBuilder(store) + .setState(Metapb.StoreState.Offline) + .build(); + + } else if ((store.getState() == Metapb.StoreState.Exiting && + !activeStores.containsKey(store.getId())) || + (store.getState() == Metapb.StoreState.Offline && + (System.currentTimeMillis() - store.getLastHeartbeat() > + pdConfig.getStore().getMaxDownTime() * 1000) && + (System.currentTimeMillis() - clusterStartTime > + pdConfig.getStore().getMaxDownTime() * 1000))) { + //手工修改为下线或者离线达到时长 + // 修改状态为关机, 增加 checkStoreCanOffline 检测 + if (storeService.checkStoreCanOffline(store)) { + changeStore = Metapb.Store.newBuilder(store) + .setState(Metapb.StoreState.Tombstone).build(); + this.logService.insertLog(LogService.NODE_CHANGE, + LogService.TASK, changeStore); + log.info("patrolStores store {} Offline", changeStore.getId()); + } + } + if (changeStore != null) { + storeService.updateStore(changeStore); + changedStores.add(changeStore); + } + } + return changedStores; + } + + + /** + * 巡查所有的分区,检查副本数是否正确 + */ + public List patrolPartitions() throws PDException { + if (!isLeader()) { + return null; + } + + // 副本数不一致,重新分配副本 + for (Metapb.ShardGroup group : storeService.getShardGroups()) { + if (group.getShardsCount() != pdConfig.getPartition().getShardCount()) { + storeService.reallocShards(group); + // 避免后面的 balance partition shard 马上执行. + kvService.put(BALANCE_SHARD_KEY, "DOING", 180 * 1000); + } + } + //检查shard是否在线。 + Map tombStores = storeService.getTombStores().stream().collect( + Collectors.toMap(Metapb.Store::getId, t -> t)); + + var partIds = new HashSet(); + + for (var pair : tombStores.entrySet()) { + for (var partition : partitionService.getPartitionByStore(pair.getValue())) { + if (partIds.contains(partition.getId())) { + continue; + } + partIds.add(partition.getId()); + + storeService.storeTurnoff(pair.getValue()); + partitionService.shardOffline(partition, pair.getValue().getId()); + } + + } + + return null; + } + + + /** + * 在Store之间平衡分区的数量 + * 机器转为UP半小时后才能进行动态平衡 + */ + public synchronized Map> balancePartitionShard() throws + PDException { + log.info("balancePartitions starting, isleader:{}", isLeader()); + + if (!isLeader()) { + return null; + } + + if (System.currentTimeMillis() - lastStoreTurnoffTime < TurnOffAndBalanceInterval) { + return null;//机器下线半小时后才能进行动态平衡 + } + + + int activeStores = storeService.getActiveStores().size(); + if (activeStores == 0) { + log.warn("balancePartitionShard non active stores, skip to balancePartitionShard"); + return null; + } + + // 避免频繁调用. (当改变副本数,需要调整shard list,此时又需要平衡分区)会发送重复的指令。造成结果不可预料。 + // 严重会删除掉分区. + if (Objects.equals(kvService.get(BALANCE_SHARD_KEY), "DOING")) { + return null; + } + + int totalShards = pdConfig.getConfigService().getPartitionCount() * + pdConfig.getPartition().getShardCount(); + int averageCount = totalShards / activeStores; + int remainder = totalShards % activeStores; + + // 统计每个store上分区, StoreId ->PartitionID, ShardRole + Map> partitionMap = new HashMap<>(); + storeService.getActiveStores().forEach(store -> { + partitionMap.put(store.getId(), new HashMap<>()); + }); + + // 如果是leaner 说明迁移正在进行,不要重复提交任务 + AtomicReference isLeaner = new AtomicReference<>(false); + partitionService.getPartitions().forEach(partition -> { + + try { + storeService.getShardList(partition.getId()).forEach(shard -> { + Long storeId = shard.getStoreId(); + // 判断每个shard为leaner或者状态非正常状态 + if (shard.getRole() == Metapb.ShardRole.Learner + || partition.getState() != Metapb.PartitionState.PState_Normal) { + isLeaner.set(true); + } + if (partitionMap.containsKey(storeId)) { + partitionMap.get(storeId).put(partition.getId(), shard.getRole()); + } + }); + } catch (PDException e) { + log.error("get partition {} shard list error:{}.", partition.getId(), + e.getMessage()); + } + }); + + if (isLeaner.get()) { + log.warn("balancePartitionShard is doing, skip this balancePartitionShard task"); + return null; + } + + // 按照shard数量由高到低排序store + List> sortedList = new ArrayList<>(); + partitionMap.forEach((storeId, shards) -> { + sortedList.add(new KVPair(storeId, shards.size())); + }); + // 由大到小排序的list + sortedList.sort(((o1, o2) -> o2.getValue().compareTo(o1.getValue()))); + // 最大堆 + PriorityQueue> maxHeap = new PriorityQueue<>(sortedList.size(), + (o1, o2) -> o2.getValue() + .compareTo( + o1.getValue())); + + // 各个副本的 committedIndex + Map> committedIndexMap = partitionService.getCommittedIndexStats(); + // 分区ID --> 源StoreID,目标StoreID + Map> movedPartitions = new HashMap<>(); + // 移除多余的shard, 按照shards由多到少的顺序遍历store,余数remainder优先给shards多的store分配,减少迁移的概率 + for (int index = 0; index < sortedList.size(); index++) { + long storeId = sortedList.get(index).getKey(); + if (!partitionMap.containsKey(storeId)) { + log.error("cannot found storeId {} in partitionMap", storeId); + return null; + } + Map shards = partitionMap.get(storeId); + int targetCount = index < remainder ? averageCount + 1 : averageCount; + // 移除多余的shard, 添加源StoreID. 非Leader,并且该分区唯一 + if (shards.size() > targetCount) { + int movedCount = shards.size() - targetCount; + log.info( + "balancePartitionShard storeId {}, shardsSize {}, targetCount {}, " + + "moveCount {}", + storeId, shards.size(), targetCount, movedCount); + for (Iterator iterator = shards.keySet().iterator(); + movedCount > 0 && iterator.hasNext(); ) { + Integer id = iterator.next(); + + if (!movedPartitions.containsKey(id)) { + log.info("store {}, shard of partition {} can be moved", storeId, id); + movedPartitions.put(id, new KVPair<>(storeId, 0L)); + movedCount--; + } + } + } else if (shards.size() < targetCount) { + int addCount = targetCount - shards.size(); + log.info( + "balancePartitionShard storeId {}, shardsSize {}, targetCount {}, " + + "addCount {}", + storeId, shards.size(), targetCount, addCount); + maxHeap.add(new KVPair<>(storeId, addCount)); + } + } + + if (movedPartitions.size() == 0) { + log.warn( + "movedPartitions is empty, totalShards:{} averageCount:{} remainder:{} " + + "sortedList:{}", + totalShards, averageCount, remainder, sortedList); + } + Iterator>> moveIterator = + movedPartitions.entrySet().iterator(); + + while (moveIterator.hasNext()) { + if (maxHeap.size() == 0) { + break; + } + Map.Entry> moveEntry = moveIterator.next(); + int partitionId = moveEntry.getKey(); + long sourceStoreId = moveEntry.getValue().getKey(); + + List> tmpList = new ArrayList<>(maxHeap.size()); + while (maxHeap.size() > 0) { + KVPair pair = maxHeap.poll(); + long destStoreId = pair.getKey(); + boolean destContains = false; + if (partitionMap.containsKey(destStoreId)) { + destContains = partitionMap.get(destStoreId).containsKey(partitionId); + } + // 如果目的store已经包含了该partition,则取一下store + if (!destContains) { + moveEntry.getValue().setValue(pair.getKey()); + log.info( + "balancePartitionShard will move partition {} from store {} to store " + + "{}", + moveEntry.getKey(), + moveEntry.getValue().getKey(), + moveEntry.getValue().getValue()); + if (pair.getValue() > 1) { + pair.setValue(pair.getValue() - 1); + tmpList.add(pair); + } + break; + } + tmpList.add(pair); + } + maxHeap.addAll(tmpList); + } + + kvService.put(BALANCE_SHARD_KEY, "DOING", 180 * 1000); + + // 开始迁移 + movedPartitions.forEach((partId, storePair) -> { + // 源和目标storeID都不为0 + if (storePair.getKey() > 0 && storePair.getValue() > 0) { + partitionService.movePartitionsShard(partId, storePair.getKey(), + storePair.getValue()); + } else { + log.warn("balancePartitionShard key or value is zero, partId:{} storePair:{}", + partId, storePair); + } + }); + return movedPartitions; + } + + /** + * 在Store之间平衡分区的Leader的数量 + */ + public synchronized Map balancePartitionLeader(boolean immediately) throws + PDException { + Map results = new HashMap<>(); + + if (!isLeader()) { + return results; + } + + if (!immediately && + System.currentTimeMillis() - lastBalanceLeaderTime < BalanceLeaderInterval) { + return results; + } + lastBalanceLeaderTime = System.currentTimeMillis(); + + List shardGroups = storeService.getShardGroups(); + + // 分裂或者缩容任务的时候,退出 + var taskMeta = storeService.getTaskInfoMeta(); + if (taskMeta.hasSplitTaskDoing() || taskMeta.hasMoveTaskDoing()) { + throw new PDException(1001, "split or combine task is processing, please try later!"); + } + + // 数据迁移的时候,退出 + if (Objects.equals(kvService.get(BALANCE_SHARD_KEY), "DOING")) { + throw new PDException(1001, "balance shard is processing, please try later!"); + } + + if (shardGroups.size() == 0) { + return results; + } + + Map storeShardCount = new HashMap<>(); + + shardGroups.forEach(group -> { + group.getShardsList().forEach(shard -> { + storeShardCount.put(shard.getStoreId(), + storeShardCount.getOrDefault(shard.getStoreId(), 0) + 1); + }); + }); + + log.info("balancePartitionLeader, shard group size: {}, by store: {}", shardGroups.size(), + storeShardCount); + + // 按照 target count, store id稳定排序 + PriorityQueue> targetCount = + new PriorityQueue<>(kvPairComparatorDesc); + + var sortedGroups = storeShardCount.entrySet().stream() + .map(entry -> new KVPair<>(entry.getKey(), + entry.getValue())) + .sorted(kvPairComparatorAsc) + .collect(Collectors.toList()); + int sum = 0; + + for (int i = 0; i < sortedGroups.size() - 1; i++) { + // at least one + int v = Math.max( + sortedGroups.get(i).getValue() / pdConfig.getPartition().getShardCount(), 1); + targetCount.add(new KVPair<>(sortedGroups.get(i).getKey(), v)); + sum += v; + } + // 最后一个, 除不尽的情况,保证总数正确 + targetCount.add(new KVPair<>(sortedGroups.get(sortedGroups.size() - 1).getKey(), + shardGroups.size() - sum)); + log.info("target count: {}", targetCount); + + for (var group : shardGroups) { + var map = group.getShardsList().stream() + .collect(Collectors.toMap(Metapb.Shard::getStoreId, shard -> shard)); + var tmpList = new ArrayList>(); + // store比较多的情况,可能不包含对应的store id. 则先将不符合的store保存到临时列表,直到找到一个合适的store + while (!targetCount.isEmpty()) { + var pair = targetCount.poll(); + var storeId = pair.getKey(); + if (map.containsKey(storeId)) { + if (map.get(storeId).getRole() != Metapb.ShardRole.Leader) { + log.info("shard group{}, store id:{}, set to leader", group.getId(), + storeId); + partitionService.transferLeader(group.getId(), map.get(storeId)); + results.put(group.getId(), storeId); + } else { + log.info("shard group {}, store id :{}, is leader, no need change", + group.getId(), storeId); + } + + if (pair.getValue() > 1) { + // count -1 + pair.setValue(pair.getValue() - 1); + tmpList.add(pair); + } + // 找到了,则处理完成 + break; + } else { + tmpList.add(pair); + } + } + targetCount.addAll(tmpList); + } + + return results; + } + + + private long getMaxIndexGap(Map> committedIndexMap, int partitionId) { + long maxGap = Long.MAX_VALUE; + if (committedIndexMap == null || !committedIndexMap.containsKey(partitionId)) { + return maxGap; + } + Map shardMap = committedIndexMap.get(partitionId); + if (shardMap == null || shardMap.size() == 0) { + return maxGap; + } + List sortedList = new ArrayList<>(); + shardMap.forEach((storeId, committedIndex) -> { + sortedList.add(committedIndex); + }); + // 由大到小排序的list + sortedList.sort(Comparator.reverseOrder()); + maxGap = sortedList.get(0) - sortedList.get(sortedList.size() - 1); + return maxGap; + } + + + /** + * 执行分区分裂,分为自动分裂和手工分裂 + * + * @return + * @throws PDException + */ + public List splitPartition( + Pdpb.OperationMode mode, List params) throws PDException { + + if (mode == Pdpb.OperationMode.Auto) { + return autoSplitPartition(); + } + + var list = params.stream() + .map(param -> new KVPair<>(param.getPartitionId(), param.getCount())) + .collect(Collectors.toList()); + + storeService.splitShardGroups(list); + return null; + } + + /** + * 自动进行分区分裂,每个store达到最大分区数量 + * 执行条件 + * 分裂后每台机器分区数量少于partition.max-partitions-per-store + * + * @throws PDException + */ + public List autoSplitPartition() throws PDException { + if (!isLeader()) { + return null; + } + + if (Metapb.ClusterState.Cluster_OK != storeService.getClusterStats().getState()) { + if (Metapb.ClusterState.Cluster_Offline == storeService.getClusterStats().getState()) { + throw new PDException(Pdpb.ErrorType.Split_Partition_Doing_VALUE, + "The data is splitting"); + } else { + throw new PDException(Pdpb.ErrorType.Cluster_State_Forbid_Splitting_VALUE, + "The current state of the cluster prohibits splitting data"); + } + } + + //For TEST + // pdConfig.getPartition().setMaxShardsPerStore(pdConfig.getPartition() + // .getMaxShardsPerStore()*2); + + // 计算集群能能支持的最大split count + int splitCount = pdConfig.getPartition().getMaxShardsPerStore() * + storeService.getActiveStores().size() / + (storeService.getShardGroups().size() * + pdConfig.getPartition().getShardCount()); + + if (splitCount < 2) { + throw new PDException(Pdpb.ErrorType.Too_Many_Partitions_Per_Store_VALUE, + "Too many partitions per store, partition.store-max-shard-count" + + " = " + + pdConfig.getPartition().getMaxShardsPerStore()); + } + + // 每store未达最大分区数,进行分裂 + log.info("Start to split partitions..., split count = {}", splitCount); + + // 设置集群状态为下线 + storeService.updateClusterStatus(Metapb.ClusterState.Cluster_Offline); + // 修改默认分区数量 + // pdConfig.getConfigService().setPartitionCount(storeService.getShardGroups().size() * + // splitCount); + + var list = storeService.getShardGroups().stream() + .map(shardGroup -> new KVPair<>(shardGroup.getId(), splitCount)) + .collect(Collectors.toList()); + storeService.splitShardGroups(list); + + return null; + } + + + /** + * Store汇报任务状态 + * 分区状态发生改变,重新计算分区所在的ShardGroup、图和整个集群的状态 + * + * @param task + */ + public void reportTask(MetaTask.Task task) { + try { + switch (task.getType()) { + case Split_Partition: + partitionService.handleSplitTask(task); + break; + case Move_Partition: + partitionService.handleMoveTask(task); + break; + case Clean_Partition: + partitionService.handleCleanPartitionTask(task); + break; + default: + break; + } + } catch (Exception e) { + log.error("Report task exception {}, {}", e, task); + } + } + + /** + * 对rocksdb进行compaction + * + * @throws PDException + */ + public Boolean dbCompaction(String tableName) throws PDException { + if (!isLeader()) { + return false; + } + + for (Metapb.ShardGroup shardGroup : storeService.getShardGroups()) { + storeService.shardGroupsDbCompaction(shardGroup.getId(), tableName); + } + + // + return true; + } + + /** + * 判断是否能把一个store的分区全部迁出,给出判断结果和迁移方案 + */ + public Map canAllPartitionsMovedOut(Metapb.Store sourceStore) throws + PDException { + if (!isLeader()) { + return null; + } + // 分析一个store上面的分区是否可以完全迁出 + Map resultMap = new HashMap<>(); + // 定义对象用于保存源store上面的分区 StoreId ->PartitionID, ShardRole + Map> sourcePartitionMap = new HashMap<>(); + sourcePartitionMap.put(sourceStore.getId(), new HashMap<>()); + // 定义对象用于保存其他活跃store上面的分区 StoreId ->PartitionID, ShardRole + Map> otherPartitionMap = new HashMap<>(); + Map availableDiskSpace = new HashMap<>(); // 每个store剩余的磁盘空间 + Map partitionDataSize = new HashMap<>(); // 记录待迁移的分区的数据量 + + storeService.getActiveStores().forEach(store -> { + if (store.getId() != sourceStore.getId()) { + otherPartitionMap.put(store.getId(), new HashMap<>()); + // 记录其他store的剩余的磁盘空间, 单位为Byte + availableDiskSpace.put(store.getId(), store.getStats().getAvailable()); + } else { + resultMap.put("current_store_is_online", true); + } + }); + // 统计待迁移的分区的数据大小 (从storeStats中统计,单位为KB) + for (Metapb.GraphStats graphStats : sourceStore.getStats().getGraphStatsList()) { + partitionDataSize.put(graphStats.getPartitionId(), + partitionDataSize.getOrDefault(graphStats.getPartitionId(), 0L) + + graphStats.getApproximateSize()); + } + // 给sourcePartitionMap 和 otherPartitionMap赋值 + partitionService.getPartitions().forEach(partition -> { + try { + storeService.getShardList(partition.getId()).forEach(shard -> { + long storeId = shard.getStoreId(); + if (storeId == sourceStore.getId()) { + sourcePartitionMap.get(storeId).put(partition.getId(), shard.getRole()); + } else { + if (otherPartitionMap.containsKey(storeId)) { + otherPartitionMap.get(storeId).put(partition.getId(), shard.getRole()); + } + } + + }); + } catch (PDException e) { + throw new RuntimeException(e); + } + }); + // 统计待移除的分区:即源store上面的所有分区 + Map> movedPartitions = new HashMap<>(); + for (Map.Entry entry : sourcePartitionMap.get( + sourceStore.getId()).entrySet()) { + movedPartitions.put(entry.getKey(), new KVPair<>(sourceStore.getId(), 0L)); + } + // 统计其他store的分区数量, 用小顶堆保存,以便始终把分区数量较少的store优先考虑 + PriorityQueue> minHeap = new PriorityQueue<>(otherPartitionMap.size(), + (o1, o2) -> o1.getValue() + .compareTo( + o2.getValue())); + otherPartitionMap.forEach((storeId, shards) -> { + minHeap.add(new KVPair(storeId, shards.size())); + }); + // 遍历待迁移的分区,优先迁移到分区比较少的store + Iterator>> moveIterator = + movedPartitions.entrySet().iterator(); + while (moveIterator.hasNext()) { + Map.Entry> moveEntry = moveIterator.next(); + int partitionId = moveEntry.getKey(); + List> tmpList = new ArrayList<>(); // 记录已经弹出优先队列的元素 + while (minHeap.size() > 0) { + KVPair pair = minHeap.poll(); //弹出首个元素 + long storeId = pair.getKey(); + int partitionCount = pair.getValue(); + Map shards = otherPartitionMap.get(storeId); + final int unitRate = 1024; // 平衡不同存储单位的进率 + if ((!shards.containsKey(partitionId)) && ( + availableDiskSpace.getOrDefault(storeId, 0L) / unitRate >= + partitionDataSize.getOrDefault(partitionId, 0L))) { + // 如果目标store上面不包含该分区,且目标store剩余空间能容纳该分区,则进行迁移 + moveEntry.getValue().setValue(storeId); //设置移动的目标store + log.info("plan to move partition {} to store {}, " + + "available disk space {}, current partitionSize:{}", + partitionId, + storeId, + availableDiskSpace.getOrDefault(storeId, 0L) / unitRate, + partitionDataSize.getOrDefault(partitionId, 0L) + ); + // 更新该store预期的剩余空间 + availableDiskSpace.put(storeId, availableDiskSpace.getOrDefault(storeId, 0L) + - partitionDataSize.getOrDefault(partitionId, + 0L) * + unitRate); + // 更新统计变量中该store的分区数量 + partitionCount += 1; + pair.setValue(partitionCount); + tmpList.add(pair); + break; + } else { + tmpList.add(pair); + } + } + minHeap.addAll(tmpList); + } + //检查是否未存在未分配目标store的分区 + List remainPartitions = new ArrayList<>(); + movedPartitions.forEach((partId, storePair) -> { + if (storePair.getValue() == 0L) { + remainPartitions.add(partId); + } + }); + if (remainPartitions.size() > 0) { + resultMap.put("flag", false); + resultMap.put("movedPartitions", null); + } else { + resultMap.put("flag", true); + resultMap.put("movedPartitions", movedPartitions); + } + return resultMap; + + } + + public Map> movePartitions( + Map> movedPartitions) { + if (!isLeader()) { + return null; + } + // 开始迁移 + log.info("begin move partitions:"); + movedPartitions.forEach((partId, storePair) -> { + // 源和目标storeID都不为0 + if (storePair.getKey() > 0 && storePair.getValue() > 0) { + partitionService.movePartitionsShard(partId, storePair.getKey(), + storePair.getValue()); + } + }); + return movedPartitions; + } + + +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/config/PDConfig.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/config/PDConfig.java new file mode 100644 index 0000000000..abc54a94dc --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/config/PDConfig.java @@ -0,0 +1,277 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.config; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.hugegraph.pd.ConfigService; +import org.apache.hugegraph.pd.IdService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Configuration; +import org.springframework.stereotype.Component; + +import lombok.Data; + + +/** + * PD配置文件 + */ +@Data +@Component +public class PDConfig { + + @Value("${pd.cluster_id:1}") + private long clusterId; // 集群ID + + @Value("${pd.patrol-interval:300}") + private long patrolInterval = 300; //巡查任务时间间隔 + @Value("${pd.data-path}") + private String dataPath; + @Value("${pd.initial-store-count:3}") + private int minStoreCount; + + // 初始store列表,该列表内的store自动激活 + @Value("${pd.initial-store-list: ''}") + private String initialStoreList; + @Value("${grpc.host}") + private String host; + + @Value("${license.verify-path}") + private String verifyPath; + @Value("${license.license-path}") + private String licensePath; + @Autowired + private ThreadPoolGrpc threadPoolGrpc; + @Autowired + private Raft raft; + @Autowired + private Store store; + @Autowired + private Partition partition; + @Autowired + private Discovery discovery; + private Map initialStoreMap = null; + private ConfigService configService; + private IdService idService; + + public Map getInitialStoreMap() { + if (initialStoreMap == null) { + initialStoreMap = new HashMap<>(); + Arrays.asList(initialStoreList.split(",")).forEach(s -> { + initialStoreMap.put(s, s); + }); + } + return initialStoreMap; + } + + /** + * 初始分区数量 + * Store数量 * 每Store最大副本数 /每分区副本数 + * + * @return + */ + public int getInitialPartitionCount() { + return getInitialStoreMap().size() * partition.getMaxShardsPerStore() + / partition.getShardCount(); + } + + public ConfigService getConfigService() { + return configService; + } + + public void setConfigService(ConfigService configService) { + this.configService = configService; + } + + public IdService getIdService() { + return idService; + } + + public void setIdService(IdService idService) { + this.idService = idService; + } + + @Data + @Configuration + public class ThreadPoolGrpc { + @Value("${thread.pool.grpc.core:600}") + private int core; + @Value("${thread.pool.grpc.max:1000}") + private int max; + @Value("${thread.pool.grpc.queue:" + Integer.MAX_VALUE + "}") + private int queue; + } + + @Data + @Configuration + public class Raft { + @Value("${raft.enable:true }") + private boolean enable; + @Value("${raft.address}") + private String address; + @Value("${pd.data-path}") + private String dataPath; + @Value("${raft.peers-list}") + private String peersList; + @Value("${raft.snapshotInterval: 300}") + private int snapshotInterval; + @Value("${raft.rpc-timeout:10000}") + private int rpcTimeout; + @Value("${grpc.host}") + private String host; + @Value("${server.port}") + private int port; + + @Value("${pd.cluster_id:1}") + private long clusterId; // 集群ID + @Value("${grpc.port}") + private int grpcPort; + + public String getGrpcAddress() { + return host + ":" + grpcPort; + } + } + + @Data + @Configuration + public class Store { + // store 心跳超时时间 + @Value("${store.keepAlive-timeout:300}") + private long keepAliveTimeout = 300; + @Value("${store.max-down-time:1800}") + private long maxDownTime = 1800; + + @Value("${store.monitor_data_enabled:true}") + private boolean monitorDataEnabled = true; + + @Value("${store.monitor_data_interval: 1 minute}") + private String monitorDataInterval = "1 minute"; + + @Value("${store.monitor_data_retention: 1 day}") + private String monitorDataRetention = "1 day"; + + /** + * interval -> seconds. + * minimum value is 1 seconds. + * + * @return the seconds of the interval + */ + public Long getMonitorInterval() { + return parseTimeExpression(this.monitorDataInterval); + } + + /** + * the monitor data that saved in rocksdb, will be deleted + * out of period + * + * @return the period of the monitor data should keep + */ + public Long getRetentionPeriod() { + return parseTimeExpression(this.monitorDataRetention); + } + + /** + * parse time expression , support pattern: + * [1-9][ ](second, minute, hour, day, month, year) + * unit could not be null, the number part is 1 by default. + * + * @param exp + * @return seconds value of the expression. 1 will return by illegal expression + */ + private Long parseTimeExpression(String exp) { + if (exp != null) { + Pattern pattern = Pattern.compile( + "(?(\\d+)*)(\\s)*(?(second|minute|hour|day|month|year)$)"); + Matcher matcher = pattern.matcher(exp.trim()); + if (matcher.find()) { + String n = matcher.group("n"); + String unit = matcher.group("unit"); + + if (null == n || n.length() == 0) { + n = "1"; + } + + Long interval; + switch (unit) { + case "minute": + interval = 60L; + break; + case "hour": + interval = 3600L; + break; + case "day": + interval = 86400L; + break; + case "month": + interval = 86400L * 30; + break; + case "year": + interval = 86400L * 365; + break; + case "second": + default: + interval = 1L; + } + // avoid n == '0' + return Math.max(1L, interval * Integer.parseInt(n)); + } + } + return 1L; + } + + } + + @Data + @Configuration + public class Partition { + private int totalCount = 0; + + // 每个Store最大副本数 + @Value("${partition.store-max-shard-count:24}") + private int maxShardsPerStore = 24; + + // 默认分副本数量 + @Value("${partition.default-shard-count:3}") + private int shardCount = 3; + + public int getTotalCount() { + if (totalCount == 0) { + totalCount = getInitialPartitionCount(); + } + return totalCount; + } + + public void setTotalCount(int totalCount) { + this.totalCount = totalCount; + } + } + + @Data + @Configuration + public class Discovery { + // 客户端注册后,无心跳最长次数,超过后,之前的注册信息会被删除 + @Value("${discovery.heartbeat-try-count:3}") + private int heartbeatOutTimes = 3; + } + +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/ConfigMetaStore.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/ConfigMetaStore.java new file mode 100644 index 0000000000..5ec6fe2171 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/ConfigMetaStore.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.meta; + +import java.util.List; +import java.util.Optional; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Metapb; + +public class ConfigMetaStore extends MetadataRocksDBStore { + + + private final long clusterId; + + public ConfigMetaStore(PDConfig pdConfig) { + super(pdConfig); + this.clusterId = pdConfig.getClusterId(); + } + + /** + * 更新图空间存储状态信息 + * + * @param + */ + public Metapb.GraphSpace setGraphSpace(Metapb.GraphSpace graphSpace) throws PDException { + byte[] graphSpaceKey = MetadataKeyHelper.getGraphSpaceKey(graphSpace.getName()); + graphSpace = graphSpace.toBuilder().setTimestamp(System.currentTimeMillis()).build(); + put(graphSpaceKey, graphSpace.toByteArray()); + return graphSpace; + } + + public List getGraphSpace(String graphSpace) throws PDException { + byte[] graphSpaceKey = MetadataKeyHelper.getGraphSpaceKey(graphSpace); + return scanPrefix(Metapb.GraphSpace.parser(), graphSpaceKey); + } + + public Metapb.PDConfig setPdConfig(Metapb.PDConfig pdConfig) throws PDException { + byte[] graphSpaceKey = + MetadataKeyHelper.getPdConfigKey(String.valueOf(pdConfig.getVersion())); + Metapb.PDConfig config = Metapb.PDConfig.newBuilder( + pdConfig).setTimestamp(System.currentTimeMillis()).build(); + put(graphSpaceKey, config.toByteArray()); + return config; + } + + public Metapb.PDConfig getPdConfig(long version) throws PDException { + byte[] graphSpaceKey = MetadataKeyHelper.getPdConfigKey(version <= 0 ? null : + String.valueOf(version)); + Optional max = scanPrefix( + Metapb.PDConfig.parser(), graphSpaceKey).stream().max( + (o1, o2) -> (o1.getVersion() > o2.getVersion()) ? 1 : -1); + return max.isPresent() ? max.get() : null; + } + + +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/DiscoveryMetaStore.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/DiscoveryMetaStore.java new file mode 100644 index 0000000000..78bfe3473c --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/DiscoveryMetaStore.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.meta; + +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.discovery.NodeInfo; +import org.apache.hugegraph.pd.grpc.discovery.NodeInfos; +import org.apache.hugegraph.pd.grpc.discovery.Query; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class DiscoveryMetaStore extends MetadataRocksDBStore { + + /** + * appName --> address --> registryInfo + */ + private static final String PREFIX = "REGIS-"; + private static final String SPLITTER = "-"; + + public DiscoveryMetaStore(PDConfig pdConfig) { + super(pdConfig); + } + + public void register(NodeInfo nodeInfo, int outTimes) throws PDException { + putWithTTL(toKey(nodeInfo.getAppName(), nodeInfo.getVersion(), nodeInfo.getAddress()), + nodeInfo.toByteArray(), (nodeInfo.getInterval() / 1000) * outTimes); + } + + byte[] toKey(String appName, String version, String address) { + StringBuilder builder = getPrefixBuilder(appName, version); + builder.append(SPLITTER); + builder.append(address); + return builder.toString().getBytes(); + } + + private StringBuilder getPrefixBuilder(String appName, String version) { + StringBuilder builder = new StringBuilder(); + builder.append(PREFIX); + if (!StringUtils.isEmpty(appName)) { + builder.append(appName); + builder.append(SPLITTER); + } + if (!StringUtils.isEmpty(version)) { + builder.append(version); + } + return builder; + } + + public NodeInfos getNodes(Query query) { + List nodeInfos = null; + try { + StringBuilder builder = getPrefixBuilder(query.getAppName(), + query.getVersion()); + nodeInfos = getInstanceListWithTTL( + NodeInfo.parser(), + builder.toString().getBytes()); + builder.setLength(0); + } catch (PDException e) { + log.error("An error occurred getting data from the store,{}", e); + } + if (query.getLabelsMap() != null && !query.getLabelsMap().isEmpty()) { + List result = new LinkedList(); + for (NodeInfo node : nodeInfos) { + if (labelMatch(node, query)) { + result.add(node); + } + } + return NodeInfos.newBuilder().addAllInfo(result).build(); + } + return NodeInfos.newBuilder().addAllInfo(nodeInfos).build(); + + } + + private boolean labelMatch(NodeInfo node, Query query) { + Map labelsMap = node.getLabelsMap(); + for (Map.Entry entry : query.getLabelsMap().entrySet()) { + if (!entry.getValue().equals(labelsMap.get(entry.getKey()))) { + return false; + } + } + return true; + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/IdMetaStore.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/IdMetaStore.java new file mode 100644 index 0000000000..70e4c501f9 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/IdMetaStore.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.meta; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.store.KV; + +import com.caucho.hessian.io.Hessian2Input; +import com.caucho.hessian.io.Hessian2Output; + +import lombok.extern.slf4j.Slf4j; + +/** + * 自增id的实现类 + */ +@Slf4j +public class IdMetaStore extends MetadataRocksDBStore { + + + private static final String ID_PREFIX = "@ID@"; + private static final String CID_PREFIX = "@CID@"; + private static final String CID_SLOT_PREFIX = "@CID_SLOT@"; + private static final String CID_DEL_SLOT_PREFIX = "@CID_DEL_SLOT@"; + private static final String SEPARATOR = "@"; + private static final ConcurrentHashMap SEQUENCES = new ConcurrentHashMap<>(); + public static long CID_DEL_TIMEOUT = 24 * 3600 * 1000; + private final long clusterId; + + public IdMetaStore(PDConfig pdConfig) { + super(pdConfig); + this.clusterId = pdConfig.getClusterId(); + } + + public static long bytesToLong(byte[] b) { + ByteBuffer buf = ByteBuffer.wrap(b); + return buf.getLong(); + } + + public static byte[] longToBytes(long l) { + ByteBuffer buf = ByteBuffer.wrap(new byte[Long.SIZE]); + buf.putLong(l); + buf.flip(); + return buf.array(); + } + + /** + * 获取自增id + * + * @param key + * @param delta + * @return + * @throws PDException + */ + public long getId(String key, int delta) throws PDException { + Object probableLock = getLock(key); + byte[] keyBs = (ID_PREFIX + key).getBytes(Charset.defaultCharset()); + synchronized (probableLock) { + byte[] bs = getOne(keyBs); + long current = bs != null ? bytesToLong(bs) : 0L; + long next = current + delta; + put(keyBs, longToBytes(next)); + return current; + } + } + + private Object getLock(String key) { + Object probableLock = new Object(); + Object currentLock = SEQUENCES.putIfAbsent(key, probableLock); + if (currentLock != null) { + probableLock = currentLock; + } + return probableLock; + } + + public void resetId(String key) throws PDException { + Object probableLock = new Object(); + Object currentLock = SEQUENCES.putIfAbsent(key, probableLock); + if (currentLock != null) { + probableLock = currentLock; + } + byte[] keyBs = (ID_PREFIX + key).getBytes(Charset.defaultCharset()); + synchronized (probableLock) { + removeByPrefix(keyBs); + } + } + + /** + * 在删除name标识的cid的24小时内重复申请同一个name的cid保持同一值 + * 如此设计为了防止缓存的不一致,造成数据错误 + * + * @param key + * @param name cid 标识 + * @param max + * @return + * @throws PDException + */ + public long getCId(String key, String name, long max) throws PDException { + // 检测是否有过期的cid,删除图的频率比较低,此处对性能影响不大 + byte[] delKeyPrefix = (CID_DEL_SLOT_PREFIX + + key + SEPARATOR).getBytes(Charset.defaultCharset()); + synchronized (this) { + scanPrefix(delKeyPrefix).forEach(kv -> { + long[] value = (long[]) deserialize(kv.getValue()); + if (value.length >= 2) { + if (System.currentTimeMillis() - value[1] > CID_DEL_TIMEOUT) { + try { + delCId(key, value[0]); + remove(kv.getKey()); + } catch (Exception e) { + log.error("Exception ", e); + } + } + } + }); + + // 从延时删除队列恢复Key + byte[] cidDelayKey = getCIDDelayKey(key, name); + byte[] value = getOne(cidDelayKey); + if (value != null) { + // 从延迟删除队列删除 + remove(cidDelayKey); + return ((long[]) deserialize(value))[0]; + } else { + return getCId(key, max); + } + } + } + + /** + * 添加到删除队列,延后删除 + */ + public long delCIdDelay(String key, String name, long cid) throws PDException { + byte[] delKey = getCIDDelayKey(key, name); + put(delKey, serialize(new long[]{cid, System.currentTimeMillis()})); + return cid; + } + + /** + * 获取自增循环不重复id, 达到上限后从0开始自增 + * + * @param key + * @param max id上限,达到该值后,重新从0开始自增 + * @return + * @throws PDException + */ + public long getCId(String key, long max) throws PDException { + Object probableLock = getLock(key); + byte[] keyBs = (CID_PREFIX + key).getBytes(Charset.defaultCharset()); + synchronized (probableLock) { + byte[] bs = getOne(keyBs); + long current = bs != null ? bytesToLong(bs) : 0L; + long last = current == 0 ? max - 1 : current - 1; + { // 查找一个未使用的cid + List kvs = scanRange(genCIDSlotKey(key, current), genCIDSlotKey(key, max)); + for (KV kv : kvs) { + if (current == bytesToLong(kv.getValue())) { + current++; + } else { + break; + } + } + } + if (current == max) { + current = 0; + List kvs = scanRange(genCIDSlotKey(key, current), genCIDSlotKey(key, last)); + for (KV kv : kvs) { + if (current == bytesToLong(kv.getValue())) { + current++; + } else { + break; + } + } + } + if (current == last) { + return -1; + } + put(genCIDSlotKey(key, current), longToBytes(current)); + put(keyBs, longToBytes(current + 1)); + return current; + } + } + + private byte[] genCIDSlotKey(String key, long value) { + byte[] keySlot = (CID_SLOT_PREFIX + key + SEPARATOR).getBytes(Charset.defaultCharset()); + ByteBuffer buf = ByteBuffer.allocate(keySlot.length + Long.SIZE); + buf.put(keySlot); + buf.put(longToBytes(value)); + return buf.array(); + } + + private byte[] getCIDDelayKey(String key, String name) { + byte[] bsKey = (CID_DEL_SLOT_PREFIX + + key + SEPARATOR + + name).getBytes(Charset.defaultCharset()); + return bsKey; + } + + /** + * 删除一个循环id,释放id值 + * + * @param key + * @param cid + * @return + * @throws PDException + */ + public long delCId(String key, long cid) throws PDException { + return remove(genCIDSlotKey(key, cid)); + } + + private byte[] serialize(Object obj) { + try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { + Hessian2Output output = new Hessian2Output(bos); + output.writeObject(obj); + output.flush(); + return bos.toByteArray(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private Object deserialize(byte[] bytes) { + try (ByteArrayInputStream bis = new ByteArrayInputStream(bytes)) { + Hessian2Input input = new Hessian2Input(bis); + Object obj = input.readObject(); + input.close(); + return obj; + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/LogMeta.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/LogMeta.java new file mode 100644 index 0000000000..ee791d5e04 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/LogMeta.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.meta; + +import java.util.List; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Metapb; + +public class LogMeta extends MetadataRocksDBStore { + + private final PDConfig pdConfig; + + public LogMeta(PDConfig pdConfig) { + super(pdConfig); + this.pdConfig = pdConfig; + } + + public void insertLog(Metapb.LogRecord record) throws PDException { + byte[] storeLogKey = MetadataKeyHelper.getLogKey(record); + put(storeLogKey, record.toByteArray()); + + } + + public List getLog(String action, Long start, Long end) throws PDException { + byte[] keyStart = MetadataKeyHelper.getLogKeyPrefix(action, start); + byte[] keyEnd = MetadataKeyHelper.getLogKeyPrefix(action, end); + List stores = this.scanRange(Metapb.LogRecord.parser(), + keyStart, keyEnd); + return stores; + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataFactory.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataFactory.java new file mode 100644 index 0000000000..c70eec489d --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataFactory.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.meta; + +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.raft.RaftEngine; +import org.apache.hugegraph.pd.store.HgKVStore; +import org.apache.hugegraph.pd.store.HgKVStoreImpl; +import org.apache.hugegraph.pd.store.RaftKVStore; + +/** + * 存储工厂类,创建相关对象的存储类 + */ +public class MetadataFactory { + + private static HgKVStore store = null; + + public static HgKVStore getStore(PDConfig pdConfig) { + if (store == null) { + synchronized (MetadataFactory.class) { + if (store == null) { + HgKVStore proto = new HgKVStoreImpl(); + //proto.init(pdConfig); + store = pdConfig.getRaft().isEnable() ? + new RaftKVStore(RaftEngine.getInstance(), proto) : + proto; + store.init(pdConfig); + } + } + } + return store; + } + + public static void closeStore() { + if (store != null) { + store.close(); + } + } + + public static StoreInfoMeta newStoreInfoMeta(PDConfig pdConfig) { + return new StoreInfoMeta(pdConfig); + } + + public static PartitionMeta newPartitionMeta(PDConfig pdConfig) { + return new PartitionMeta(pdConfig); + } + + public static IdMetaStore newHugeServerMeta(PDConfig pdConfig) { + return new IdMetaStore(pdConfig); + } + + public static DiscoveryMetaStore newDiscoveryMeta(PDConfig pdConfig) { + return new DiscoveryMetaStore(pdConfig); + } + + public static ConfigMetaStore newConfigMeta(PDConfig pdConfig) { + return new ConfigMetaStore(pdConfig); + } + + public static TaskInfoMeta newTaskInfoMeta(PDConfig pdConfig) { + return new TaskInfoMeta(pdConfig); + } + + + public static QueueStore newQueueStore(PDConfig pdConfig) { + return new QueueStore(pdConfig); + } + + public static LogMeta newLogMeta(PDConfig pdConfig) { + return new LogMeta(pdConfig); + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java new file mode 100644 index 0000000000..8a421c2d60 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java @@ -0,0 +1,378 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.meta; + +import java.nio.charset.Charset; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hugegraph.pd.grpc.Metapb; + +public class MetadataKeyHelper { + + public static final char DELIMITER = '/'; + + private static final String STORE = "STORE"; + private static final String ACTIVESTORE = "ACTIVESTORE"; + private static final String STORESTATUS = "STORESTATUS"; + private static final String PARTITION = "PARTITION"; + private static final String PARTITION_V36 = "PARTITION_V36"; + private static final String SHARDGROUP = "SHARDGROUP"; + + private static final String PARTITION_STATUS = "PARTITION_STATUS"; + private static final String GRAPH = "GRAPH"; + private static final String GRAPHMETA = "GRAPHMETA"; + private static final String GRAPH_SPACE = "GRAPH_SPACE"; + private static final String PD_CONFIG = "PD_CONFIG"; + private static final String TASK_SPLIT = "TASK_SPLIT"; + private static final String TASK_MOVE = "TASK_MOVE"; + private static final String LOG_RECORD = "LOG_RECORD"; + + private static final String QUEUE = "QUEUE"; + + public static byte[] getStoreInfoKey(final long storeId) { + //STORE/{storeId} + String key = StringBuilderHelper.get() + .append(STORE).append(DELIMITER) + .append(storeId) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getActiveStoreKey(final long storeId) { + //ACTIVESTORE/{storeId} + String key = StringBuilderHelper.get() + .append(ACTIVESTORE).append(DELIMITER) + .append(storeId) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getActiveStorePrefix() { + //ACTIVESTORE + String key = StringBuilderHelper.get() + .append(ACTIVESTORE).append(DELIMITER) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getStorePrefix() { + //STORE + String key = StringBuilderHelper.get() + .append(STORE).append(DELIMITER) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getStoreStatusKey(final long storeId) { + //STORESTATUS/{storeId} + String key = StringBuilderHelper.get() + .append(STORESTATUS).append(DELIMITER) + .append(storeId) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getShardGroupKey(final long groupId) { + //SHARDGROUP/{storeId} + String key = StringBuilderHelper.get() + .append(SHARDGROUP).append(DELIMITER) + .append(groupId) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getShardGroupPrefix() { + //SHARDGROUP + String key = StringBuilderHelper.get() + .append(SHARDGROUP).append(DELIMITER) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getPartitionKey(final String graphName, final int partId) { + //GRAPH/{graphName}/Partition/{partId} + String key = StringBuilderHelper.get() + .append(GRAPH).append(DELIMITER) + .append(graphName).append(DELIMITER) + .append(PARTITION).append(DELIMITER) + .append(partId) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getPartitionV36Key(final String graphName, final int partId) { + // GRAPH/{graphName}/PartitionV36/{partId} + String key = StringBuilderHelper.get() + .append(GRAPH).append(DELIMITER) + .append(graphName).append(DELIMITER) + .append(PARTITION_V36).append(DELIMITER) + .append(partId) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getPartitionPrefix(final String graphName) { + //GRAPH/{graph}/Partition + String key = StringBuilderHelper.get() + .append(GRAPH).append(DELIMITER) + .append(graphName).append(DELIMITER) + .append(PARTITION).append(DELIMITER) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getShardKey(final long storeId, final int partId) { + //SHARD/{graphName}/{type} + String key = StringBuilderHelper.get() + .append(SHARDGROUP).append(DELIMITER) + .append(storeId).append(DELIMITER) + .append(partId) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getShardPrefix(final long storeId) { + //SHARD/{graphName}/{type} + String key = StringBuilderHelper.get() + .append(SHARDGROUP).append(DELIMITER) + .append(storeId).append(DELIMITER) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getGraphKey(final String graphName) { + //GRAPHMETA/{graphName} + String key = StringBuilderHelper.get() + .append(GRAPHMETA).append(DELIMITER) + .append(graphName).append(DELIMITER) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getGraphPrefix() { + //GRAPHMETA/{ + String key = StringBuilderHelper.get() + .append(GRAPHMETA).append(DELIMITER) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getPartitionStatusKey(String graphName, int id) { + //PARTITION_STATUS/{ + String key = StringBuilderHelper.get() + .append(PARTITION_STATUS) + .append(DELIMITER) + .append(graphName).append(DELIMITER) + .append(id).append(DELIMITER) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getPartitionStatusPrefixKey(String graphName) { + //PARTITION_STATUS/{ + StringBuilder builder = StringBuilderHelper.get().append(PARTITION_STATUS) + .append(DELIMITER); + if (!StringUtils.isEmpty(graphName)) { + builder.append(graphName).append(DELIMITER); + } + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getGraphSpaceKey(String graphSpace) { + //GRAPH_SPACE/{ + StringBuilder builder = StringBuilderHelper.get().append( + GRAPH_SPACE).append(DELIMITER); + if (!StringUtils.isEmpty(graphSpace)) { + builder.append(graphSpace).append(DELIMITER); + } + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getPdConfigKey(String configKey) { + //PD_CONFIG/{ + StringBuilder builder = StringBuilderHelper.get().append( + PD_CONFIG).append(DELIMITER); + if (!StringUtils.isEmpty(configKey)) { + builder.append(configKey).append(DELIMITER); + } + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getQueueItemPrefix() { + //QUEUE + String key = StringBuilderHelper.get() + .append(QUEUE).append(DELIMITER) + .toString(); + return key.getBytes(Charset.defaultCharset()); + } + + public static byte[] getQueueItemKey(String itemId) { + //QUEUE + StringBuilder builder = StringBuilderHelper.get() + .append(QUEUE).append(DELIMITER); + if (!StringUtils.isEmpty(itemId)) { + builder.append(itemId).append(DELIMITER); + } + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getSplitTaskKey(String graphName, int groupId) { + // TASK_SPLIT/{GraphName}/{partitionID} + StringBuilder builder = StringBuilderHelper.get() + .append(TASK_SPLIT).append(DELIMITER) + .append(graphName).append(DELIMITER) + .append(groupId); + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getSplitTaskPrefix(String graphName) { + // TASK_SPLIT/{GraphName}/ + StringBuilder builder = StringBuilderHelper.get() + .append(TASK_SPLIT).append(DELIMITER) + .append(graphName); + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getAllSplitTaskPrefix() { + // TASK_SPLIT/{GraphName}/ + StringBuilder builder = StringBuilderHelper.get() + .append(TASK_SPLIT).append(DELIMITER); + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getMoveTaskKey(String graphName, int targetGroupId, int groupId) { + // TASK_MOVE/{GraphName}/to PartitionID/{source partitionID} + StringBuilder builder = StringBuilderHelper.get() + .append(TASK_MOVE).append(DELIMITER) + .append(graphName).append(DELIMITER) + .append(targetGroupId).append(DELIMITER) + .append(groupId); + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getMoveTaskPrefix(String graphName) { + // TASK_MOVE/{graphName}/toPartitionId/ + StringBuilder builder = StringBuilderHelper.get() + .append(TASK_MOVE).append(DELIMITER) + .append(graphName); + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getAllMoveTaskPrefix() { + // TASK_MOVE/{graphName}/toPartitionId/ + StringBuilder builder = StringBuilderHelper.get() + .append(TASK_MOVE).append(DELIMITER); + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getLogKey(Metapb.LogRecord record) { + //LOG_RECORD/{action}/{time}/ + StringBuilder builder = StringBuilderHelper.get() + .append(LOG_RECORD) + .append(DELIMITER) + .append(record.getAction()) + .append(DELIMITER) + .append(record.getTimestamp()); + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getLogKeyPrefix(String action, long time) { + //LOG_DATA_SPLIT/{time}/{GraphName} + StringBuilder builder = StringBuilderHelper.get() + .append(LOG_RECORD) + .append(DELIMITER) + .append(action) + .append(DELIMITER) + .append(time); + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getKVPrefix(String prefix, String key) { + //K@/{key} + StringBuilder builder = StringBuilderHelper.get() + .append(prefix).append(DELIMITER); + if (!StringUtils.isEmpty(key)) { + builder.append(key).append(DELIMITER); + } + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static byte[] getKVTTLPrefix(String ttlPrefix, String prefix, String key) { + StringBuilder builder = StringBuilderHelper.get().append(ttlPrefix) + .append(prefix).append(DELIMITER); + if (!StringUtils.isEmpty(key)) { + builder.append(key).append(DELIMITER); + } + return builder.toString().getBytes(Charset.defaultCharset()); + } + + public static String getKVWatchKeyPrefix(String key, String watchDelimiter, long clientId) { + StringBuilder builder = StringBuilderHelper.get(); + builder.append(watchDelimiter).append(DELIMITER); + builder.append(key == null ? "" : key).append(DELIMITER); + builder.append(clientId); + return builder.toString(); + } + + public static String getKVWatchKeyPrefix(String key, String watchDelimiter) { + StringBuilder builder = StringBuilderHelper.get(); + builder.append(watchDelimiter).append(DELIMITER); + builder.append(key == null ? "" : key).append(DELIMITER); + return builder.toString(); + } + + public static char getDelimiter() { + return DELIMITER; + } + + public static StringBuilder getStringBuilderHelper() { + return StringBuilderHelper.get(); + } + + static class StringBuilderHelper { + private static final int DISCARD_LIMIT = 1024 << 3; // 8k + + private static final ThreadLocal holderThreadLocal = ThreadLocal + .withInitial(StringBuilderHolder::new); + + public static StringBuilder get() { + final StringBuilderHolder holder = holderThreadLocal.get(); + return holder.getStringBuilder(); + } + + public static void truncate() { + final StringBuilderHolder holder = holderThreadLocal.get(); + holder.truncate(); + } + + private static class StringBuilderHolder { + + private final StringBuilder buf = new StringBuilder(); + + private StringBuilder getStringBuilder() { + truncate(); + return buf; + } + + private void truncate() { + buf.setLength(0); + } + } + } + +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataRocksDBStore.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataRocksDBStore.java new file mode 100644 index 0000000000..bf77e41c05 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataRocksDBStore.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.meta; + +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.lang3.ArrayUtils; +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Pdpb; +import org.apache.hugegraph.pd.store.HgKVStore; +import org.apache.hugegraph.pd.store.KV; + +import com.google.protobuf.Parser; + +public class MetadataRocksDBStore extends MetadataStoreBase { + + HgKVStore store; + + PDConfig pdConfig; + + public MetadataRocksDBStore(PDConfig pdConfig) { + store = MetadataFactory.getStore(pdConfig); + this.pdConfig = pdConfig; + } + + public HgKVStore getStore() { + if (store == null) { + store = MetadataFactory.getStore(pdConfig); + } + return store; + } + + @Override + public byte[] getOne(byte[] key) throws PDException { + try { + byte[] bytes = store.get(key); + return bytes; + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_READ_ERROR_VALUE, e); + } + } + + @Override + public E getOne(Parser parser, byte[] key) throws PDException { + try { + byte[] bytes = store.get(key); + if (ArrayUtils.isEmpty(bytes)) { + return null; + } + return parser.parseFrom(bytes); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_READ_ERROR_VALUE, e); + } + } + + @Override + public void put(byte[] key, byte[] value) throws PDException { + try { + getStore().put(key, value); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_WRITE_ERROR_VALUE, e); + } + } + + @Override + public void putWithTTL(byte[] key, byte[] value, long ttl) throws PDException { + this.store.putWithTTL(key, value, ttl); + } + + @Override + public void putWithTTL(byte[] key, byte[] value, long ttl, TimeUnit timeUnit) throws + PDException { + this.store.putWithTTL(key, value, ttl, timeUnit); + } + + @Override + public byte[] getWithTTL(byte[] key) throws PDException { + return this.store.getWithTTL(key); + } + + @Override + public List getListWithTTL(byte[] key) throws PDException { + return this.store.getListWithTTL(key); + } + + @Override + public void removeWithTTL(byte[] key) throws PDException { + this.store.removeWithTTL(key); + } + + @Override + public List scanPrefix(byte[] prefix) throws PDException { + //TODO 使用rocksdb 前缀查询 + try { + return this.store.scanPrefix(prefix); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_READ_ERROR_VALUE, e); + } + } + + @Override + public List scanRange(byte[] start, byte[] end) throws PDException { + return this.store.scanRange(start, end); + } + + @Override + public List scanRange(Parser parser, byte[] start, byte[] end) throws PDException { + List stores = new LinkedList<>(); + try { + List kvs = this.scanRange(start, end); + for (KV keyValue : kvs) { + stores.add(parser.parseFrom(keyValue.getValue())); + } + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_READ_ERROR_VALUE, e); + } + return stores; + } + + @Override + public List scanPrefix(Parser parser, byte[] prefix) throws PDException { + List stores = new LinkedList<>(); + try { + List kvs = this.scanPrefix(prefix); + for (KV keyValue : kvs) { + stores.add(parser.parseFrom(keyValue.getValue())); + } + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_READ_ERROR_VALUE, e); + } + return stores; + } + + @Override + public boolean containsKey(byte[] key) throws PDException { + return !ArrayUtils.isEmpty(store.get(key)); + } + + @Override + public long remove(byte[] key) throws PDException { + try { + return this.store.remove(key); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_WRITE_ERROR_VALUE, e); + } + } + + @Override + public long removeByPrefix(byte[] prefix) throws PDException { + try { + return this.store.removeByPrefix(prefix); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_WRITE_ERROR_VALUE, e); + } + } + + @Override + public void clearAllCache() throws PDException { + this.store.clear(); + } + + @Override + public void close() { + + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataStoreBase.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataStoreBase.java new file mode 100644 index 0000000000..10c38a3ec6 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataStoreBase.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.meta; + + +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.grpc.Pdpb; +import org.apache.hugegraph.pd.store.KV; + +import com.google.protobuf.Parser; + +public abstract class MetadataStoreBase { + + // public long timeout = 3; // 请求超时时间,默认三秒 + + public abstract byte[] getOne(byte[] key) throws PDException; + + public abstract E getOne(Parser parser, byte[] key) throws PDException; + + public abstract void put(byte[] key, byte[] value) throws PDException; + + /** + * 带有过期时间的put + */ + + public abstract void putWithTTL(byte[] key, + byte[] value, + long ttl) throws PDException; + + public abstract void putWithTTL(byte[] key, + byte[] value, + long ttl, TimeUnit timeUnit) throws PDException; + + public abstract byte[] getWithTTL(byte[] key) throws PDException; + + public abstract List getListWithTTL(byte[] key) throws PDException; + + public abstract void removeWithTTL(byte[] key) throws PDException; + + /** + * 前缀查询 + * + * @param prefix + * @return + * @throws PDException + */ + public abstract List scanPrefix(byte[] prefix) throws PDException; + + /** + * 前缀查询 + * + * @param prefix + * @return + * @throws PDException + */ + + public abstract List scanPrefix(Parser parser, byte[] prefix) throws PDException; + + public abstract List scanRange(byte[] start, byte[] end) throws PDException; + + public abstract List scanRange(Parser parser, byte[] start, byte[] end) throws + PDException; + + /** + * 检查Key是否存在 + * + * @param key + * @return + * @throws PDException + */ + + public abstract boolean containsKey(byte[] key) throws PDException; + + public abstract long remove(byte[] key) throws PDException; + + public abstract long removeByPrefix(byte[] prefix) throws PDException; + + public abstract void clearAllCache() throws PDException; + + public abstract void close() throws IOException; + + public T getInstanceWithTTL(Parser parser, byte[] key) throws PDException { + try { + byte[] withTTL = this.getWithTTL(key); + return parser.parseFrom(withTTL); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_READ_ERROR_VALUE, e); + } + } + + public List getInstanceListWithTTL(Parser parser, byte[] key) + throws PDException { + try { + List withTTL = this.getListWithTTL(key); + LinkedList ts = new LinkedList<>(); + for (int i = 0; i < withTTL.size(); i++) { + ts.add(parser.parseFrom((byte[]) withTTL.get(i))); + } + return ts; + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_READ_ERROR_VALUE, e); + } + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/PartitionMeta.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/PartitionMeta.java new file mode 100644 index 0000000000..09a4eb8e20 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/PartitionMeta.java @@ -0,0 +1,295 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.meta; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.common.PartitionCache; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Metapb; + +import lombok.extern.slf4j.Slf4j; + +/** + * 分区信息管理 + */ +@Slf4j +public class PartitionMeta extends MetadataRocksDBStore { + static String CID_GRAPH_ID_KEY = "GraphID"; + static int CID_GRAPH_ID_MAX = 0xFFFE; + private final PDConfig pdConfig; + private final PartitionCache cache; + + public PartitionMeta(PDConfig pdConfig) { + super(pdConfig); + this.pdConfig = pdConfig; + //this.timeout = pdConfig.getEtcd().getTimeout(); + this.cache = new PartitionCache(); + } + + /** + * 初始化,加载所有的分区 + */ + public void init() throws PDException { + loadShardGroups(); + loadGraphs(); + } + + public void reload() throws PDException { + cache.clear(); + loadShardGroups(); + loadGraphs(); + } + + private void loadGraphs() throws PDException { + byte[] key = MetadataKeyHelper.getGraphPrefix(); + List graphs = scanPrefix(Metapb.Graph.parser(), key); + for (Metapb.Graph graph : graphs) { + cache.updateGraph(graph); + loadPartitions(graph); + } + } + + /** + * partition 和 shard group分开存储,再init的时候,需要加载进来 + * + * @throws PDException + */ + private void loadShardGroups() throws PDException { + byte[] shardGroupPrefix = MetadataKeyHelper.getShardGroupPrefix(); + for (var shardGroup : scanPrefix(Metapb.ShardGroup.parser(), shardGroupPrefix)) { + cache.updateShardGroup(shardGroup); + } + } + + private void loadPartitions(Metapb.Graph graph) throws PDException { + byte[] prefix = MetadataKeyHelper.getPartitionPrefix(graph.getGraphName()); + List partitions = scanPrefix(Metapb.Partition.parser(), prefix); + partitions.forEach(p -> { + cache.updatePartition(p); + }); + } + + /** + * 根据id查找分区 (先从缓存找,再到数据库中找) + * + * @param graphName + * @param partId + * @return + * @throws PDException + */ + public Metapb.Partition getPartitionById(String graphName, int partId) throws PDException { + var pair = cache.getPartitionById(graphName, partId); + Metapb.Partition partition; + if (pair == null) { + byte[] key = MetadataKeyHelper.getPartitionKey(graphName, partId); + partition = getOne(Metapb.Partition.parser(), key); + if (partition != null) { + cache.updatePartition(partition); + } + } else { + partition = pair.getKey(); + } + return partition; + } + + public List getPartitionById(int partId) throws PDException { + List partitions = new ArrayList<>(); + cache.getGraphs().forEach(graph -> { + cache.getPartitions(graph.getGraphName()).forEach(partition -> { + if (partition.getId() == partId) { + partitions.add(partition); + } + }); + }); + return partitions; + } + + /** + * 根据code查找分区 + */ + public Metapb.Partition getPartitionByCode(String graphName, long code) throws PDException { + var pair = cache.getPartitionByCode(graphName, code); + if (pair != null) { + return pair.getKey(); + } + return null; + } + + public Metapb.Graph getAndCreateGraph(String graphName) throws PDException { + return getAndCreateGraph(graphName, pdConfig.getPartition().getTotalCount()); + } + + public Metapb.Graph getAndCreateGraph(String graphName, int partitionCount) throws PDException { + + if (partitionCount > pdConfig.getPartition().getTotalCount()) { + partitionCount = pdConfig.getPartition().getTotalCount(); + } + + // 管理图,只有一个分区 + if (graphName.endsWith("/s") || graphName.endsWith("/m")) { + partitionCount = 1; + } + + Metapb.Graph graph = cache.getGraph(graphName); + if (graph == null) { + // 保存图信息 + graph = Metapb.Graph.newBuilder() + .setGraphName(graphName) + .setPartitionCount(partitionCount) + .setState(Metapb.PartitionState.PState_Normal) + .build(); + updateGraph(graph); + } + return graph; + } + + /** + * 保存分区信息 + * + * @param partition + * @return + * @throws PDException + */ + public Metapb.Partition updatePartition(Metapb.Partition partition) throws PDException { + if (!cache.hasGraph(partition.getGraphName())) { + getAndCreateGraph(partition.getGraphName()); + } + byte[] key = MetadataKeyHelper.getPartitionKey(partition.getGraphName(), partition.getId()); + put(key, partition.toByteString().toByteArray()); + cache.updatePartition(partition); + return partition; + } + + /** + * 检查数据库,是否存在对应的图,不存在,则创建。 + * 更新partition的 version, conf version 和 shard list + * + * @param partition + * @return + * @throws PDException + */ + public Metapb.Partition updateShardList(Metapb.Partition partition) throws PDException { + if (!cache.hasGraph(partition.getGraphName())) { + getAndCreateGraph(partition.getGraphName()); + } + + Metapb.Partition pt = getPartitionById(partition.getGraphName(), partition.getId()); + // pt = pt.toBuilder().setVersion(partition.getVersion()) + // .setConfVer(partition.getConfVer()) + // .clearShards() + // .addAllShards(partition.getShardsList()).build(); + + byte[] key = MetadataKeyHelper.getPartitionKey(pt.getGraphName(), pt.getId()); + put(key, pt.toByteString().toByteArray()); + cache.updatePartition(pt); + return partition; + } + + /** + * 删除所有分区 + */ + public long removeAllPartitions(String graphName) throws PDException { + cache.removeAll(graphName); + byte[] prefix = MetadataKeyHelper.getPartitionPrefix(graphName); + return removeByPrefix(prefix); + } + + public long removePartition(String graphName, int id) throws PDException { + cache.remove(graphName, id); + byte[] key = MetadataKeyHelper.getPartitionKey(graphName, id); + return remove(key); + } + + public void updatePartitionStats(Metapb.PartitionStats stats) throws PDException { + for (String graphName : stats.getGraphNameList()) { + byte[] prefix = MetadataKeyHelper.getPartitionStatusKey(graphName, stats.getId()); + put(prefix, stats.toByteArray()); + } + } + + /** + * 获取分区状态 + */ + public Metapb.PartitionStats getPartitionStats(String graphName, int id) throws PDException { + byte[] prefix = MetadataKeyHelper.getPartitionStatusKey(graphName, id); + return getOne(Metapb.PartitionStats.parser(), prefix); + } + + + /** + * 获取分区状态 + */ + public List getPartitionStats(String graphName) throws PDException { + byte[] prefix = MetadataKeyHelper.getPartitionStatusPrefixKey(graphName); + return scanPrefix(Metapb.PartitionStats.parser(), prefix); + } + + /** + * 更新图信息 + * + * @param graph + * @return + */ + public Metapb.Graph updateGraph(Metapb.Graph graph) throws PDException { + log.info("updateGraph {}", graph); + byte[] key = MetadataKeyHelper.getGraphKey(graph.getGraphName()); + // 保存图信息 + put(key, graph.toByteString().toByteArray()); + cache.updateGraph(graph); + return graph; + } + + public List getPartitions() { + List partitions = new ArrayList<>(); + List graphs = cache.getGraphs(); + graphs.forEach(e -> { + partitions.addAll(cache.getPartitions(e.getGraphName())); + }); + return partitions; + } + + public List getPartitions(String graphName) { + return cache.getPartitions(graphName); + } + + public List getGraphs() throws PDException { + byte[] key = MetadataKeyHelper.getGraphPrefix(); + return scanPrefix(Metapb.Graph.parser(), key); + } + + public Metapb.Graph getGraph(String graphName) throws PDException { + byte[] key = MetadataKeyHelper.getGraphKey(graphName); + return getOne(Metapb.Graph.parser(), key); + } + + /** + * 删除图,并删除图id + */ + public long removeGraph(String graphName) throws PDException { + byte[] key = MetadataKeyHelper.getGraphKey(graphName); + long l = remove(key); + return l; + } + + public PartitionCache getPartitionCache() { + return cache; + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/QueueStore.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/QueueStore.java new file mode 100644 index 0000000000..74820ab023 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/QueueStore.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.meta; + +import java.util.List; + +import org.apache.hugegraph.pd.common.HgAssert; +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.raft.RaftEngine; +import org.apache.hugegraph.pd.store.RaftKVStore; + +public class QueueStore extends MetadataRocksDBStore { + QueueStore(PDConfig pdConfig) { + super(pdConfig); + } + + public void addItem(Metapb.QueueItem queueItem) throws PDException { + HgAssert.isArgumentNotNull(queueItem, "queueItem"); + byte[] key = MetadataKeyHelper.getQueueItemKey(queueItem.getItemId()); + put(key, queueItem.toByteString().toByteArray()); + } + + public void removeItem(String itemId) throws PDException { + if (RaftEngine.getInstance().isLeader()) { + remove(MetadataKeyHelper.getQueueItemKey(itemId)); + } else { + var store = getStore(); + // todo: delete record via client + if (store instanceof RaftKVStore) { + ((RaftKVStore) store).doRemove(MetadataKeyHelper.getQueueItemKey(itemId)); + } + } + } + + public List getQueue() throws PDException { + byte[] prefix = MetadataKeyHelper.getQueueItemPrefix(); + return scanPrefix(Metapb.QueueItem.parser(), prefix); + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/StoreInfoMeta.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/StoreInfoMeta.java new file mode 100644 index 0000000000..45959211d8 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/StoreInfoMeta.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.meta; + +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Metapb; + +import lombok.extern.slf4j.Slf4j; + +/** + * Store信息存储 + */ +@Slf4j +public class StoreInfoMeta extends MetadataRocksDBStore { + private final PDConfig pdConfig; + + public StoreInfoMeta(PDConfig pdConfig) { + super(pdConfig); + this.pdConfig = pdConfig; + // this.timeout = pdConfig.getDiscovery().getHeartbeatOutTimes(); + } + + public static boolean shardGroupEquals(List g1, List g2) { + ListIterator e1 = g1.listIterator(); + ListIterator e2 = g2.listIterator(); + while (e1.hasNext() && e2.hasNext()) { + Metapb.Shard o1 = e1.next(); + Metapb.Shard o2 = e2.next(); + if (!(o1 == null ? o2 == null : o1.getStoreId() == o2.getStoreId())) { + return false; + } + } + return !(e1.hasNext() || e2.hasNext()); + } + + /** + * 更新Store信息 + * + * @param store + * @throws PDException + */ + public void updateStore(Metapb.Store store) throws PDException { + byte[] storeInfoKey = MetadataKeyHelper.getStoreInfoKey(store.getId()); + put(storeInfoKey, store.toByteArray()); + } + + /** + * 更新Store的存活状态 + * + * @param store + */ + public void keepStoreAlive(Metapb.Store store) throws PDException { + byte[] activeStoreKey = MetadataKeyHelper.getActiveStoreKey(store.getId()); + putWithTTL(activeStoreKey, store.toByteArray(), pdConfig.getStore().getKeepAliveTimeout()); + } + + public void removeActiveStore(Metapb.Store store) throws PDException { + byte[] activeStoreKey = MetadataKeyHelper.getActiveStoreKey(store.getId()); + removeWithTTL(activeStoreKey); + } + + public Metapb.Store getStore(Long storeId) throws PDException { + byte[] storeInfoKey = MetadataKeyHelper.getStoreInfoKey(storeId); + Metapb.Store store = getOne(Metapb.Store.parser(), storeInfoKey); + return store; + } + + /** + * 获取所有的store + * + * @param graphName + * @return + * @throws PDException + */ + public List getStores(String graphName) throws PDException { + byte[] storePrefix = MetadataKeyHelper.getStorePrefix(); + return scanPrefix(Metapb.Store.parser(), storePrefix); + } + + /** + * 获取活跃的Store + * + * @param graphName + * @return + * @throws PDException + */ + public List getActiveStores(String graphName) throws PDException { + byte[] activePrefix = MetadataKeyHelper.getActiveStorePrefix(); + List listWithTTL = getInstanceListWithTTL(Metapb.Store.parser(), + activePrefix); + return listWithTTL; + } + + public List getActiveStores() throws PDException { + byte[] activePrefix = MetadataKeyHelper.getActiveStorePrefix(); + List listWithTTL = getInstanceListWithTTL(Metapb.Store.parser(), + activePrefix); + return listWithTTL; + } + + /** + * 检查storeid是否存在 + * + * @param storeId + * @return + */ + public boolean storeExists(Long storeId) throws PDException { + byte[] storeInfoKey = MetadataKeyHelper.getStoreInfoKey(storeId); + return containsKey(storeInfoKey); + } + + /** + * 更新存储状态信息 + * + * @param storeStats + */ + public Metapb.StoreStats updateStoreStats(Metapb.StoreStats storeStats) throws PDException { + byte[] storeStatusKey = MetadataKeyHelper.getStoreStatusKey(storeStats.getStoreId()); + + put(storeStatusKey, storeStats.toByteArray()); + return storeStats; + } + + public long removeStore(long storeId) throws PDException { + byte[] storeInfoKey = MetadataKeyHelper.getStoreInfoKey(storeId); + return remove(storeInfoKey); + } + + public long removeAll() throws PDException { + byte[] storePrefix = MetadataKeyHelper.getStorePrefix(); + return this.removeByPrefix(storePrefix); + } + + public void updateShardGroup(Metapb.ShardGroup group) throws PDException { + byte[] shardGroupKey = MetadataKeyHelper.getShardGroupKey(group.getId()); + put(shardGroupKey, group.toByteArray()); + } + + public void deleteShardGroup(int groupId) throws PDException { + byte[] shardGroupKey = MetadataKeyHelper.getShardGroupKey(groupId); + remove(shardGroupKey); + } + + public Metapb.ShardGroup getShardGroup(int groupId) throws PDException { + byte[] shardGroupKey = MetadataKeyHelper.getShardGroupKey(groupId); + return getOne(Metapb.ShardGroup.parser(), shardGroupKey); + } + + public int getShardGroupCount() throws PDException { + byte[] shardGroupPrefix = MetadataKeyHelper.getShardGroupPrefix(); + return scanPrefix(Metapb.ShardGroup.parser(), shardGroupPrefix).size(); + } + + public List getShardGroups() throws PDException { + byte[] shardGroupPrefix = MetadataKeyHelper.getShardGroupPrefix(); + return scanPrefix(Metapb.ShardGroup.parser(), shardGroupPrefix); + } + + public Metapb.StoreStats getStoreStats(long storeId) throws PDException { + byte[] storeStatusKey = MetadataKeyHelper.getStoreStatusKey(storeId); + Metapb.StoreStats stats = getOne(Metapb.StoreStats.parser(), + storeStatusKey); + return stats; + } + + /** + * @return store及状态信息 + * @throws PDException + */ + public List getStoreStatus(boolean isActive) throws PDException { + byte[] storePrefix = MetadataKeyHelper.getStorePrefix(); + List stores = isActive ? getActiveStores() : + scanPrefix(Metapb.Store.parser(), storePrefix); + LinkedList list = new LinkedList<>(); + for (int i = 0; i < stores.size(); i++) { + Metapb.Store store = stores.get(i); + Metapb.StoreStats stats = getStoreStats(store.getId()); + if (stats != null) { + store = Metapb.Store.newBuilder(store).setStats(getStoreStats(store.getId())) + .build(); + } + list.add(store); + } + return list; + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/TaskInfoMeta.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/TaskInfoMeta.java new file mode 100644 index 0000000000..148101de4d --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/TaskInfoMeta.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.meta; + +import java.util.List; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.MetaTask; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.grpc.pulse.MovePartition; +import org.apache.hugegraph.pd.grpc.pulse.SplitPartition; + +/** + * 任务管理 + */ +public class TaskInfoMeta extends MetadataRocksDBStore { + public TaskInfoMeta(PDConfig pdConfig) { + super(pdConfig); + } + + /** + * 添加分区分裂任务 + */ + public void addSplitTask(int groupID, Metapb.Partition partition, SplitPartition splitPartition) + throws PDException { + byte[] key = MetadataKeyHelper.getSplitTaskKey(partition.getGraphName(), groupID); + MetaTask.Task task = MetaTask.Task.newBuilder() + .setType(MetaTask.TaskType.Split_Partition) + .setState(MetaTask.TaskState.Task_Doing) + .setStartTimestamp(System.currentTimeMillis()) + .setPartition(partition) + .setSplitPartition(splitPartition) + .build(); + put(key, task.toByteString().toByteArray()); + } + + public void updateSplitTask(MetaTask.Task task) throws PDException { + var partition = task.getPartition(); + byte[] key = MetadataKeyHelper.getSplitTaskKey(partition.getGraphName(), partition.getId()); + put(key, task.toByteString().toByteArray()); + } + + public MetaTask.Task getSplitTask(String graphName, int groupID) throws PDException { + byte[] key = MetadataKeyHelper.getSplitTaskKey(graphName, groupID); + return getOne(MetaTask.Task.parser(), key); + } + + public List scanSplitTask(String graphName) throws PDException { + byte[] prefix = MetadataKeyHelper.getSplitTaskPrefix(graphName); + return scanPrefix(MetaTask.Task.parser(), prefix); + } + + public void removeSplitTaskPrefix(String graphName) throws PDException { + byte[] key = MetadataKeyHelper.getSplitTaskPrefix(graphName); + removeByPrefix(key); + } + + public boolean hasSplitTaskDoing() throws PDException { + byte[] key = MetadataKeyHelper.getAllSplitTaskPrefix(); + return scanPrefix(key).size() > 0; + } + + public void addMovePartitionTask(Metapb.Partition partition, MovePartition movePartition) + throws PDException { + byte[] key = MetadataKeyHelper.getMoveTaskKey(partition.getGraphName(), + movePartition.getTargetPartition().getId(), + partition.getId()); + + MetaTask.Task task = MetaTask.Task.newBuilder() + .setType(MetaTask.TaskType.Move_Partition) + .setState(MetaTask.TaskState.Task_Doing) + .setStartTimestamp(System.currentTimeMillis()) + .setPartition(partition) + .setMovePartition(movePartition) + .build(); + put(key, task.toByteArray()); + } + + public void updateMovePartitionTask(MetaTask.Task task) + throws PDException { + + byte[] key = MetadataKeyHelper.getMoveTaskKey(task.getPartition().getGraphName(), + task.getMovePartition().getTargetPartition() + .getId(), + task.getPartition().getId()); + put(key, task.toByteArray()); + } + + public MetaTask.Task getMovePartitionTask(String graphName, int targetId, int partId) throws + PDException { + byte[] key = MetadataKeyHelper.getMoveTaskKey(graphName, targetId, partId); + return getOne(MetaTask.Task.parser(), key); + } + + public List scanMoveTask(String graphName) throws PDException { + byte[] prefix = MetadataKeyHelper.getMoveTaskPrefix(graphName); + return scanPrefix(MetaTask.Task.parser(), prefix); + } + + /** + * 按照prefix删除迁移任务,一次分组的 + * + * @param graphName 图名称 + * @throws PDException io error + */ + public void removeMoveTaskPrefix(String graphName) throws PDException { + byte[] key = MetadataKeyHelper.getMoveTaskPrefix(graphName); + removeByPrefix(key); + } + + public boolean hasMoveTaskDoing() throws PDException { + byte[] key = MetadataKeyHelper.getAllMoveTaskPrefix(); + return scanPrefix(key).size() > 0; + } + +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/FutureClosureAdapter.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/FutureClosureAdapter.java new file mode 100644 index 0000000000..1991a78db9 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/FutureClosureAdapter.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.raft; + +import java.util.concurrent.CompletableFuture; + +import com.alipay.sofa.jraft.Closure; +import com.alipay.sofa.jraft.Status; + +public class FutureClosureAdapter implements Closure { + public final CompletableFuture future = new CompletableFuture<>(); + private T resp; + + public T getResponse() { + return this.resp; + } + + public void setResponse(T resp) { + this.resp = resp; + future.complete(resp); + run(Status.OK()); + } + + public void failure(Throwable t) { + future.completeExceptionally(t); + run(new Status(-1, t.getMessage())); + } + + @Override + public void run(Status status) { + + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/KVOperation.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/KVOperation.java new file mode 100644 index 0000000000..9169a248dc --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/KVOperation.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.raft; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +import com.alipay.sofa.jraft.util.BytesUtil; +import com.alipay.sofa.jraft.util.Requires; +import com.caucho.hessian.io.Hessian2Input; +import com.caucho.hessian.io.Hessian2Output; + +import lombok.Data; + +@Data +public class KVOperation { + + /** + * Put operation + */ + public static final byte PUT = 0x01; + /** + * Get operation + */ + public static final byte GET = 0x02; + public static final byte DEL = 0x03; + public static final byte REMOVE_BY_PREFIX = 0x04; + public static final byte REMOVE = 0x05; + public static final byte PUT_WITH_TTL = 0x06; + public static final byte CLEAR = 0x07; + public static final byte PUT_WITH_TTL_UNIT = 0x08; + public static final byte REMOVE_WITH_TTL = 0x09; + /** + * Snapshot operation + */ + public static final byte SAVE_SNAPSHOT = 0x10; + public static final byte LOAD_SNAPSHOT = 0x11; + + private byte[] key; + private byte[] value; + private Object attach; // 原始对象,用于本机处理,减少一次反序列化操作 + private Object arg; + private byte op; + + public KVOperation() { + + } + + public KVOperation(byte[] key, byte[] value, Object attach, byte op) { + this.key = key; + this.value = value; + this.attach = attach; + this.op = op; + } + + public KVOperation(byte[] key, byte[] value, Object attach, byte op, Object arg) { + this.key = key; + this.value = value; + this.attach = attach; + this.op = op; + this.arg = arg; + } + + public static KVOperation fromByteArray(byte[] value) throws IOException { + + try (ByteArrayInputStream bis = new ByteArrayInputStream(value, 1, value.length - 1)) { + Hessian2Input input = new Hessian2Input(bis); + KVOperation op = new KVOperation(); + op.op = value[0]; + op.key = input.readBytes(); + op.value = input.readBytes(); + op.arg = input.readObject(); + input.close(); + return op; + } + } + + public static KVOperation createPut(final byte[] key, final byte[] value) { + Requires.requireNonNull(key, "key"); + Requires.requireNonNull(value, "value"); + return new KVOperation(key, value, null, PUT); + } + + public static KVOperation createGet(final byte[] key) { + Requires.requireNonNull(key, "key"); + return new KVOperation(key, BytesUtil.EMPTY_BYTES, null, GET); + } + + public static KVOperation createPutWithTTL(byte[] key, byte[] value, long ttl) { + Requires.requireNonNull(key, "key"); + Requires.requireNonNull(value, "value"); + return new KVOperation(key, value, value, PUT_WITH_TTL, + ttl); + } + + public static KVOperation createPutWithTTL(byte[] key, byte[] value, long ttl, + TimeUnit timeUnit) { + Requires.requireNonNull(key, "key"); + Requires.requireNonNull(value, "value"); + return new KVOperation(key, value, value, PUT_WITH_TTL_UNIT, + new Object[]{ttl, timeUnit}); + } + + public static KVOperation createRemoveWithTTL(byte[] key) { + Requires.requireNonNull(key, "key"); + return new KVOperation(key, key, null, REMOVE_WITH_TTL); + } + + public static KVOperation createRemoveByPrefix(byte[] key) { + Requires.requireNonNull(key, "key"); + return new KVOperation(key, key, null, REMOVE_BY_PREFIX); + } + + public static KVOperation createRemove(byte[] key) { + Requires.requireNonNull(key, "key"); + return new KVOperation(key, key, null, REMOVE); + } + + public static KVOperation createClear() { + return new KVOperation(null, null, null, CLEAR); + } + + public static KVOperation createSaveSnapshot(String snapshotPath) { + Requires.requireNonNull(snapshotPath, "snapshotPath"); + return new KVOperation(null, null, snapshotPath, SAVE_SNAPSHOT); + } + + public static KVOperation createLoadSnapshot(String snapshotPath) { + Requires.requireNonNull(snapshotPath, "snapshotPath"); + return new KVOperation(null, null, snapshotPath, LOAD_SNAPSHOT); + } + + public byte[] toByteArray() throws IOException { + try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { + bos.write(op); + Hessian2Output output = new Hessian2Output(bos); + output.writeObject(key); + output.writeObject(value); + output.writeObject(arg); + output.flush(); + return bos.toByteArray(); + } + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/KVStoreClosure.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/KVStoreClosure.java new file mode 100644 index 0000000000..cb5291703a --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/KVStoreClosure.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.raft; + +import org.apache.hugegraph.pd.grpc.Pdpb; + +import com.alipay.sofa.jraft.Closure; + +public interface KVStoreClosure extends Closure { + + Pdpb.Error getError(); + + void setError(final Pdpb.Error error); + + Object getData(); + + void setData(final Object data); +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java new file mode 100644 index 0000000000..f3089ed074 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java @@ -0,0 +1,377 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.raft; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicReference; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Metapb; +import org.apache.hugegraph.pd.grpc.Pdpb; + +import com.alipay.sofa.jraft.JRaftUtils; +import com.alipay.sofa.jraft.Node; +import com.alipay.sofa.jraft.RaftGroupService; +import com.alipay.sofa.jraft.ReplicatorGroup; +import com.alipay.sofa.jraft.Status; +import com.alipay.sofa.jraft.conf.Configuration; +import com.alipay.sofa.jraft.core.Replicator; +import com.alipay.sofa.jraft.entity.PeerId; +import com.alipay.sofa.jraft.entity.Task; +import com.alipay.sofa.jraft.error.RaftError; +import com.alipay.sofa.jraft.option.NodeOptions; +import com.alipay.sofa.jraft.option.RaftOptions; +import com.alipay.sofa.jraft.option.RpcOptions; +import com.alipay.sofa.jraft.rpc.RaftRpcServerFactory; +import com.alipay.sofa.jraft.rpc.RpcServer; +import com.alipay.sofa.jraft.util.Endpoint; +import com.alipay.sofa.jraft.util.ThreadId; +import com.alipay.sofa.jraft.util.internal.ThrowUtil; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class RaftEngine { + private static final RaftEngine INSTANCE = new RaftEngine(); + private final RaftStateMachine stateMachine; + private PDConfig.Raft config; + private RaftGroupService raftGroupService; + private RpcServer rpcServer; + private Node raftNode; + private RaftRpcClient raftRpcClient; + + public RaftEngine() { + this.stateMachine = new RaftStateMachine(); + } + + public static RaftEngine getInstance() { + return INSTANCE; + } + + public boolean init(PDConfig.Raft config) { + if (this.raftNode != null) { + return false; + } + this.config = config; + + raftRpcClient = new RaftRpcClient(); + raftRpcClient.init(new RpcOptions()); + + String groupId = "pd_raft"; + String raftPath = config.getDataPath() + "/" + groupId; + new File(raftPath).mkdirs(); + + new File(config.getDataPath()).mkdirs(); + Configuration initConf = new Configuration(); + initConf.parse(config.getPeersList()); + if (config.isEnable() && config.getPeersList().length() < 3) { + log.error("The RaftEngine parameter is incorrect." + + " When RAFT is enabled, the number of peers " + "cannot be less than 3"); + } + // 设置 Node 参数,包括日志存储路径和状态机实例 + NodeOptions nodeOptions = new NodeOptions(); + nodeOptions.setFsm(stateMachine); + nodeOptions.setEnableMetrics(true); + // 日志路径 + nodeOptions.setLogUri(raftPath + "/log"); + // raft 元数据路径 + nodeOptions.setRaftMetaUri(raftPath + "/meta"); + // 快照路径 + nodeOptions.setSnapshotUri(raftPath + "/snapshot"); + // 初始集群 + nodeOptions.setInitialConf(initConf); + // 快照时间间隔 + nodeOptions.setSnapshotIntervalSecs(config.getSnapshotInterval()); + + nodeOptions.setRpcConnectTimeoutMs(config.getRpcTimeout()); + nodeOptions.setRpcDefaultTimeout(config.getRpcTimeout()); + nodeOptions.setRpcInstallSnapshotTimeout(config.getRpcTimeout()); + // 设置 raft 配置 + RaftOptions raftOptions = nodeOptions.getRaftOptions(); + + nodeOptions.setEnableMetrics(true); + + final PeerId serverId = JRaftUtils.getPeerId(config.getAddress()); + + rpcServer = createRaftRpcServer(config.getAddress()); + // 构建 raft 组并启动 raft + this.raftGroupService = + new RaftGroupService(groupId, serverId, nodeOptions, rpcServer, true); + this.raftNode = raftGroupService.start(false); + log.info("RaftEngine start successfully: id = {}, peers list = {}", groupId, + nodeOptions.getInitialConf().getPeers()); + return this.raftNode != null; + } + + /** + * 创建 raft rpc server,用于 pd 之间通讯 + */ + private RpcServer createRaftRpcServer(String raftAddr) { + Endpoint endpoint = JRaftUtils.getEndPoint(raftAddr); + RpcServer rpcServer = RaftRpcServerFactory.createRaftRpcServer(endpoint); + RaftRpcProcessor.registerProcessor(rpcServer, this); + rpcServer.init(null); + return rpcServer; + } + + public void shutDown() { + if (this.raftGroupService != null) { + this.raftGroupService.shutdown(); + try { + this.raftGroupService.join(); + } catch (final InterruptedException e) { + this.raftNode = null; + ThrowUtil.throwException(e); + } + this.raftGroupService = null; + } + if (this.rpcServer != null) { + this.rpcServer.shutdown(); + this.rpcServer = null; + } + if (this.raftNode != null) { + this.raftNode.shutdown(); + } + this.raftNode = null; + } + + public boolean isLeader() { + return this.raftNode.isLeader(true); + } + + /** + * 添加 Raft 任务,grpc 通过该接口给 raft 发送数据 + */ + public void addTask(Task task) { + if (!isLeader()) { + KVStoreClosure closure = (KVStoreClosure) task.getDone(); + closure.setError(Pdpb.Error.newBuilder().setType(Pdpb.ErrorType.NOT_LEADER).build()); + closure.run(new Status(RaftError.EPERM, "Not leader")); + return; + } + this.raftNode.apply(task); + } + + public void addStateListener(RaftStateListener listener) { + this.stateMachine.addStateListener(listener); + } + + public void addTaskHandler(RaftTaskHandler handler) { + this.stateMachine.addTaskHandler(handler); + } + + public PDConfig.Raft getConfig() { + return this.config; + } + + public PeerId getLeader() { + return raftNode.getLeaderId(); + } + + /** + * 向 leader 发消息,获取 grpc 地址; + */ + public String getLeaderGrpcAddress() throws ExecutionException, InterruptedException { + if (isLeader()) { + return config.getGrpcAddress(); + } + + if (raftNode.getLeaderId() == null) { + waitingForLeader(10000); + } + + return raftRpcClient.getGrpcAddress(raftNode.getLeaderId().getEndpoint().toString()).get() + .getGrpcAddress(); + } + + public Metapb.Member getLocalMember() { + Metapb.Member.Builder builder = Metapb.Member.newBuilder(); + builder.setClusterId(config.getClusterId()); + builder.setRaftUrl(config.getAddress()); + builder.setDataPath(config.getDataPath()); + builder.setGrpcUrl(config.getGrpcAddress()); + builder.setState(Metapb.StoreState.Up); + return builder.build(); + } + + public List getMembers() { + List members = new ArrayList<>(); + + List peers = raftNode.listPeers(); + peers.addAll(raftNode.listLearners()); + var learners = new HashSet<>(raftNode.listLearners()); + + for (PeerId peerId : peers) { + Metapb.Member.Builder builder = Metapb.Member.newBuilder(); + builder.setClusterId(config.getClusterId()); + CompletableFuture future = + raftRpcClient.getGrpcAddress(peerId.getEndpoint().toString()); + + Metapb.ShardRole role = Metapb.ShardRole.Follower; + if (peerEquals(peerId, raftNode.getLeaderId())) { + role = Metapb.ShardRole.Leader; + } else if (learners.contains(peerId)) { + role = Metapb.ShardRole.Learner; + var state = getReplicatorState(peerId); + if (state != null) { + builder.setReplicatorState(state.name()); + } + } + + builder.setRole(role); + + try { + if (future.isCompletedExceptionally()) { + log.error("failed to getGrpcAddress of {}", peerId.getEndpoint().toString()); + builder.setState(Metapb.StoreState.Offline); + builder.setRaftUrl(peerId.getEndpoint().toString()); + members.add(builder.build()); + } else { + RaftRpcProcessor.GetMemberResponse response = future.get(); + builder.setState(Metapb.StoreState.Up); + builder.setRaftUrl(response.getRaftAddress()); + builder.setDataPath(response.getDatePath()); + builder.setGrpcUrl(response.getGrpcAddress()); + builder.setRestUrl(response.getRestAddress()); + members.add(builder.build()); + } + } catch (Exception e) { + log.error("failed to getGrpcAddress of {}.", peerId.getEndpoint().toString(), e); + builder.setState(Metapb.StoreState.Offline); + builder.setRaftUrl(peerId.getEndpoint().toString()); + members.add(builder.build()); + } + + } + return members; + } + + public Status changePeerList(String peerList) { + AtomicReference result = new AtomicReference<>(); + try { + String[] peers = peerList.split(",", -1); + if ((peers.length & 1) != 1) { + throw new PDException(-1, "the number of peer list must be odd."); + } + Configuration newPeers = new Configuration(); + newPeers.parse(peerList); + CountDownLatch latch = new CountDownLatch(1); + this.raftNode.changePeers(newPeers, status -> { + result.set(status); + latch.countDown(); + }); + latch.await(); + } catch (Exception e) { + log.error("failed to changePeerList to {}", peerList, e); + result.set(new Status(-1, e.getMessage())); + } + return result.get(); + } + + public PeerId waitingForLeader(long timeOut) { + PeerId leader = getLeader(); + if (leader != null) { + return leader; + } + + synchronized (this) { + leader = getLeader(); + long start = System.currentTimeMillis(); + while ((System.currentTimeMillis() - start < timeOut) && (leader == null)) { + try { + this.wait(1000); + } catch (InterruptedException e) { + log.error("Raft wait for leader exception", e); + } + leader = getLeader(); + } + return leader; + } + + } + + public Node getRaftNode() { + return raftNode; + } + + private boolean peerEquals(PeerId p1, PeerId p2) { + if (p1 == null && p2 == null) { + return true; + } + if (p1 == null || p2 == null) { + return false; + } + return Objects.equals(p1.getIp(), p2.getIp()) && Objects.equals(p1.getPort(), p2.getPort()); + } + + private Replicator.State getReplicatorState(PeerId peerId) { + var replicateGroup = getReplicatorGroup(); + if (replicateGroup == null) { + return null; + } + + ThreadId threadId = replicateGroup.getReplicator(peerId); + if (threadId == null) { + return null; + } else { + Replicator r = (Replicator) threadId.lock(); + if (r == null) { + return Replicator.State.Probe; + } + Replicator.State result = getState(r); + threadId.unlock(); + return result; + } + } + + private ReplicatorGroup getReplicatorGroup() { + var clz = this.raftNode.getClass(); + try { + var f = clz.getDeclaredField("replicatorGroup"); + f.setAccessible(true); + var group = (ReplicatorGroup) f.get(this.raftNode); + f.setAccessible(false); + return group; + } catch (NoSuchFieldException | IllegalAccessException e) { + log.info("getReplicatorGroup: error {}", e.getMessage()); + return null; + } + } + + private Replicator.State getState(Replicator r) { + var clz = r.getClass(); + try { + var f = clz.getDeclaredField("state"); + f.setAccessible(true); + var state = (Replicator.State) f.get(this.raftNode); + f.setAccessible(false); + return state; + } catch (NoSuchFieldException | IllegalAccessException e) { + log.info("getReplicatorGroup: error {}", e.getMessage()); + return null; + } + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcClient.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcClient.java new file mode 100644 index 0000000000..2e17a65eef --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcClient.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.raft; + +import java.util.concurrent.CompletableFuture; + +import com.alipay.sofa.jraft.JRaftUtils; +import com.alipay.sofa.jraft.Status; +import com.alipay.sofa.jraft.option.RpcOptions; +import com.alipay.sofa.jraft.rpc.InvokeCallback; +import com.alipay.sofa.jraft.rpc.InvokeContext; +import com.alipay.sofa.jraft.rpc.RaftRpcFactory; +import com.alipay.sofa.jraft.rpc.RpcClient; +import com.alipay.sofa.jraft.util.Endpoint; +import com.alipay.sofa.jraft.util.RpcFactoryHelper; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class RaftRpcClient { + protected volatile RpcClient rpcClient; + private RpcOptions rpcOptions; + + public synchronized boolean init(final RpcOptions rpcOptions) { + this.rpcOptions = rpcOptions; + final RaftRpcFactory factory = RpcFactoryHelper.rpcFactory(); + this.rpcClient = + factory.createRpcClient(factory.defaultJRaftClientConfigHelper(this.rpcOptions)); + return this.rpcClient.init(null); + } + + /** + * 请求快照 + */ + public CompletableFuture + getGrpcAddress(final String address) { + RaftRpcProcessor.GetMemberRequest request = new RaftRpcProcessor.GetMemberRequest(); + FutureClosureAdapter response = + new FutureClosureAdapter<>(); + internalCallAsyncWithRpc(JRaftUtils.getEndPoint(address), request, response); + return response.future; + } + + private void internalCallAsyncWithRpc(final Endpoint endpoint, + final RaftRpcProcessor.BaseRequest request, + final FutureClosureAdapter closure) { + final InvokeContext invokeCtx = new InvokeContext(); + final InvokeCallback invokeCallback = new InvokeCallback() { + + @Override + public void complete(final Object result, final Throwable err) { + if (err == null) { + final RaftRpcProcessor.BaseResponse response = + (RaftRpcProcessor.BaseResponse) result; + closure.setResponse((V) response); + } else { + closure.failure(err); + closure.run(new Status(-1, err.getMessage())); + } + } + }; + + try { + this.rpcClient.invokeAsync(endpoint, request, invokeCtx, invokeCallback, + this.rpcOptions.getRpcDefaultTimeout()); + } catch (final Throwable t) { + log.error("failed to call rpc to {}. {}", endpoint, t.getMessage()); + closure.failure(t); + closure.run(new Status(-1, t.getMessage())); + } + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcProcessor.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcProcessor.java new file mode 100644 index 0000000000..1286515de2 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcProcessor.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.raft; + +import java.io.Serializable; + +import com.alipay.sofa.jraft.rpc.RpcContext; +import com.alipay.sofa.jraft.rpc.RpcProcessor; +import com.alipay.sofa.jraft.rpc.RpcServer; + +import lombok.Data; + +public class RaftRpcProcessor implements RpcProcessor { + + + private final Class requestClass; + private final RaftEngine raftEngine; + + public RaftRpcProcessor(Class requestClass, RaftEngine raftEngine) { + this.requestClass = requestClass; + this.raftEngine = raftEngine; + } + + public static void registerProcessor(final RpcServer rpcServer, RaftEngine raftEngine) { + rpcServer.registerProcessor(new RaftRpcProcessor<>(GetMemberRequest.class, raftEngine)); + } + + @Override + public void handleRequest(RpcContext rpcCtx, T request) { + if (request.magic() == BaseRequest.GET_GRPC_ADDRESS) { + rpcCtx.sendResponse(getGrpcAddress()); + } + } + + @Override + public String interest() { + return this.requestClass.getName(); + } + + private GetMemberResponse getGrpcAddress() { + GetMemberResponse rep = new GetMemberResponse(); + rep.setGrpcAddress(raftEngine.getConfig().getGrpcAddress()); + rep.setClusterId(raftEngine.getConfig().getClusterId()); + rep.setDatePath(raftEngine.getConfig().getDataPath()); + rep.setRaftAddress(raftEngine.getConfig().getAddress()); + rep.setRestAddress( + raftEngine.getConfig().getHost() + ":" + raftEngine.getConfig().getPort()); + rep.setStatus(Status.OK); + return rep; + } + + public enum Status implements Serializable { + UNKNOWN(-1, "unknown"), + OK(0, "ok"), + COMPLETE(0, "Transmission completed"), + INCOMPLETE(1, "Incomplete transmission"), + NO_PARTITION(10, "Partition not found"), + IO_ERROR(11, "io error"), + EXCEPTION(12, "exception"), + ABORT(100, "Transmission aborted"); + + private final int code; + private String msg; + + Status(int code, String msg) { + this.code = code; + this.msg = msg; + } + + public int getCode() { + return this.code; + } + + public Status setMsg(String msg) { + this.msg = msg; + return this; + } + + public boolean isOK() { + return this.code == 0; + } + } + + public abstract static class BaseRequest implements Serializable { + public static final byte GET_GRPC_ADDRESS = 0x01; + + public abstract byte magic(); + } + + @Data + public abstract static class BaseResponse implements Serializable { + private Status status; + + } + + @Data + public static class GetMemberRequest extends BaseRequest { + @Override + public byte magic() { + return GET_GRPC_ADDRESS; + } + } + + @Data + public static class GetMemberResponse extends BaseResponse { + private long clusterId; + private String raftAddress; + private String grpcAddress; + private String datePath; + private String restAddress; + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateListener.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateListener.java new file mode 100644 index 0000000000..020be6f8bb --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateListener.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.raft; + +public interface RaftStateListener { + void onRaftLeaderChanged(); +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateMachine.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateMachine.java new file mode 100644 index 0000000000..4733212022 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateMachine.java @@ -0,0 +1,330 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.raft; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicLong; +import java.util.zip.Checksum; + +import org.apache.commons.io.FileUtils; +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.grpc.Pdpb; +import org.springframework.util.CollectionUtils; + +import com.alipay.sofa.jraft.Closure; +import com.alipay.sofa.jraft.Iterator; +import com.alipay.sofa.jraft.Status; +import com.alipay.sofa.jraft.conf.Configuration; +import com.alipay.sofa.jraft.core.StateMachineAdapter; +import com.alipay.sofa.jraft.entity.LeaderChangeContext; +import com.alipay.sofa.jraft.entity.LocalFileMetaOutter; +import com.alipay.sofa.jraft.error.RaftError; +import com.alipay.sofa.jraft.error.RaftException; +import com.alipay.sofa.jraft.storage.snapshot.SnapshotReader; +import com.alipay.sofa.jraft.storage.snapshot.SnapshotWriter; +import com.alipay.sofa.jraft.util.CRC64; +import com.alipay.sofa.jraft.util.Utils; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class RaftStateMachine extends StateMachineAdapter { + private static final String SNAPSHOT_DIR_NAME = "snapshot"; + private static final String SNAPSHOT_ARCHIVE_NAME = "snapshot.zip"; + private final AtomicLong leaderTerm = new AtomicLong(-1); + private final List taskHandlers; + private final List stateListeners; + + public RaftStateMachine() { + this.taskHandlers = new CopyOnWriteArrayList<>(); + this.stateListeners = new CopyOnWriteArrayList<>(); + } + + public void addTaskHandler(RaftTaskHandler handler) { + taskHandlers.add(handler); + } + + public void addStateListener(RaftStateListener listener) { + stateListeners.add(listener); + } + + public boolean isLeader() { + return this.leaderTerm.get() > 0; + } + + @Override + public void onApply(Iterator iter) { + while (iter.hasNext()) { + final RaftClosureAdapter done = (RaftClosureAdapter) iter.done(); + try { + KVOperation kvOp; + if (done != null) { + kvOp = done.op; + } else { + kvOp = KVOperation.fromByteArray(iter.getData().array()); + } + for (RaftTaskHandler taskHandler : taskHandlers) { + taskHandler.invoke(kvOp, done); + } + if (done != null) { + done.run(Status.OK()); + } + } catch (Throwable t) { + log.error("StateMachine meet critical error: {}.", t); + if (done != null) { + done.run(new Status(RaftError.EINTERNAL, t.getMessage())); + } + } + iter.next(); + } + } + + @Override + public void onError(final RaftException e) { + log.error("Raft StateMachine on error {}", e); + } + + @Override + public void onShutdown() { + super.onShutdown(); + } + + @Override + public void onLeaderStart(final long term) { + this.leaderTerm.set(term); + super.onLeaderStart(term); + + log.info("Raft becomes leader"); + Utils.runInThread(() -> { + if (!CollectionUtils.isEmpty(stateListeners)) { + stateListeners.forEach(listener -> { + listener.onRaftLeaderChanged(); + }); + } + }); + } + + @Override + public void onLeaderStop(final Status status) { + this.leaderTerm.set(-1); + super.onLeaderStop(status); + log.info("Raft lost leader "); + } + + @Override + public void onStartFollowing(final LeaderChangeContext ctx) { + super.onStartFollowing(ctx); + Utils.runInThread(() -> { + if (!CollectionUtils.isEmpty(stateListeners)) { + stateListeners.forEach(listener -> { + listener.onRaftLeaderChanged(); + }); + } + }); + } + + @Override + public void onStopFollowing(final LeaderChangeContext ctx) { + super.onStopFollowing(ctx); + } + + + @Override + public void onConfigurationCommitted(final Configuration conf) { + log.info("Raft onConfigurationCommitted {}", conf); + } + + @Override + public void onSnapshotSave(final SnapshotWriter writer, final Closure done) { + + String snapshotDir = writer.getPath() + File.separator + SNAPSHOT_DIR_NAME; + try { + FileUtils.deleteDirectory(new File(snapshotDir)); + FileUtils.forceMkdir(new File(snapshotDir)); + } catch (IOException e) { + log.error("Failed to create snapshot directory {}", snapshotDir); + done.run(new Status(RaftError.EIO, e.toString())); + return; + } + + CountDownLatch latch = new CountDownLatch(taskHandlers.size()); + for (RaftTaskHandler taskHandler : taskHandlers) { + Utils.runInThread(() -> { + try { + KVOperation op = KVOperation.createSaveSnapshot(snapshotDir); + taskHandler.invoke(op, null); + log.info("Raft onSnapshotSave success"); + latch.countDown(); + } catch (PDException e) { + log.error("Raft onSnapshotSave failed. {}", e.toString()); + done.run(new Status(RaftError.EIO, e.toString())); + } + }); + } + try { + latch.await(); + } catch (InterruptedException e) { + log.error("Raft onSnapshotSave failed. {}", e.toString()); + done.run(new Status(RaftError.EIO, e.toString())); + return; + } + + // compress + try { + compressSnapshot(writer); + FileUtils.deleteDirectory(new File(snapshotDir)); + } catch (Exception e) { + log.error("Failed to delete snapshot directory {}, {}", snapshotDir, e.toString()); + done.run(new Status(RaftError.EIO, e.toString())); + return; + } + done.run(Status.OK()); + } + + @Override + public boolean onSnapshotLoad(final SnapshotReader reader) { + if (isLeader()) { + log.warn("Leader is not supposed to load snapshot"); + return false; + } + String snapshotDir = reader.getPath() + File.separator + SNAPSHOT_DIR_NAME; + String snapshotArchive = reader.getPath() + File.separator + SNAPSHOT_ARCHIVE_NAME; + // 2. decompress snapshot archive + try { + decompressSnapshot(reader); + } catch (PDException e) { + log.error("Failed to delete snapshot directory {}, {}", snapshotDir, e.toString()); + return true; + } + + CountDownLatch latch = new CountDownLatch(taskHandlers.size()); + for (RaftTaskHandler taskHandler : taskHandlers) { + try { + KVOperation op = KVOperation.createLoadSnapshot(snapshotDir); + taskHandler.invoke(op, null); + log.info("Raft onSnapshotLoad success"); + latch.countDown(); + } catch (PDException e) { + log.error("Raft onSnapshotLoad failed. {}", e.toString()); + return false; + } + } + try { + latch.await(); + } catch (InterruptedException e) { + log.error("Raft onSnapshotSave failed. {}", e.toString()); + return false; + } + + + try { + // TODO: remove file from meta + // SnapshotReader 沒有提供刪除文件的接口 + FileUtils.deleteDirectory(new File(snapshotDir)); + File file = new File(snapshotArchive); + if (file.exists()) { + FileUtils.forceDelete(file); + } + } catch (IOException e) { + log.error("Failed to delete snapshot directory {} and file {}", snapshotDir, + snapshotArchive); + return false; + } + + return true; + } + + private void compressSnapshot(final SnapshotWriter writer) throws PDException { + final Checksum checksum = new CRC64(); + final String snapshotArchive = writer.getPath() + File.separator + SNAPSHOT_ARCHIVE_NAME; + try { + ZipUtils.compress(writer.getPath(), SNAPSHOT_DIR_NAME, snapshotArchive, checksum); + LocalFileMetaOutter.LocalFileMeta.Builder metaBuild = + LocalFileMetaOutter.LocalFileMeta.newBuilder(); + metaBuild.setChecksum(Long.toHexString(checksum.getValue())); + if (!writer.addFile(SNAPSHOT_ARCHIVE_NAME, metaBuild.build())) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_SAVE_SNAPSHOT_ERROR_VALUE, + "failed to add file to LocalFileMeta"); + } + } catch (IOException e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_SAVE_SNAPSHOT_ERROR_VALUE, e); + } + } + + private void decompressSnapshot(final SnapshotReader reader) throws PDException { + final LocalFileMetaOutter.LocalFileMeta meta = + (LocalFileMetaOutter.LocalFileMeta) reader.getFileMeta(SNAPSHOT_ARCHIVE_NAME); + final Checksum checksum = new CRC64(); + final String snapshotArchive = reader.getPath() + File.separator + SNAPSHOT_ARCHIVE_NAME; + try { + ZipUtils.decompress(snapshotArchive, reader.getPath(), checksum); + if (meta.hasChecksum()) { + if (!meta.getChecksum().equals(Long.toHexString(checksum.getValue()))) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_LOAD_SNAPSHOT_ERROR_VALUE, + "Snapshot checksum failed"); + } + } + } catch (IOException e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_LOAD_SNAPSHOT_ERROR_VALUE, e); + } + } + + + public static class RaftClosureAdapter implements KVStoreClosure { + private final KVOperation op; + private final KVStoreClosure closure; + + public RaftClosureAdapter(KVOperation op, KVStoreClosure closure) { + this.op = op; + this.closure = closure; + } + + public KVStoreClosure getClosure() { + return closure; + } + + @Override + public void run(Status status) { + closure.run(status); + } + + @Override + public Pdpb.Error getError() { + return null; + } + + @Override + public void setError(Pdpb.Error error) { + + } + + @Override + public Object getData() { + return null; + } + + @Override + public void setData(Object data) { + + } + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftTaskHandler.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftTaskHandler.java new file mode 100644 index 0000000000..6dfced4c98 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftTaskHandler.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.raft; + +import org.apache.hugegraph.pd.common.PDException; + +/** + * 接收raft发送的数据 + */ +public interface RaftTaskHandler { + boolean invoke(final KVOperation op, KVStoreClosure response) throws PDException; +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/ZipUtils.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/ZipUtils.java new file mode 100644 index 0000000000..ed75b54211 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/ZipUtils.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.raft; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Paths; +import java.util.zip.CheckedInputStream; +import java.util.zip.CheckedOutputStream; +import java.util.zip.Checksum; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import java.util.zip.ZipOutputStream; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.output.NullOutputStream; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public final class ZipUtils { + + public static void compress(final String rootDir, final String sourceDir, + final String outputFile, final Checksum checksum) throws + IOException { + try (final FileOutputStream fos = new FileOutputStream(outputFile); + final CheckedOutputStream cos = new CheckedOutputStream(fos, checksum); + final ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(cos))) { + ZipUtils.compressDirectoryToZipFile(rootDir, sourceDir, zos); + zos.flush(); + fos.getFD().sync(); + } + } + + private static void compressDirectoryToZipFile(final String rootDir, final String sourceDir, + final ZipOutputStream zos) throws IOException { + final String dir = Paths.get(rootDir, sourceDir).toString(); + final File[] files = new File(dir).listFiles(); + for (final File file : files) { + final String child = Paths.get(sourceDir, file.getName()).toString(); + if (file.isDirectory()) { + compressDirectoryToZipFile(rootDir, child, zos); + } else { + zos.putNextEntry(new ZipEntry(child)); + try (final FileInputStream fis = new FileInputStream(file); + final BufferedInputStream bis = new BufferedInputStream(fis)) { + IOUtils.copy(bis, zos); + } + } + } + } + + public static void decompress(final String sourceFile, final String outputDir, + final Checksum checksum) throws IOException { + try (final FileInputStream fis = new FileInputStream(sourceFile); + final CheckedInputStream cis = new CheckedInputStream(fis, checksum); + final ZipInputStream zis = new ZipInputStream(new BufferedInputStream(cis))) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + final String fileName = entry.getName(); + final File entryFile = new File(Paths.get(outputDir, fileName).toString()); + FileUtils.forceMkdir(entryFile.getParentFile()); + try (final FileOutputStream fos = new FileOutputStream(entryFile); + final BufferedOutputStream bos = new BufferedOutputStream(fos)) { + IOUtils.copy(zis, bos); + bos.flush(); + fos.getFD().sync(); + } + } + IOUtils.copy(cis, NullOutputStream.NULL_OUTPUT_STREAM); + } + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/BaseKVStoreClosure.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/BaseKVStoreClosure.java new file mode 100644 index 0000000000..84974aea27 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/BaseKVStoreClosure.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.store; + +import org.apache.hugegraph.pd.grpc.Pdpb; +import org.apache.hugegraph.pd.raft.KVStoreClosure; + +public abstract class BaseKVStoreClosure implements KVStoreClosure { + private Pdpb.Error error; + private Object data; + + @Override + public Pdpb.Error getError() { + return error; + } + + @Override + public void setError(Pdpb.Error error) { + this.error = error; + } + + @Override + public Object getData() { + return data; + } + + @Override + public void setData(Object data) { + this.data = data; + } + + +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStore.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStore.java new file mode 100644 index 0000000000..bfa2f1ded9 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStore.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.store; + +import java.util.List; +import java.util.concurrent.TimeUnit; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; + +public interface HgKVStore { + void init(PDConfig config); + + void put(byte[] key, byte[] value) throws PDException; + + byte[] get(byte[] key) throws PDException; + + List scanPrefix(byte[] prefix); + + long remove(byte[] bytes) throws PDException; + + long removeByPrefix(byte[] bytes) throws PDException; + + void putWithTTL(byte[] key, byte[] value, long ttl) throws PDException; + + void putWithTTL(byte[] key, byte[] value, long ttl, TimeUnit timeUnit) throws PDException; + + byte[] getWithTTL(byte[] key) throws PDException; + + void removeWithTTL(byte[] key) throws PDException; + + List getListWithTTL(byte[] key) throws PDException; + + void clear() throws PDException; + + void saveSnapshot(String snapshotPath) throws PDException; + + void loadSnapshot(String snapshotPath) throws PDException; + + List scanRange(byte[] start, byte[] end); + + void close(); +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStoreImpl.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStoreImpl.java new file mode 100644 index 0000000000..88ebd5ca28 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStoreImpl.java @@ -0,0 +1,343 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.store; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import org.apache.commons.io.FileUtils; +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Pdpb; +import org.apache.hugegraph.pd.grpc.discovery.RegisterInfo; +import org.rocksdb.Checkpoint; +import org.rocksdb.Options; +import org.rocksdb.ReadOptions; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.rocksdb.RocksIterator; +import org.rocksdb.Slice; + +import com.alipay.sofa.jraft.util.Utils; +import com.google.common.cache.CacheBuilder; +import com.google.common.primitives.Bytes; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class HgKVStoreImpl implements HgKVStore { + private static final ConcurrentHashMap> CACHE = new ConcurrentHashMap(); + private final ReadWriteLock readWriteLock = new ReentrantReadWriteLock(); + private RocksDB db; + private String dbPath; + private Options dbOptions; + + @Override + public void init(PDConfig config) { + dbOptions = new Options().setCreateIfMissing(true); + + final Lock writeLock = this.readWriteLock.writeLock(); + writeLock.lock(); + try { + this.dbPath = config.getDataPath() + "/rocksdb/"; + File file = new File(this.dbPath); + if (!file.exists()) { + try { + FileUtils.forceMkdir(file); + } catch (IOException e) { + log.warn("Failed to create data file,{}", e); + } + } + openRocksDB(dbPath); + } catch (PDException e) { + log.error("Failed to open data file,{}", e); + } finally { + writeLock.unlock(); + } + } + + @Override + public void put(byte[] key, byte[] value) throws PDException { + final Lock readLock = this.readWriteLock.readLock(); + readLock.lock(); + try { + db.put(key, value); + } catch (RocksDBException e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_WRITE_ERROR_VALUE, e); + } finally { + readLock.unlock(); + } + } + + @Override + public byte[] get(byte[] key) throws PDException { + final Lock readLock = this.readWriteLock.readLock(); + readLock.lock(); + try { + return db.get(key); + } catch (RocksDBException e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_READ_ERROR_VALUE, e); + } finally { + readLock.unlock(); + } + } + + @Override + public List scanPrefix(byte[] prefix) { + final Lock readLock = this.readWriteLock.readLock(); + readLock.lock(); + try (ReadOptions options = new ReadOptions() + .setIterateLowerBound(new Slice(prefix))) { + List kvs = new ArrayList<>(); + RocksIterator iterator = db.newIterator(options); + iterator.seekToFirst(); + while (iterator.isValid() && 0 == Bytes.indexOf(iterator.key(), prefix)) { + kvs.add(new KV(iterator.key(), iterator.value())); + iterator.next(); + } + return kvs; + } finally { + readLock.unlock(); + } + } + + @Override + public long remove(byte[] key) throws PDException { + final Lock readLock = this.readWriteLock.readLock(); + readLock.lock(); + try { + db.delete(key); + } catch (RocksDBException e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_DEL_ERROR_VALUE, e); + } finally { + readLock.unlock(); + } + return 0; + } + + @Override + public long removeByPrefix(byte[] prefix) throws PDException { + final Lock readLock = this.readWriteLock.readLock(); + readLock.lock(); + try (ReadOptions options = new ReadOptions() + .setIterateLowerBound(new Slice(prefix))) { + RocksIterator iterator = db.newIterator(options); + iterator.seekToFirst(); + + while (iterator.isValid()) { + if (0 == Bytes.indexOf(iterator.key(), prefix)) { + db.delete(iterator.key()); + } else { + break; + } + iterator.next(); + } + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_WRITE_ERROR_VALUE, e); + } finally { + readLock.unlock(); + } + return 0; + } + + @Override + public void clear() throws PDException { + CACHE.clear(); + } + + @Override + public List getListWithTTL(byte[] key) throws PDException { + String storeKey = new String(key, Charset.defaultCharset()); + LinkedList ts = new LinkedList<>(); + CACHE.keySet().forEach((cacheKey) -> { + if (cacheKey.startsWith(storeKey)) { + ConcurrentMap map; + if ((map = CACHE.get(cacheKey)) == null) { + return; + } + map.values().forEach((element) -> { + ts.add((byte[]) element); + }); + } + }); + return ts; + } + + @Override + public byte[] getWithTTL(byte[] key) throws PDException { + ConcurrentMap map; + String storeKey = new String(key, Charset.defaultCharset()); + if ((map = CACHE.get(storeKey)) == null) { + return null; + } + Object value = map.get(storeKey); + return value == null ? null : (byte[]) value; + } + + @Override + public void removeWithTTL(byte[] key) throws PDException { + ConcurrentMap map; + String storeKey = new String(key, Charset.defaultCharset()); + if ((map = CACHE.get(storeKey)) == null) { + return; + } + map.remove(storeKey); + } + + @Override + public void putWithTTL(byte[] key, byte[] value, long ttl) throws PDException { + this.putWithTTL(key, value, ttl, TimeUnit.SECONDS); + } + + @Override + public void putWithTTL(byte[] key, byte[] value, long ttl, TimeUnit timeUnit) throws + PDException { + try { + ConcurrentMap spaceNode = CacheBuilder.newBuilder().initialCapacity(200) + .expireAfterWrite(ttl, + timeUnit) + .build().asMap(); + String storeKey = new String(key, Charset.defaultCharset()); + ConcurrentMap space = CACHE.putIfAbsent(storeKey, spaceNode); + if (space == null) { + space = spaceNode; + } + space.put(storeKey, value); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.ROCKSDB_WRITE_ERROR_VALUE, e); + } + } + + @Override + public void saveSnapshot(String snapshotPath) throws PDException { + log.info("begin save snapshot at {}", snapshotPath); + final Lock writeLock = this.readWriteLock.writeLock(); + writeLock.lock(); + try (final Checkpoint checkpoint = Checkpoint.create(this.db)) { + final String tempPath = Paths.get(snapshotPath) + "_temp"; + final File tempFile = new File(tempPath); + FileUtils.deleteDirectory(tempFile); + checkpoint.createCheckpoint(tempPath); + final File snapshotFile = new File(snapshotPath); + FileUtils.deleteDirectory(snapshotFile); + if (!Utils.atomicMoveFile(tempFile, snapshotFile, true)) { + log.error("Fail to rename {} to {}", tempPath, snapshotPath); + throw new PDException(Pdpb.ErrorType.ROCKSDB_SAVE_SNAPSHOT_ERROR_VALUE, + String.format("Fail to rename %s to %s", tempPath, + snapshotPath)); + } + } catch (final PDException e) { + throw e; + } catch (final Exception e) { + log.error("Fail to write snapshot at path: {}", snapshotPath, e); + throw new PDException(Pdpb.ErrorType.ROCKSDB_SAVE_SNAPSHOT_ERROR_VALUE, e); + } finally { + writeLock.unlock(); + } + log.info("saved snapshot into {}", snapshotPath); + } + + @Override + public void loadSnapshot(String snapshotPath) throws PDException { + log.info("begin load snapshot from {}", snapshotPath); + final Lock writeLock = this.readWriteLock.writeLock(); + writeLock.lock(); + try { + final File snapshotFile = new File(snapshotPath); + if (!snapshotFile.exists()) { + log.error("Snapshot file {} not exists.", snapshotPath); + return; + } + // close DB + closeRocksDB(); + // replace rocksdb data with snapshot data + final File dbFile = new File(this.dbPath); + FileUtils.deleteDirectory(dbFile); + if (!Utils.atomicMoveFile(snapshotFile, dbFile, true)) { + log.error("Fail to rename {} to {}", snapshotPath, this.dbPath); + throw new PDException(Pdpb.ErrorType.ROCKSDB_LOAD_SNAPSHOT_ERROR_VALUE, + String.format("Fail to rename %s to %s", snapshotPath, + this.dbPath)); + } + // reopen the db + openRocksDB(this.dbPath); + } catch (final PDException e) { + throw e; + } catch (final Exception e) { + log.error("failed to load snapshot from {}", snapshotPath); + throw new PDException(Pdpb.ErrorType.ROCKSDB_LOAD_SNAPSHOT_ERROR_VALUE, e); + } finally { + writeLock.unlock(); + } + log.info("loaded snapshot from {}", snapshotPath); + } + + @Override + public List scanRange(byte[] start, byte[] end) { + final Lock readLock = this.readWriteLock.readLock(); + readLock.lock(); + try (ReadOptions options = new ReadOptions() + .setIterateLowerBound(new Slice(start)) + .setIterateUpperBound(new Slice(end))) { + List kvs = new ArrayList<>(); + RocksIterator iterator = db.newIterator(options); + iterator.seekToFirst(); + while (iterator.isValid()) { + kvs.add(new KV(iterator.key(), iterator.value())); + iterator.next(); + } + return kvs; + } finally { + readLock.unlock(); + } + } + + @Override + public void close() { + closeRocksDB(); + } + + + private void closeRocksDB() { + if (this.db != null) { + this.db.close(); + this.db = null; + } + } + + private void openRocksDB(String dbPath) throws PDException { + try { + this.db = RocksDB.open(dbOptions, dbPath); + } catch (RocksDBException e) { + log.error("Failed to open RocksDB from {}", dbPath, e); + throw new PDException(Pdpb.ErrorType.ROCKSDB_LOAD_SNAPSHOT_ERROR_VALUE, e); + } + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/KV.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/KV.java new file mode 100644 index 0000000000..763a8541a8 --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/KV.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.store; + + +public class KV { + private byte[] key; + private byte[] value; + + public KV(byte[] key, byte[] value) { + this.key = key; + this.value = value; + } + + public byte[] getKey() { + return key; + } + + public void setKey(byte[] key) { + this.key = key; + } + + public byte[] getValue() { + return value; + } + + public void setValue(byte[] value) { + this.value = value; + } +} diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/RaftKVStore.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/RaftKVStore.java new file mode 100644 index 0000000000..b9e373ce8b --- /dev/null +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/RaftKVStore.java @@ -0,0 +1,324 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.store; + +import java.nio.ByteBuffer; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; + +import org.apache.hugegraph.pd.common.PDException; +import org.apache.hugegraph.pd.config.PDConfig; +import org.apache.hugegraph.pd.grpc.Pdpb; +import org.apache.hugegraph.pd.raft.KVOperation; +import org.apache.hugegraph.pd.raft.KVStoreClosure; +import org.apache.hugegraph.pd.raft.RaftEngine; +import org.apache.hugegraph.pd.raft.RaftStateMachine; +import org.apache.hugegraph.pd.raft.RaftTaskHandler; + +import com.alipay.sofa.jraft.Status; +import com.alipay.sofa.jraft.entity.Task; +import com.alipay.sofa.jraft.error.RaftError; + +import lombok.extern.slf4j.Slf4j; + + +@Slf4j +public class RaftKVStore implements HgKVStore, RaftTaskHandler { + + private final RaftEngine engine; + private final HgKVStore store; + + public RaftKVStore(RaftEngine engine, HgKVStore store) { + this.engine = engine; + this.store = store; + } + + @Override + public void init(PDConfig config) { + this.store.init(config); + this.engine.addTaskHandler(this); + } + + private BaseKVStoreClosure createClosure() { + return new BaseKVStoreClosure() { + @Override + public void run(Status status) { + if (!status.isOk()) { + log.error("An exception occurred while performing the RAFT,{}", + status.getErrorMsg()); + } else { + log.info("RAFT done!"); + } + } + }; + } + + @Override + public void put(byte[] key, byte[] value) throws PDException { + KVOperation operation = KVOperation.createPut(key, value); + try { + applyOperation(operation).get(); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.UNKNOWN_VALUE, e.getMessage()); + } + } + + /** + * 查询可以不走raft,直接读取 + */ + @Override + public byte[] get(byte[] key) throws PDException { + return store.get(key); + + } + + @Override + public List scanPrefix(byte[] prefix) { + return store.scanPrefix(prefix); + } + + @Override + public long remove(byte[] bytes) throws PDException { + try { + applyOperation(KVOperation.createRemove(bytes)).get(); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.UNKNOWN_VALUE, e.getMessage()); + } + return 0; + } + + + @Override + public long removeByPrefix(byte[] bytes) throws PDException { + try { + applyOperation(KVOperation.createRemoveByPrefix(bytes)).get(); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.UNKNOWN_VALUE, e.getMessage()); + } + return 0; + } + + @Override + public void clear() throws PDException { + try { + applyOperation(KVOperation.createClear()).get(); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.UNKNOWN_VALUE, e.getMessage()); + } + } + + @Override + public void putWithTTL(byte[] key, byte[] value, long ttl) throws PDException { + try { + applyOperation(KVOperation.createPutWithTTL(key, value, ttl)).get(); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.UNKNOWN_VALUE, e.getMessage()); + } + } + + @Override + public void putWithTTL(byte[] key, byte[] value, long ttl, TimeUnit timeUnit) throws + PDException { + try { + applyOperation(KVOperation.createPutWithTTL(key, value, ttl, timeUnit)).get(); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.UNKNOWN_VALUE, e.getMessage()); + } + } + + @Override + public List getListWithTTL(byte[] key) throws PDException { + return store.getListWithTTL(key); + } + + @Override + public byte[] getWithTTL(byte[] key) throws PDException { + return store.getWithTTL(key); + } + + @Override + public void removeWithTTL(byte[] key) throws PDException { + try { + applyOperation(KVOperation.createRemoveWithTTL(key)).get(); + } catch (Exception e) { + throw new PDException(Pdpb.ErrorType.UNKNOWN_VALUE, e.getMessage()); + } + } + + @Override + public void saveSnapshot(String snapshotPath) throws PDException { + store.saveSnapshot(snapshotPath); + } + + @Override + public void loadSnapshot(String snapshotPath) throws PDException { + store.loadSnapshot(snapshotPath); + } + + @Override + public List scanRange(byte[] start, byte[] end) { + return store.scanRange(start, end); + } + + @Override + public void close() { + store.close(); + } + + /** + * 需要走Raft的真实操作 + */ + private void doPut(byte[] key, byte[] value) throws PDException { + + store.put(key, value); + } + + public long doRemove(byte[] bytes) throws PDException { + return this.store.remove(bytes); + } + + public long doRemoveByPrefix(byte[] bytes) throws PDException { + return this.store.removeByPrefix(bytes); + } + + public void doRemoveWithTTL(byte[] key) throws PDException { + this.store.removeWithTTL(key); + } + + public void doClear() throws PDException { + this.store.clear(); + } + + public void doPutWithTTL(byte[] key, byte[] value, long ttl) throws PDException { + this.store.putWithTTL(key, value, ttl); + } + + public void doPutWithTTL(byte[] key, byte[] value, long ttl, TimeUnit timeUnit) throws + PDException { + this.store.putWithTTL(key, value, ttl, timeUnit); + } + + public void doSaveSnapshot(String snapshotPath) throws PDException { + this.store.saveSnapshot(snapshotPath); + } + + public void doLoadSnapshot(String snapshotPath) throws PDException { + this.store.loadSnapshot(snapshotPath); + } + + private CompletableFuture applyOperation(final KVOperation op) throws PDException { + CompletableFuture future = new CompletableFuture<>(); + try { + final Task task = new Task(); + task.setData(ByteBuffer.wrap(op.toByteArray())); + task.setDone(new RaftStateMachine.RaftClosureAdapter(op, new KVStoreClosure() { + Object data; + Pdpb.Error error; + + @Override + public Pdpb.Error getError() { + return error; + } + + @Override + public void setError(Pdpb.Error error) { + this.error = error; + } + + @Override + public Object getData() { + return data; + } + + @Override + public void setData(Object data) { + this.data = data; + } + + @Override + public void run(Status status) { + if (status.isOk()) { + future.complete((T) data); + } else { + RaftError raftError = status.getRaftError(); + Pdpb.ErrorType type; + if (RaftError.EPERM.equals(raftError)) { + type = Pdpb.ErrorType.NOT_LEADER; + } else { + type = Pdpb.ErrorType.UNKNOWN; + } + error = Pdpb.Error.newBuilder().setType(type) + .setMessage(status.getErrorMsg()) + .build(); + future.completeExceptionally( + new PDException(error.getTypeValue())); + } + } + })); + this.engine.addTask(task); + return future; + } catch (Exception e) { + future.completeExceptionally(e); + return future; + } + } + + private boolean isLeader() { + return this.engine.isLeader(); + } + + @Override + public boolean invoke(KVOperation op, KVStoreClosure response) throws PDException { + switch (op.getOp()) { + case KVOperation.GET: + break; + case KVOperation.PUT: + doPut(op.getKey(), op.getValue()); + break; + case KVOperation.REMOVE: + doRemove(op.getKey()); + break; + case KVOperation.PUT_WITH_TTL: + doPutWithTTL(op.getKey(), op.getValue(), (long) op.getArg()); + break; + case KVOperation.PUT_WITH_TTL_UNIT: + Object[] arg = (Object[]) op.getArg(); + doPutWithTTL(op.getKey(), op.getValue(), (long) arg[0], (TimeUnit) arg[1]); + break; + case KVOperation.REMOVE_BY_PREFIX: + doRemoveByPrefix(op.getKey()); + break; + case KVOperation.REMOVE_WITH_TTL: + doRemoveWithTTL(op.getKey()); + break; + case KVOperation.CLEAR: + doClear(); + break; + case KVOperation.SAVE_SNAPSHOT: + doSaveSnapshot((String) op.getAttach()); + break; + case KVOperation.LOAD_SNAPSHOT: + doLoadSnapshot((String) op.getAttach()); + break; + default: + log.error("Err op {}", op.getOp()); + } + return false; + } +} From 24ad2748e77819e38249f6bf4d0a60319082c6ff Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Thu, 14 Mar 2024 13:35:35 +0800 Subject: [PATCH 3/7] adjust hugegraph-pd/pom.xml --- hugegraph-pd/pom.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hugegraph-pd/pom.xml b/hugegraph-pd/pom.xml index 6253cfd443..8647775d3c 100644 --- a/hugegraph-pd/pom.xml +++ b/hugegraph-pd/pom.xml @@ -36,11 +36,10 @@ hg-pd-common hg-pd-client hg-pd-test + hg-pd-core - - From d40b270dc3479662e20c92a45ff37f5e4ece84fc Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Thu, 14 Mar 2024 14:06:59 +0800 Subject: [PATCH 4/7] format hg-pd-core --- hugegraph-pd/hg-pd-core/pom.xml | 2 +- .../java/org/apache/hugegraph/pd/ConfigService.java | 1 - .../main/java/org/apache/hugegraph/pd/KvService.java | 1 - .../hugegraph/pd/PartitionInstructionListener.java | 1 + .../java/org/apache/hugegraph/pd/PartitionService.java | 1 - .../apache/hugegraph/pd/PartitionStatusListener.java | 1 + .../java/org/apache/hugegraph/pd/RegistryService.java | 1 + .../apache/hugegraph/pd/ShardGroupStatusListener.java | 1 + .../apache/hugegraph/pd/StoreMonitorDataService.java | 3 +-- .../java/org/apache/hugegraph/pd/StoreNodeService.java | 1 - .../org/apache/hugegraph/pd/TaskScheduleService.java | 10 +--------- .../java/org/apache/hugegraph/pd/config/PDConfig.java | 8 ++++++-- .../org/apache/hugegraph/pd/meta/ConfigMetaStore.java | 2 -- .../java/org/apache/hugegraph/pd/meta/IdMetaStore.java | 1 - .../org/apache/hugegraph/pd/meta/MetadataFactory.java | 1 - .../apache/hugegraph/pd/meta/MetadataKeyHelper.java | 1 + .../apache/hugegraph/pd/meta/MetadataStoreBase.java | 1 - .../org/apache/hugegraph/pd/meta/PartitionMeta.java | 2 +- .../java/org/apache/hugegraph/pd/meta/QueueStore.java | 1 + .../org/apache/hugegraph/pd/meta/StoreInfoMeta.java | 1 + .../org/apache/hugegraph/pd/meta/TaskInfoMeta.java | 1 + .../apache/hugegraph/pd/raft/FutureClosureAdapter.java | 1 + .../java/org/apache/hugegraph/pd/raft/RaftEngine.java | 1 + .../org/apache/hugegraph/pd/raft/RaftRpcClient.java | 1 + .../org/apache/hugegraph/pd/raft/RaftRpcProcessor.java | 5 ++++- .../apache/hugegraph/pd/raft/RaftStateListener.java | 1 + .../org/apache/hugegraph/pd/raft/RaftStateMachine.java | 5 ++--- .../org/apache/hugegraph/pd/raft/RaftTaskHandler.java | 1 + .../apache/hugegraph/pd/store/BaseKVStoreClosure.java | 2 +- .../java/org/apache/hugegraph/pd/store/HgKVStore.java | 1 + .../org/apache/hugegraph/pd/store/HgKVStoreImpl.java | 2 +- .../main/java/org/apache/hugegraph/pd/store/KV.java | 2 +- .../org/apache/hugegraph/pd/store/RaftKVStore.java | 2 -- 33 files changed, 33 insertions(+), 33 deletions(-) diff --git a/hugegraph-pd/hg-pd-core/pom.xml b/hugegraph-pd/hg-pd-core/pom.xml index e59b5ac35e..1f23259d21 100644 --- a/hugegraph-pd/hg-pd-core/pom.xml +++ b/hugegraph-pd/hg-pd-core/pom.xml @@ -37,7 +37,7 @@ com.alipay.sofa jraft-core - + 1.3.13 diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ConfigService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ConfigService.java index 2557745c88..cc28c1b0a7 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ConfigService.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ConfigService.java @@ -41,7 +41,6 @@ public ConfigService(PDConfig config) { meta = MetadataFactory.newConfigMeta(config); } - public Metapb.PDConfig getPDConfig(long version) throws PDException { return this.meta.getPdConfig(version); } diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/KvService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/KvService.java index e85cfcb1eb..f31196f81c 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/KvService.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/KvService.java @@ -43,7 +43,6 @@ @Service public class KvService { - public static final char KV_DELIMITER = '@'; // TODO 主前缀之后,增加类名做区分 private static final String TTL_PREFIX = "T"; diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionInstructionListener.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionInstructionListener.java index 2188f6ca78..2b1e4a6375 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionInstructionListener.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionInstructionListener.java @@ -31,6 +31,7 @@ * 分区命令监听 */ public interface PartitionInstructionListener { + void changeShard(Metapb.Partition partition, ChangeShard changeShard) throws PDException; void transferLeader(Metapb.Partition partition, TransferLeader transferLeader) throws diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionService.java index 9291a813c9..c8ec3e3e7d 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionService.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionService.java @@ -991,7 +991,6 @@ private void checkShardState(Metapb.Partition partition, Metapb.PartitionStats s } } - public void addInstructionListener(PartitionInstructionListener event) { instructionListeners.add(event); } diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionStatusListener.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionStatusListener.java index 933822f109..fea0ce35d9 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionStatusListener.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/PartitionStatusListener.java @@ -23,6 +23,7 @@ * 分区状态监听 */ public interface PartitionStatusListener { + void onPartitionChanged(Metapb.Partition partition, Metapb.Partition newPartition); void onPartitionRemoved(Metapb.Partition partition); diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/RegistryService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/RegistryService.java index 223889cadf..86922d56d3 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/RegistryService.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/RegistryService.java @@ -26,6 +26,7 @@ import org.apache.hugegraph.pd.meta.MetadataFactory; public class RegistryService { + private final PDConfig pdConfig; private final DiscoveryMetaStore meta; diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ShardGroupStatusListener.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ShardGroupStatusListener.java index 342a335ff6..d5c068de94 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ShardGroupStatusListener.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/ShardGroupStatusListener.java @@ -20,6 +20,7 @@ import org.apache.hugegraph.pd.grpc.Metapb; public interface ShardGroupStatusListener { + void onShardListChanged(Metapb.ShardGroup shardGroup, Metapb.ShardGroup newShardGroup); void onShardListOp(Metapb.ShardGroup shardGroup); diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreMonitorDataService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreMonitorDataService.java index 7be54db0cf..54ff6b6e8d 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreMonitorDataService.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreMonitorDataService.java @@ -36,10 +36,10 @@ import lombok.extern.slf4j.Slf4j; - @Slf4j @Service public class StoreMonitorDataService { + private static final String MONITOR_DATA_PREFIX = "SMD"; private final PDConfig pdConfig; private final KvService kvService; @@ -49,7 +49,6 @@ public class StoreMonitorDataService { */ private final Map lastStoreStateTimestamp; - public StoreMonitorDataService(PDConfig pdConfig) { this.pdConfig = pdConfig; this.kvService = new KvService(pdConfig); diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreNodeService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreNodeService.java index bfd4f88032..b755326340 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreNodeService.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/StoreNodeService.java @@ -47,7 +47,6 @@ import lombok.extern.slf4j.Slf4j; - /** * HgStore注册、保活管理类 */ diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/TaskScheduleService.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/TaskScheduleService.java index 9ec8152a0d..889e5a0234 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/TaskScheduleService.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/TaskScheduleService.java @@ -43,7 +43,6 @@ import lombok.extern.slf4j.Slf4j; - /** * 任务调度服务,定时检查Store、资源、分区的状态,及时迁移数据,错误节点 * 1、监测Store是否离线 @@ -53,6 +52,7 @@ */ @Slf4j public class TaskScheduleService { + private static final String BALANCE_SHARD_KEY = "BALANCE_SHARD_KEY"; private final long TurnOffAndBalanceInterval = 30 * 60 * 1000; //机器下线30后才能进行动态平衡 private final long BalanceLeaderInterval = 30 * 1000; // leader平衡时间间隔 @@ -82,7 +82,6 @@ public class TaskScheduleService { private long lastStoreTurnoffTime = 0; private long lastBalanceLeaderTime = 0; - public TaskScheduleService(PDConfig config, StoreNodeService storeService, PartitionService partitionService) { this.pdConfig = config; @@ -239,7 +238,6 @@ public List patrolStores() throws PDException { return changedStores; } - /** * 巡查所有的分区,检查副本数是否正确 */ @@ -278,7 +276,6 @@ public List patrolPartitions() throws PDException { return null; } - /** * 在Store之间平衡分区的数量 * 机器转为UP半小时后才能进行动态平衡 @@ -295,7 +292,6 @@ public synchronized Map> balancePartitionShard() thr return null;//机器下线半小时后才能进行动态平衡 } - int activeStores = storeService.getActiveStores().size(); if (activeStores == 0) { log.warn("balancePartitionShard non active stores, skip to balancePartitionShard"); @@ -565,7 +561,6 @@ public synchronized Map balancePartitionLeader(boolean immediatel return results; } - private long getMaxIndexGap(Map> committedIndexMap, int partitionId) { long maxGap = Long.MAX_VALUE; if (committedIndexMap == null || !committedIndexMap.containsKey(partitionId)) { @@ -585,7 +580,6 @@ private long getMaxIndexGap(Map> committedIndexMap, int return maxGap; } - /** * 执行分区分裂,分为自动分裂和手工分裂 * @@ -663,7 +657,6 @@ public List autoSplitPartition() throws PDException { return null; } - /** * Store汇报任务状态 * 分区状态发生改变,重新计算分区所在的ShardGroup、图和整个集群的状态 @@ -849,5 +842,4 @@ public Map> movePartitions( return movedPartitions; } - } diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/config/PDConfig.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/config/PDConfig.java index abc54a94dc..6ff66459ef 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/config/PDConfig.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/config/PDConfig.java @@ -32,7 +32,6 @@ import lombok.Data; - /** * PD配置文件 */ @@ -114,6 +113,7 @@ public void setIdService(IdService idService) { @Data @Configuration public class ThreadPoolGrpc { + @Value("${thread.pool.grpc.core:600}") private int core; @Value("${thread.pool.grpc.max:1000}") @@ -125,6 +125,7 @@ public class ThreadPoolGrpc { @Data @Configuration public class Raft { + @Value("${raft.enable:true }") private boolean enable; @Value("${raft.address}") @@ -155,6 +156,7 @@ public String getGrpcAddress() { @Data @Configuration public class Store { + // store 心跳超时时间 @Value("${store.keepAlive-timeout:300}") private long keepAliveTimeout = 300; @@ -201,7 +203,7 @@ public Long getRetentionPeriod() { private Long parseTimeExpression(String exp) { if (exp != null) { Pattern pattern = Pattern.compile( - "(?(\\d+)*)(\\s)*(?(second|minute|hour|day|month|year)$)"); + "(?(\\d+)*)(\\s)*(?(second|minute|hour|day|month|year)$)"); Matcher matcher = pattern.matcher(exp.trim()); if (matcher.find()) { String n = matcher.group("n"); @@ -244,6 +246,7 @@ private Long parseTimeExpression(String exp) { @Data @Configuration public class Partition { + private int totalCount = 0; // 每个Store最大副本数 @@ -269,6 +272,7 @@ public void setTotalCount(int totalCount) { @Data @Configuration public class Discovery { + // 客户端注册后,无心跳最长次数,超过后,之前的注册信息会被删除 @Value("${discovery.heartbeat-try-count:3}") private int heartbeatOutTimes = 3; diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/ConfigMetaStore.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/ConfigMetaStore.java index 5ec6fe2171..df332f46b6 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/ConfigMetaStore.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/ConfigMetaStore.java @@ -26,7 +26,6 @@ public class ConfigMetaStore extends MetadataRocksDBStore { - private final long clusterId; public ConfigMetaStore(PDConfig pdConfig) { @@ -69,5 +68,4 @@ public Metapb.PDConfig getPdConfig(long version) throws PDException { return max.isPresent() ? max.get() : null; } - } diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/IdMetaStore.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/IdMetaStore.java index 70e4c501f9..177e4255b5 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/IdMetaStore.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/IdMetaStore.java @@ -40,7 +40,6 @@ @Slf4j public class IdMetaStore extends MetadataRocksDBStore { - private static final String ID_PREFIX = "@ID@"; private static final String CID_PREFIX = "@CID@"; private static final String CID_SLOT_PREFIX = "@CID_SLOT@"; diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataFactory.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataFactory.java index c70eec489d..cc247041cf 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataFactory.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataFactory.java @@ -76,7 +76,6 @@ public static TaskInfoMeta newTaskInfoMeta(PDConfig pdConfig) { return new TaskInfoMeta(pdConfig); } - public static QueueStore newQueueStore(PDConfig pdConfig) { return new QueueStore(pdConfig); } diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java index 8a421c2d60..2b29734c23 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java @@ -345,6 +345,7 @@ public static StringBuilder getStringBuilderHelper() { } static class StringBuilderHelper { + private static final int DISCARD_LIMIT = 1024 << 3; // 8k private static final ThreadLocal holderThreadLocal = ThreadLocal diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataStoreBase.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataStoreBase.java index 10c38a3ec6..4cd9e1d364 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataStoreBase.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataStoreBase.java @@ -17,7 +17,6 @@ package org.apache.hugegraph.pd.meta; - import java.io.IOException; import java.util.LinkedList; import java.util.List; diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/PartitionMeta.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/PartitionMeta.java index 09a4eb8e20..713a0046d7 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/PartitionMeta.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/PartitionMeta.java @@ -32,6 +32,7 @@ */ @Slf4j public class PartitionMeta extends MetadataRocksDBStore { + static String CID_GRAPH_ID_KEY = "GraphID"; static int CID_GRAPH_ID_MAX = 0xFFFE; private final PDConfig pdConfig; @@ -233,7 +234,6 @@ public Metapb.PartitionStats getPartitionStats(String graphName, int id) throws return getOne(Metapb.PartitionStats.parser(), prefix); } - /** * 获取分区状态 */ diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/QueueStore.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/QueueStore.java index 74820ab023..e1b8437a48 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/QueueStore.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/QueueStore.java @@ -27,6 +27,7 @@ import org.apache.hugegraph.pd.store.RaftKVStore; public class QueueStore extends MetadataRocksDBStore { + QueueStore(PDConfig pdConfig) { super(pdConfig); } diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/StoreInfoMeta.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/StoreInfoMeta.java index 45959211d8..2a50b0448c 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/StoreInfoMeta.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/StoreInfoMeta.java @@ -32,6 +32,7 @@ */ @Slf4j public class StoreInfoMeta extends MetadataRocksDBStore { + private final PDConfig pdConfig; public StoreInfoMeta(PDConfig pdConfig) { diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/TaskInfoMeta.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/TaskInfoMeta.java index 148101de4d..756be71e98 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/TaskInfoMeta.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/TaskInfoMeta.java @@ -30,6 +30,7 @@ * 任务管理 */ public class TaskInfoMeta extends MetadataRocksDBStore { + public TaskInfoMeta(PDConfig pdConfig) { super(pdConfig); } diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/FutureClosureAdapter.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/FutureClosureAdapter.java index 1991a78db9..d90c50c6c9 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/FutureClosureAdapter.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/FutureClosureAdapter.java @@ -23,6 +23,7 @@ import com.alipay.sofa.jraft.Status; public class FutureClosureAdapter implements Closure { + public final CompletableFuture future = new CompletableFuture<>(); private T resp; diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java index f3089ed074..9ed62b0e61 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java @@ -55,6 +55,7 @@ @Slf4j public class RaftEngine { + private static final RaftEngine INSTANCE = new RaftEngine(); private final RaftStateMachine stateMachine; private PDConfig.Raft config; diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcClient.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcClient.java index 2e17a65eef..6e47ce4e59 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcClient.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcClient.java @@ -33,6 +33,7 @@ @Slf4j public class RaftRpcClient { + protected volatile RpcClient rpcClient; private RpcOptions rpcOptions; diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcProcessor.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcProcessor.java index 1286515de2..ed950a4ee1 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcProcessor.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftRpcProcessor.java @@ -27,7 +27,6 @@ public class RaftRpcProcessor implements RpcProcessor { - private final Class requestClass; private final RaftEngine raftEngine; @@ -97,6 +96,7 @@ public boolean isOK() { } public abstract static class BaseRequest implements Serializable { + public static final byte GET_GRPC_ADDRESS = 0x01; public abstract byte magic(); @@ -104,12 +104,14 @@ public abstract static class BaseRequest implements Serializable { @Data public abstract static class BaseResponse implements Serializable { + private Status status; } @Data public static class GetMemberRequest extends BaseRequest { + @Override public byte magic() { return GET_GRPC_ADDRESS; @@ -118,6 +120,7 @@ public byte magic() { @Data public static class GetMemberResponse extends BaseResponse { + private long clusterId; private String raftAddress; private String grpcAddress; diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateListener.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateListener.java index 020be6f8bb..56f39e3ad4 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateListener.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateListener.java @@ -18,5 +18,6 @@ package org.apache.hugegraph.pd.raft; public interface RaftStateListener { + void onRaftLeaderChanged(); } diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateMachine.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateMachine.java index 4733212022..dafa209cb3 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateMachine.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateMachine.java @@ -48,6 +48,7 @@ @Slf4j public class RaftStateMachine extends StateMachineAdapter { + private static final String SNAPSHOT_DIR_NAME = "snapshot"; private static final String SNAPSHOT_ARCHIVE_NAME = "snapshot.zip"; private final AtomicLong leaderTerm = new AtomicLong(-1); @@ -147,7 +148,6 @@ public void onStopFollowing(final LeaderChangeContext ctx) { super.onStopFollowing(ctx); } - @Override public void onConfigurationCommitted(final Configuration conf) { log.info("Raft onConfigurationCommitted {}", conf); @@ -235,7 +235,6 @@ public boolean onSnapshotLoad(final SnapshotReader reader) { return false; } - try { // TODO: remove file from meta // SnapshotReader 沒有提供刪除文件的接口 @@ -288,8 +287,8 @@ private void decompressSnapshot(final SnapshotReader reader) throws PDException } } - public static class RaftClosureAdapter implements KVStoreClosure { + private final KVOperation op; private final KVStoreClosure closure; diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftTaskHandler.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftTaskHandler.java index 6dfced4c98..ec8120cc83 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftTaskHandler.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftTaskHandler.java @@ -23,5 +23,6 @@ * 接收raft发送的数据 */ public interface RaftTaskHandler { + boolean invoke(final KVOperation op, KVStoreClosure response) throws PDException; } diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/BaseKVStoreClosure.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/BaseKVStoreClosure.java index 84974aea27..3cc4dbb54a 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/BaseKVStoreClosure.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/BaseKVStoreClosure.java @@ -21,6 +21,7 @@ import org.apache.hugegraph.pd.raft.KVStoreClosure; public abstract class BaseKVStoreClosure implements KVStoreClosure { + private Pdpb.Error error; private Object data; @@ -44,5 +45,4 @@ public void setData(Object data) { this.data = data; } - } diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStore.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStore.java index bfa2f1ded9..263cb70b28 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStore.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStore.java @@ -24,6 +24,7 @@ import org.apache.hugegraph.pd.config.PDConfig; public interface HgKVStore { + void init(PDConfig config); void put(byte[] key, byte[] value) throws PDException; diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStoreImpl.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStoreImpl.java index 88ebd5ca28..bd2e7a9e22 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStoreImpl.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/HgKVStoreImpl.java @@ -52,6 +52,7 @@ @Slf4j public class HgKVStoreImpl implements HgKVStore { + private static final ConcurrentHashMap> CACHE = new ConcurrentHashMap(); private final ReadWriteLock readWriteLock = new ReentrantReadWriteLock(); @@ -324,7 +325,6 @@ public void close() { closeRocksDB(); } - private void closeRocksDB() { if (this.db != null) { this.db.close(); diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/KV.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/KV.java index 763a8541a8..35dce065b5 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/KV.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/KV.java @@ -17,8 +17,8 @@ package org.apache.hugegraph.pd.store; - public class KV { + private byte[] key; private byte[] value; diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/RaftKVStore.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/RaftKVStore.java index b9e373ce8b..ed97d13f71 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/RaftKVStore.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/store/RaftKVStore.java @@ -37,7 +37,6 @@ import lombok.extern.slf4j.Slf4j; - @Slf4j public class RaftKVStore implements HgKVStore, RaftTaskHandler { @@ -103,7 +102,6 @@ public long remove(byte[] bytes) throws PDException { return 0; } - @Override public long removeByPrefix(byte[] bytes) throws PDException { try { From cc156bfe4049dc953a74a8e86c0847efe3659c61 Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Thu, 14 Mar 2024 14:09:47 +0800 Subject: [PATCH 5/7] add deps for hg-pd-test --- hugegraph-pd/hg-pd-test/pom.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hugegraph-pd/hg-pd-test/pom.xml b/hugegraph-pd/hg-pd-test/pom.xml index 31c0fd889d..f2e187cbe1 100644 --- a/hugegraph-pd/hg-pd-test/pom.xml +++ b/hugegraph-pd/hg-pd-test/pom.xml @@ -99,6 +99,16 @@ hg-pd-common ${revision} + + org.apache.hugegraph + hg-pd-client + ${revision} + + + org.apache.hugegraph + hg-pd-core + ${revision} + com.google.code.gson From b7c9b6869a80db412cc155f89ba192b5fab08f42 Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Thu, 14 Mar 2024 22:40:40 +0800 Subject: [PATCH 6/7] fix comment --- .../java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java index 2b29734c23..193b3b7229 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/meta/MetadataKeyHelper.java @@ -292,7 +292,7 @@ public static byte[] getLogKey(Metapb.LogRecord record) { } public static byte[] getLogKeyPrefix(String action, long time) { - //LOG_DATA_SPLIT/{time}/{GraphName} + //LOG_RECORD/{action}/{time}/ StringBuilder builder = StringBuilderHelper.get() .append(LOG_RECORD) .append(DELIMITER) From c1ca5bf8756b90ad1cc56c2c6fd201ca5357b72d Mon Sep 17 00:00:00 2001 From: VGalaxies Date: Sun, 14 Apr 2024 21:41:23 +0800 Subject: [PATCH 7/7] fix: arbitrary file access during archive extraction --- .../org/apache/hugegraph/pd/raft/RaftStateMachine.java | 2 +- .../main/java/org/apache/hugegraph/pd/raft/ZipUtils.java | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateMachine.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateMachine.java index dafa209cb3..e747518668 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateMachine.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftStateMachine.java @@ -275,7 +275,7 @@ private void decompressSnapshot(final SnapshotReader reader) throws PDException final Checksum checksum = new CRC64(); final String snapshotArchive = reader.getPath() + File.separator + SNAPSHOT_ARCHIVE_NAME; try { - ZipUtils.decompress(snapshotArchive, reader.getPath(), checksum); + ZipUtils.decompress(snapshotArchive, new File(reader.getPath()), checksum); if (meta.hasChecksum()) { if (!meta.getChecksum().equals(Long.toHexString(checksum.getValue()))) { throw new PDException(Pdpb.ErrorType.ROCKSDB_LOAD_SNAPSHOT_ERROR_VALUE, diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/ZipUtils.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/ZipUtils.java index ed75b54211..a570e0ba93 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/ZipUtils.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/ZipUtils.java @@ -70,7 +70,7 @@ private static void compressDirectoryToZipFile(final String rootDir, final Strin } } - public static void decompress(final String sourceFile, final String outputDir, + public static void decompress(final String sourceFile, final File outputDir, final Checksum checksum) throws IOException { try (final FileInputStream fis = new FileInputStream(sourceFile); final CheckedInputStream cis = new CheckedInputStream(fis, checksum); @@ -78,7 +78,10 @@ public static void decompress(final String sourceFile, final String outputDir, ZipEntry entry; while ((entry = zis.getNextEntry()) != null) { final String fileName = entry.getName(); - final File entryFile = new File(Paths.get(outputDir, fileName).toString()); + final File entryFile = new File(outputDir, fileName); + if (!entryFile.toPath().normalize().startsWith(outputDir.toPath())) { + throw new IOException("Bad zip entry"); + } FileUtils.forceMkdir(entryFile.getParentFile()); try (final FileOutputStream fos = new FileOutputStream(entryFile); final BufferedOutputStream bos = new BufferedOutputStream(fos)) {