From c663887171e4c255a1fc952a30cc8f75d4aa59c6 Mon Sep 17 00:00:00 2001 From: Zhangmei Li Date: Sat, 19 Mar 2022 00:13:36 +0800 Subject: [PATCH] StringEncoding: improve empty bytes decode Change-Id: I6649c70ef63305acb9eada4abf307a099cf4a997 --- .../serializer/BinaryBackendEntry.java | 4 +-- .../backend/serializer/BinarySerializer.java | 34 ++++++++++++------- .../backend/serializer/BytesBuffer.java | 8 ++++- .../baidu/hugegraph/structure/HugeVertex.java | 5 ++- .../baidu/hugegraph/util/StringEncoding.java | 7 ++++ .../backend/store/hbase/HbaseTables.java | 6 ++-- 6 files changed, 43 insertions(+), 21 deletions(-) diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/backend/serializer/BinaryBackendEntry.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/backend/serializer/BinaryBackendEntry.java index 5344dc66f1..ecfc88dbf6 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/backend/serializer/BinaryBackendEntry.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/backend/serializer/BinaryBackendEntry.java @@ -35,8 +35,6 @@ public class BinaryBackendEntry implements BackendEntry { - private static final byte[] EMPTY_BYTES = new byte[]{}; - private final HugeType type; private final BinaryId id; private Id subId; @@ -123,7 +121,7 @@ public void column(BackendColumn column) { public void column(byte[] name, byte[] value) { E.checkNotNull(name, "name"); - value = value != null ? value : EMPTY_BYTES; + value = value != null ? value : BytesBuffer.BYTES_EMPTY; this.columns.add(BackendColumn.of(name, value)); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/backend/serializer/BinarySerializer.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/backend/serializer/BinarySerializer.java index 2393f5a25f..a07515db98 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/backend/serializer/BinarySerializer.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/backend/serializer/BinarySerializer.java @@ -19,9 +19,12 @@ package com.baidu.hugegraph.backend.serializer; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; -import com.baidu.hugegraph.config.HugeConfig; import org.apache.commons.lang.NotImplementedException; import com.baidu.hugegraph.HugeGraph; @@ -39,6 +42,7 @@ import com.baidu.hugegraph.backend.serializer.BinaryBackendEntry.BinaryId; import com.baidu.hugegraph.backend.store.BackendEntry; import com.baidu.hugegraph.backend.store.BackendEntry.BackendColumn; +import com.baidu.hugegraph.config.HugeConfig; import com.baidu.hugegraph.schema.EdgeLabel; import com.baidu.hugegraph.schema.IndexLabel; import com.baidu.hugegraph.schema.PropertyKey; @@ -71,8 +75,6 @@ public class BinarySerializer extends AbstractSerializer { - public static final byte[] EMPTY_BYTES = new byte[0]; - /* * Id is stored in column name if keyWithIdPrefix=true like RocksDB, * else stored in rowkey like HBase. @@ -415,7 +417,8 @@ public BackendEntry writeVertex(HugeVertex vertex) { } // Fill column - byte[] name = this.keyWithIdPrefix ? entry.id().asBytes() : EMPTY_BYTES; + byte[] name = this.keyWithIdPrefix ? + entry.id().asBytes() : BytesBuffer.BYTES_EMPTY; entry.column(name, buffer.bytes()); return entry; @@ -427,16 +430,19 @@ public BackendEntry writeOlapVertex(HugeVertex vertex) { BytesBuffer buffer = BytesBuffer.allocate(8 + 16); Collection> properties = vertex.getProperties(); - E.checkArgument(properties.size() == 1, - "Expect only 1 property for olap vertex, but got %s", - properties.size()); + if (properties.size() != 1) { + E.checkArgument(false, + "Expect 1 property for olap vertex, but got %s", + properties.size()); + } HugeProperty property = properties.iterator().next(); PropertyKey propertyKey = property.propertyKey(); buffer.writeVInt(SchemaElement.schemaId(propertyKey.id())); buffer.writeProperty(propertyKey, property.value()); // Fill column - byte[] name = this.keyWithIdPrefix ? entry.id().asBytes() : EMPTY_BYTES; + byte[] name = this.keyWithIdPrefix ? + entry.id().asBytes() : BytesBuffer.BYTES_EMPTY; entry.column(name, buffer.bytes()); entry.subId(propertyKey.id()); entry.olap(true); @@ -493,8 +499,7 @@ protected void parseVertexOlap(byte[] value, HugeVertex vertex) { public BackendEntry writeEdge(HugeEdge edge) { BinaryBackendEntry entry = newBackendEntry(edge); byte[] name = this.keyWithIdPrefix ? - entry.id().asBytes() : EMPTY_BYTES; - + entry.id().asBytes() : BytesBuffer.BYTES_EMPTY; byte[] value = this.formatEdgeValue(edge); entry.column(name, value); @@ -515,8 +520,11 @@ public BackendEntry writeEdgeProperty(HugeEdgeProperty prop) { public HugeEdge readEdge(HugeGraph graph, BackendEntry bytesEntry) { HugeVertex vertex = this.readVertex(graph, bytesEntry); Collection edges = vertex.getEdges(); - E.checkState(edges.size() == 1, - "Expect one edge in vertex, but got %s", edges.size()); + if (edges.size() != 1) { + E.checkState(false, + "Expect 1 edge in vertex, but got %s", + edges.size()); + } return edges.iterator().next(); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/backend/serializer/BytesBuffer.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/backend/serializer/BytesBuffer.java index a93b3f007f..3d29a51e67 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/backend/serializer/BytesBuffer.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/backend/serializer/BytesBuffer.java @@ -78,6 +78,8 @@ public final class BytesBuffer extends OutputStream { public static final int BUF_EDGE_ID = 128; public static final int BUF_PROPERTY = 64; + public static final byte[] BYTES_EMPTY = new byte[0]; + private ByteBuffer buffer; private final boolean resize; @@ -908,7 +910,8 @@ private long readNumber(byte b) { private byte[] readBytesWithEnding() { int start = this.buffer.position(); boolean foundEnding = false; - while (this.remaining() > 0) { + int remaining = this.remaining(); + for (int i = 0; i < remaining; i++) { byte current = this.read(); if (current == STRING_ENDING_BYTE) { foundEnding = true; @@ -919,6 +922,9 @@ private byte[] readBytesWithEnding() { Bytes.toHex(STRING_ENDING_BYTE)); int end = this.buffer.position() - 1; int len = end - start; + if (len <= 0) { + return BYTES_EMPTY; + } byte[] bytes = new byte[len]; System.arraycopy(this.array(), start, bytes, 0, len); return bytes; diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/structure/HugeVertex.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/structure/HugeVertex.java index 5de8e59d19..f8021ea87d 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/structure/HugeVertex.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/structure/HugeVertex.java @@ -173,7 +173,10 @@ public void assignId(Id id, boolean force) { protected void checkIdLength() { assert this.id != null; int len = this.id.asBytes().length; - E.checkArgument(len <= BytesBuffer.ID_LEN_MAX, + if (len <= BytesBuffer.ID_LEN_MAX) { + return; + } + E.checkArgument(false, "The max length of vertex id is %s, but got %s {%s}", BytesBuffer.ID_LEN_MAX, len, this.id); } diff --git a/hugegraph-core/src/main/java/com/baidu/hugegraph/util/StringEncoding.java b/hugegraph-core/src/main/java/com/baidu/hugegraph/util/StringEncoding.java index 3bee33e79c..e8eabb9fcf 100644 --- a/hugegraph-core/src/main/java/com/baidu/hugegraph/util/StringEncoding.java +++ b/hugegraph-core/src/main/java/com/baidu/hugegraph/util/StringEncoding.java @@ -52,6 +52,7 @@ public final class StringEncoding { private static final MessageDigest DIGEST; private static final byte[] BYTES_EMPTY = new byte[0]; + private static final String STRING_EMPTY = ""; private static final int BLOCK_SIZE = 4096; static { @@ -117,6 +118,9 @@ public static byte[] encode(String value) { } public static String decode(byte[] bytes) { + if (bytes.length == 0) { + return STRING_EMPTY; + } try { return new String(bytes, "UTF-8"); } catch (UnsupportedEncodingException e) { @@ -125,6 +129,9 @@ public static String decode(byte[] bytes) { } public static String decode(byte[] bytes, int offset, int length) { + if (length == 0) { + return STRING_EMPTY; + } try { return new String(bytes, offset, length, "UTF-8"); } catch (UnsupportedEncodingException e) { diff --git a/hugegraph-hbase/src/main/java/com/baidu/hugegraph/backend/store/hbase/HbaseTables.java b/hugegraph-hbase/src/main/java/com/baidu/hugegraph/backend/store/hbase/HbaseTables.java index 658f7fd5f8..1f47516113 100644 --- a/hugegraph-hbase/src/main/java/com/baidu/hugegraph/backend/store/hbase/HbaseTables.java +++ b/hugegraph-hbase/src/main/java/com/baidu/hugegraph/backend/store/hbase/HbaseTables.java @@ -35,7 +35,7 @@ import com.baidu.hugegraph.backend.query.Query; import com.baidu.hugegraph.backend.serializer.BinaryBackendEntry; import com.baidu.hugegraph.backend.serializer.BinaryEntryIterator; -import com.baidu.hugegraph.backend.serializer.BinarySerializer; +import com.baidu.hugegraph.backend.serializer.BytesBuffer; import com.baidu.hugegraph.backend.store.BackendEntry; import com.baidu.hugegraph.backend.store.BackendEntry.BackendColumn; import com.baidu.hugegraph.backend.store.BackendEntryIterator; @@ -208,10 +208,10 @@ public void insert(Session session, BackendEntry entry) { long ttl = entry.ttl(); if (ttl == 0L) { session.put(this.table(), CF, col.name, - BinarySerializer.EMPTY_BYTES, col.value); + BytesBuffer.BYTES_EMPTY, col.value); } else { session.put(this.table(), CF, col.name, - BinarySerializer.EMPTY_BYTES, col.value, ttl); + BytesBuffer.BYTES_EMPTY, col.value, ttl); } }