Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refact: use standard UTF-8 charset & enhance CI configs #2095

Merged
merged 18 commits into from
Mar 7, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,13 @@
* CassandraShard is used for cassandra scanning operations.
* Each shard represents a range of tokens for a node.
* Reading data from a given shard does not cross multiple nodes.
* <p>
* Refer to AbstractColumnFamilyInputFormat from:
* <a href="https://github.com/2013Commons/hive-cassandra/">...</a>
*/
public class CassandraShard {

/* The minimal shard size should >= 1M to prevent too many number of shards */
/** The minimal shard size should >= 1M to prevent too many number of shards */
imbajin marked this conversation as resolved.
Show resolved Hide resolved
private static final int MIN_SHARD_SIZE = (int) Bytes.MB;

private CassandraSessionPool.Session session;
Expand Down Expand Up @@ -228,7 +229,7 @@ private static Map<TokenRange, Long> describeSplits(
tokenRange.getEnd().toString());
Row row = resultSet.one();

long meanPartitionSize = 0L;
long meanPartitionSize;
long partitionsCount = 0L;
long splitCount = 0L;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public AnsjAnalyzer(String mode) {

@Override
public Set<String> segment(String text) {
Result terms = null;
Result terms;
switch (this.analysis) {
case "BaseAnalysis":
terms = BaseAnalysis.parse(text);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,11 @@ private void require(int size) {
E.checkState(this.resize, "Can't resize for wrapped buffer");

// Extra capacity as buffer
int newcapacity = size + this.buffer.limit() + DEFAULT_CAPACITY;
E.checkArgument(newcapacity <= MAX_BUFFER_CAPACITY,
int newCapacity = size + this.buffer.limit() + DEFAULT_CAPACITY;
E.checkArgument(newCapacity <= MAX_BUFFER_CAPACITY,
"Capacity exceeds max buffer capacity: %s",
MAX_BUFFER_CAPACITY);
ByteBuffer newBuffer = ByteBuffer.allocate(newcapacity);
ByteBuffer newBuffer = ByteBuffer.allocate(newCapacity);
((Buffer) this.buffer).flip();
newBuffer.put(this.buffer);
this.buffer = newBuffer;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,8 @@ public final class HugeScriptTraversal<S, E> extends DefaultTraversal<S, E> {

private Object result;

public HugeScriptTraversal(TraversalSource traversalSource,
String language, String script,
Map<String, Object> bindings,
Map<String, String> aliases) {
public HugeScriptTraversal(TraversalSource traversalSource, String language, String script,
Map<String, Object> bindings, Map<String, String> aliases) {
this.graph = traversalSource.getGraph();
this.language = language;
this.script = script;
Expand All @@ -75,8 +73,7 @@ public String script() {

@Override
public void applyStrategies() throws IllegalStateException {
ScriptEngine engine =
SingleGremlinScriptEngineManager.get(this.language);
ScriptEngine engine = SingleGremlinScriptEngineManager.get(this.language);

Bindings bindings = engine.createBindings();
bindings.putAll(this.bindings);
Expand All @@ -94,9 +91,8 @@ public void applyStrategies() throws IllegalStateException {
for (Map.Entry<String, String> entry : this.aliases.entrySet()) {
Object value = bindings.get(entry.getValue());
if (value == null) {
throw new IllegalArgumentException(String.format(
"Invalid aliase '%s':'%s'",
entry.getKey(), entry.getValue()));
throw new IllegalArgumentException(String.format("Invalid alias '%s':'%s'",
entry.getKey(), entry.getValue()));
}
bindings.put(entry.getKey(), value);
}
Expand All @@ -105,7 +101,7 @@ public void applyStrategies() throws IllegalStateException {
Object result = engine.eval(this.script, bindings);

if (result instanceof Admin) {
@SuppressWarnings({ "unchecked", "resource" })
@SuppressWarnings({ "unchecked"})
Admin<S, E> traversal = (Admin<S, E>) result;
traversal.getSideEffects().mergeInto(this.sideEffects);
traversal.getSteps().forEach(this::addStep);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ public enum Cardinality implements SerialEnum {
*/
SET(3, "set");

private byte code = 0;
private String name = null;
private final byte code;
private final String name;

static {
SerialEnum.register(Cardinality.class);
Expand Down Expand Up @@ -78,8 +78,8 @@ public static Cardinality convert(VertexProperty.Cardinality cardinality) {
case set:
return SET;
default:
throw new AssertionError(String.format(
"Unrecognized cardinality: '%s'", cardinality));
throw new AssertionError(String.format("Unrecognized cardinality: '%s'",
cardinality));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,43 +37,36 @@ public static BytesBuffer compress(byte[] bytes, int blockSize) {
return compress(bytes, blockSize, DEFAULT_BUFFER_RATIO);
}

public static BytesBuffer compress(byte[] bytes, int blockSize,
float bufferRatio) {
public static BytesBuffer compress(byte[] bytes, int blockSize, float bufferRatio) {
float ratio = bufferRatio <= 0.0F ? DEFAULT_BUFFER_RATIO : bufferRatio;
LZ4Factory factory = LZ4Factory.fastestInstance();
LZ4Compressor compressor = factory.fastCompressor();
int initBufferSize = Math.round(bytes.length / ratio);
BytesBuffer buf = new BytesBuffer(initBufferSize);
LZ4BlockOutputStream lz4Output = new LZ4BlockOutputStream(
buf, blockSize, compressor);
LZ4BlockOutputStream lz4Output = new LZ4BlockOutputStream(buf, blockSize, compressor);
try {
lz4Output.write(bytes);
lz4Output.close();
} catch (IOException e) {
throw new BackendException("Failed to compress", e);
}
/*
* If need to perform reading outside the method,
* remember to call forReadWritten()
*/
// If we need to perform reading outside the method, remember to call forReadWritten()
return buf;
}

public static BytesBuffer decompress(byte[] bytes, int blockSize) {
return decompress(bytes, blockSize, DEFAULT_BUFFER_RATIO);
}

public static BytesBuffer decompress(byte[] bytes, int blockSize,
float bufferRatio) {
public static BytesBuffer decompress(byte[] bytes, int blockSize, float bufferRatio) {
float ratio = bufferRatio <= 0.0F ? DEFAULT_BUFFER_RATIO : bufferRatio;
LZ4Factory factory = LZ4Factory.fastestInstance();
LZ4FastDecompressor decompressor = factory.fastDecompressor();
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
int initBufferSize = Math.min(Math.round(bytes.length * ratio),
BytesBuffer.MAX_BUFFER_CAPACITY);
BytesBuffer buf = new BytesBuffer(initBufferSize);
LZ4BlockInputStream lzInput = new LZ4BlockInputStream(bais,
decompressor);
LZ4BlockInputStream lzInput = new LZ4BlockInputStream(bais, decompressor);
int count;
byte[] buffer = new byte[blockSize];
try {
Expand All @@ -84,10 +77,7 @@ public static BytesBuffer decompress(byte[] bytes, int blockSize,
} catch (IOException e) {
throw new BackendException("Failed to decompress", e);
}
/*
* If need to perform reading outside the method,
* remember to call forReadWritten()
*/
// If we need to perform reading outside the method, remember to call forReadWritten()
return buf;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,17 @@

package org.apache.hugegraph.util;

import java.io.UnsupportedEncodingException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Base64;
import java.util.UUID;

import org.mindrot.jbcrypt.BCrypt;

import org.apache.hugegraph.HugeException;
import org.apache.hugegraph.backend.serializer.BytesBuffer;
import org.mindrot.jbcrypt.BCrypt;

import com.google.common.base.CharMatcher;

/**
Expand All @@ -49,7 +50,7 @@ public final class StringEncoding {
private static final Base64.Encoder BASE64_ENCODER = Base64.getEncoder();
private static final Base64.Decoder BASE64_DECODER = Base64.getDecoder();

// Similar to {@link StringSerializer}
/** Similar to {@link StringSerializer} */
public static int writeAsciiString(byte[] array, int offset, String value) {
E.checkArgument(CharMatcher.ascii().matchesAllOf(value),
"'%s' must be ASCII string", value);
Expand All @@ -65,7 +66,8 @@ public static int writeAsciiString(byte[] array, int offset, String value) {
assert c <= 127;
byte b = (byte) c;
if (++i == len) {
b |= 0x80; // End marker
// End marker
b |= 0x80;
}
array[offset++] = b;
} while (i < len);
Expand All @@ -75,7 +77,7 @@ public static int writeAsciiString(byte[] array, int offset, String value) {

public static String readAsciiString(byte[] array, int offset) {
StringBuilder sb = new StringBuilder();
int c = 0;
int c;
do {
c = 0xFF & array[offset++];
if (c != 0x80) {
Expand All @@ -92,33 +94,21 @@ public static int getAsciiByteLength(String value) {
}

public static byte[] encode(String value) {
try {
return value.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
throw new HugeException("Failed to encode string", e);
}
return value.getBytes(StandardCharsets.UTF_8);
}

public static String decode(byte[] bytes) {
if (bytes.length == 0) {
return STRING_EMPTY;
}
try {
return new String(bytes, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new HugeException("Failed to decode string", e);
}
return new String(bytes, StandardCharsets.UTF_8);
}

public static String decode(byte[] bytes, int offset, int length) {
if (length == 0) {
return STRING_EMPTY;
}
try {
return new String(bytes, offset, length, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new HugeException("Failed to decode string", e);
}
return new String(bytes, offset, length, StandardCharsets.UTF_8);
}

public static String encodeBase64(byte[] bytes) {
Expand All @@ -137,26 +127,30 @@ public static byte[] compress(String value) {
}

public static byte[] compress(String value, float bufferRatio) {
BytesBuffer buf = LZ4Util.compress(encode(value), BLOCK_SIZE,
bufferRatio);
return buf.bytes();
try (BytesBuffer buf = LZ4Util.compress(encode(value), BLOCK_SIZE, bufferRatio)) {
imbajin marked this conversation as resolved.
Show resolved Hide resolved
return buf.bytes();
} catch (IOException e) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems don't need to catch IOException

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the IOException throws by LZ4Util.compress/decompress & we just auto release the BytesBuffer when it used up, so if we don't catch exception, we need to throws out? (so as the decompress

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes just remove the catch, since the LZ4Util.compress already transfer IOException to BackendException

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes just remove the catch, since the LZ4Util.compress already transfer IOException to BackendException

�so shall we close the ByteBuffer when it used up? if we remove catch with try(xx), we need throws the exception?

It means we just ignore the ByteBuffer close?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes the BytesBuffer don't need to close, it's just a memory buffer

Copy link
Member Author

@imbajin imbajin Mar 2, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes the BytesBuffer don't need to close, it's just a memory buffer

OK, my previous consideration is that if the buffer is large(like compress a big file), we should recycle it manually in time to reduce the pressure of GC & memory usage, but lack enough validation, so back off for now

throw new RuntimeException(e);
imbajin marked this conversation as resolved.
Show resolved Hide resolved
}
}

public static String decompress(byte[] value) {
return decompress(value, LZ4Util.DEFAULT_BUFFER_RATIO);
}

public static String decompress(byte[] value, float bufferRatio) {
BytesBuffer buf = LZ4Util.decompress(value, BLOCK_SIZE, bufferRatio);
return decode(buf.array(), 0, buf.position());
try (BytesBuffer buf = LZ4Util.decompress(value, BLOCK_SIZE, bufferRatio)) {
return decode(buf.array(), 0, buf.position());
} catch (IOException e) {
imbajin marked this conversation as resolved.
Show resolved Hide resolved
throw new RuntimeException(e);
}
}

public static String hashPassword(String password) {
return BCrypt.hashpw(password, BCrypt.gensalt(4));
}

public static boolean checkPassword(String candidatePassword,
String dbPassword) {
public static boolean checkPassword(String candidatePassword, String dbPassword) {
return BCrypt.checkpw(candidatePassword, dbPassword);
}

Expand All @@ -177,8 +171,7 @@ public static UUID uuid(String value) {
return UUID.fromString(value);
}
// UUID represented by hex string
E.checkArgument(value.length() == 32,
"Invalid UUID string: %s", value);
E.checkArgument(value.length() == 32, "Invalid UUID string: %s", value);
String high = value.substring(0, 16);
String low = value.substring(16);
return new UUID(Long.parseUnsignedLong(high, 16),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ public class ProcessBasicSuite extends AbstractGremlinSuite {
* A list of the minimum set of base tests that
* Gremlin flavors should implement to be compliant with Gremlin.
*/
private static final Class<?>[] TESTS_TO_ENFORCE = new Class<?>[] {
private static final Class<?>[] TESTS_TO_ENFORCE = new Class<?>[]{
// branch
BranchTest.class,
ChooseTest.class,
Expand Down Expand Up @@ -267,17 +267,14 @@ public class ProcessBasicSuite extends AbstractGremlinSuite {
};

public ProcessBasicSuite(final Class<?> klass,
final RunnerBuilder builder)
throws InitializationError {
final RunnerBuilder builder) throws InitializationError {
super(klass, builder, ALL_TESTS, TESTS_TO_ENFORCE, true,
TraversalEngine.Type.STANDARD);
RegisterUtil.registerBackends();
}

public ProcessBasicSuite(final Class<?> klass,
final RunnerBuilder builder,
final Class<?>[] testsToExecute)
throws InitializationError {
public ProcessBasicSuite(final Class<?> klass, final RunnerBuilder builder,
final Class<?>[] testsToExecute) throws InitializationError {
super(klass, builder, testsToExecute, TESTS_TO_ENFORCE, true,
TraversalEngine.Type.STANDARD);
RegisterUtil.registerBackends();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.hugegraph.tinkerpop;

import org.apache.commons.configuration.ConfigurationException;
import org.apache.hugegraph.dist.RegisterUtil;
import org.apache.tinkerpop.gremlin.AbstractGremlinSuite;
import org.apache.tinkerpop.gremlin.GraphManager;
import org.apache.tinkerpop.gremlin.GraphProvider;
Expand Down Expand Up @@ -54,8 +54,6 @@
import org.junit.runners.model.RunnerBuilder;
import org.junit.runners.model.Statement;

import org.apache.hugegraph.dist.RegisterUtil;

/**
* Standard structure test suite for tinkerpop graph
*
Expand Down Expand Up @@ -100,8 +98,7 @@ public class StructureBasicSuite extends AbstractGremlinSuite {

public StructureBasicSuite(final Class<?> klass,
final RunnerBuilder builder)
throws InitializationError,
ConfigurationException {
throws InitializationError {
super(klass, builder, ALL_TESTS, null, true,
TraversalEngine.Type.STANDARD);

Expand Down