diff --git a/java/fury-core/src/main/java/io/fury/Config.java b/java/fury-core/src/main/java/io/fury/Config.java index aead95abfa..38470d327f 100644 --- a/java/fury-core/src/main/java/io/fury/Config.java +++ b/java/fury-core/src/main/java/io/fury/Config.java @@ -51,7 +51,9 @@ public class Config implements Serializable { private final Language language; private final boolean referenceTracking; private final boolean basicTypesReferenceIgnored; + private final boolean stringReferenceIgnored; private final boolean compressNumber; + private final boolean compressString; private final boolean secureModeEnabled; private final boolean classRegistrationRequired; private transient int configHash; @@ -60,7 +62,9 @@ public class Config implements Serializable { language = builder.language; referenceTracking = builder.referenceTracking; basicTypesReferenceIgnored = !referenceTracking || builder.basicTypesReferenceIgnored; + stringReferenceIgnored = !referenceTracking || builder.stringReferenceIgnored; compressNumber = builder.compressNumber; + compressString = builder.compressString; secureModeEnabled = builder.secureModeEnabled; classRegistrationRequired = builder.requireClassRegistration; } @@ -77,10 +81,18 @@ public boolean isBasicTypesReferenceIgnored() { return basicTypesReferenceIgnored; } + public boolean isStringReferenceIgnored() { + return stringReferenceIgnored; + } + public boolean compressNumber() { return compressNumber; } + public boolean compressString() { + return compressString; + } + public boolean isClassRegistrationRequired() { return classRegistrationRequired; } diff --git a/java/fury-core/src/main/java/io/fury/Fury.java b/java/fury-core/src/main/java/io/fury/Fury.java index d9b2a33ccc..2a97b9e517 100644 --- a/java/fury-core/src/main/java/io/fury/Fury.java +++ b/java/fury-core/src/main/java/io/fury/Fury.java @@ -34,6 +34,7 @@ import io.fury.serializer.OpaqueObjects; import io.fury.serializer.Serializer; import io.fury.serializer.SerializerFactory; +import io.fury.serializer.StringSerializer; import io.fury.type.Generics; import io.fury.type.Type; import io.fury.util.LoggerFactory; @@ -88,6 +89,7 @@ public final class Fury { private final MemoryBuffer buffer; private final List nativeObjects; + private final StringSerializer stringSerializer; private final Language language; private final boolean compressNumber; private final Generics generics; @@ -117,6 +119,7 @@ private Fury(FuryBuilder builder, ClassLoader classLoader) { buffer = MemoryUtils.buffer(32); nativeObjects = new ArrayList<>(); generics = new Generics(this); + stringSerializer = new StringSerializer(this); LOG.info("Created new fury {}", this); } @@ -462,7 +465,8 @@ private void writeData(MemoryBuffer buffer, ClassInfo classInfo, Object obj) { buffer.writeDouble((Double) obj); break; case ClassResolver.STRING_CLASS_ID: - throw new UnsupportedOperationException(); + stringSerializer.writeJavaString(buffer, (String) obj); + break; // TODO(add fastpath for other types) default: depth++; @@ -683,7 +687,7 @@ private Object readData(MemoryBuffer buffer, ClassInfo classInfo) { case ClassResolver.DOUBLE_CLASS_ID: return buffer.readDouble(); case ClassResolver.STRING_CLASS_ID: - throw new UnsupportedOperationException(); + return stringSerializer.readJavaString(buffer); // TODO(add fastpath for other types) default: depth++; @@ -855,6 +859,10 @@ public boolean trackingReference() { return referenceTracking; } + public boolean isStringReferenceIgnored() { + return config.isStringReferenceIgnored(); + } + public boolean isBasicTypesReferenceIgnored() { return config.isBasicTypesReferenceIgnored(); } @@ -863,6 +871,10 @@ public Config getConfig() { return config; } + public boolean compressString() { + return config.compressString(); + } + public boolean compressNumber() { return compressNumber; } @@ -897,6 +909,7 @@ public static final class FuryBuilder { boolean timeReferenceIgnored = true; ClassLoader classLoader; boolean compressNumber = false; + boolean compressString = true; boolean secureModeEnabled = true; boolean requireClassRegistration = true; @@ -917,12 +930,22 @@ public FuryBuilder ignoreBasicTypesReference(boolean ignoreBasicTypesReference) return this; } + public FuryBuilder ignoreStringReference(boolean ignoreStringReference) { + this.stringReferenceIgnored = ignoreStringReference; + return this; + } + /** Use variable length encoding for int/long. */ public FuryBuilder withNumberCompressed(boolean compressNumber) { this.compressNumber = compressNumber; return this; } + public FuryBuilder withStringCompressed(boolean compressString) { + this.compressString = compressString; + return this; + } + public FuryBuilder withClassLoader(ClassLoader classLoader) { this.classLoader = classLoader; return this; diff --git a/java/fury-core/src/main/java/io/fury/resolver/ClassResolver.java b/java/fury-core/src/main/java/io/fury/resolver/ClassResolver.java index 7bad8ec71d..7748f0fa45 100644 --- a/java/fury-core/src/main/java/io/fury/resolver/ClassResolver.java +++ b/java/fury-core/src/main/java/io/fury/resolver/ClassResolver.java @@ -31,6 +31,7 @@ import io.fury.serializer.Serializer; import io.fury.serializer.SerializerFactory; import io.fury.serializer.Serializers; +import io.fury.serializer.StringSerializer; import io.fury.type.TypeUtils; import io.fury.util.Functions; import io.fury.util.LoggerFactory; @@ -219,6 +220,8 @@ private void addDefaultSerializers() { addDefaultSerializer(Long.class, new Serializers.LongSerializer(fury, Long.class)); addDefaultSerializer(Float.class, new Serializers.FloatSerializer(fury, Float.class)); addDefaultSerializer(Double.class, new Serializers.DoubleSerializer(fury, Double.class)); + addDefaultSerializer(String.class, new StringSerializer(fury)); + addDefaultSerializer(String[].class, new Serializers.StringArraySerializer(fury)); addDefaultSerializer(Class.class, new Serializers.ClassSerializer(fury)); } diff --git a/java/fury-core/src/main/java/io/fury/serializer/Serializers.java b/java/fury-core/src/main/java/io/fury/serializer/Serializers.java index fd8e7d24cc..93852bf99d 100644 --- a/java/fury-core/src/main/java/io/fury/serializer/Serializers.java +++ b/java/fury-core/src/main/java/io/fury/serializer/Serializers.java @@ -358,6 +358,78 @@ public Double read(MemoryBuffer buffer) { } } + public static final class StringArraySerializer extends Serializer { + private final StringSerializer stringSerializer; + + public StringArraySerializer(Fury fury) { + super(fury, String[].class); + stringSerializer = new StringSerializer(fury); + } + + @Override + public short getCrossLanguageTypeId() { + return (short) -Type.FURY_STRING_ARRAY.getId(); + } + + @Override + public void write(MemoryBuffer buffer, String[] value) { + int len = value.length; + buffer.writeInt(len); + for (String elem : value) { + // TODO reference support + if (elem != null) { + buffer.writeByte(Fury.REF_VALUE_FLAG); + stringSerializer.writeJavaString(buffer, elem); + } else { + buffer.writeByte(Fury.NULL_FLAG); + } + } + } + + @Override + public String[] read(MemoryBuffer buffer) { + int numElements = buffer.readInt(); + String[] value = new String[numElements]; + fury.getReferenceResolver().reference(value); + for (int i = 0; i < numElements; i++) { + if (buffer.readByte() == Fury.REF_VALUE_FLAG) { + value[i] = stringSerializer.readJavaString(buffer); + } else { + value[i] = null; + } + } + return value; + } + + @Override + public void crossLanguageWrite(MemoryBuffer buffer, String[] value) { + int len = value.length; + buffer.writeInt(len); + for (String elem : value) { + if (elem != null) { + buffer.writeByte(Fury.REF_VALUE_FLAG); + stringSerializer.writeUTF8String(buffer, elem); + } else { + buffer.writeByte(Fury.NULL_FLAG); + } + } + } + + @Override + public String[] crossLanguageRead(MemoryBuffer buffer) { + int numElements = buffer.readInt(); + String[] value = new String[numElements]; + for (int i = 0; i < numElements; i++) { + if (buffer.readByte() == Fury.REF_VALUE_FLAG) { + value[i] = stringSerializer.readUTF8String(buffer); + } else { + value[i] = null; + } + } + return value; + } + } + public static final class ClassSerializer extends Serializer { private static final byte USE_CLASS_ID = 0; private static final byte USE_CLASSNAME = 1; diff --git a/java/fury-core/src/main/java/io/fury/serializer/StringSerializer.java b/java/fury-core/src/main/java/io/fury/serializer/StringSerializer.java new file mode 100644 index 0000000000..ea2da8795f --- /dev/null +++ b/java/fury-core/src/main/java/io/fury/serializer/StringSerializer.java @@ -0,0 +1,552 @@ +/* + * Copyright 2023 The Fury authors + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.fury.serializer; + +import com.google.common.base.Preconditions; +import io.fury.Fury; +import io.fury.memory.MemoryBuffer; +import io.fury.type.Type; +import io.fury.util.MathUtils; +import io.fury.util.Platform; +import io.fury.util.ReflectionUtils; +import java.lang.invoke.CallSite; +import java.lang.invoke.LambdaMetafactory; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; +import java.nio.charset.StandardCharsets; +import java.util.function.BiFunction; +import java.util.function.Function; + +/** + * String serializer based on {@link sun.misc.Unsafe} and {@link MethodHandle} for speed. + * + *

Note that string operations is very common in serialization, and jvm inline and branch + * elimination is not reliable even in c2 compiler, so we try to inline and avoid checks as we can + * manually. + * + * @author chaokunyang + */ +@SuppressWarnings("unchecked") +public final class StringSerializer extends Serializer { + private static final long STRING_CODER_FIELD_OFFSET; + private static final long STRING_VALUE_FIELD_OFFSET; + private static final boolean STRING_VALUE_FIELD_IS_CHARS; + private static final boolean STRING_VALUE_FIELD_IS_BYTES; + private static final long STRING_OFFSET_FIELD_OFFSET; + // String length field for android. + private static final long STRING_COUNT_FIELD_OFFSET; + private static final byte LATIN1 = 0; + private static final byte UTF16 = 1; + private static final int DEFAULT_BUFFER_SIZE = 1024; + // A long mask used to clear all-higher bits of char in a super-word way. + private static final long MULTI_CHARS_NON_ASCII_MASK; + + static { + Field valueField = ReflectionUtils.getFieldNullable(String.class, "value"); + // Java8 string + STRING_VALUE_FIELD_IS_CHARS = valueField != null && valueField.getType() == char[].class; + // Java11 string + STRING_VALUE_FIELD_IS_BYTES = valueField != null && valueField.getType() == byte[].class; + STRING_VALUE_FIELD_OFFSET = ReflectionUtils.getFieldOffset(String.class, "value"); + STRING_CODER_FIELD_OFFSET = ReflectionUtils.getFieldOffset(String.class, "coder"); + STRING_OFFSET_FIELD_OFFSET = ReflectionUtils.getFieldOffset(String.class, "offset"); + STRING_COUNT_FIELD_OFFSET = ReflectionUtils.getFieldOffset(String.class, "count"); + Preconditions.checkArgument(STRING_OFFSET_FIELD_OFFSET == -1, "Current jdk not supported"); + Preconditions.checkArgument(STRING_COUNT_FIELD_OFFSET == -1, "Current jdk not supported"); + if (Platform.IS_LITTLE_ENDIAN) { + // ascii chars will be 0xXX,0x00;0xXX,0x00 in byte order; + // Using 0x00,0xff(0xff00) to clear ascii bits. + MULTI_CHARS_NON_ASCII_MASK = 0xff00ff00ff00ff00L; + } else { + // ascii chars will be 0x00,0xXX;0x00,0xXX in byte order; + // Using 0x00,0xff(0x00ff) to clear ascii bits. + MULTI_CHARS_NON_ASCII_MASK = 0x00ff00ff00ff00ffL; + } + } + + private final boolean compressString; + private byte[] byteArray = new byte[DEFAULT_BUFFER_SIZE]; + private int smoothByteArrayLength = DEFAULT_BUFFER_SIZE; + + public StringSerializer(Fury fury) { + super(fury, String.class, fury.trackingReference() && !fury.isStringReferenceIgnored()); + compressString = fury.compressString(); + } + + @Override + public short getCrossLanguageTypeId() { + return Type.STRING.getId(); + } + + @Override + public void write(MemoryBuffer buffer, String value) { + writeJavaString(buffer, value); + } + + @Override + public void crossLanguageWrite(MemoryBuffer buffer, String value) { + writeUTF8String(buffer, value); + } + + @Override + public String read(MemoryBuffer buffer) { + return readJavaString(buffer); + } + + @Override + public String crossLanguageRead(MemoryBuffer buffer) { + return readUTF8String(buffer); + } + + public void writeString(MemoryBuffer buffer, String value) { + if (isJava) { + writeJavaString(buffer, value); + } else { + writeUTF8String(buffer, value); + } + } + + public String readString(MemoryBuffer buffer) { + if (isJava) { + return readJavaString(buffer); + } else { + return readUTF8String(buffer); + } + } + + private byte[] getByteArray(int numElements) { + byte[] byteArray = this.byteArray; + if (byteArray.length < numElements) { + byteArray = new byte[numElements]; + this.byteArray = byteArray; + } + if (byteArray.length > DEFAULT_BUFFER_SIZE) { + smoothByteArrayLength = + Math.max(((int) (smoothByteArrayLength * 0.9 + numElements * 0.1)), DEFAULT_BUFFER_SIZE); + if (smoothByteArrayLength <= DEFAULT_BUFFER_SIZE) { + this.byteArray = new byte[DEFAULT_BUFFER_SIZE]; + } + } + return byteArray; + } + + // Invoked by fury JIT + public void writeJavaString(MemoryBuffer buffer, String value) { + if (STRING_VALUE_FIELD_IS_BYTES) { + writeJDK11String(buffer, value); + } else { + if (!STRING_VALUE_FIELD_IS_CHARS) { + throw new UnsupportedOperationException(); + } + final char[] chars = (char[]) Platform.getObject(value, STRING_VALUE_FIELD_OFFSET); + if (compressString) { + if (isAscii(chars)) { + writeJDK8Ascii(buffer, chars); + } else { + writeJDK8UTF16(buffer, chars); + } + } else { + int numBytes = MathUtils.doubleExact(value.length()); + buffer.writePrimitiveArrayWithSizeEmbedded(chars, Platform.CHAR_ARRAY_OFFSET, numBytes); + } + } + } + + public static boolean isAscii(char[] chars) { + int numChars = chars.length; + int vectorizedLen = numChars >> 2; + int vectorizedChars = vectorizedLen << 2; + int endOffset = Platform.CHAR_ARRAY_OFFSET + (vectorizedChars << 1); + boolean isAscii = true; + for (int offset = Platform.CHAR_ARRAY_OFFSET; offset < endOffset; offset += 8) { + // check 4 chars in a vectorized way, 4 times faster than scalar check loop. + // See benchmark in CompressStringSuite.asciiSuperWordCheck. + long multiChars = Platform.getLong(chars, offset); + if ((multiChars & MULTI_CHARS_NON_ASCII_MASK) != 0) { + isAscii = false; + break; + } + } + if (isAscii) { + for (int i = vectorizedChars; i < numChars; i++) { + if (chars[i] > 0xFF) { + isAscii = false; + break; + } + } + } + return isAscii; + } + + // Invoked by fury JIT + public String readJavaString(MemoryBuffer buffer) { + if (STRING_VALUE_FIELD_IS_BYTES) { + if (Platform.JAVA_VERSION >= 17) { + // Seems neither Unsafe.put nor MethodHandle are available in JDK17+, + // `Unsafe.put` doesn't work on IDE, but works on command. + // But `Unsafe.put` is 50% slower than `readStringChars`, so just inflate ant copy here. + byte coder = buffer.readByte(); + if (coder == LATIN1) { + return new String(readAsciiChars(buffer)); + } else { + return new String(readUTF16Chars(buffer, coder)); + } + } else { + byte coder = buffer.readByte(); + byte[] value = buffer.readBytesWithSizeEmbedded(); + return newJava11StringByZeroCopy(value, coder); + } + } else { + if (!STRING_VALUE_FIELD_IS_CHARS) { + throw new UnsupportedOperationException(); + } + if (compressString) { + byte coder = buffer.readByte(); + if (coder == LATIN1) { + return newJava8StringByZeroCopy(readAsciiChars(buffer)); + } else { + return newJava8StringByZeroCopy(readUTF16Chars(buffer, coder)); + } + } else { + return newJava8StringByZeroCopy(buffer.readCharsWithSizeEmbedded()); + } + } + } + + static void writeJDK11String(MemoryBuffer buffer, String value) { + byte[] bytes = (byte[]) Platform.getObject(value, STRING_VALUE_FIELD_OFFSET); + byte coder = Platform.getByte(value, STRING_CODER_FIELD_OFFSET); + buffer.writeByte(coder); + buffer.writePrimitiveArrayWithSizeEmbedded(bytes, Platform.BYTE_ARRAY_OFFSET, bytes.length); + } + + private void writeJDK8Ascii(MemoryBuffer buffer, char[] chars) { + buffer.writeByte(LATIN1); + final int strLen = chars.length; + int writerIndex = buffer.writerIndex(); + // The `ensure` ensure next operations are safe without bound checks, + // and inner heap buffer doesn't change. + buffer.ensure(writerIndex + 5 + strLen); + final byte[] targetArray = buffer.getHeapMemory(); + writerIndex += buffer.unsafeWritePositiveVarInt(strLen); + if (targetArray != null) { + final int targetIndex = buffer.unsafeHeapWriterIndex(); + for (int i = 0; i < strLen; i++) { + targetArray[targetIndex + i] = (byte) chars[i]; + } + } else { + final byte[] tmpArray = getByteArray(strLen); + // Write to heap memory then copy is 60% faster than unsafe write to direct memory. + for (int i = 0; i < strLen; i++) { + tmpArray[i] = (byte) chars[i]; + } + buffer.put(writerIndex, tmpArray, 0, strLen); + } + buffer.writerIndex(writerIndex + strLen); + } + + private void writeJDK8UTF16(MemoryBuffer buffer, char[] chars) { + buffer.writeByte(UTF16); + int strLen = chars.length; + int numBytes = MathUtils.doubleExact(strLen); + if (Platform.IS_LITTLE_ENDIAN) { + // FIXME JDK11 utf16 string uses little-endian order. + buffer.writePrimitiveArrayWithSizeEmbedded(chars, Platform.CHAR_ARRAY_OFFSET, numBytes); + } else { + // The `ensure` ensure next operations are safe without bound checks, + // and inner heap buffer doesn't change. + int writerIndex = buffer.writerIndex(); + buffer.ensure(writerIndex + 5 + numBytes); + byte[] targetArray = buffer.getHeapMemory(); + writerIndex += buffer.unsafeWritePositiveVarInt(numBytes); + if (targetArray != null) { + // Write to heap memory then copy is 250% faster than unsafe write to direct memory. + int charIndex = 0; + for (int i = buffer.unsafeHeapWriterIndex(), end = i + numBytes; i < end; i += 2) { + char c = chars[charIndex++]; + targetArray[i] = (byte) (c >> StringUTF16.HI_BYTE_SHIFT); + targetArray[i + 1] = (byte) (c >> StringUTF16.LO_BYTE_SHIFT); + } + } else { + byte[] tmpArray = getByteArray(strLen); + int charIndex = 0; + for (int i = 0; i < numBytes; i += 2) { + char c = chars[charIndex++]; + tmpArray[i] = (byte) (c >> StringUTF16.HI_BYTE_SHIFT); + tmpArray[i + 1] = (byte) (c >> StringUTF16.LO_BYTE_SHIFT); + } + buffer.put(writerIndex, tmpArray, 0, numBytes); + } + buffer.writerIndex(writerIndex + numBytes); + } + } + + private char[] readAsciiChars(MemoryBuffer buffer) { + final int numBytes = buffer.readPositiveVarInt(); + char[] chars = new char[numBytes]; + byte[] targetArray = buffer.getHeapMemory(); + if (targetArray != null) { + int srcIndex = buffer.unsafeHeapReaderIndex(); + for (int i = 0; i < numBytes; i++) { + chars[i] = (char) (targetArray[srcIndex++] & 0xff); + } + buffer.increaseReaderIndexUnsafe(numBytes); + } else { + byte[] byteArray = getByteArray(numBytes); + buffer.readBytes(byteArray, 0, numBytes); + for (int i = 0; i < numBytes; i++) { + chars[i] = (char) (byteArray[i] & 0xff); + } + } + return chars; + } + + private char[] readUTF16Chars(MemoryBuffer buffer, byte coder) { + if (coder != UTF16) { + throw new UnsupportedOperationException(String.format("Unsupported coder %s", coder)); + } + int numBytes = buffer.readPositiveVarInt(); + int strLen = numBytes >> 1; + char[] chars = new char[strLen]; + if (Platform.IS_LITTLE_ENDIAN) { + // FIXME JDK11 utf16 string uses little-endian order. + buffer.readChars(chars, Platform.CHAR_ARRAY_OFFSET, numBytes); + } else { + final byte[] targetArray = buffer.getHeapMemory(); + if (targetArray != null) { + buffer.checkReadableBytes(numBytes); + int charIndex = 0; + for (int i = buffer.unsafeHeapReaderIndex(), end = i + numBytes; i < end; i += 2) { + char c = + (char) + ((targetArray[i] & 0xff << StringUTF16.HI_BYTE_SHIFT) + | ((targetArray[i + 1] & 0xff) << StringUTF16.LO_BYTE_SHIFT)); + chars[charIndex++] = c; + } + buffer.increaseReaderIndexUnsafe(numBytes); + } else { + final byte[] tmpArray = getByteArray(numBytes); + buffer.readBytes(tmpArray, 0, numBytes); + int charIndex = 0; + for (int i = 0; i < numBytes; i += 2) { + char c = + (char) + ((tmpArray[i] & 0xff << StringUTF16.HI_BYTE_SHIFT) + | ((tmpArray[i + 1] & 0xff) << StringUTF16.LO_BYTE_SHIFT)); + chars[charIndex++] = c; + } + } + } + return chars; + } + + public static String newJava8StringByZeroCopy(char[] data) { + if (Platform.JAVA_VERSION != 8) { + throw new IllegalStateException( + String.format("Current java version is %s", Platform.JAVA_VERSION)); + } + try { + if (JAVA8_STRING_ZERO_COPY_CTR == null) { + // 1. As documented in `Subsequent Modification of final Fields` in + // https://docs.oracle.com/javase/specs/jls/se8/html/jls-17.html#d5e34106 + // Maybe we can use `UNSAFE.putObject` to update String field to avoid reflection overhead. + // 2. `setAccessible` is an illegal-reflective-access because zero-copy String constructor + // isn't public, and `java.base/java.lang` isn't open to fury by default. + // 3. JavaLangAccess#newStringUnsafe is used by jdk internally and won't be available + // in jdk11 if `jdk.internal.misc` are not exported, so we don't use it. + // StringBuffer#toString is a synchronized method, so we don't use it to create String. + String str = Platform.newInstance(String.class); + Platform.putObject(str, STRING_VALUE_FIELD_OFFSET, data); + // unsafe is 800% faster than copy for length 230. + return str; + } else { + // 25% faster than unsafe put field, only 10% slower than `new String(str)` + return JAVA8_STRING_ZERO_COPY_CTR.apply(data, Boolean.TRUE); + } + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + public static String newJava11StringByZeroCopy(byte[] data, byte coder) { + if (Platform.JAVA_VERSION < 9) { + throw new IllegalStateException( + String.format("Current java version is %s", Platform.JAVA_VERSION)); + } + try { + if (coder == LATIN1) { + if (JAVA11_ASCII_STRING_ZERO_COPY_CTR == null) { + String str = Platform.newInstance(String.class); + // if --illegal-access=deny, this wont' take effect, the reset will be empty. + Platform.putObject(str, STRING_VALUE_FIELD_OFFSET, data); + Platform.putObject(str, STRING_CODER_FIELD_OFFSET, coder); + return str; + } else { + // 700% faster than unsafe put field in java11, only 10% slower than `new String(str)` for + // string length 230. + // 50% faster than unsafe put field in java11 for string length 10. + return JAVA11_ASCII_STRING_ZERO_COPY_CTR.apply(data); + } + } else { + if (JAVA11_STRING_ZERO_COPY_CTR == null) { + String str = Platform.newInstance(String.class); + // if --illegal-access=deny, this won't take effect, the reset will be empty. + Platform.putObject(str, STRING_VALUE_FIELD_OFFSET, data); + Platform.putObject(str, STRING_CODER_FIELD_OFFSET, coder); + return str; + } else { + // 700% faster than unsafe put field in java11, only 10% slower than `new String(str)` for + // string length 230. + // 50% faster than unsafe put field in java11 for string length 10. + return (String) JAVA11_STRING_ZERO_COPY_CTR.invokeExact(data, coder); + } + } + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + private static final BiFunction JAVA8_STRING_ZERO_COPY_CTR = + getJava8StringZeroCopyCtr(); + private static final MethodHandle JAVA11_STRING_ZERO_COPY_CTR = getJava11StringZeroCopyCtr(); + private static final Function JAVA11_ASCII_STRING_ZERO_COPY_CTR = + getJava11AsciiStringZeroCopyCtr(); + + private static BiFunction getJava8StringZeroCopyCtr() { + MethodHandles.Lookup lookup = getLookupByReflection(); + if (lookup == null) { + return null; + } + MethodHandle handle = getJavaStringZeroCopyCtrHandle(lookup); + if (handle == null) { + return null; + } + try { + // Faster than handle.invokeExact(data, boolean) + CallSite callSite = + LambdaMetafactory.metafactory( + lookup, + "apply", + MethodType.methodType(BiFunction.class), + handle.type().generic(), + handle, + handle.type()); + return (BiFunction) callSite.getTarget().invokeExact(); + } catch (Throwable e) { + return null; + } + } + + private static MethodHandle getJava11StringZeroCopyCtr() { + MethodHandles.Lookup lookup = getLookupByReflection(); + if (lookup == null) { + return null; + } + return getJavaStringZeroCopyCtrHandle(lookup); + } + + private static Function getJava11AsciiStringZeroCopyCtr() { + MethodHandles.Lookup lookup = getLookupByReflection(); + if (lookup == null) { + return null; + } + // Can't create callSite like java8, will get error: + // java.lang.invoke.LambdaConversionException: Type mismatch for instantiated parameter 1: + // byte is not a subtype of class java.lang.Object + try { + Class clazz = Class.forName("java.lang.StringCoding"); + MethodHandles.Lookup caller = lookup.in(clazz); + MethodHandle handle = + caller.findStatic( + clazz, "newStringLatin1", MethodType.methodType(String.class, byte[].class)); + // Faster than handle.invokeExact(data, byte) + CallSite callSite = + LambdaMetafactory.metafactory( + caller, + "apply", + MethodType.methodType(Function.class), + handle.type().generic(), + handle, + handle.type()); + return (Function) callSite.getTarget().invokeExact(); + } catch (Throwable e) { + return null; + } + } + + private static MethodHandle getJavaStringZeroCopyCtrHandle(MethodHandles.Lookup lookup) { + Preconditions.checkArgument(Platform.JAVA_VERSION >= 8); + if (Platform.JAVA_VERSION > 16) { + return null; + } + try { + if (Platform.JAVA_VERSION == 8) { + return lookup.findConstructor( + String.class, MethodType.methodType(void.class, char[].class, boolean.class)); + } else { + return lookup.findConstructor( + String.class, MethodType.methodType(void.class, byte[].class, byte.class)); + } + } catch (Exception e) { + return null; + } + } + + private static MethodHandles.Lookup getLookup() throws Exception { + // This can supress illegal-access and work even --illegal-access=deny for jdk16-. + // For JDK16+, this will fail at `lookupClass` field not found. + // This will produce unknown behaviour on some version of lombok. + MethodHandles.Lookup lookup = ReflectionUtils.unsafeCopy(MethodHandles.lookup()); + long lookupClassOffset = + ReflectionUtils.getFieldOffset(MethodHandles.Lookup.class.getDeclaredField("lookupClass")); + long allowedModesOffset = + ReflectionUtils.getFieldOffset(MethodHandles.Lookup.class.getDeclaredField("allowedModes")); + Platform.putObject(lookup, lookupClassOffset, String.class); + Platform.putObject(lookup, allowedModesOffset, -1); + return lookup; + } + + private static MethodHandles.Lookup getLookupByReflection() { + try { + Constructor constructor = + MethodHandles.Lookup.class.getDeclaredConstructor(Class.class, int.class); + constructor.setAccessible(true); + return constructor.newInstance( + String.class, -1 // Lookup.TRUSTED + ); + } catch (Exception e) { + return null; + } + } + + public void writeUTF8String(MemoryBuffer buffer, String value) { + byte[] bytes = value.getBytes(StandardCharsets.UTF_8); + buffer.writePositiveVarInt(bytes.length); + buffer.writeBytes(bytes); + } + + public String readUTF8String(MemoryBuffer buffer) { + int len = buffer.readPositiveVarInt(); + byte[] bytes = buffer.readBytes(len); + return new String(bytes, StandardCharsets.UTF_8); + } +} diff --git a/java/fury-core/src/main/java/io/fury/serializer/StringUTF16.java b/java/fury-core/src/main/java/io/fury/serializer/StringUTF16.java new file mode 100644 index 0000000000..954e117908 --- /dev/null +++ b/java/fury-core/src/main/java/io/fury/serializer/StringUTF16.java @@ -0,0 +1,70 @@ +/* + * Copyright 2023 The Fury authors + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.fury.serializer; + +import java.nio.ByteOrder; + +/** + * UTF16 utils. + * + * @author chaokunyang + */ +class StringUTF16 { + static final int HI_BYTE_SHIFT; + static final int LO_BYTE_SHIFT; + static final boolean IS_BIG_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN; + + static { + if (IS_BIG_ENDIAN) { + HI_BYTE_SHIFT = 8; + LO_BYTE_SHIFT = 0; + } else { + HI_BYTE_SHIFT = 0; + LO_BYTE_SHIFT = 8; + } + } + + // Won't be faster than `putChars` + // static void putCharsSplit(char[] src, final int charLen, byte[] target, int targetOffset) { + // for (int i = 0; i < charLen; i++) { + // target[targetOffset+ i<<1] = (byte) (src[i] >> HI_BYTE_SHIFT); + // } + // for (int i = 0; i < charLen; i++) { + // target[targetOffset + i<<1 + 1] = (byte) (src[i] >> LO_BYTE_SHIFT); + // } + // } + + // static void putChars(char[] str, int off, byte[] val, int index, int end) { + // while (off < end) { + // putChar(val, index++, str[off++]); + // } + // } + + // static void putChar(byte[] val, int index, int c) { + // assert index >= 0 && index < length(val) : "Trusted caller missed bounds check"; + // index <<= 1; + // // FIXME JDK11 utf16 string uses little-endian order + // val[index++] = (byte) (c >> HI_BYTE_SHIFT); + // val[index] = (byte) (c >> LO_BYTE_SHIFT); + // } + + // static int length(byte[] value) { + // return value.length >> 1; + // } +} diff --git a/java/fury-core/src/main/java/io/fury/util/LoggerFactory.java b/java/fury-core/src/main/java/io/fury/util/LoggerFactory.java index 47a0b5737d..aee633b300 100644 --- a/java/fury-core/src/main/java/io/fury/util/LoggerFactory.java +++ b/java/fury-core/src/main/java/io/fury/util/LoggerFactory.java @@ -23,6 +23,8 @@ /** * A logger factory which can be used to disable fury logging more easily than configure logging. + * + * @author chaokunyang */ public class LoggerFactory { private static boolean disableLogging; diff --git a/java/fury-core/src/main/java/io/fury/util/MathUtils.java b/java/fury-core/src/main/java/io/fury/util/MathUtils.java new file mode 100644 index 0000000000..0814ddde96 --- /dev/null +++ b/java/fury-core/src/main/java/io/fury/util/MathUtils.java @@ -0,0 +1,35 @@ +/* + * Copyright 2023 The Fury authors + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.fury.util; + +/** + * Math utils. + * + * @author chaokunyang + */ +public class MathUtils { + + public static int doubleExact(int x) { + long r = (long) x << 1; + if ((int) r != r) { + throw new ArithmeticException("integer overflow"); + } + return (int) r; + } +} diff --git a/java/fury-core/src/test/java/io/fury/FuryTestBase.java b/java/fury-core/src/test/java/io/fury/FuryTestBase.java index 1f6e66e7f1..f992076056 100644 --- a/java/fury-core/src/test/java/io/fury/FuryTestBase.java +++ b/java/fury-core/src/test/java/io/fury/FuryTestBase.java @@ -21,6 +21,7 @@ import io.fury.util.Platform; import io.fury.util.ReflectionUtils; import org.testng.Assert; +import org.testng.annotations.DataProvider; /** * Fury unit test base class. @@ -30,6 +31,11 @@ @SuppressWarnings("unchecked") public abstract class FuryTestBase { + @DataProvider(name = "endian") + public static Object[][] endian() { + return new Object[][] {{false}, {true}}; + } + public static Object serDe(Fury fury1, Fury fury2, Object obj) { byte[] bytes = fury1.serialize(obj); return fury2.deserialize(bytes); diff --git a/java/fury-core/src/test/java/io/fury/serializer/StringSerializerTest.java b/java/fury-core/src/test/java/io/fury/serializer/StringSerializerTest.java new file mode 100644 index 0000000000..7b9673fd3f --- /dev/null +++ b/java/fury-core/src/test/java/io/fury/serializer/StringSerializerTest.java @@ -0,0 +1,364 @@ +/* + * Copyright 2023 The Fury authors + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.fury.serializer; + +import static io.fury.serializer.StringSerializer.newJava11StringByZeroCopy; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import com.google.common.base.Strings; +import io.fury.Fury; +import io.fury.FuryTestBase; +import io.fury.Language; +import io.fury.collection.Tuple2; +import io.fury.memory.MemoryBuffer; +import io.fury.memory.MemoryUtils; +import io.fury.util.MathUtils; +import io.fury.util.Platform; +import io.fury.util.ReflectionUtils; +import io.fury.util.StringUtils; +import java.lang.reflect.Field; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ConcurrentLinkedQueue; +import org.testng.Assert; +import org.testng.SkipException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class StringSerializerTest extends FuryTestBase { + @DataProvider(name = "stringCompress") + public static Object[][] stringCompress() { + return new Object[][] {{false}, {true}}; + } + + @Test + public void testJavaStringZeroCopy() { + if (Platform.JAVA_VERSION >= 17) { + throw new SkipException("Skip on jdk17+"); + } + // Ensure JavaStringZeroCopy work for CI and most development environments. + MemoryBuffer buffer = MemoryBuffer.newHeapBuffer(32); + for (int i = 0; i < 32; i++) { + for (int j = 0; j < 1024; j++) { + String str = StringUtils.random(j); + Assert.assertTrue(writeJavaStringZeroCopy(buffer, str)); + String newStr = readJavaStringZeroCopy(buffer); + Assert.assertEquals(str, newStr, String.format("i %s j %s", i, j)); + } + } + } + + private static String readJavaStringZeroCopy(MemoryBuffer buffer) { + try { + Field valueIsBytesField = + StringSerializer.class.getDeclaredField("STRING_VALUE_FIELD_IS_BYTES"); + valueIsBytesField.setAccessible(true); + boolean STRING_VALUE_FIELD_IS_BYTES = (boolean) valueIsBytesField.get(null); + Field valueIsCharsField = + StringSerializer.class.getDeclaredField("STRING_VALUE_FIELD_IS_CHARS"); + valueIsCharsField.setAccessible(true); + boolean STRING_VALUE_FIELD_IS_CHARS = (Boolean) valueIsCharsField.get(null); + if (STRING_VALUE_FIELD_IS_BYTES) { + return readJDK11String(buffer); + } else if (STRING_VALUE_FIELD_IS_CHARS) { + return StringSerializer.newJava8StringByZeroCopy(buffer.readCharsWithSizeEmbedded()); + } + return null; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + static String readJDK11String(MemoryBuffer buffer) { + byte coder = buffer.readByte(); + byte[] value = buffer.readBytesWithSizeEmbedded(); + return newJava11StringByZeroCopy(value, coder); + } + + private static boolean writeJavaStringZeroCopy(MemoryBuffer buffer, String value) { + try { + Field valueIsBytesField = + StringSerializer.class.getDeclaredField("STRING_VALUE_FIELD_IS_BYTES"); + valueIsBytesField.setAccessible(true); + boolean STRING_VALUE_FIELD_IS_BYTES = (boolean) valueIsBytesField.get(null); + Field valueIsCharsField = + StringSerializer.class.getDeclaredField("STRING_VALUE_FIELD_IS_CHARS"); + valueIsCharsField.setAccessible(true); + boolean STRING_VALUE_FIELD_IS_CHARS = (Boolean) valueIsCharsField.get(null); + if (STRING_VALUE_FIELD_IS_BYTES) { + StringSerializer.writeJDK11String(buffer, value); + } else if (STRING_VALUE_FIELD_IS_CHARS) { + writeJDK8String(buffer, value); + } else { + return false; + } + return true; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + static void writeJDK8String(MemoryBuffer buffer, String value) { + final char[] chars = + (char[]) Platform.getObject(value, ReflectionUtils.getFieldOffset(String.class, "value")); + int numBytes = MathUtils.doubleExact(value.length()); + buffer.writePrimitiveArrayWithSizeEmbedded(chars, Platform.CHAR_ARRAY_OFFSET, numBytes); + } + + @Test + public void testJavaStringSimple() { + Fury fury = Fury.builder().withStringCompressed(true).disableSecureMode().build(); + MemoryBuffer buffer = MemoryUtils.buffer(32); + StringSerializer serializer = new StringSerializer(fury); + { + String str = "str"; + serializer.writeJavaString(buffer, str); + assertEquals(str, serializer.readJavaString(buffer)); + Assert.assertEquals(buffer.writerIndex(), buffer.readerIndex()); + } + { + String str = "你好, Fury"; + serializer.writeJavaString(buffer, str); + assertEquals(str, serializer.readJavaString(buffer)); + Assert.assertEquals(buffer.writerIndex(), buffer.readerIndex()); + } + } + + @Test(dataProvider = "stringCompress") + public void testJavaString(boolean stringCompress) { + Fury fury = Fury.builder().withStringCompressed(stringCompress).disableSecureMode().build(); + MemoryBuffer buffer = MemoryUtils.buffer(32); + StringSerializer serializer = new StringSerializer(fury); + + String longStr = Strings.repeat("abc", 50); + buffer.writerIndex(0); + buffer.readerIndex(0); + serializer.writeJavaString(buffer, longStr); + assertEquals(longStr, serializer.readJavaString(buffer)); + + serDe(fury, "你好, Fury" + StringUtils.random(64)); + serDe(fury, "你好, Fury" + StringUtils.random(64)); + serDe(fury, StringUtils.random(64)); + serDe( + fury, + new String[] {"你好, Fury" + StringUtils.random(64), "你好, Fury" + StringUtils.random(64)}); + } + + @Test(dataProvider = "stringCompress") + public void testJavaStringOffHeap(boolean stringCompress) { + Fury fury = Fury.builder().withStringCompressed(stringCompress).disableSecureMode().build(); + MemoryBuffer buffer = MemoryUtils.wrap(ByteBuffer.allocateDirect(1024)); + Object o1 = "你好, Fury" + StringUtils.random(64); + Object o2 = + new String[] {"你好, Fury" + StringUtils.random(64), "你好, Fury" + StringUtils.random(64)}; + fury.serialize(buffer, o1); + fury.serialize(buffer, o2); + assertEquals(fury.deserialize(buffer), o1); + assertEquals(fury.deserialize(buffer), o2); + } + + @Test + public void testJavaStringMemoryModel() { + BlockingQueue> dataQueue = new ArrayBlockingQueue<>(1024); + ConcurrentLinkedQueue> results = new ConcurrentLinkedQueue<>(); + Thread producer1 = new Thread(new DataProducer(dataQueue)); + Thread producer2 = new Thread(new DataProducer(dataQueue)); + Thread consumer1 = new Thread(new DataConsumer(dataQueue, results)); + Thread consumer2 = new Thread(new DataConsumer(dataQueue, results)); + Thread consumer3 = new Thread(new DataConsumer(dataQueue, results)); + Arrays.asList(producer1, producer2, consumer1, consumer2, consumer3).forEach(Thread::start); + int count = DataProducer.numItems * 2; + while (count > 0) { + Tuple2 item = results.poll(); + if (item != null) { + count--; + assertEquals(item.f0, item.f1); + } + } + Arrays.asList(producer1, producer2, consumer1, consumer2, consumer3).forEach(Thread::interrupt); + } + + public static class DataProducer implements Runnable { + static int numItems = 4 + 32 * 1024 * 2; + private final Fury fury; + private final BlockingQueue> dataQueue; + + public DataProducer(BlockingQueue> dataQueue) { + this.dataQueue = dataQueue; + this.fury = Fury.builder().withLanguage(Language.JAVA).disableSecureMode().build(); + } + + public void run() { + try { + dataQueue.put(Tuple2.of("", fury.serialize(""))); + dataQueue.put(Tuple2.of("a", fury.serialize("a"))); + dataQueue.put(Tuple2.of("ab", fury.serialize("ab"))); + dataQueue.put(Tuple2.of("abc", fury.serialize("abc"))); + for (int i = 0; i < 32; i++) { + for (int j = 0; j < 1024; j++) { + String str = StringUtils.random(j); + dataQueue.put(Tuple2.of(str, fury.serialize(str))); + str = String.valueOf(i); + dataQueue.put(Tuple2.of(str, fury.serialize(str))); + } + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + + public static class DataConsumer implements Runnable { + private final Fury fury; + private final BlockingQueue> dataQueue; + private final ConcurrentLinkedQueue> results; + + public DataConsumer( + BlockingQueue> dataQueue, + ConcurrentLinkedQueue> results) { + this.fury = Fury.builder().withLanguage(Language.JAVA).disableSecureMode().build(); + this.dataQueue = dataQueue; + this.results = results; + } + + @Override + public void run() { + try { + while (!Thread.currentThread().isInterrupted()) { + Tuple2 dataItem = dataQueue.take(); + String newStr = (String) fury.deserialize(dataItem.f1); + results.add(Tuple2.of(dataItem.f0, newStr)); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + + @Test + public void testCompressJava8String() { + if (Platform.JAVA_VERSION != 8) { + throw new SkipException("Java 8 only"); + } + Fury fury = Fury.builder().withStringCompressed(true).disableSecureMode().build(); + StringSerializer stringSerializer = + (StringSerializer) fury.getClassResolver().getSerializer(String.class); + + String utf16Str = "你好, Fury" + StringUtils.random(64); + char[] utf16StrChars = utf16Str.toCharArray(); + for (MemoryBuffer buffer : + new MemoryBuffer[] { + MemoryUtils.buffer(512), MemoryUtils.wrap(ByteBuffer.allocateDirect(512)), + }) { + stringSerializer.writeJavaString(buffer, utf16Str); + assertEquals(stringSerializer.readJavaString(buffer), utf16Str); + assertEquals(buffer.writerIndex(), buffer.readerIndex()); + + String latinStr = StringUtils.random(utf16StrChars.length, 0); + stringSerializer.writeJavaString(buffer, latinStr); + assertEquals(stringSerializer.readJavaString(buffer), latinStr); + assertEquals(buffer.writerIndex(), buffer.readerIndex()); + } + } + + @Test(dataProvider = "endian") + public void testVectorizedAsciiCheckAlgorithm(boolean endian) { + // assertTrue(isAscii("Fury".toCharArray(), endian)); + // assertTrue(isAscii(StringUtils.random(8 * 10).toCharArray(), endian)); + // test unaligned + assertTrue(isAscii((StringUtils.random(8 * 10) + "1").toCharArray(), endian)); + assertTrue(isAscii((StringUtils.random(8 * 10) + "12").toCharArray(), endian)); + assertTrue(isAscii((StringUtils.random(8 * 10) + "123").toCharArray(), endian)); + assertFalse(isAscii("你好, Fury".toCharArray(), endian)); + assertFalse(isAscii((StringUtils.random(8 * 10) + "你好").toCharArray(), endian)); + assertFalse(isAscii((StringUtils.random(8 * 10) + "1你好").toCharArray(), endian)); + } + + private boolean isAscii(char[] chars, boolean isLittle) { + boolean reverseBytes = + (Platform.IS_LITTLE_ENDIAN && !isLittle) || (!Platform.IS_LITTLE_ENDIAN && !isLittle); + if (reverseBytes) { + for (int i = 0; i < chars.length; i++) { + chars[i] = Character.reverseBytes(chars[i]); + } + } + long mask; + if (isLittle) { + // ascii chars will be 0xXX,0x00;0xXX,0x00 in byte order; + // Using 0x00,0xff(0xff00) to clear ascii bits. + mask = 0xff00ff00ff00ff00L; + } else { + // ascii chars will be 0x00,0xXX;0x00,0xXX in byte order; + // Using 0x00,0xff(0x00ff) to clear ascii bits. + mask = 0x00ff00ff00ff00ffL; + } + int numChars = chars.length; + int vectorizedLen = numChars >> 2; + int vectorizedChars = vectorizedLen << 2; + int endOffset = Platform.CHAR_ARRAY_OFFSET + (vectorizedChars << 1); + boolean isAscii = true; + for (int offset = Platform.CHAR_ARRAY_OFFSET; offset < endOffset; offset += 8) { + // check 4 chars in a vectorized way, 4 times faster than scalar check loop. + long multiChars = Platform.getLong(chars, offset); + if ((multiChars & mask) != 0) { + isAscii = false; + break; + } + } + if (isAscii) { + for (int i = vectorizedChars; i < numChars; i++) { + char c = chars[i]; + if (reverseBytes) { + c = Character.reverseBytes(c); + } + if (c > 0xFF) { + isAscii = false; + break; + } + } + } + return isAscii; + } + + @Test + public void testAsciiCheck() { + assertTrue(StringSerializer.isAscii("Fury".toCharArray())); + assertTrue(StringSerializer.isAscii(StringUtils.random(8 * 10).toCharArray())); + // test unaligned + assertTrue(StringSerializer.isAscii((StringUtils.random(8 * 10) + "1").toCharArray())); + assertTrue(StringSerializer.isAscii((StringUtils.random(8 * 10) + "12").toCharArray())); + assertTrue(StringSerializer.isAscii((StringUtils.random(8 * 10) + "123").toCharArray())); + assertFalse(StringSerializer.isAscii("你好, Fury".toCharArray())); + assertFalse(StringSerializer.isAscii((StringUtils.random(8 * 10) + "你好").toCharArray())); + assertFalse(StringSerializer.isAscii((StringUtils.random(8 * 10) + "1你好").toCharArray())); + assertFalse(StringSerializer.isAscii((StringUtils.random(11) + "你").toCharArray())); + assertFalse(StringSerializer.isAscii((StringUtils.random(10) + "你好").toCharArray())); + assertFalse(StringSerializer.isAscii((StringUtils.random(9) + "性能好").toCharArray())); + assertFalse(StringSerializer.isAscii("\u1234".toCharArray())); + assertFalse(StringSerializer.isAscii("a\u1234".toCharArray())); + assertFalse(StringSerializer.isAscii("ab\u1234".toCharArray())); + assertFalse(StringSerializer.isAscii("abc\u1234".toCharArray())); + assertFalse(StringSerializer.isAscii("abcd\u1234".toCharArray())); + assertFalse(StringSerializer.isAscii("Javaone Keynote\u1234".toCharArray())); + } +}