Skip to content

Commit

Permalink
[SPARK-8301] removed unnecessary copying of UTF8String. Added a priva…
Browse files Browse the repository at this point in the history
…te function startsWith(prefix, offset) to implement the check for startsWith, endsWith and contains.
  • Loading branch information
tarekbecker committed Jun 13, 2015
1 parent d986fb9 commit b17909e
Showing 1 changed file with 15 additions and 8 deletions.
23 changes: 15 additions & 8 deletions unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,24 +131,31 @@ public boolean contains(final UTF8String substring) {
}

for (int i = 0; i <= bytes.length - b.length; i++) {
// TODO: Avoid copying.
if (bytes[i] == b[0] && Arrays.equals(Arrays.copyOfRange(bytes, i, i + b.length), b)) {
if (bytes[i] == b[0] && startsWith(substring, i)) {
return true;
}
}
return false;
}

private boolean startsWith(final UTF8String prefix, int offset) {
byte[] b = prefix.getBytes();
if (b.length + offset > bytes.length || offset < 0) {
return false;
}
int i = 0;
while (i < b.length && b[i] == bytes[i + offset]) {
i++;
}
return i == b.length;
}

public boolean startsWith(final UTF8String prefix) {
final byte[] b = prefix.getBytes();
// TODO: Avoid copying.
return b.length <= bytes.length && Arrays.equals(Arrays.copyOfRange(bytes, 0, b.length), b);
return startsWith(prefix, 0);
}

public boolean endsWith(final UTF8String suffix) {
final byte[] b = suffix.getBytes();
return b.length <= bytes.length &&
Arrays.equals(Arrays.copyOfRange(bytes, bytes.length - b.length, bytes.length), b);
return startsWith(suffix, bytes.length - suffix.getBytes().length);
}

public UTF8String toUpperCase() {
Expand Down

0 comments on commit b17909e

Please sign in to comment.