Skip to content
This repository has been archived by the owner on Nov 14, 2024. It is now read-only.

Optimize some cell and value extraction read paths #6073

Merged
merged 16 commits into from
Jun 8, 2022
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.primitives.Bytes;
import com.google.common.primitives.UnsignedBytes;
import com.palantir.atlasdb.encoding.PtBytes;
import com.palantir.logsafe.Preconditions;
import com.palantir.logsafe.SafeArg;
import com.palantir.logsafe.UnsafeArg;
import com.palantir.logsafe.exceptions.SafeIllegalArgumentException;
import com.palantir.logsafe.logger.SafeLogger;
import com.palantir.logsafe.logger.SafeLoggerFactory;
import java.io.Serializable;
Expand All @@ -43,6 +44,8 @@ public final class Cell implements Serializable, Comparable<Cell> {

// Oracle has an upper bound on RAW types of 2000.
public static final int MAX_NAME_LENGTH = 1500;
private static final SafeArg<Integer> MAX_NAME_LENGTH_ARG = SafeArg.of("maxNameLength", MAX_NAME_LENGTH);

public static final Comparator<Cell> COLUMN_COMPARATOR = PtBytes.BYTES_COMPARATOR.onResultOf(Cell::getColumnName);

/**
Expand All @@ -57,25 +60,32 @@ public static boolean isNameValid(byte[] name) {
return name != null && name.length > 0 && name.length <= MAX_NAME_LENGTH;
}

private void validateNameValid(byte[] name) {
com.palantir.logsafe.Preconditions.checkNotNull(name, "name cannot be null");
com.palantir.logsafe.Preconditions.checkArgument(name.length > 0, "name must be non-empty");

try {
Preconditions.checkArgument(
name.length <= MAX_NAME_LENGTH, "name must be no longer than %s.", MAX_NAME_LENGTH);
} catch (IllegalArgumentException e) {
log.error(
"Cell name length exceeded. Name must be no longer than {}. "
+ "Cell creation that was attempted was: {}; since the vast majority of people "
+ "encountering this problem are using unbounded Strings as components, it may aid your "
+ "debugging to know the UTF-8 interpretation of the bad field was: [{}]",
SafeArg.of("max name length", MAX_NAME_LENGTH),
UnsafeArg.of("cell", this),
UnsafeArg.of("name", new String(name, StandardCharsets.UTF_8)),
e);
throw e;
private byte[] validateNameValid(byte[] name) {
if (isNameValid(name)) {
return name;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we want to keep this method small so that it gets JITed & inlined for the Cell.create happy path

}
throw invalidName(name);
}

/**
* Returns exception for invalid Cell name.
* Intentionally pulled out of the happy-path hot path for valid Cell names.
*/
private SafeIllegalArgumentException invalidName(byte[] name) {
Preconditions.checkNotNull(name, "name cannot be null");
Preconditions.checkArgument(name.length > 0, "name must be non-empty");
SafeIllegalArgumentException exception =
new SafeIllegalArgumentException("name length exceeds maximum", MAX_NAME_LENGTH_ARG);
log.error(
"Cell name length exceeded. Name must be no longer than {}. "
+ "Cell creation that was attempted was: {}; since the vast majority of people "
+ "encountering this problem are using unbounded Strings as components, it may aid your "
+ "debugging to know the UTF-8 interpretation of the bad field was: [{}]",
MAX_NAME_LENGTH_ARG,
UnsafeArg.of("cell", this),
UnsafeArg.of("name", new String(name, StandardCharsets.UTF_8)),
exception);
return exception;
}

private final byte[] rowName;
Expand All @@ -85,11 +95,8 @@ private void validateNameValid(byte[] name) {
// NOTE: This constructor doesn't copy the arrays for performance reasons.
@JsonCreator
private Cell(@JsonProperty("rowName") byte[] rowName, @JsonProperty("columnName") byte[] columnName) {
this.rowName = rowName;
this.columnName = columnName;

validateNameValid(rowName);
validateNameValid(columnName);
this.rowName = validateNameValid(rowName);
this.columnName = validateNameValid(columnName);
}

/**
Expand Down Expand Up @@ -134,6 +141,11 @@ public int hashCode() {
* allow for benign data races.
*/
if (hashCode == 0) {
/*
* This hashCode() implementation has a rather unfortunate case where it is always 0 if the row name and
* the column name match. We did not want to change it to keep backwards compatibility.
* See {@link CellReference#goodHash()}
*/
hashCode = Arrays.hashCode(rowName) ^ Arrays.hashCode(columnName);
}
return hashCode;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* (c) Copyright 2022 Palantir Technologies Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.palantir.atlasdb.keyvalue.api;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import com.palantir.logsafe.exceptions.SafeIllegalArgumentException;
import com.palantir.logsafe.exceptions.SafeNullPointerException;
import java.nio.charset.StandardCharsets;
import org.junit.Test;

public final class CellTest {

@Test
public void create() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we prefix create, isNameValid, and compareTo with test?

Cell cell = Cell.create(bytes("row"), bytes("col"));
assertThat(cell.getRowName()).isEqualTo(bytes("row"));
assertThat(cell.getColumnName()).isEqualTo(bytes("col"));
assertThatThrownBy(() -> Cell.create(null, bytes("col"))).isInstanceOf(SafeNullPointerException.class);
assertThatThrownBy(() -> Cell.create(bytes("row"), null)).isInstanceOf(SafeNullPointerException.class);
assertThatThrownBy(() -> Cell.create(bytes(""), bytes(""))).isInstanceOf(SafeIllegalArgumentException.class);
assertThatThrownBy(() -> Cell.create(bytes("row"), bytes(""))).isInstanceOf(SafeIllegalArgumentException.class);
assertThatThrownBy(() -> Cell.create(bytes(""), bytes("col"))).isInstanceOf(SafeIllegalArgumentException.class);
assertThatThrownBy(() -> Cell.create(bytes("row"), bytes("x".repeat(Cell.MAX_NAME_LENGTH + 1))))
.isInstanceOf(SafeIllegalArgumentException.class);
assertThatThrownBy(() -> Cell.create(bytes("x".repeat(Cell.MAX_NAME_LENGTH + 1)), bytes("col")))
.isInstanceOf(SafeIllegalArgumentException.class);
}

@Test
@SuppressWarnings("ConstantConditions") // explicitly testing conditions
public void isNameValid() {
assertThat(Cell.isNameValid(bytes("row"))).isTrue();
assertThat(Cell.isNameValid(null)).isFalse();
assertThat(Cell.isNameValid(new byte[0])).isFalse();
assertThat(Cell.isNameValid(bytes("x"))).isTrue();
assertThat(Cell.isNameValid(bytes("x".repeat(Cell.MAX_NAME_LENGTH + 1))))
.isFalse();
}

@Test
public void compareTo() {
assertThat(Cell.create(bytes("row"), bytes("col")))
.isEqualByComparingTo(Cell.create(bytes("row"), bytes("col")));
assertThat(Cell.create(bytes("row"), bytes("col")))
.isNotEqualByComparingTo(Cell.create(bytes("row2"), bytes("col")));
assertThat(Cell.create(bytes("row"), bytes("col")))
.isNotEqualByComparingTo(Cell.create(bytes("row2"), bytes("col2")));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're missing the case where row is the same and column is different

assertThat(Cell.create(bytes("row1"), bytes("col"))).isLessThan(Cell.create(bytes("row2"), bytes("col")));
assertThat(Cell.create(bytes("row1"), bytes("col"))).isGreaterThan(Cell.create(bytes("row0"), bytes("col")));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, the compareTo method has two branches, and this only tests the first (where the row differs); perhaps worth testing the compareTo part when row is the same and col is less or greater?

}

@Test
public void testEquals() {
assertThat(Cell.create(bytes("row"), bytes("col"))).isEqualTo(Cell.create(bytes("row"), bytes("col")));
assertThat(Cell.create(bytes("row"), bytes("col"))).isNotEqualTo(Cell.create(bytes("col"), bytes("row")));
}

@Test
public void testHashCode() {
assertThat(Cell.create(bytes("row"), bytes("col")).hashCode()).isNotZero();
assertThat(Cell.create(bytes("row"), bytes("col")))
.describedAs("Cell unfortunately has a non-ideal hashCode where swapped "
+ "row and column values lead to the same hashCode and cannot be changed due "
+ "to backward compatibility. See goodHash")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: See CellReference's goodHash or CellReferenhce#goodHash

.hasSameHashCodeAs(Cell.create(bytes("col"), bytes("row")));
}

private static byte[] bytes(String value) {
return value.getBytes(StandardCharsets.UTF_8);
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: newline

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

spotless is failing on this

Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ void loadWithTs(
}
int totalPartitions = hostsAndCells.keySet().size();

if (log.isTraceEnabled()) {
final boolean isTraceEnabled = log.isTraceEnabled();
if (isTraceEnabled) {
log.trace(
"Loading {} cells from {} {}starting at timestamp {}, partitioned across {} nodes.",
SafeArg.of("cells", cells.size()),
Expand All @@ -120,12 +121,12 @@ void loadWithTs(
SafeArg.of("totalPartitions", totalPartitions));
}

List<Callable<Void>> tasks = new ArrayList<>();
List<Callable<Void>> tasks = new ArrayList<>(hostsAndCells.size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

while this is probably a better estimate, we're still possibly going to exceed this probably (as each hostsAndCells element may add several elements). Still an improvement, so fine by me

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this isn't fully optimal but should be more reasonable estimate

for (Map.Entry<CassandraServer, List<Cell>> hostAndCells : hostsAndCells.entrySet()) {
if (log.isTraceEnabled()) {
if (isTraceEnabled) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in general case I wouldn't normally pull log.isTraceEnabled() out to a local, but this popped in profile for relatively hot loop this will almost always be false, so pulled it out a local final boolean

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, that makes sense

log.trace(
"Requesting {} cells from {} {}starting at timestamp {} on {}",
SafeArg.of("cells", hostsAndCells.values().size()),
SafeArg.of("cells", hostAndCells.getValue().size()),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was a drive by fix as this seemed to log incorrect value

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep - thanks!

LoggingArgs.tableRef(tableRef),
SafeArg.of("timestampClause", loadAllTs ? "for all timestamps " : ""),
SafeArg.of("startTs", startTs),
Expand Down Expand Up @@ -159,8 +160,9 @@ private List<Callable<Void>> getLoadWithTsTasksForSingleHost(
final CassandraKeyValueServices.ThreadSafeResultVisitor visitor,
final ConsistencyLevel consistency) {
final ColumnParent colFam = new ColumnParent(CassandraKeyValueServiceImpl.internalTableName(tableRef));
List<Callable<Void>> tasks = new ArrayList<>();
for (final List<Cell> partition : batcher.partitionIntoBatches(cells, cassandraServer, tableRef)) {
List<List<Cell>> batches = batcher.partitionIntoBatches(cells, cassandraServer, tableRef);
List<Callable<Void>> tasks = new ArrayList<>(batches.size());
for (final List<Cell> partition : batches) {
Comment on lines +163 to +165
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

Callable<Void> multiGetCallable = () -> clientPool.runWithRetryOnServer(
cassandraServer, new FunctionCheckedException<CassandraClient, Void, Exception>() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,8 @@ public void internalExtractResult(
long startTs, ColumnSelection selection, byte[] row, byte[] col, byte[] val, long ts) {
if (ts < startTs && selection.contains(col)) {
Cell cell = Cell.create(row, col);
if (!collector.containsKey(cell)) {
collector.put(cell, Value.create(val, ts));
} else {
Value value = collector.computeIfAbsent(cell, _cell -> Value.create(val, ts));
if (value.getTimestamp() != ts) {
notLatestVisibleValueCellFilterCounter.inc();
}
} else {
Expand Down
5 changes: 5 additions & 0 deletions changelog/@unreleased/pr-6073.v2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
type: improvement
improvement:
description: Optimize some cell and value extraction read paths
links:
- https://github.com/palantir/atlasdb/pull/6073