Skip to content
This repository has been archived by the owner on Nov 14, 2024. It is now read-only.

[TEX] Part 1b: TrackingKeyValueService: utilities for byte size (1) #6332

Merged
merged 12 commits into from
Nov 8, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
*/
package com.palantir.atlasdb.keyvalue.api;

import com.palantir.atlasdb.util.Measurable;
import java.util.Collection;
import org.immutables.value.Value;

@Value.Immutable
public interface CandidateCellForSweeping {
public interface CandidateCellForSweeping extends Measurable {

Cell cell();

Expand All @@ -42,6 +43,7 @@ public interface CandidateCellForSweeping {
*/
boolean isLatestValueEmpty();

@Override
default long sizeInBytes() {
return cell().sizeInBytes() + ((long) sortedTimestamps().size()) * Long.BYTES;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.google.common.base.MoreObjects;
import com.google.common.primitives.UnsignedBytes;
import com.palantir.atlasdb.encoding.PtBytes;
import com.palantir.atlasdb.util.Measurable;
import com.palantir.logsafe.Preconditions;
import com.palantir.logsafe.SafeArg;
import com.palantir.logsafe.UnsafeArg;
Expand All @@ -39,7 +40,7 @@
* @see Value
* @see Bytes
*/
public final class Cell implements Serializable, Comparable<Cell> {
public final class Cell implements Serializable, Comparable<Cell>, Measurable {
private static final long serialVersionUID = 1L;
private static final SafeLogger log = SafeLoggerFactory.get(Cell.class);

Expand Down Expand Up @@ -116,6 +117,7 @@ public byte[] getColumnName() {
return columnName;
}

@Override
public long sizeInBytes() {
Sam-Kramer marked this conversation as resolved.
Show resolved Hide resolved
return Long.sum(rowName.length, columnName.length);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just curious, is the Long.sum method call to implicitly widen and avoid casting to long for addition (e.g. return ((long) rowName.length) + columnName.length;)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes - it also looked more succinct to my eyes

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.palantir.atlasdb.util.Measurable;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils;

public final class TableReference {
public final class TableReference implements Measurable {
private final Namespace namespace;
private final String tableName;

Expand Down Expand Up @@ -147,6 +148,7 @@ public String toString() {
return getQualifiedName();
}

@Override
public long sizeInBytes() {
return stringSizeInBytes(tableName) + stringSizeInBytes(namespace.getName());
Sam-Kramer marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These aren't the only strings that are passed around, though. The size of AbstractKeyValueService.internalTableName() is probably closer to what you want.

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import com.google.common.base.Function;
import com.google.common.base.MoreObjects;
import com.palantir.atlasdb.encoding.PtBytes;
import com.palantir.atlasdb.util.Measurable;
import com.palantir.logsafe.Preconditions;
import java.io.Serializable;
import java.util.Arrays;
Expand All @@ -31,7 +32,7 @@
* Values are stored in {@link Cell}s.
* @see Cell
*/
public final class Value implements Serializable {
public final class Value implements Serializable, Measurable {
private static final long serialVersionUID = 1L;
public static final long INVALID_VALUE_TIMESTAMP = -1L;

Expand Down Expand Up @@ -82,6 +83,7 @@ private Value(byte[] contents, long timestamp) {

public static final Function<Value, byte[]> GET_VALUE = Value::getContents;

@Override
public long sizeInBytes() {
// one long added for the timestamp
return Long.sum(Long.BYTES, contents.length);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*
* (c) Copyright 2022 Palantir Technologies Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.palantir.atlasdb.util;

public interface Measurable {
long sizeInBytes();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe trivial, but I would add javadocs.

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,7 @@
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
Expand All @@ -33,79 +30,61 @@

@RunWith(MockitoJUnitRunner.class)
public class CandidateCellForSweepingTest {
private static final byte BYTE = (byte) 0xa;
private static final int CELL_NAME_SIZE = 100;
private static final int TIMESTAMPS_COLLECTION_SIZE = 200;
private static final long TIMESTAMP = 1977;
private static final ImmutableSet<Integer> THREE_CELL_NAME_SIZES =
ImmutableSet.of(1, Cell.MAX_NAME_LENGTH / 2, Cell.MAX_NAME_LENGTH);
private static final ImmutableList<Cell> EXAMPLE_CELLS =
Sets.cartesianProduct(THREE_CELL_NAME_SIZES, THREE_CELL_NAME_SIZES).stream()
.map(pair -> Cell.create(spawnBytes(pair.get(0)), spawnBytes(pair.get(1))))
.collect(ImmutableList.toImmutableList());

private static final ImmutableList<Integer> SORTED_TIMESTAMPS_SIZES = ImmutableList.of(0, 1, 2, 100, 1000);
private static final Cell CELL = Cell.create(spawnBytes(), spawnBytes());
private static final List<Long> TIMESTAMPS = Collections.nCopies(TIMESTAMPS_COLLECTION_SIZE, TIMESTAMP);

@Mock
private List<Long> MOCK_TIMESTAMPS;

@Test
public void candidateCellSizeWithLargerTimestampCollectionIsBigger() {
EXAMPLE_CELLS.forEach(cell -> {
CandidateCellForSweeping withOneTimestamp = createCandidateCell(cell, ImmutableSet.of(TIMESTAMP), true);
CandidateCellForSweeping withTwoTimestamps =
createCandidateCell(cell, ImmutableSet.of(TIMESTAMP, TIMESTAMP + 1), false);
assertThat(withOneTimestamp.sizeInBytes()).isLessThan(withTwoTimestamps.sizeInBytes());
});
public void candidateCellHasCorrectSizeForEmptyTimestampCollection() {
CandidateCellForSweeping candidate = createCandidateCell(ImmutableSet.of(), false);
assertThat(candidate.sizeInBytes()).isEqualTo(CELL.sizeInBytes());
}

@Test
public void candidateCellSizeIsCorrectForDifferentSortedTimestampSizes() {
SORTED_TIMESTAMPS_SIZES.forEach(sortedTimestampsSize -> {
for (CandidateCellForSweeping candidate : createCandidateCells(sortedTimestampsSize)) {
assertThat(candidate.sizeInBytes())
.isEqualTo(Long.sum(candidate.cell().sizeInBytes(), (long) sortedTimestampsSize * Long.BYTES));
}
});
public void candidateCellHasCorrectSizeForOneTimestamp() {
CandidateCellForSweeping candidate = createCandidateCell(ImmutableSet.of(TIMESTAMP), false);
assertThat(candidate.sizeInBytes()).isEqualTo(Long.sum(CELL.sizeInBytes(), Long.BYTES));
}

@Test
public void candidateCellSizeHasCorrectSizeForMultipleTimestamps() {
CandidateCellForSweeping candidate = createCandidateCell(TIMESTAMPS, false);
assertThat(candidate.sizeInBytes())
.isEqualTo(Long.sum(CELL.sizeInBytes(), Long.BYTES * ((long) TIMESTAMPS_COLLECTION_SIZE)));
}

@Test
public void noOverflowFromCollectionSize() {
// Mocking because otherwise we OOM.
when(MOCK_TIMESTAMPS.size()).thenReturn(Integer.MAX_VALUE);
Cell exampleCell = EXAMPLE_CELLS.get(0);
for (boolean isLatestValueEmpty : new boolean[] {true, false}) {
assertThat(createCandidateCell(exampleCell, MOCK_TIMESTAMPS, isLatestValueEmpty)
.sizeInBytes())
.isEqualTo(Long.sum(Integer.MAX_VALUE * 8L, exampleCell.sizeInBytes()));
}
assertThat(createCandidateCell(MOCK_TIMESTAMPS, false).sizeInBytes())
.isEqualTo(Long.sum(Integer.MAX_VALUE * 8L, CELL.sizeInBytes()));
}

private static ImmutableSet<CandidateCellForSweeping> createCandidateCells(int sortedTimestampsSize) {
ImmutableSet.Builder<CandidateCellForSweeping> builder = ImmutableSet.<CandidateCellForSweeping>builder();
for (boolean isLatestValueEmpty : new boolean[] {true, false}) {
builder.addAll(EXAMPLE_CELLS.stream()
.map(cell -> createCandidateCell(
cell, spawnCollectionOfTimestamps(sortedTimestampsSize), isLatestValueEmpty))
.iterator());
}
return builder.build();
@Test
public void candidateCellSizeIsEqualRegardlessOfLatestValueEmpty() {
CandidateCellForSweeping candidateWithLatestValueNonEmpty = createCandidateCell(TIMESTAMPS, false);
CandidateCellForSweeping candidateWithLatestValueEmpty = createCandidateCell(TIMESTAMPS, true);
assertThat(candidateWithLatestValueNonEmpty.sizeInBytes())
.isEqualTo(candidateWithLatestValueEmpty.sizeInBytes());
}

private static CandidateCellForSweeping createCandidateCell(
Cell cell, Collection<Long> sortedTimestamps, boolean isLatestValueEmpty) {
Collection<Long> sortedTimestamps, boolean isLatestValueEmpty) {
return ImmutableCandidateCellForSweeping.builder()
.cell(cell)
.cell(CELL)
.sortedTimestamps(sortedTimestamps)
.isLatestValueEmpty(isLatestValueEmpty)
.build();
}

private static List<Long> spawnCollectionOfTimestamps(int size) {
return Collections.nCopies(size, TIMESTAMP);
}

private static byte[] spawnBytes(int size) {
byte[] bytes = new byte[size];
Arrays.fill(bytes, BYTE);
return bytes;
private static byte[] spawnBytes() {
return new byte[CELL_NAME_SIZE];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tbh I'd just make this a static variable at this point

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,13 @@
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.palantir.logsafe.Preconditions;
import com.palantir.logsafe.exceptions.SafeIllegalArgumentException;
import com.palantir.logsafe.exceptions.SafeNullPointerException;
import com.palantir.util.Pair;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Set;
import java.util.stream.Collectors;
import org.junit.Test;

public final class CellTest {
private static final ImmutableSet<Integer> THREE_CELL_NAME_SIZES =
ImmutableSet.of(1, Cell.MAX_NAME_LENGTH / 2, Cell.MAX_NAME_LENGTH);
private static final ImmutableSet<Byte> TWO_BYTES = ImmutableSet.of((byte) 0xa, (byte) 0xb);

@Test
public void testCreate() {
Cell cell = Cell.create(bytes("row"), bytes("col"));
Expand Down Expand Up @@ -91,27 +82,18 @@ public void testHashCode() {

@Test
public void testSizeInBytes() {
for (Pair<Integer, Integer> sizes : allPairs(THREE_CELL_NAME_SIZES)) {
for (Pair<Byte, Byte> bytes : allPairs(TWO_BYTES)) {
assertThat(Cell.create(
spawnBytes(sizes.getLhSide(), bytes.getLhSide()),
spawnBytes(sizes.getRhSide(), bytes.getRhSide()))
.sizeInBytes())
.isEqualTo(sizes.getLhSide() + sizes.getRhSide());
}
}
assertThat(createCellWithByteSize(2).sizeInBytes()).isEqualTo(2);
assertThat(createCellWithByteSize(63).sizeInBytes()).isEqualTo(63);
assertThat(createCellWithByteSize(256).sizeInBytes()).isEqualTo(256);
}

private static <T> Set<Pair<T, T>> allPairs(Set<T> set) {
return Sets.cartesianProduct(set, set).stream()
.map(list -> Pair.create(list.get(0), list.get(1)))
.collect(Collectors.toSet());
private static Cell createCellWithByteSize(int size) {
Preconditions.checkArgument(size > 1, "Size should be at least 2");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: since the assertion is at least 2, no need to rely on the discrete domain - just write size >= 2?

return Cell.create(spawnBytes(size / 2), spawnBytes(size - (size / 2)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I'm being a bit thick, but shouldn't size / 2 == size - (size / 2) the vast majority of the time?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are equal when n is even

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do you want them to not be equal when n is odd?

Copy link
Contributor

@jeremyk-91 jeremyk-91 Nov 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think @ergo14 wants the cell to have exactly the right size (e.g., otherwise createCellWithByteSize(7) returns a cell that has row and column of size 3, i.e. an overall size of 6) - it'll be one byte smaller if we just divide when size is odd

}

private static byte[] spawnBytes(int size, byte element) {
byte[] bytes = new byte[size];
Arrays.fill(bytes, element);
return bytes;
private static byte[] spawnBytes(int size) {
return new byte[size];
}

private static byte[] bytes(String value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,28 +69,13 @@ public void sizeInBytesForTableReferenceWithEmptyNamespaceIsSizeOfAsciiTableName
public void sizeInBytesForTableReferenceWithAsciiNamespaceAndTableNameIsCorrect() {
assertThat(TableReference.create(Namespace.create("FOO"), "").sizeInBytes())
.isEqualTo(3 * Character.BYTES);
assertThat(TableReference.create(Namespace.create("FO"), "BAR").sizeInBytes())
.isEqualTo(5 * Character.BYTES);
assertThat(TableReference.create(Namespace.create("FOO"), "BAR").sizeInBytes())
.isEqualTo(6 * Character.BYTES);
assertThat(TableReference.create(Namespace.create("FOO"), "BABAZ").sizeInBytes())
.isEqualTo(8 * Character.BYTES);
assertThat(TableReference.create(Namespace.create("FOOBAR"), "BAZ").sizeInBytes())
.isEqualTo(9 * Character.BYTES);
}

@Test
public void orderOfSizeInBytesOfValuesWithSameNamespaceFollowsTableNameSizeOrder() {
Namespace namespace = Namespace.create("TestNameSpace");
assertThat(TableReference.create(namespace, "smallerTableName").sizeInBytes())
.isLessThan(TableReference.create(namespace, "largerTableNamePadding")
.sizeInBytes());
}

@Test
public void orderOfSizeInBytesOfValuesWithSameTableNameFollowsNamespaceOrder() {
String tableName = "tableName";
assertThat(TableReference.create(Namespace.create("smallerNamespace"), tableName)
.sizeInBytes())
.isLessThan(TableReference.create(Namespace.create("largerNamespacePadding"), tableName)
.sizeInBytes());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,33 @@

import static org.assertj.core.api.Assertions.assertThat;

import com.palantir.atlasdb.encoding.PtBytes;
import java.util.Arrays;
import org.junit.Test;

public class ValueTest {
private static final int SMALLER = 100;
private static final int LARGER = 200;
private static final byte BYTE = (byte) 0xa;
private static final int CONTENTS_SIZE_1 = 100;
private static final int CONTENTS_SIZE_2 = 200;

@Test
public void sizeInBytesOfValueWithNoContentsIsSizeOfLong() {
assertThat(Value.create(PtBytes.EMPTY_BYTE_ARRAY, Value.INVALID_VALUE_TIMESTAMP)
.sizeInBytes())
.isEqualTo(Long.BYTES);
assertThat(createValue(0).sizeInBytes()).isEqualTo(Long.BYTES);
}

@Test
public void sizeInBytesOfValueOrderFollowsContentsSizeOrder() {
assertThat(Value.create(spawnBytes(SMALLER), Value.INVALID_VALUE_TIMESTAMP)
.sizeInBytes())
.isLessThan(Value.create(spawnBytes(LARGER), Value.INVALID_VALUE_TIMESTAMP)
.sizeInBytes());
public void sizeInBytesIsCorrectForOneByteContentsArray() {
assertThat(createValue(1).sizeInBytes()).isEqualTo(1L + Long.BYTES);
}

@Test
public void sizeInBytesIsCorrectForMultipleBytesArray() {
assertThat(createValue(CONTENTS_SIZE_1).sizeInBytes()).isEqualTo(Long.sum(CONTENTS_SIZE_1, Long.BYTES));
assertThat(createValue(CONTENTS_SIZE_2).sizeInBytes()).isEqualTo(Long.sum(CONTENTS_SIZE_2, Long.BYTES));
}

private static Value createValue(int contentsSize) {
return Value.create(spawnBytes(contentsSize), Value.INVALID_VALUE_TIMESTAMP);
}

private static byte[] spawnBytes(int size) {
Copy link
Contributor

@Sam-Kramer Sam-Kramer Nov 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seeing as this method just creates a new byte[size], and is only used in one place, consider removing this creation method. although this is a very stylistic nit, so totally feel free to ignore :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: also for this and the others, we should name it createBytes rather than spawnBytes to match codebase style

byte[] bytes = new byte[size];
Arrays.fill(bytes, BYTE);
return bytes;
return new byte[size];
}
}