Skip to content
This repository has been archived by the owner on Nov 14, 2024. It is now read-only.

[TEX] Part 1b: TrackingKeyValueService: utilities for byte size (1) #6332

Merged
merged 12 commits into from
Nov 8, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,8 @@ public interface CandidateCellForSweeping {
* Otherwise, the return value is undefined and depends on the implementation.
*/
boolean isLatestValueEmpty();

default long sizeInBytes() {
return cell().sizeInBytes() + ((long) sortedTimestamps().size()) * Long.BYTES;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ public byte[] getColumnName() {
return columnName;
}

public long sizeInBytes() {
Sam-Kramer marked this conversation as resolved.
Show resolved Hide resolved
return Long.sum(rowName.length, columnName.length);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just curious, is the Long.sum method call to implicitly widen and avoid casting to long for addition (e.g. return ((long) rowName.length) + columnName.length;)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes - it also looked more succinct to my eyes

}

@Override
public int compareTo(Cell other) {
int cmp = UnsignedBytes.lexicographicalComparator().compare(rowName, other.rowName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ public byte[] getRowName() {
return row.clone();
}

public long getRowNameSize() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume this is needed as we capture the row name size separately from the columns?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added only because getRowName copies data (& also because the parameter type T in row result won't necessarily implement Measurable so we can't implement this in-class)

return row.length;
}

public NavigableMap<byte[], T> getColumns() {
return columns;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils;

Expand Down Expand Up @@ -146,6 +147,14 @@ public String toString() {
return getQualifiedName();
}

public long sizeInBytes() {
return stringSizeInBytes(tableName) + stringSizeInBytes(namespace.getName());
Sam-Kramer marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These aren't the only strings that are passed around, though. The size of AbstractKeyValueService.internalTableName() is probably closer to what you want.

}

private static long stringSizeInBytes(String string) {
return Character.BYTES * ((long) string.getBytes(StandardCharsets.UTF_8).length);
}

public static TableReference fromString(String tableReferenceAsString) {
int dotCount = StringUtils.countMatches(tableReferenceAsString, ".");
if (dotCount == 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ private Value(byte[] contents, long timestamp) {

public static final Function<Value, byte[]> GET_VALUE = Value::getContents;

public long sizeInBytes() {
// one long added for the timestamp
return Long.sum(Long.BYTES, contents.length);
}

@Override
public boolean equals(Object obj) {
if (this == obj) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* (c) Copyright 2022 Palantir Technologies Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.palantir.atlasdb.keyvalue.api;

import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnitRunner;

@RunWith(MockitoJUnitRunner.class)
public class CandidateCellForSweepingTest {
private static final byte BYTE = (byte) 0xa;
private static final long TIMESTAMP = 1977;
private static final ImmutableSet<Integer> THREE_CELL_NAME_SIZES =
ImmutableSet.of(1, Cell.MAX_NAME_LENGTH / 2, Cell.MAX_NAME_LENGTH);
private static final ImmutableList<Cell> EXAMPLE_CELLS =
Sets.cartesianProduct(THREE_CELL_NAME_SIZES, THREE_CELL_NAME_SIZES).stream()
.map(pair -> Cell.create(spawnBytes(pair.get(0)), spawnBytes(pair.get(1))))
.collect(ImmutableList.toImmutableList());

private static final ImmutableList<Integer> SORTED_TIMESTAMPS_SIZES = ImmutableList.of(0, 1, 2, 100, 1000);

@Mock
private List<Long> MOCK_TIMESTAMPS;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only used in the overflow test from what I can see; I'd suggest making it a local to that method.

Also, nit: case is wrong: fields use camelCase (so mockTimestamps)


@Test
public void candidateCellSizeWithLargerTimestampCollectionIsBigger() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think your testing here is very thorough, but I think we can relax it a bit in favor of readability.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's also "best practice" to try and test a single concept per unit test. So for this, I'd just make a couple like:
void candidateCellSizeHasCorrectSizeForOneTimestamp, void candidateCellSizeHasCorrectSizeForMultipleTimestamps, void candidateCellSizeIsEqualRegardlessOfLatestValueEmpty

EXAMPLE_CELLS.forEach(cell -> {
CandidateCellForSweeping withOneTimestamp = createCandidateCell(cell, ImmutableSet.of(TIMESTAMP), true);
CandidateCellForSweeping withTwoTimestamps =
createCandidateCell(cell, ImmutableSet.of(TIMESTAMP, TIMESTAMP + 1), false);
assertThat(withOneTimestamp.sizeInBytes()).isLessThan(withTwoTimestamps.sizeInBytes());
});
}

@Test
public void candidateCellSizeIsCorrectForDifferentSortedTimestampSizes() {
SORTED_TIMESTAMPS_SIZES.forEach(sortedTimestampsSize -> {
for (CandidateCellForSweeping candidate : createCandidateCells(sortedTimestampsSize)) {
assertThat(candidate.sizeInBytes())
.isEqualTo(Long.sum(candidate.cell().sizeInBytes(), (long) sortedTimestampsSize * Long.BYTES));
}
});
}

@Test
public void noOverflowFromCollectionSize() {
// Mocking because otherwise we OOM.
when(MOCK_TIMESTAMPS.size()).thenReturn(Integer.MAX_VALUE);
Cell exampleCell = EXAMPLE_CELLS.get(0);
for (boolean isLatestValueEmpty : new boolean[] {true, false}) {
assertThat(createCandidateCell(exampleCell, MOCK_TIMESTAMPS, isLatestValueEmpty)
.sizeInBytes())
.isEqualTo(Long.sum(Integer.MAX_VALUE * 8L, exampleCell.sizeInBytes()));
}
}

private static ImmutableSet<CandidateCellForSweeping> createCandidateCells(int sortedTimestampsSize) {
ImmutableSet.Builder<CandidateCellForSweeping> builder = ImmutableSet.<CandidateCellForSweeping>builder();
for (boolean isLatestValueEmpty : new boolean[] {true, false}) {
builder.addAll(EXAMPLE_CELLS.stream()
.map(cell -> createCandidateCell(
cell, spawnCollectionOfTimestamps(sortedTimestampsSize), isLatestValueEmpty))
.iterator());
}
return builder.build();
}

private static CandidateCellForSweeping createCandidateCell(
Cell cell, Collection<Long> sortedTimestamps, boolean isLatestValueEmpty) {
return ImmutableCandidateCellForSweeping.builder()
.cell(cell)
.sortedTimestamps(sortedTimestamps)
.isLatestValueEmpty(isLatestValueEmpty)
.build();
}

private static List<Long> spawnCollectionOfTimestamps(int size) {
return Collections.nCopies(size, TIMESTAMP);
}

private static byte[] spawnBytes(int size) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity, why do we need to fill the array with our default byte? I think all we really care about size, no?

byte[] bytes = new byte[size];
Arrays.fill(bytes, BYTE);
return bytes;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,21 @@
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.palantir.logsafe.exceptions.SafeIllegalArgumentException;
import com.palantir.logsafe.exceptions.SafeNullPointerException;
import com.palantir.util.Pair;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Set;
import java.util.stream.Collectors;
import org.junit.Test;

public final class CellTest {
private static final ImmutableSet<Integer> THREE_CELL_NAME_SIZES =
ImmutableSet.of(1, Cell.MAX_NAME_LENGTH / 2, Cell.MAX_NAME_LENGTH);
private static final ImmutableSet<Byte> TWO_BYTES = ImmutableSet.of((byte) 0xa, (byte) 0xb);

@Test
public void testCreate() {
Expand Down Expand Up @@ -80,6 +89,31 @@ public void testHashCode() {
.hasSameHashCodeAs(Cell.create(bytes("col"), bytes("row")));
}

@Test
public void testSizeInBytes() {
for (Pair<Integer, Integer> sizes : allPairs(THREE_CELL_NAME_SIZES)) {
for (Pair<Byte, Byte> bytes : allPairs(TWO_BYTES)) {
assertThat(Cell.create(
spawnBytes(sizes.getLhSide(), bytes.getLhSide()),
spawnBytes(sizes.getRhSide(), bytes.getRhSide()))
.sizeInBytes())
.isEqualTo(sizes.getLhSide() + sizes.getRhSide());
}
}
}

private static <T> Set<Pair<T, T>> allPairs(Set<T> set) {
return Sets.cartesianProduct(set, set).stream()
.map(list -> Pair.create(list.get(0), list.get(1)))
.collect(Collectors.toSet());
}

private static byte[] spawnBytes(int size, byte element) {
byte[] bytes = new byte[size];
Arrays.fill(bytes, element);
return bytes;
}

private static byte[] bytes(String value) {
return value.getBytes(StandardCharsets.UTF_8);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,41 @@ public int hashCode() {
})
.doesNotHaveSameHashCodeAs(TableReference.create(Namespace.create("table"), "test"));
}

@Test
public void sizeInBytesForTableReferenceWithEmptyNamespaceIsSizeOfAsciiTableName() {
assertThat(TableReference.createWithEmptyNamespace("").sizeInBytes()).isEqualTo(0);
Copy link
Contributor

@jeremyk-91 jeremyk-91 Nov 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

non actionable: I'm not sure if this is a realistic case 😬

assertThat(TableReference.createWithEmptyNamespace("FOO").sizeInBytes()).isEqualTo(3 * Character.BYTES);
assertThat(TableReference.createWithEmptyNamespace("FOOBA").sizeInBytes())
.isEqualTo(5 * Character.BYTES);
}

@Test
public void sizeInBytesForTableReferenceWithAsciiNamespaceAndTableNameIsCorrect() {
assertThat(TableReference.create(Namespace.create("FOO"), "").sizeInBytes())
.isEqualTo(3 * Character.BYTES);
assertThat(TableReference.create(Namespace.create("FOO"), "BAR").sizeInBytes())
.isEqualTo(6 * Character.BYTES);
assertThat(TableReference.create(Namespace.create("FOO"), "BABAZ").sizeInBytes())
.isEqualTo(8 * Character.BYTES);
assertThat(TableReference.create(Namespace.create("FOOBAR"), "BAZ").sizeInBytes())
.isEqualTo(9 * Character.BYTES);
}

@Test
public void orderOfSizeInBytesOfValuesWithSameNamespaceFollowsTableNameSizeOrder() {
Namespace namespace = Namespace.create("TestNameSpace");
assertThat(TableReference.create(namespace, "smallerTableName").sizeInBytes())
.isLessThan(TableReference.create(namespace, "largerTableNamePadding")
.sizeInBytes());
}

@Test
public void orderOfSizeInBytesOfValuesWithSameTableNameFollowsNamespaceOrder() {
String tableName = "tableName";
assertThat(TableReference.create(Namespace.create("smallerNamespace"), tableName)
.sizeInBytes())
.isLessThan(TableReference.create(Namespace.create("largerNamespacePadding"), tableName)
.sizeInBytes());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* (c) Copyright 2022 Palantir Technologies Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.palantir.atlasdb.keyvalue.api;

import static org.assertj.core.api.Assertions.assertThat;

import com.palantir.atlasdb.encoding.PtBytes;
import java.util.Arrays;
import org.junit.Test;

public class ValueTest {
private static final int SMALLER = 100;
private static final int LARGER = 200;
private static final byte BYTE = (byte) 0xa;

@Test
public void sizeInBytesOfValueWithNoContentsIsSizeOfLong() {
assertThat(Value.create(PtBytes.EMPTY_BYTE_ARRAY, Value.INVALID_VALUE_TIMESTAMP)
.sizeInBytes())
.isEqualTo(Long.BYTES);
}

@Test
public void sizeInBytesOfValueOrderFollowsContentsSizeOrder() {
assertThat(Value.create(spawnBytes(SMALLER), Value.INVALID_VALUE_TIMESTAMP)
.sizeInBytes())
.isLessThan(Value.create(spawnBytes(LARGER), Value.INVALID_VALUE_TIMESTAMP)
.sizeInBytes());
}

private static byte[] spawnBytes(int size) {
Copy link
Contributor

@Sam-Kramer Sam-Kramer Nov 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seeing as this method just creates a new byte[size], and is only used in one place, consider removing this creation method. although this is a very stylistic nit, so totally feel free to ignore :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: also for this and the others, we should name it createBytes rather than spawnBytes to match codebase style

byte[] bytes = new byte[size];
Arrays.fill(bytes, BYTE);
return bytes;
}
}