Skip to content

Commit

Permalink
Merge branch 'main' into log-estimated-pending-compaction-bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
ajkr authored Jan 22, 2024
2 parents 44c09e4 + bc95cdd commit b69389d
Show file tree
Hide file tree
Showing 53 changed files with 1,068 additions and 637 deletions.
21 changes: 11 additions & 10 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,12 @@ commands:
install-maven:
steps:
- run:
name: Install maven
name: Install Maven
command: |
sudo apt-get update -y && sudo apt-get install -y maven
wget --no-check-certificate https://dlcdn.apache.org/maven/maven-3/3.9.6/binaries/apache-maven-3.9.6-bin.tar.gz
tar zxf apache-maven-3.9.6-bin.tar.gz
echo "export M2_HOME=$(pwd)/apache-maven-3.9.6" >> $BASH_ENV
echo 'export PATH=$M2_HOME/bin:$PATH' >> $BASH_ENV
setup-folly:
steps:
Expand Down Expand Up @@ -231,6 +234,7 @@ executors:
- image: zjay437/rocksdb:0.6
linux-java-docker:
docker:
# This is the Docker Image used for building RocksJava releases, see: https://github.com/evolvedbinary/docker-rocksjava
- image: evolvedbinary/rocksjava:centos6_x64-be

jobs:
Expand Down Expand Up @@ -623,7 +627,7 @@ jobs:
- windows-build-steps

build-linux-java:
executor: linux-docker
executor: linux-java-docker
resource_class: large
steps:
- pre-steps
Expand All @@ -636,17 +640,13 @@ jobs:
which javac && javac -version
- run:
name: "Test RocksDBJava"
command: make V=1 J=8 -j8 jtest
command: scl enable devtoolset-7 'make V=1 J=8 -j8 jtest'
- post-steps

build-linux-java-pmd:
machine:
image: ubuntu-2004:202111-02
executor: linux-java-docker
resource_class: large
environment:
JAVA_HOME: /usr/lib/jvm/java-8-openjdk-amd64
steps:
- install-maven
- pre-steps
- run:
name: "Set Java Environment"
Expand All @@ -655,9 +655,10 @@ jobs:
echo 'export PATH=$JAVA_HOME/bin:$PATH' >> $BASH_ENV
which java && java -version
which javac && javac -version
- install-maven
- run:
name: "PMD RocksDBJava"
command: make V=1 J=8 -j8 jpmd
command: scl enable devtoolset-7 'make V=1 J=8 -j8 jpmd'
- post-pmd-steps

build-linux-java-static:
Expand Down
8 changes: 6 additions & 2 deletions .github/actions/install-maven/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ name: install-maven
runs:
using: composite
steps:
- name: Install maven
run: sudo apt-get update -y && sudo apt-get install -y maven
- name: Install Maven
run: |
wget --no-check-certificate https://dlcdn.apache.org/maven/maven-3/3.9.6/binaries/apache-maven-3.9.6-bin.tar.gz
tar zxf apache-maven-3.9.6-bin.tar.gz
echo "export M2_HOME=$(pwd)/apache-maven-3.9.6" >> $GITHUB_ENV
echo "$(pwd)/apache-maven-3.9.6/bin" >> $GITHUB_PATH
shell: bash
7 changes: 5 additions & 2 deletions .github/actions/post-benchmarks/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@ name: post-benchmarks
runs:
using: composite
steps:
- uses: actions/[email protected]
- name: Upload Benchmark Results artifact
uses: actions/[email protected]
with:
path: "${{ runner.temp }}/benchmark-results"
name: benchmark-results
path: "${{ runner.temp }}/benchmark-results/**"
if-no-files-found: error
- name: Send benchmark report to visualisation
run: |-
set +e
Expand Down
10 changes: 0 additions & 10 deletions .github/actions/post-pmd-steps/action.yml

This file was deleted.

41 changes: 27 additions & 14 deletions .github/actions/post-steps/action.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,38 @@
name: post-steps
description: Steps that are taken after a RocksDB job
inputs:
artifact-prefix:
description: Prefix to append to the name of artifacts that are uploaded
required: true
default: "${{ github.job }}"
runs:
using: composite
steps:
- uses: actions/[email protected]
- name: Upload Test Results artifact
uses: actions/[email protected]
with:
path: "${{ runner.temp }}/test-results"
- uses: actions/[email protected]
name: "${{ inputs.artifact-prefix }}-test-results"
path: "${{ runner.temp }}/test-results/**"
- name: Upload DB LOG file artifact
uses: actions/[email protected]
with:
name: "${{ inputs.artifact-prefix }}-db-log-file"
path: LOG
- name: Compress Test Logs
run: tar -cvzf t.tar.gz t
- name: Copy Test Logs (on Failure)
if: ${{ failure() }}
run: |
mkdir -p ${{ runner.temp }}/failure-test-logs
cp -r t/* ${{ runner.temp }}/failure-test-logs
shell: bash
- uses: actions/[email protected]
- name: Upload Test Logs (on Failure) artifact
uses: actions/[email protected]
with:
path: t.tar.gz
- run: |-
mkdir -p ${{ runner.temp }}/core_dumps
cp core.* ${{ runner.temp }}/core_dumps
if: ${{ failure() }}
shell: bash
- uses: actions/[email protected]
name: "${{ inputs.artifact-prefix }}-failure-test-logs"
path: ${{ runner.temp }}/failure-test-logs/**
if-no-files-found: ignore
- name: Upload Core Dumps artifact
uses: actions/[email protected]
with:
path: "${{ runner.temp }}/core_dumps"
name: "${{ inputs.artifact-prefix }}-core-dumps"
path: "core.*"
if-no-files-found: ignore
38 changes: 29 additions & 9 deletions .github/workflows/pr-jobs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,9 @@ jobs:
- name: compress test report
run: tar -cvzf scan_build_report.tar.gz scan_build_report
if: failure()
- uses: actions/upload-artifact@v3.1.3
- uses: actions/upload-artifact@v4.0.0
with:
name: scan-build-report
path: scan_build_report.tar.gz
build-linux-unity-and-headers:
if: ${{ github.repository_owner == 'facebook' }}
Expand Down Expand Up @@ -461,19 +462,29 @@ jobs:
runs-on:
labels: 4-core-ubuntu
container:
image: zjay437/rocksdb:0.6
image: evolvedbinary/rocksjava:centos6_x64-be
options: --shm-size=16gb
steps:
- uses: actions/[email protected]
# The docker image is intentionally based on an OS that has an older GLIBC version.
# That GLIBC is incompatibile with GitHub's actions/checkout. Thus we implement a manual checkout step.
- name: Checkout
env:
GH_TOKEN: ${{ github.token }}
run: |
chown `whoami` . || true
git clone --no-checkout https://oath2:[email protected]/${{ github.repository }}.git .
git -c protocol.version=2 fetch --update-head-ok --no-tags --prune --no-recurse-submodules --depth=1 origin +${{ github.sha }}:${{ github.ref }}
git checkout --progress --force ${{ github.ref }}
git log -1 --format='%H'
- uses: "./.github/actions/pre-steps"
- name: Set Java Environment
run: |-
echo "JAVA_HOME=${JAVA_HOME}"
which java && java -version
which javac && javac -version
- name: Test RocksDBJava
run: make V=1 J=8 -j8 jtest
- uses: "./.github/actions/post-steps"
run: scl enable devtoolset-7 'make V=1 J=8 -j8 jtest'
# NOTE: post-steps skipped because of compatibility issues with docker image
build-linux-java-static:
if: ${{ github.repository_owner == 'facebook' }}
runs-on:
Expand All @@ -482,9 +493,8 @@ jobs:
image: evolvedbinary/rocksjava:centos6_x64-be
options: --shm-size=16gb
steps:
# The docker image is based on such an old OS that it has a GLIBC
# incompatibility with actions/checkout and other actions. Thus we
# implement a manual checkout step.
# The docker image is intentionally based on an OS that has an older GLIBC version.
# That GLIBC is incompatibile with GitHub's actions/checkout. Thus we implement a manual checkout step.
- name: Checkout
env:
GH_TOKEN: ${{ github.token }}
Expand Down Expand Up @@ -574,6 +584,9 @@ jobs:
if: ${{ github.repository_owner == 'facebook' }}
runs-on:
labels: 4-core-ubuntu
container:
image: evolvedbinary/rocksjava:rockylinux8_x64-be
options: --shm-size=16gb
steps:
- uses: actions/[email protected]
- uses: "./.github/actions/install-maven"
Expand All @@ -585,4 +598,11 @@ jobs:
which javac && javac -version
- name: PMD RocksDBJava
run: make V=1 J=8 -j8 jpmd
- uses: "./.github/actions/post-pmd-steps"
- uses: actions/[email protected]
with:
name: pmd-report
path: "${{ github.workspace }}/java/target/pmd.xml"
- uses: actions/[email protected]
with:
name: maven-site
path: "${{ github.workspace }}/java/target/site"
25 changes: 25 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,31 @@
# Rocksdb Change Log
> NOTE: Entries for next release do not go here. Follow instructions in `unreleased_history/README.txt`
## 8.11.0 (01/19/2024)
### New Features
* Add new statistics: `rocksdb.sst.write.micros` measures time of each write to SST file; `rocksdb.file.write.{flush|compaction|db.open}.micros` measure time of each write to SST table (currently only block-based table format) and blob file for flush, compaction and db open.

### Public API Changes
* Added another enumerator `kVerify` to enum class `FileOperationType` in listener.h. Update your `switch` statements as needed.
* Add CompressionOptions to the CompressedSecondaryCacheOptions structure to allow users to specify library specific options when creating the compressed secondary cache.
* Deprecated several options: `level_compaction_dynamic_file_size`, `ignore_max_compaction_bytes_for_input`, `check_flush_compaction_key_order`, `flush_verify_memtable_count`, `compaction_verify_record_count`, `fail_if_options_file_error`, and `enforce_single_del_contracts`
* Exposed options ttl via c api.

### Behavior Changes
* `rocksdb.blobdb.blob.file.write.micros` expands to also measure time writing the header and footer. Therefore the COUNT may be higher and values may be smaller than before. For stacked BlobDB, it no longer measures the time of explictly flushing blob file.
* Files will be compacted to the next level if the data age exceeds periodic_compaction_seconds except for the last level.
* Reduced the compaction debt ratio trigger for scheduling parallel compactions
* For leveled compaction with default compaction pri (kMinOverlappingRatio), files marked for compaction will be prioritized over files not marked when picking a file from a level for compaction.

### Bug Fixes
* Fix bug in auto_readahead_size that combined with IndexType::kBinarySearchWithFirstKey + fails or iterator lands at a wrong key
* Fixed some cases in which DB file corruption was detected but ignored on creating a backup with BackupEngine.
* Fix bugs where `rocksdb.blobdb.blob.file.synced` includes blob files failed to get synced and `rocksdb.blobdb.blob.file.bytes.written` includes blob bytes failed to get written.
* Fixed a possible memory leak or crash on a failure (such as I/O error) in automatic atomic flush of multiple column families.
* Fixed some cases of in-memory data corruption using mmap reads with `BackupEngine`, `sst_dump`, or `ldb`.
* Fixed issues with experimental `preclude_last_level_data_seconds` option that could interfere with expected data tiering.
* Fixed the handling of the edge case when all existing blob files become unreferenced. Such files are now correctly deleted.

## 8.10.0 (12/15/2023)
### New Features
* Provide support for async_io to trim readahead_size by doing block cache lookup
Expand Down
13 changes: 8 additions & 5 deletions db/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "rocksdb/iterator.h"
#include "rocksdb/options.h"
#include "rocksdb/table.h"
#include "seqno_to_time_mapping.h"
#include "table/block_based/block_based_table_builder.h"
#include "table/format.h"
#include "table/internal_iterator.h"
Expand Down Expand Up @@ -299,12 +300,14 @@ Status BuildTable(
if (!s.ok() || empty) {
builder->Abandon();
} else {
std::string seqno_to_time_mapping_str;
seqno_to_time_mapping.Encode(
seqno_to_time_mapping_str, meta->fd.smallest_seqno,
meta->fd.largest_seqno, meta->file_creation_time);
SeqnoToTimeMapping relevant_mapping;
relevant_mapping.CopyFromSeqnoRange(seqno_to_time_mapping,
meta->fd.smallest_seqno,
meta->fd.largest_seqno);
relevant_mapping.SetCapacity(kMaxSeqnoTimePairsPerSST);
relevant_mapping.Enforce(tboptions.file_creation_time);
builder->SetSeqnoTimeTableProperties(
seqno_to_time_mapping_str,
relevant_mapping,
ioptions.compaction_style == CompactionStyle::kCompactionStyleFIFO
? meta->file_creation_time
: meta->oldest_ancester_time);
Expand Down
10 changes: 10 additions & 0 deletions db/c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4005,6 +4005,16 @@ rocksdb_ratelimiter_t* rocksdb_ratelimiter_create_auto_tuned(
return rate_limiter;
}

rocksdb_ratelimiter_t* rocksdb_ratelimiter_create_with_mode(
int64_t rate_bytes_per_sec, int64_t refill_period_us, int32_t fairness,
int mode, bool auto_tuned) {
rocksdb_ratelimiter_t* rate_limiter = new rocksdb_ratelimiter_t;
rate_limiter->rep.reset(
NewGenericRateLimiter(rate_bytes_per_sec, refill_period_us, fairness,
static_cast<RateLimiter::Mode>(mode), auto_tuned));
return rate_limiter;
}

void rocksdb_ratelimiter_destroy(rocksdb_ratelimiter_t* limiter) {
delete limiter;
}
Expand Down
5 changes: 5 additions & 0 deletions db/c_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,11 @@ int main(int argc, char** argv) {
rocksdb_options_set_ratelimiter(options, rate_limiter);
rocksdb_ratelimiter_destroy(rate_limiter);

rate_limiter = rocksdb_ratelimiter_create_with_mode(1000 * 1024 * 1024,
100 * 1000, 10, 0, true);
rocksdb_options_set_ratelimiter(options, rate_limiter);
rocksdb_ratelimiter_destroy(rate_limiter);

roptions = rocksdb_readoptions_create();
rocksdb_readoptions_set_verify_checksums(roptions, 1);
rocksdb_readoptions_set_fill_cache(roptions, 1);
Expand Down
40 changes: 24 additions & 16 deletions db/compaction/compaction_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -288,39 +288,37 @@ void CompactionJob::Prepare() {

if (preserve_time_duration > 0) {
const ReadOptions read_options(Env::IOActivity::kCompaction);
// setup seqno_to_time_mapping_
seqno_to_time_mapping_.SetMaxTimeDuration(preserve_time_duration);
// Setup seqno_to_time_mapping_ with relevant time range.
seqno_to_time_mapping_.SetMaxTimeSpan(preserve_time_duration);
for (const auto& each_level : *c->inputs()) {
for (const auto& fmd : each_level.files) {
std::shared_ptr<const TableProperties> tp;
Status s =
cfd->current()->GetTableProperties(read_options, &tp, fmd, nullptr);
if (s.ok()) {
seqno_to_time_mapping_.Add(tp->seqno_to_time_mapping)
.PermitUncheckedError();
seqno_to_time_mapping_.Add(fmd->fd.smallest_seqno,
fmd->oldest_ancester_time);
s = seqno_to_time_mapping_.DecodeFrom(tp->seqno_to_time_mapping);
}
if (!s.ok()) {
ROCKS_LOG_WARN(
db_options_.info_log,
"Problem reading or processing seqno-to-time mapping: %s",
s.ToString().c_str());
}
}
}

auto status = seqno_to_time_mapping_.Sort();
if (!status.ok()) {
ROCKS_LOG_WARN(db_options_.info_log,
"Invalid sequence number to time mapping: Status: %s",
status.ToString().c_str());
}
int64_t _current_time = 0;
status = db_options_.clock->GetCurrentTime(&_current_time);
if (!status.ok()) {
Status s = db_options_.clock->GetCurrentTime(&_current_time);
if (!s.ok()) {
ROCKS_LOG_WARN(db_options_.info_log,
"Failed to get current time in compaction: Status: %s",
status.ToString().c_str());
s.ToString().c_str());
// preserve all time information
preserve_time_min_seqno_ = 0;
preclude_last_level_min_seqno_ = 0;
seqno_to_time_mapping_.Enforce();
} else {
seqno_to_time_mapping_.TruncateOldEntries(_current_time);
seqno_to_time_mapping_.Enforce(_current_time);
uint64_t preserve_time =
static_cast<uint64_t>(_current_time) > preserve_time_duration
? _current_time - preserve_time_duration
Expand All @@ -344,6 +342,16 @@ void CompactionJob::Prepare() {
1;
}
}
// For accuracy of the GetProximalSeqnoBeforeTime queries above, we only
// limit the capacity after them.
// Here If we set capacity to the per-SST limit, we could be throwing away
// fidelity when a compaction output file has a narrower seqno range than
// all the inputs. If we only limit capacity for each compaction output, we
// could be doing a lot of unnecessary recomputation in a large compaction
// (up to quadratic in number of files). Thus, we do soemthing in the
// middle: enforce a resonably large constant size limit substantially
// larger than kMaxSeqnoTimePairsPerSST.
seqno_to_time_mapping_.SetCapacity(kMaxSeqnoToTimeEntries);
}
}

Expand Down
Loading

0 comments on commit b69389d

Please sign in to comment.