Skip to content

Commit

Permalink
Merge #36424 #36488
Browse files Browse the repository at this point in the history
36424: libroach: bump up rocks write-backpressure triggers significantly r=dt a=dt

We belive these triggers are unlikely to be hit in normal operation.
However they are easy to hit during bulk ingestion of SSTs, as that can quickly
add large amounts of data or numbers of files to any given level. When Rocks
assumes levels got that way via regular writes getting ahead of the compactor,
these triggers then start slowing those writes, affecting foreground traffic
that, in reality, is not what hit those triggers.

We considered dynamically raising these only durion bulk ingestion, but given
that we don't believe they are currently being hit, simply raising them should
achieve the same effect without any added complexity.

Release note (performance improvement): avoid rocksdb slowing down write traffic during bulk-ingestions.

36488: roachprod: enable dns_hostnames in VPC and re-enable terraform config r=ajwerner a=ajwerner

It turns out the missing configuration option from the terraform configuration
was using dns hostnames. Now that it's enabled the terraform-created VPCs and
associated configuration works with roachtest. This PR fixes the terraform
config (which has been applied and tested) and sets it back to the default.

Also fixes a broken reference to a renamed terraform output. 

Release note: None

Co-authored-by: David Taylor <[email protected]>
Co-authored-by: Andrew Werner <[email protected]>
  • Loading branch information
3 people committed Apr 3, 2019
3 parents 762a6a5 + fee236d + 7600a2e commit 5323f4d
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 12 deletions.
20 changes: 17 additions & 3 deletions c-deps/libroach/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -215,13 +215,27 @@ rocksdb::Options DBMakeOptions(DBOptions db_opts) {
// amplification.
options.level0_file_num_compaction_trigger = 2;
// Soft limit on number of L0 files. Writes are slowed down when
// this number is reached.
options.level0_slowdown_writes_trigger = 20;
// this number is reached. Bulk-ingestion can add lots of files
// suddenly, so setting this much higher should avoid spurious
// slowdowns to writes.
// TODO(dt): if/when we dynamically tune for bulk-ingestion, we
// could leave this at 20 and only raise it during ingest jobs.
options.level0_slowdown_writes_trigger = 200;
// Maximum number of L0 files. Writes are stopped at this
// point. This is set significantly higher than
// level0_slowdown_writes_trigger to avoid completely blocking
// writes.
options.level0_stop_writes_trigger = 32;
// TODO(dt): if/when we dynamically tune for bulk-ingestion, we
// could leave this at 30 and only raise it during ingest.
options.level0_stop_writes_trigger = 400;
// Maximum estimated pending compaction bytes before slowing writes.
// Default is 64gb but that can be hit during bulk-ingestion since it
// is based on assumptions about relative level sizes that do not hold
// during bulk-ingestion.
// TODO(dt): if/when we dynamically tune for bulk-ingestion, we
// could leave these as-is and only raise / disable them during ingest.
options.soft_pending_compaction_bytes_limit = 256 * 1073741824ull;
options.hard_pending_compaction_bytes_limit = 512 * 1073741824ull;
// Flush write buffers to L0 as soon as they are full. A higher
// value could be beneficial if there are duplicate records in each
// of the individual write buffers, but perf testing hasn't shown
Expand Down
4 changes: 3 additions & 1 deletion pkg/cmd/roachprod/vm/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,17 +89,19 @@ const (

var defaultConfig = func() (cfg *awsConfig) {
cfg = new(awsConfig)
if err := json.Unmarshal(MustAsset("old.json"), cfg); err != nil {
if err := json.Unmarshal(MustAsset("config.json"), cfg); err != nil {
panic(errors.Wrap(err, "failed to embedded configuration"))
}
return cfg
}()

var defaultZones = []string{
"us-east-2a",
"us-east-2b",
"us-east-2c",
"us-west-2a",
"us-west-2b",
"us-west-2c",
"eu-west-2a",
"eu-west-2b",
"eu-west-2c",
Expand Down
9 changes: 8 additions & 1 deletion pkg/cmd/roachprod/vm/aws/keys.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"io/ioutil"
"os"
"strings"

"github.com/pkg/errors"
)
Expand Down Expand Up @@ -71,7 +72,13 @@ func (p *Provider) sshKeyImport(keyName string, region string) error {
"--key-name", keyName,
"--public-key-material", string(keyBytes),
}
return p.runJSONCommand(args, &data)
err = p.runJSONCommand(args, &data)
// If two roachprod instances run at the same time with the same key, they may
// race to upload the key pair.
if strings.Contains(err.Error(), "InvalidKeyPair.Duplicate") {
return nil
}
return err
}

// sshKeyName computes the name of the ec2 ssh key that we'll store the local user's public key in
Expand Down
8 changes: 5 additions & 3 deletions pkg/cmd/roachprod/vm/aws/support.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package aws

import (
"bytes"
"encoding/json"
"io/ioutil"
"log"
Expand Down Expand Up @@ -144,15 +145,16 @@ func (p *Provider) runCommand(args []string) ([]byte, error) {
if p.opts.Profile != "" {
args = append(args[:len(args):len(args)], "--profile", p.opts.Profile)
}

var stderrBuf bytes.Buffer
cmd := exec.Command("aws", args...)

cmd.Stderr = &stderrBuf
output, err := cmd.Output()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
log.Println(string(exitErr.Stderr))
}
return nil, errors.Wrapf(err, "failed to run: aws %s", strings.Join(args, " "))
return nil, errors.Wrapf(err, "failed to run: aws %s: stderr: %v",
strings.Join(args, " "), stderrBuf.String())
}
return output, nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/roachprod/vm/aws/terraform/aws-region/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ output "vpc_info" {
}
}

output "extended_vpc_info" {
output "region_info" {
value = {
"region" = "${var.region}"
"security_group" = "${aws_security_group.region_security_group.id}"
Expand Down
6 changes: 3 additions & 3 deletions pkg/cmd/roachprod/vm/aws/terraform/aws-region/network.tf
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ data "aws_availability_zone" "zone_detail" {

# One VPC per region, with CIDR 10.<region ID>.0.0/8.
resource "aws_vpc" "region_vpc" {
cidr_block = "${cidrsubnet("10.0.0.0/8", 8, local.region_number[var.region])}"

cidr_block = "${cidrsubnet("10.0.0.0/8", 8, local.region_number[var.region])}"
enable_dns_hostnames = true
tags {
Name = "${var.label}-vpc-${var.region}"
Name = "${var.label}-vpc-${var.region}"
}
}

Expand Down

0 comments on commit 5323f4d

Please sign in to comment.