From a85e2074e2b0a8516be858849446536eba91aea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Buko=C5=A1ek?= Date: Tue, 11 Aug 2020 11:56:00 +0200 Subject: [PATCH 1/3] go/extra/conbench: Add consensus benchmark command --- .changelog/3183.internal.md | 1 + go/.gitignore | 1 + go/Makefile | 3 +- go/extra/conbench/cmd/conbench.go | 573 ++++++++++++++++++++++++ go/extra/conbench/cmd/root.go | 53 +++ go/extra/conbench/conbench-plot.sh | 312 +++++++++++++ go/extra/conbench/conbench-test.sh | 71 +++ go/extra/conbench/main.go | 10 + go/oasis-net-runner/fixtures/default.go | 8 +- go/oasis-node/cmd/common/pprof/pprof.go | 3 + go/oasis-test-runner/oasis/entity.go | 4 +- go/oasis-test-runner/oasis/fixture.go | 3 + go/oasis-test-runner/oasis/validator.go | 6 +- 13 files changed, 1041 insertions(+), 7 deletions(-) create mode 100644 .changelog/3183.internal.md create mode 100644 go/extra/conbench/cmd/conbench.go create mode 100644 go/extra/conbench/cmd/root.go create mode 100755 go/extra/conbench/conbench-plot.sh create mode 100755 go/extra/conbench/conbench-test.sh create mode 100644 go/extra/conbench/main.go diff --git a/.changelog/3183.internal.md b/.changelog/3183.internal.md new file mode 100644 index 00000000000..4b3409fc7d0 --- /dev/null +++ b/.changelog/3183.internal.md @@ -0,0 +1 @@ +go/extra/conbench: Add consensus benchmark subcommand diff --git a/go/.gitignore b/go/.gitignore index 22ff16ef970..e7230e12e5a 100644 --- a/go/.gitignore +++ b/go/.gitignore @@ -14,3 +14,4 @@ registry/gen_vectors/gen_vectors staking/gen_vectors/gen_vectors extra/extract-metrics/extract-metrics +extra/conbench/conbench diff --git a/go/Makefile b/go/Makefile index 8cdf045262a..ec9e73cf452 100644 --- a/go/Makefile +++ b/go/Makefile @@ -17,7 +17,8 @@ generate: # Build. # List of Go binaries to build. go-binaries := oasis-node oasis-test-runner oasis-net-runner oasis-remote-signer \ - extra/extract-metrics oasis-test-runner/scenario/pluginsigner/example_signer_plugin + oasis-test-runner/scenario/pluginsigner/example_signer_plugin \ + extra/extract-metrics extra/conbench $(go-binaries): @$(ECHO) "$(MAGENTA)*** Building $@...$(OFF)" diff --git a/go/extra/conbench/cmd/conbench.go b/go/extra/conbench/cmd/conbench.go new file mode 100644 index 00000000000..00dcda1d7cf --- /dev/null +++ b/go/extra/conbench/cmd/conbench.go @@ -0,0 +1,573 @@ +package cmd + +import ( + "context" + "crypto" + "fmt" + "math" + "math/rand" + "sort" + "strings" + "sync/atomic" + "time" + + "github.com/spf13/cobra" + flag "github.com/spf13/pflag" + "github.com/spf13/viper" + + "github.com/oasisprotocol/oasis-core/go/common/crypto/drbg" + "github.com/oasisprotocol/oasis-core/go/common/crypto/mathrand" + "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" + memorySigner "github.com/oasisprotocol/oasis-core/go/common/crypto/signature/signers/memory" + "github.com/oasisprotocol/oasis-core/go/common/entity" + "github.com/oasisprotocol/oasis-core/go/common/logging" + "github.com/oasisprotocol/oasis-core/go/common/quantity" + consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" + "github.com/oasisprotocol/oasis-core/go/consensus/api/transaction" + "github.com/oasisprotocol/oasis-core/go/control/api" + "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common" + cmdFlags "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/flags" + cmdGrpc "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/grpc" + cmdSigner "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common/signer" + staking "github.com/oasisprotocol/oasis-core/go/staking/api" +) + +const ( + // Number of test accounts to create. + // This also controls the number of parallel transfers. + CfgNumAccounts = "num_accounts" + + // Number of samples (transfers) per account. + CfgNumSamples = "num_samples" + + // Timeout for SubmitTx. + CfgSubmitTxTimeout = "submit_timeout" + + // Use test entity for funding? + CfgUseTestEntity = "use_test_entity" + + // Gas price (should be set to the minimum gas price of validators). + CfgGasPrice = "gas_price" + + // Only fund accounts and exit (useful for multiple runs of the benchmark, + // since funding is the slowest part). + CfgFundAndExit = "fund_and_exit" + + // Only refund funding account and exit (counterpart of the above). + CfgRefundAndExit = "refund_and_exit" + + // Skip funding accounts. + CfgSkipFunding = "skip_funding" + + // Seed to use for the DRBG. + CfgSeed = "seed" + + // Placeholder value for cachedNonce and cachedGas in localAccount struct + // when they haven't been initialized yet. + notYetCached = uint64(math.MaxUint64) +) + +var ( + logger = logging.GetLogger("cmd/conbench") + conbenchCmd = &cobra.Command{ + Use: "conbench", + Short: "benchmark consensus layer", + Long: "Runs a consensus layer benchmark.", + RunE: doRun, + } +) + +type localAccount struct { + signer signature.Signer + addr staking.Address + cachedNonce uint64 + cachedGas uint64 +} + +func transfer(ctx context.Context, cc consensus.ClientBackend, from *localAccount, toAddr staking.Address, amount uint64, noCache bool) error { + var err error + + // Get sender's nonce if not yet cached (or if we're ignoring cache). + nonce := from.cachedNonce + if nonce == notYetCached || noCache { + nonce, err = cc.GetSignerNonce(ctx, &consensus.GetSignerNonceRequest{ + AccountAddress: from.addr, + Height: consensus.HeightLatest, + }) + if err != nil { + return fmt.Errorf("unable to get sender's nonce: %w", err) + } + atomic.StoreUint64(&from.cachedNonce, nonce) + } + + // Construct transfer transaction. + transfer := staking.Transfer{ + To: toAddr, + } + if err = transfer.Amount.FromUint64(amount); err != nil { + return fmt.Errorf("unable to convert given amount from uint64: %w", err) + } + + var fee transaction.Fee + tx := staking.NewTransferTx(nonce, &fee, &transfer) + + // Estimate gas if not yet cached (or if we're ignoring cache). + gas := from.cachedGas + if gas == notYetCached || noCache { + estGas, grr := cc.EstimateGas(ctx, &consensus.EstimateGasRequest{ + Signer: from.signer.Public(), + Transaction: tx, + }) + if grr != nil { + return fmt.Errorf("unable to estimate gas: %w", grr) + } + gas = uint64(estGas) + atomic.StoreUint64(&from.cachedGas, gas) + } + + tx.Fee.Gas = transaction.Gas(gas) + if err = tx.Fee.Amount.FromUint64(gas * viper.GetUint64(CfgGasPrice)); err != nil { + return fmt.Errorf("unable to convert fee amount from uint64: %w", err) + } + + signedTx, err := transaction.Sign(from.signer, tx) + if err != nil { + return fmt.Errorf("unable to sign transfer transaction: %w", err) + } + + // Increment cached nonce. + atomic.AddUint64(&from.cachedNonce, 1) + + // Submit with timeout to avoid blocking forever if the client node + // is skipping CheckTx checks. The timeout should be set large enough + // for the network to handle the submission. + timeout := viper.GetDuration(CfgSubmitTxTimeout) + submissionCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + if err = cc.SubmitTx(submissionCtx, signedTx); err != nil { + return err + } + return nil +} + +func refund(ctx context.Context, cc consensus.ClientBackend, sc staking.Backend, from *localAccount, toAddr staking.Address) error { + // Fetch account info. + acct, err := sc.Account(ctx, &staking.OwnerQuery{ + Height: consensus.HeightLatest, + Owner: from.addr, + }) + if err != nil { + return fmt.Errorf("unable to fetch account balance: %w", err) + } + + // Since we're dealing with tiny amounts, we can afford this hack. + amount := acct.General.Balance.ToBigInt().Uint64() + + if amount == 0 { + // Nothing to refund. + return nil + } + + // We don't want refunds to fail, so disable caching. + if err = transfer(ctx, cc, from, toAddr, amount, true); err != nil { + return fmt.Errorf("unable to refund from account: %w", err) + } + + return nil +} + +func refundMultiple(ctx context.Context, cc consensus.ClientBackend, sc staking.Backend, account []localAccount, toAddr staking.Address) { + // Do the refunds in parallel. + doneCh := make(chan bool, len(account)) + for a := range account { + go func(a int) { + if err := refund(ctx, cc, sc, &account[a], toAddr); err != nil { + // Tough luck. + logger.Error("unable to refund from account", + "account_address", account[a].addr, + "err", err, + ) + } + doneCh <- true + }(a) + } + + // Wait for all goroutines to finish. + for range account { + <-doneCh + } +} + +func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo + cmd.SilenceUsage = true + + if err := common.Init(); err != nil { + common.EarlyLogAndExit(err) + } + + numAccounts := viper.GetUint64(CfgNumAccounts) + numSamples := viper.GetUint64(CfgNumSamples) + + if numAccounts < 1 { + return fmt.Errorf("number of accounts must be >= 1") + } + if numSamples < 3 { + return fmt.Errorf("number of samples must be >= 3") + } + + fundAndExit := viper.GetBool(CfgFundAndExit) + refundAndExit := viper.GetBool(CfgRefundAndExit) + skipFunding := viper.GetBool(CfgSkipFunding) + + if fundAndExit && refundAndExit { + return fmt.Errorf("cannot both fund and refund at the same time") + } + if fundAndExit && skipFunding { + return fmt.Errorf("exiting") + } + if refundAndExit && skipFunding { + return fmt.Errorf("--skip_funding has no effect with --refund_and_exit") + } + + ctx := context.Background() + + // Connect to node. + logger.Debug("dialing node", "addr", viper.GetString(cmdGrpc.CfgAddress)) + conn, err := cmdGrpc.NewClient(cmd) + if err != nil { + return fmt.Errorf("unable to connect to node: %w", err) + } + defer conn.Close() + + cc := consensus.NewConsensusClient(conn) + sc := staking.NewStakingClient(conn) + ncc := api.NewNodeControllerClient(conn) + + // Set chain context from genesis document obtained from the node. + genDoc, err := cc.GetGenesisDocument(ctx) + if err != nil { + return fmt.Errorf("unable to obtain genesis document from node: %w", err) + } + genDoc.SetChainContext() + + // Create new DRBG. + src, err := drbg.New(crypto.SHA512, []byte(viper.GetString(CfgSeed)), nil, []byte("consensus benchmark")) + if err != nil { + return fmt.Errorf("unable to create deterministic random generator: %w", err) + } + rng := rand.New(mathrand.New(src)) + + // Wait for the node to sync. + logger.Debug("waiting for node sync") + if err = ncc.WaitSync(context.Background()); err != nil { + return fmt.Errorf("unable to wait for node sync: %w", err) + } + logger.Debug("node synced") + + // Create multiple accounts. + account := make([]localAccount, numAccounts) + msf := memorySigner.NewFactory() + for a := range account { + signer, grr := msf.Generate(signature.SignerEntity, rng) + if grr != nil { + return fmt.Errorf("unable to generate account %d: %w", a, grr) + } + account[a].signer = signer + account[a].addr = staking.NewAddress(signer.Public()) + account[a].cachedNonce = notYetCached + account[a].cachedGas = notYetCached + } + + var fundingSigner signature.Signer + + if !skipFunding { + if viper.GetBool(CfgUseTestEntity) { + // Use test entity for funding. + _, fundingSigner, _ = entity.TestEntity() + } else { + // Use given signer for funding. + signerDir, grr := cmdSigner.CLIDirOrPwd() + if grr != nil { + return fmt.Errorf("failed to retrieve signer dir: %w", grr) + } + signerFactory, grr := cmdSigner.NewFactory(cmdSigner.Backend(), signerDir, signature.SignerEntity) + if grr != nil { + return fmt.Errorf("failed to create signer factory: %w", grr) + } + fundingSigner, grr = signerFactory.Load(signature.SignerEntity) + if grr != nil { + return fmt.Errorf("failed to load signer: %w", grr) + } + } + } else { + // We won't need a signer, since we're not funding, fake it + // with the test entity instead. + _, fundingSigner, _ = entity.TestEntity() + } + + fundingAddr := staking.NewAddress(fundingSigner.Public()) + + if refundAndExit { + logger.Info("refunding money") + refundMultiple(ctx, cc, sc, account, fundingAddr) + logger.Info("money refunded") + return nil + } + + if !skipFunding { + fundingAcct := localAccount{ + signer: fundingSigner, + addr: fundingAddr, + cachedNonce: notYetCached, + cachedGas: notYetCached, + } + + // Check if funding account has enough funds. + logger.Debug("checking if funding account has enough funds") + var fundingAcctInfo *staking.Account + fundingAcctInfo, err = sc.Account(ctx, &staking.OwnerQuery{ + Height: consensus.HeightLatest, + Owner: fundingAddr, + }) + if err != nil { + return fmt.Errorf("unable to fetch funding account balance: %w", err) + } + // Estimate gas. + dummyXfer := &staking.Transfer{To: fundingAddr} + if err = dummyXfer.Amount.FromUint64(1); err != nil { + return fmt.Errorf("unable to convert uint64 to amount: %w", err) + } + var estGas transaction.Gas + estGas, err = cc.EstimateGas(ctx, &consensus.EstimateGasRequest{ + Signer: fundingSigner.Public(), + Transaction: staking.NewTransferTx(fundingAcctInfo.General.Nonce, nil, dummyXfer), + }) + if err != nil { + return fmt.Errorf("unable to estimate gas: %w", err) + } + // Each account needs additional tokens for fees. + // An additional fee is allocated for the refund at the end. + perAccountFunds := numSamples + (numSamples+1)*(viper.GetUint64(CfgGasPrice)*uint64(estGas)) + requiredFunds := quantity.NewFromUint64(numAccounts * perAccountFunds) + availableFunds := fundingAcctInfo.General.Balance + if availableFunds.Cmp(requiredFunds) < 0 { + return fmt.Errorf("funding account has insufficient funds (%s required, %s available)", requiredFunds.String(), availableFunds.String()) + } + logger.Debug("funding account has enough funds", + "required", requiredFunds.String(), + "available", availableFunds.String(), + ) + + // Fund all accounts from the funding account. + logger.Info("funding test accounts", + "num_accounts", numAccounts, + ) + for a := range account { + // Populate cached gas estimates. + account[a].cachedGas = uint64(estGas) + + // Each account gets perAccountFunds tokens. + if errr := transfer(ctx, cc, &fundingAcct, account[a].addr, perAccountFunds, true); errr != nil { + // An error has happened while funding, make sure to refund the + // funding account from the accounts funded until this point. + logger.Error("error while funding, attempting to refund account") + refundMultiple(ctx, cc, sc, account[0:a], fundingAddr) + return fmt.Errorf("unable to fund account %d: %w", a, errr) + } + } + if fundAndExit { + return nil + } + } + + logger.Info("starting benchmark", "num_accounts", numAccounts) + startStatus, err := cc.GetStatus(ctx) + if err != nil { + if !skipFunding { + logger.Info("refunding money") + refundMultiple(ctx, cc, sc, account, fundingAddr) + logger.Info("money refunded") + } + return fmt.Errorf("unable to get status: %w", err) + } + benchmarkStartHeight := startStatus.LatestHeight + benchmarkStartT := time.Now() + + // Submit time is the time required to submit the transaction and + // wait for it to be included in a block. + var ( + totalSubmitTimeNs uint64 + numSubmitSamples uint64 + numSubmitErrors uint64 + ) + + // Perform benchmark in parallel, one goroutine per account. + doneCh := make(chan bool, numAccounts*numSamples) + for a := range account { + go func(idx uint64) { + var noCache bool + for s := uint64(0); s < numSamples; s++ { + fromIdx := idx + toIdx := idx + toAddr := account[toIdx].addr + + startT := time.Now() + if err = transfer(ctx, cc, &account[fromIdx], toAddr, 1, noCache); err != nil { + atomic.AddUint64(&numSubmitErrors, 1) + // Disable cache for the next sample, just in case + // we messed up the nonce or if the gas cost changed. + noCache = true + doneCh <- true + continue + } + atomic.AddUint64(&totalSubmitTimeNs, uint64(time.Since(startT).Nanoseconds())) + atomic.AddUint64(&numSubmitSamples, 1) + doneCh <- true + noCache = false + } + }(uint64(a)) + } + + // Wait for all goroutines to finish. + for i := uint64(0); i < numAccounts*numSamples; i++ { + <-doneCh + } + + benchmarkDuration := time.Since(benchmarkStartT) + stopStatus, err := cc.GetStatus(ctx) + if err != nil { + if !skipFunding { + logger.Info("refunding money") + refundMultiple(ctx, cc, sc, account, fundingAddr) + logger.Info("money refunded") + } + return fmt.Errorf("unable to get status: %w", err) + } + benchmarkStopHeight := stopStatus.LatestHeight + + // Go through all transactions from benchmarkStartHeight to + // benchmarkStopHeight and calculate the average number of + // transactions per second and other stats. + // Note that we count all transactions, not just the ones made + // by this benchmark. + var totalTxs uint64 + var maxTxs uint64 + minTxs := uint64(18446744073709551615) + txsPerBlock := make([]uint64, 0) + txBytesPerBlock := make([]uint64, 0) + blockDeltaT := make([]float64, 0) + var prevBlockT time.Time + for height := benchmarkStartHeight; height <= benchmarkStopHeight; height++ { + // Count number of transactions. + txs, grr := cc.GetTransactions(ctx, height) + if grr != nil { + logger.Error("GetTransactions failed", "err", grr, "height", height) + continue + } + lenTxs := uint64(len(txs)) + totalTxs += lenTxs + txsPerBlock = append(txsPerBlock, lenTxs) + if lenTxs > maxTxs { + maxTxs = lenTxs + } + if lenTxs < minTxs { + minTxs = lenTxs + } + + // Count size of transactions in bytes. + var blkSizeBytes uint64 + for _, tx := range txs { + blkSizeBytes += uint64(len(tx)) + } + txBytesPerBlock = append(txBytesPerBlock, blkSizeBytes) + + // Calculate time between blocks. + blk, grr := cc.GetBlock(ctx, height) + if grr != nil { + logger.Error("GetBlock failed", "err", grr, "height", height) + continue + } + if prevBlockT.IsZero() { + prevBlockT = blk.Time + } + blockDeltaT = append(blockDeltaT, blk.Time.Sub(prevBlockT).Seconds()) + prevBlockT = blk.Time + } + + tps := float64(totalTxs) / benchmarkDuration.Seconds() + + // Calculate median number of transactions. + sort.Slice(txsPerBlock, func(i, j int) bool { return txsPerBlock[i] < txsPerBlock[j] }) + medianTxs := txsPerBlock[len(txsPerBlock)/2] + + avgSubmitTimeNs := float64(totalSubmitTimeNs) / float64(numSubmitSamples) + + logger.Info("benchmark finished", + // Number of accounts involved in benchmark (level of parallelism). + "num_accounts", numAccounts, + // Average time (in seconds) required to submit a transaction and wait + // for it to be included in a block. + "avg_submit_time_s", avgSubmitTimeNs/1.0e9, + // Transactions per second (this includes all transactions that + // appeared on the network during the time of the benchmark). + "transactions_per_second", tps, + // Number of successful SubmitTx calls (i.e. transfer transactions). + "submit_samples", numSubmitSamples, + // Number of unsuccessful SubmitTx calls. + "submit_errors", numSubmitErrors, + // Duration of the entire benchmark (in seconds). + "bench_duration_s", benchmarkDuration.Seconds(), + // Number of blocks seen on the network during the benchmark. + "num_blocks", len(txsPerBlock), + // Minimum number of transactions per block (during the benchmark). + "min_txs_per_block", minTxs, + // Maximum number of transactions per block (during the benchmark). + "max_txs_per_block", maxTxs, + // Average number of transactions per block (during the benchmark). + "avg_txs_per_block", float64(totalTxs)/float64(len(txsPerBlock)), + // Median number of transactions per block (during the benchmark). + "median_txs_per_block", medianTxs, + // Total number of transactions observed during the benchmark. + "total_txs", totalTxs, + // Number of transactions in each block (block size). + "block_sizes", strings.Trim(fmt.Sprint(txsPerBlock), "[]"), + // Size of all transactions in each block (in bytes). + "block_sizes_bytes", strings.Trim(fmt.Sprint(txBytesPerBlock), "[]"), + // Time delta between blocks (in seconds). + "block_delta_t_s", strings.Trim(fmt.Sprint(blockDeltaT), "[]"), + ) + + // Refund money into original funding account. + if !skipFunding { + logger.Info("refunding money") + refundMultiple(ctx, cc, sc, account, fundingAddr) + logger.Info("money refunded") + } + + return nil +} + +// Register registers the conbench sub-command. +func RegisterConbenchCmd(parentCmd *cobra.Command) { + parentCmd.AddCommand(conbenchCmd) +} + +func init() { + fs := flag.NewFlagSet("", flag.ContinueOnError) + fs.Uint64(CfgNumAccounts, 10, "Number of accounts to create for benchmarking (also level of parallelism)") + fs.Uint64(CfgNumSamples, 30, "Number of samples (transfers) per account") + fs.Duration(CfgSubmitTxTimeout, 10*time.Second, "Timeout for SubmitTx (set this based on network parameters)") + fs.Bool(CfgUseTestEntity, false, "Use test entity for funding (only for testing)") + fs.Uint64(CfgGasPrice, 1, "Gas price (should be set to the minimum gas price of validators)") + fs.Bool(CfgFundAndExit, false, "Only fund accounts and exit") + fs.Bool(CfgRefundAndExit, false, "Only refund funding account and exit") + fs.Bool(CfgSkipFunding, false, "Skip funding accounts") + fs.String(CfgSeed, "consensus benchmark random seeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeed", "Seed to use for the DRBG (change this if you're running multiple benchmarks in parallel)") + _ = viper.BindPFlags(fs) + conbenchCmd.Flags().AddFlagSet(fs) + + conbenchCmd.Flags().AddFlagSet(cmdGrpc.ClientFlags) + conbenchCmd.Flags().AddFlagSet(cmdFlags.DebugTestEntityFlags) + conbenchCmd.Flags().AddFlagSet(cmdFlags.DebugDontBlameOasisFlag) + conbenchCmd.Flags().AddFlagSet(cmdSigner.CLIFlags) +} diff --git a/go/extra/conbench/cmd/root.go b/go/extra/conbench/cmd/root.go new file mode 100644 index 00000000000..299a7fa82a2 --- /dev/null +++ b/go/extra/conbench/cmd/root.go @@ -0,0 +1,53 @@ +// Root command for the consensus benchmark. +package cmd + +import ( + "fmt" + + "github.com/spf13/cobra" + flag "github.com/spf13/pflag" + "github.com/spf13/viper" + + "github.com/oasisprotocol/oasis-core/go/common/logging" + "github.com/oasisprotocol/oasis-core/go/common/version" + "github.com/oasisprotocol/oasis-core/go/oasis-node/cmd/common" +) + +const cfgLogLevel = "log.level" + +var ( + rootCmd = &cobra.Command{ + Use: "conbench", + Short: "Consensus benchmark", + Version: version.SoftwareVersion, + } + + rootFlags = flag.NewFlagSet("", flag.ContinueOnError) +) + +// RootCommand returns the root (top level) cobra.Command. +func RootCommand() *cobra.Command { + return rootCmd +} + +// Execute spawns the main entry point after handling the command line arguments. +func Execute() { + var logLevel logging.Level + if err := logLevel.Set(viper.GetString(cfgLogLevel)); err != nil { + common.EarlyLogAndExit(fmt.Errorf("root: failed to set log level: %w", err)) + } + + if err := rootCmd.Execute(); err != nil { + common.EarlyLogAndExit(err) + } +} + +func init() { + logLevel := logging.LevelInfo + rootFlags.Var(&logLevel, cfgLogLevel, "log level") + _ = viper.BindPFlags(rootFlags) + rootCmd.PersistentFlags().AddFlagSet(rootFlags) + + // Register all of the sub-commands. + RegisterConbenchCmd(rootCmd) +} diff --git a/go/extra/conbench/conbench-plot.sh b/go/extra/conbench/conbench-plot.sh new file mode 100755 index 00000000000..11e1b2ab79a --- /dev/null +++ b/go/extra/conbench/conbench-plot.sh @@ -0,0 +1,312 @@ +#!/usr/bin/env bash +# +# Run consensus benchmark and plot results. +# +# Outputs are saved to the current working directory: +# conbench-data.TS.txt -- raw output with benchmark results from conbench +# conbench-tps.TS.png -- transactions per second +# conbench-avg-submit-time.TS.png -- average time required to submit txn +# conbench-both.TS.png -- both TPS and avg time on same graph +# conbench-block-size.TS.png -- block sizes (min/avg/max) +# conbench-block-sizes.TS.png -- 3D graph of block sizes per num of accounts +# conbench-block-sizes-bytes.TS.png -- 3D graph of block sizes in bytes +# conbench-block-times.TS.png -- 3D graph of time between blocks per #accts +# conbench-N.prof -- if profiling enabled, profile output for N accounts +# conbench-N.block.prof -- if profiling enabled, blocking profile output +# conbench-N.mutex.prof -- if profiling enabled, mutex contention profile +# +# The TS in filenames above represents the timestamp when the script was run. +# +# Most interesting output files are probably conbench-both.TS.png, +# conbench-block-size.TS.png, and conbench-block-sizes.TS.png. +# +# The environment variable OASIS_NODE_GRPC_ADDR should be set to the node's +# GRPC address, e.g. "unix:/tmp/foo/net-runner/network/client-0/internal.sock". +# +# If you want to run this script with a net runner network: +# conbench-plot.sh --use_test_entity +# Alternatively, you can run the "conbench-test.sh" script, which also sets up +# a default net runner network for you. +# +# If you want to run this script on a real network: +# conbench-plot.sh --signer.dir /path/to/your/entity/files/dir +# +# If you want to profile runs, edit the PROF variable below and make sure the +# node is run with the additional argument `--pprof.bind 127.0.0.1:10101`. +# If using the test runner and the conbench-test.sh, add the argument to the +# list in consensusValidator() in go/oasis-test-runner/oasis/args.go. +# + +set -o errexit -o nounset -o pipefail +trap "exit 1" INT + +# Output file names. +NOW=`date +%Y%m%d-%H%M%S` +RAW_DATA="conbench-data.${NOW}.txt" +TPS_PLOT="conbench-tps.${NOW}.png" +ST_PLOT="conbench-avg-submit-time.${NOW}.png" +BOTH_PLOT="conbench-both.${NOW}.png" +BS_PLOT="conbench-block-size.${NOW}.png" +BSS_PLOT="conbench-block-sizes.${NOW}.png" +BSSB_PLOT="conbench-block-sizes-bytes.${NOW}.png" +BTS_PLOT="conbench-block-times.${NOW}.png" + +# Get the root directory of the repository. +ROOT="$(cd $(dirname $0)/../../../; pwd -P)" + +# ANSI escape codes to brighten up the output. +RED=$'\e[31;1m' +GRN=$'\e[32;1m' +OFF=$'\e[0m' + + +CONBENCH="${ROOT}/go/extra/conbench/conbench" + +# Check if we have all the tools we need. +if [[ "$(which gnuplot)" == "" ]]; then + printf "${RED}ERROR: gnuplot not installed. Install it and try again.${OFF}\n" + exit 1 +fi +if [[ ! -x "${CONBENCH}" ]]; then + printf "${RED}ERROR: conbench command isn't built. Run 'make' in '${ROOT}/go' and try again.${OFF}\n" +fi + +TPS_DATA_FILE="$(mktemp -t oasis-conbench-tps-plot-XXXXXXXXXX)" +ST_DATA_FILE="$(mktemp -t oasis-conbench-st-plot-XXXXXXXXXX)" +BS_DATA_FILE="$(mktemp -t oasis-conbench-bs-plot-XXXXXXXXXX)" +BSS_DATA_FILE="$(mktemp -t oasis-conbench-bss-plot-XXXXXXXXXX)" +BSSB_DATA_FILE="$(mktemp -t oasis-conbench-bssb-plot-XXXXXXXXXX)" +BTS_DATA_FILE="$(mktemp -t oasis-conbench-bts-plot-XXXXXXXXXX)" + +ARGS="$@" + +rm -f "${RAW_DATA}" +touch "${RAW_DATA}" + +conbench() { + ${CONBENCH} conbench conbench \ + --address ${OASIS_NODE_GRPC_ADDR} \ + ${ARGS} \ + --log.level INFO \ + --num_samples 30 \ + $@ +} + +run_bench() { + local num_accounts=$1 + local no_plot=$2 + local output="$(mktemp -t oasis-conbench-output-${num_accounts}-XXXXXXXXXX)" + + # Run benchmark. + printf "${GRN}*** Running benchmark for ${num_accounts} accounts...${OFF}\n" + conbench --skip_funding --num_accounts ${num_accounts} > "${output}" + + local results=$(fgrep 'msg="benchmark finished"' "${output}") + echo "${results}" | tee -a "${RAW_DATA}" + + local tps=$(echo "${results}" | sed -r 's/[[:alnum:]_]+=/\n&/g' | awk -F= '$1=="transactions_per_second"{print $2}') + + local st=$(echo "${results}" | sed -r 's/[[:alnum:]_]+=/\n&/g' | awk -F= '$1=="avg_submit_time_s"{print $2}') + + local min_bs=$(echo "${results}" | sed -r 's/[[:alnum:]_]+=/\n&/g' | awk -F= '$1=="min_txs_per_block"{print $2}') + local avg_bs=$(echo "${results}" | sed -r 's/[[:alnum:]_]+=/\n&/g' | awk -F= '$1=="avg_txs_per_block"{print $2}') + local max_bs=$(echo "${results}" | sed -r 's/[[:alnum:]_]+=/\n&/g' | awk -F= '$1=="max_txs_per_block"{print $2}') + + local bss=$(echo "${results}" | sed -r 's/[[:alnum:]_]+=/\n&/g' | awk -F= '$1=="block_sizes"{print $2}' | tr -d '"') + + local bssb=$(echo "${results}" | sed -r 's/[[:alnum:]_]+=/\n&/g' | awk -F= '$1=="block_sizes_bytes"{print $2}' | tr -d '"') + + local bts=$(echo "${results}" | sed -r 's/[[:alnum:]_]+=/\n&/g' | awk -F= '$1=="block_delta_t_s"{print $2}' | tr -d '"') + + rm "${output}" + + if [[ "${no_plot}" == "no_plot" ]]; then + return + fi + + echo "${num_accounts} ${tps}" >> "${TPS_DATA_FILE}" + echo "${num_accounts} ${st}" >> "${ST_DATA_FILE}" + echo "${num_accounts} ${min_bs} ${avg_bs} ${max_bs}" >> "${BS_DATA_FILE}" + + local blk=0 + for bs in ${bss} + do + echo "${num_accounts} ${blk} ${bs}" >> "${BSS_DATA_FILE}" + blk=$((blk+1)) + done + + blk=0 + for bsb in ${bssb} + do + echo "${num_accounts} ${blk} ${bsb}" >> "${BSSB_DATA_FILE}" + blk=$((blk+1)) + done + + blk=0 + for bt in ${bts} + do + echo "${num_accounts} ${blk} ${bt}" >> "${BTS_DATA_FILE}" + blk=$((blk+1)) + done +} + +ACCT="10, 50, 100, 175, 250, 325, 425, 500, 650, 800, 900" + +ACCTN=$(echo "${ACCT}" | tr -d ',') +MAX_ACCTS=$(echo "${ACCTN}" | tr ' ' '\n' | sort -nr | head -1) +NUM_ACCT_RUNS=$(echo "${ACCTN}" | wc -w) + +# Set this to a list of runs you wish to profile (e.g. "175 500 800") +# or to the string "no" to disable. +PROF="no" +if [[ "${PROF}" != "no" ]]; then + NUM_PROF_RUNS=$(echo "${PROF}" | wc -w) +else + NUM_PROF_RUNS=0 +fi + +NUM_RUNS=$(( NUM_ACCT_RUNS + NUM_PROF_RUNS )) + +printf "${GRN}*** Funding ${MAX_ACCTS} accounts for ${NUM_RUNS} runs...${OFF}\n" +# The gas price is artificially inflated to provide enough tokens to pay fees +# for all the runs. +conbench --num_accounts ${MAX_ACCTS} --gas_price ${NUM_RUNS} --fund_and_exit + +for a in ${ACCTN} +do + run_bench $a plot + if grep -Fow "$a" <<< "${PROF}"; then + # Go's pprof server doesn't seem to have a way to start/stop profiling, + # it can only do a N second run, which is unfortunate. + # + # To work around this, we do a normal run first, then parse how many + # seconds it needed, round it up to the nearest integer, and finally + # do a second run, profiling for that number of seconds. Ugh. + + seconds=$(fgrep 'msg="benchmark finished" num_accounts='$a "${RAW_DATA}" | sed -r 's/[[:alnum:]_]+=/\n&/g' | awk -F= '$1=="bench_duration_s"{print $2}' | awk '{print int($1+0.5)}') + + printf "${GRN}*** Re-running benchmark for ${a} accounts with profiling (${seconds} s)...${OFF}\n" + + curl -so conbench-$a.prof 'http://127.0.0.1:10101/debug/pprof/profile?seconds='${seconds} & + run_bench $a no_plot + curl -so conbench-$a.block.prof 'http://127.0.0.1:10101/debug/pprof/block' + go tool pprof -png -lines -noinlines conbench-$a.block.prof > conbench-$a.block.png + curl -so conbench-$a.mutex.prof 'http://127.0.0.1:10101/debug/pprof/mutex' + go tool pprof -png -lines -noinlines conbench-$a.mutex.prof > conbench-$a.mutex.png + fi +done + + +# Plot TPS graph. +gnuplot <<- EOF +set title "Transactions per second" +set xlabel "Number of parallel accounts" +set xtics (${ACCT}) +set ylabel "transactions/s" textcolor lt 1 +set autoscale y +set grid +set term png +set output "${TPS_PLOT}" +plot '${TPS_DATA_FILE}' using 1:2 with linespoint notitle +EOF + +# Plot avg submit time graph. +gnuplot <<- EOF +set title "Average SubmitTx time" +set xlabel "Number of parallel accounts" +set xtics (${ACCT}) +set ylabel "SubmitTx time [s]" textcolor lt 1 +set autoscale y +set grid +set term png +set output "${ST_PLOT}" +plot '${ST_DATA_FILE}' using 1:2 with linespoint notitle +EOF + +# Plot both on a single graph. +gnuplot <<- EOF +set title "Transactions per second and average SubmitTx time" +set xlabel "Number of parallel accounts" +set xtics (${ACCT}) +set ylabel "transactions/s" textcolor lt 1 +set y2label "SubmitTx time [s]" textcolor lt 2 +set y2tics nomirror +set autoscale y +set autoscale y2 +set grid +set term png +set output "${BOTH_PLOT}" +plot '${TPS_DATA_FILE}' using 1:2 axes x1y1 with linespoint notitle, '${ST_DATA_FILE}' using 1:2 axes x1y2 with linespoint notitle +EOF + +# Plot block sizes (min/avg/max). +gnuplot <<- EOF +set title "Block size" +set xlabel "Number of parallel accounts" +set xtics (${ACCT}) +set ylabel "Block size [number of transactions]" +set autoscale y +set grid +set key left top +set term png +set output "${BS_PLOT}" +plot '${BS_DATA_FILE}' using 1:2 with linespoint title "min", '${BS_DATA_FILE}' using 1:3 with linespoint title "avg", '${BS_DATA_FILE}' using 1:4 with linespoint title "max" +EOF + +# Plot block sizes (number of transactions per block per number of accounts). +gnuplot <<- EOF +set title "Block size" +set xlabel "Number of parallel accounts" rotate parallel offset 0,-2,0 +set xtics (${ACCT}) offset 0,-1,0 +set ylabel "Block number" rotate parallel offset 0,-1,0 +set zlabel "Block size [number of transactions]" rotate parallel offset 1,0,0 +set ticslevel 0 +set autoscale y +set autoscale z +set grid +set term png +set output "${BSS_PLOT}" +set palette defined (0 "red", 1 "yellow", 2 "cyan", 3 "blue", 4 "dark-violet") +splot '${BSS_DATA_FILE}' with impulses lw 2 lc palette notitle +EOF + +# Plot block sizes in bytes. +gnuplot <<- EOF +set title "Block size in bytes" +set xlabel "Number of parallel accounts" rotate parallel offset 0,-2,0 +set xtics (${ACCT}) offset 0,-1,0 +set ylabel "Block number" rotate parallel offset 0,-1,0 +set zlabel "Block size [bytes]" rotate parallel +set ticslevel 0 +set autoscale y +set autoscale z +set grid +set term png +set output "${BSSB_PLOT}" +set palette defined (0 "red", 1 "yellow", 2 "cyan", 3 "blue", 4 "dark-violet") +splot '${BSSB_DATA_FILE}' with impulses lw 2 lc palette notitle +EOF + +# Plot time between blocks. +gnuplot <<- EOF +set title "Time between blocks" +set xlabel "Number of parallel accounts" rotate parallel offset 0,-2,0 +set xtics (${ACCT}) offset 0,-1,0 +set ylabel "Block number" rotate parallel offset 0,-1,0 +set zlabel "Time delta to previous block [s]" rotate parallel offset 1,0,0 +set ticslevel 0 +set autoscale y +set autoscale z +set grid +set term png +set output "${BTS_PLOT}" +set palette defined (0 "dark-violet", 1 "blue", 2 "cyan", 3 "yellow", 4 "red") +splot '${BTS_DATA_FILE}' with impulses lw 2 lc palette notitle +EOF + +rm "${TPS_DATA_FILE}" "${ST_DATA_FILE}" "${BS_DATA_FILE}" "${BSS_DATA_FILE}" "${BSSB_DATA_FILE}" "${BTS_DATA_FILE}" + +printf "${GRN}*** Refunding original funding account...${OFF}\n" +conbench --num_accounts ${MAX_ACCTS} --refund_and_exit + +printf "${GRN}*** Benchmarks completed.${OFF}\n" diff --git a/go/extra/conbench/conbench-test.sh b/go/extra/conbench/conbench-test.sh new file mode 100755 index 00000000000..270bdffced9 --- /dev/null +++ b/go/extra/conbench/conbench-test.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# +# Set-up the default oasis-net-runner network and run conbench-plot on it. +# +# You might want to set the cpufreq governor to 'performance' before running +# any benchmarks: +# +# sudo sh -c 'for cpu in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do echo "performance" > $cpu; done' +# ./conbench-test.sh +# sudo sh -c 'for cpu in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor; do echo "powersave" > $cpu; done' +# + +set -o nounset -o pipefail -o errexit +trap "exit 1" INT + +# Get the root directory of the repository. +ROOT="$(cd $(dirname $0)/../../../; pwd -P)" + +# ANSI escape codes to brighten up the output. +GRN=$'\e[32;1m' +OFF=$'\e[0m' + +# Paths to various binaries and config files that we need. +OASIS_NET_RUNNER="${ROOT}/go/oasis-net-runner/oasis-net-runner" +OASIS_NODE="${ROOT}/go/oasis-node/oasis-node" + +# Kill all dangling processes on exit. +cleanup() { + printf "${OFF}" + pkill -P $$ || true + wait || true +} +trap "cleanup" EXIT + +# The base directory for all the node and test env cruft. +# Note: We don't make this under /tmp to prevent running out of RAM. +# Note2: There's a dumb limit to the path length for the UNIX socket, so run +# this script somewhere close to the root of the filesystem. +TEST_BASE_DIR=$(cd `mktemp -p . -d oasis-conbench-XXXXXXXXXX`; pwd -P) + +# The oasis-node binary must be in the path for the oasis-net-runner to find it. +export PATH="${PATH}:${ROOT}/go/oasis-node" + +# Make sure the open file limit is big enough. +ulimit -n 10240 + +printf "${GRN}### Starting the test network...${OFF}\n" +${OASIS_NET_RUNNER} \ + --fixture.default.setup_runtimes=false \ + --fixture.default.num_entities=1 \ + --fixture.default.disable_supplementary_sanity_checks=true \ + --fixture.default.timeout_commit=1ms \ + --basedir.no_temp_dir \ + --basedir "${TEST_BASE_DIR}" & + +export OASIS_NODE_GRPC_ADDR="unix:${TEST_BASE_DIR}/net-runner/network/validator-0/internal.sock" + +printf "${GRN}### Waiting for all nodes to register...${OFF}\n" +${OASIS_NODE} debug control wait-nodes \ + --address ${OASIS_NODE_GRPC_ADDR} \ + --nodes 1 \ + --wait + +printf "${GRN}### Running benchmark...${OFF}\n" +${ROOT}/go/extra/conbench/conbench-plot.sh --use_test_entity + +# Clean up after a successful run. +cleanup +rm -rf "${TEST_BASE_DIR}" + +printf "${GRN}### Tests finished.${OFF}\n" diff --git a/go/extra/conbench/main.go b/go/extra/conbench/main.go new file mode 100644 index 00000000000..56404c82b37 --- /dev/null +++ b/go/extra/conbench/main.go @@ -0,0 +1,10 @@ +// Consensus benchmark. +package main + +import ( + "github.com/oasisprotocol/oasis-core/go/extra/conbench/cmd" +) + +func main() { + cmd.Execute() +} diff --git a/go/oasis-net-runner/fixtures/default.go b/go/oasis-net-runner/fixtures/default.go index 817f5580c57..ec393e25880 100644 --- a/go/oasis-net-runner/fixtures/default.go +++ b/go/oasis-net-runner/fixtures/default.go @@ -28,6 +28,8 @@ const ( cfgRuntimeLoader = "fixture.default.runtime.loader" cfgSetupRuntimes = "fixture.default.setup_runtimes" cfgTEEHardware = "fixture.default.tee_hardware" + cfgDisableSupSanityChecks = "fixture.default.disable_supplementary_sanity_checks" + cfgTimeoutCommit = "fixture.default.timeout_commit" ) var ( @@ -57,7 +59,7 @@ func newDefaultFixture() (*oasis.NetworkFixture, error) { RuntimeSGXLoaderBinary: viper.GetString(cfgRuntimeLoader), Consensus: consensusGenesis.Genesis{ Parameters: consensusGenesis.Parameters{ - TimeoutCommit: 1 * time.Second, + TimeoutCommit: viper.GetDuration(cfgTimeoutCommit), }, }, EpochtimeMock: viper.GetBool(cfgEpochtimeMock), @@ -73,7 +75,7 @@ func newDefaultFixture() (*oasis.NetworkFixture, error) { {IsDebugTestEntity: true}, }, Validators: []oasis.ValidatorFixture{ - {Entity: 1}, + {Entity: 1, Consensus: oasis.ConsensusFixture{DisableSupplementarySanityChecks: viper.GetBool(cfgDisableSupSanityChecks)}}, }, Seeds: []oasis.SeedFixture{{}}, } @@ -152,6 +154,7 @@ func init() { DefaultFixtureFlags.Bool(cfgFundEntities, false, "fund all entities in genesis") DefaultFixtureFlags.Bool(cfgEpochtimeMock, false, "use mock epochtime") DefaultFixtureFlags.Bool(cfgSetupRuntimes, true, "initialize the network with runtimes and runtime nodes") + DefaultFixtureFlags.Bool(cfgDisableSupSanityChecks, false, "disable supplementary sanity checks") DefaultFixtureFlags.Int(cfgNumEntities, 1, "number of (non debug) entities in genesis") DefaultFixtureFlags.String(cfgKeymanagerBinary, "simple-keymanager", "path to the keymanager runtime") DefaultFixtureFlags.String(cfgNodeBinary, "oasis-node", "path to the oasis-node binary") @@ -160,6 +163,7 @@ func init() { DefaultFixtureFlags.String(cfgRuntimeLoader, "oasis-core-runtime-loader", "path to the runtime loader") DefaultFixtureFlags.String(cfgTEEHardware, "", "TEE hardware to use") DefaultFixtureFlags.Uint64(cfgHaltEpoch, math.MaxUint64, "halt epoch height") + DefaultFixtureFlags.Duration(cfgTimeoutCommit, 1*time.Second, "consensus timeout commit parameter") _ = viper.BindPFlags(DefaultFixtureFlags) diff --git a/go/oasis-node/cmd/common/pprof/pprof.go b/go/oasis-node/cmd/common/pprof/pprof.go index b863a7c0cbe..b2e9af85bb6 100644 --- a/go/oasis-node/cmd/common/pprof/pprof.go +++ b/go/oasis-node/cmd/common/pprof/pprof.go @@ -71,6 +71,9 @@ func (p *pprofService) Start() error { return err } + runtime.SetBlockProfileRate(1) + runtime.SetMutexProfileFraction(1) + // Create a new mux just for the pprof endpoints to avoid using the // global multiplexer where pprof's init function registers by default. mux := http.NewServeMux() diff --git a/go/oasis-test-runner/oasis/entity.go b/go/oasis-test-runner/oasis/entity.go index f610e75ac8d..f8ddff7416c 100644 --- a/go/oasis-test-runner/oasis/entity.go +++ b/go/oasis-test-runner/oasis/entity.go @@ -38,8 +38,8 @@ type Entity struct { // EntityCfg is the Oasis entity provisioning configuration. type EntityCfg struct { - IsDebugTestEntity bool - Restore bool + IsDebugTestEntity bool `json:"is_debug_test_entity"` + Restore bool `json:"restore"` } // Inner returns the actual Oasis entity and it's signer. diff --git a/go/oasis-test-runner/oasis/fixture.go b/go/oasis-test-runner/oasis/fixture.go index 67bb0b00fc1..314ef52e676 100644 --- a/go/oasis-test-runner/oasis/fixture.go +++ b/go/oasis-test-runner/oasis/fixture.go @@ -143,6 +143,9 @@ type ConsensusFixture struct { // nolint: maligned // EnableConsensusRPCWorker enables the public consensus RPC services worker. EnableConsensusRPCWorker bool `json:"enable_consensusrpc_worker,omitempty"` + + // DisableSupplementarySanityChecks disables the supplementary sanity checks. + DisableSupplementarySanityChecks bool `json:"disable_supplementary_sanity_checks,omitempty"` } // TEEFixture is a TEE configuration fixture. diff --git a/go/oasis-test-runner/oasis/validator.go b/go/oasis-test-runner/oasis/validator.go index 4c49ad49e55..a73c1906953 100644 --- a/go/oasis-test-runner/oasis/validator.go +++ b/go/oasis-test-runner/oasis/validator.go @@ -103,8 +103,10 @@ func (val *Validator) startNode() error { workerConsensusRPCEnabled() } - if len(val.net.validators) >= 1 && val == val.net.validators[0] { - args = args.tendermintSupplementarySanityEnabled() + if !val.consensus.DisableSupplementarySanityChecks { + if len(val.net.validators) >= 1 && val == val.net.validators[0] { + args = args.tendermintSupplementarySanityEnabled() + } } if err := val.net.startOasisNode(&val.Node, nil, args); err != nil { From cd127b2074e324625608128433c8c6eded2ea0b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Buko=C5=A1ek?= Date: Fri, 2 Oct 2020 16:52:44 +0200 Subject: [PATCH 2/3] w1p: Add no_wait mode to conbench --- go/extra/conbench/cmd/conbench.go | 84 ++++++++++++++++++++++++++---- go/extra/conbench/conbench-plot.sh | 24 ++++++++- 2 files changed, 97 insertions(+), 11 deletions(-) diff --git a/go/extra/conbench/cmd/conbench.go b/go/extra/conbench/cmd/conbench.go index 00dcda1d7cf..3e045671440 100644 --- a/go/extra/conbench/cmd/conbench.go +++ b/go/extra/conbench/cmd/conbench.go @@ -46,6 +46,10 @@ const ( // Use test entity for funding? CfgUseTestEntity = "use_test_entity" + // CfgNoWait uses SubmitTxNoWait instead of SubmitTx, submits txns for the + // given amount of time, then stops. + CfgNoWait = "no_wait" + // Gas price (should be set to the minimum gas price of validators). CfgGasPrice = "gas_price" @@ -84,7 +88,7 @@ type localAccount struct { cachedGas uint64 } -func transfer(ctx context.Context, cc consensus.ClientBackend, from *localAccount, toAddr staking.Address, amount uint64, noCache bool) error { +func transfer(ctx context.Context, cc consensus.ClientBackend, from *localAccount, toAddr staking.Address, amount uint64, noCache, noWait bool) error { var err error // Get sender's nonce if not yet cached (or if we're ignoring cache). @@ -138,6 +142,12 @@ func transfer(ctx context.Context, cc consensus.ClientBackend, from *localAccoun // Increment cached nonce. atomic.AddUint64(&from.cachedNonce, 1) + if noWait { + // Submit transaction, but don't wait for it to be included in a block. + return cc.SubmitTxNoWait(ctx, signedTx) + } + + // Otherwise, submit and wait for the txn to be included in a block. // Submit with timeout to avoid blocking forever if the client node // is skipping CheckTx checks. The timeout should be set large enough // for the network to handle the submission. @@ -169,7 +179,7 @@ func refund(ctx context.Context, cc consensus.ClientBackend, sc staking.Backend, } // We don't want refunds to fail, so disable caching. - if err = transfer(ctx, cc, from, toAddr, amount, true); err != nil { + if err = transfer(ctx, cc, from, toAddr, amount, true, false); err != nil { return fmt.Errorf("unable to refund from account: %w", err) } @@ -367,7 +377,7 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo account[a].cachedGas = uint64(estGas) // Each account gets perAccountFunds tokens. - if errr := transfer(ctx, cc, &fundingAcct, account[a].addr, perAccountFunds, true); errr != nil { + if errr := transfer(ctx, cc, &fundingAcct, account[a].addr, perAccountFunds, true, false); errr != nil { // An error has happened while funding, make sure to refund the // funding account from the accounts funded until this point. logger.Error("error while funding, attempting to refund account") @@ -380,6 +390,9 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo } } + noWait := viper.IsSet(CfgNoWait) + noWaitDuration := viper.GetDuration(CfgNoWait) + logger.Info("starting benchmark", "num_accounts", numAccounts) startStatus, err := cc.GetStatus(ctx) if err != nil { @@ -399,6 +412,7 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo totalSubmitTimeNs uint64 numSubmitSamples uint64 numSubmitErrors uint64 + gottaStopFast uint32 ) // Perform benchmark in parallel, one goroutine per account. @@ -407,30 +421,48 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo go func(idx uint64) { var noCache bool for s := uint64(0); s < numSamples; s++ { + if atomic.LoadUint32(&gottaStopFast) > 0 { + // Terminate. + return + } + if noWait { + // Send transactions until terminated. + s = 0 + } + fromIdx := idx toIdx := idx toAddr := account[toIdx].addr startT := time.Now() - if err = transfer(ctx, cc, &account[fromIdx], toAddr, 1, noCache); err != nil { + if err = transfer(ctx, cc, &account[fromIdx], toAddr, 1, noCache, noWait); err != nil { atomic.AddUint64(&numSubmitErrors, 1) // Disable cache for the next sample, just in case // we messed up the nonce or if the gas cost changed. noCache = true - doneCh <- true + if !noWait { + doneCh <- true + } continue } atomic.AddUint64(&totalSubmitTimeNs, uint64(time.Since(startT).Nanoseconds())) atomic.AddUint64(&numSubmitSamples, 1) - doneCh <- true noCache = false + if !noWait { + doneCh <- true + } } }(uint64(a)) } - // Wait for all goroutines to finish. - for i := uint64(0); i < numAccounts*numSamples; i++ { - <-doneCh + if !noWait { + // Wait for all goroutines to finish. + for i := uint64(0); i < numAccounts*numSamples; i++ { + <-doneCh + } + } else { + time.Sleep(noWaitDuration) + atomic.StoreUint32(&gottaStopFast, 1) } benchmarkDuration := time.Since(benchmarkStartT) @@ -450,13 +482,17 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo // transactions per second and other stats. // Note that we count all transactions, not just the ones made // by this benchmark. + // + // In addition, do a sliding window for the max avg tps. var totalTxs uint64 var maxTxs uint64 minTxs := uint64(18446744073709551615) txsPerBlock := make([]uint64, 0) txBytesPerBlock := make([]uint64, 0) blockDeltaT := make([]float64, 0) + blockT := make([]time.Time, 0) var prevBlockT time.Time + for height := benchmarkStartHeight; height <= benchmarkStopHeight; height++ { // Count number of transactions. txs, grr := cc.GetTransactions(ctx, height) @@ -492,6 +528,7 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo } blockDeltaT = append(blockDeltaT, blk.Time.Sub(prevBlockT).Seconds()) prevBlockT = blk.Time + blockT = append(blockT, blk.Time) } tps := float64(totalTxs) / benchmarkDuration.Seconds() @@ -502,6 +539,32 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo avgSubmitTimeNs := float64(totalSubmitTimeNs) / float64(numSubmitSamples) + // Do a sliding window over the block size array to get the max avg tps. + var bestAvgTps float64 + for slidingWindowSize := 1; slidingWindowSize <= 32; slidingWindowSize++ { + for i := range txsPerBlock { + var curAvgTps float64 + j := i + // Gather transactions from up to slidingWindowSize blocks or + // up to as many blocks as needed for the block timestamp to change. + // The block timestamp has a granularity of only 1s, so this can be + // an issue with fast CommitTimeouts (e.g. less than 1s), as it + // can cause a divide by zero in the average tps calculation below + // (since the blocks are too close together). + // Increasing the window size to encompass blocks with different + // times fixes this. + for ; j < len(txsPerBlock) && (blockT[j] == blockT[i] || j < i+slidingWindowSize); j++ { + curAvgTps += float64(txsPerBlock[j]) + } + curAvgTps /= blockT[j-1].Sub(blockT[i]).Seconds() + // Despite the workaround above, the above can still divide by zero + // at the very end of the run, so make sure we don't count that. + if curAvgTps > bestAvgTps && !math.IsInf(curAvgTps, 0) { + bestAvgTps = curAvgTps + } + } + } + logger.Info("benchmark finished", // Number of accounts involved in benchmark (level of parallelism). "num_accounts", numAccounts, @@ -535,6 +598,8 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo "block_sizes_bytes", strings.Trim(fmt.Sprint(txBytesPerBlock), "[]"), // Time delta between blocks (in seconds). "block_delta_t_s", strings.Trim(fmt.Sprint(blockDeltaT), "[]"), + // Maximum average tps over a sliding window. + "max_avg_tps", bestAvgTps, ) // Refund money into original funding account. @@ -557,6 +622,7 @@ func init() { fs.Uint64(CfgNumAccounts, 10, "Number of accounts to create for benchmarking (also level of parallelism)") fs.Uint64(CfgNumSamples, 30, "Number of samples (transfers) per account") fs.Duration(CfgSubmitTxTimeout, 10*time.Second, "Timeout for SubmitTx (set this based on network parameters)") + fs.Duration(CfgNoWait, 10*time.Second, "Use SubmitTxNoWait instead of SubmitTx (spam transactions) for given amount of time") fs.Bool(CfgUseTestEntity, false, "Use test entity for funding (only for testing)") fs.Uint64(CfgGasPrice, 1, "Gas price (should be set to the minimum gas price of validators)") fs.Bool(CfgFundAndExit, false, "Only fund accounts and exit") diff --git a/go/extra/conbench/conbench-plot.sh b/go/extra/conbench/conbench-plot.sh index 11e1b2ab79a..f17cfe3811c 100755 --- a/go/extra/conbench/conbench-plot.sh +++ b/go/extra/conbench/conbench-plot.sh @@ -50,6 +50,7 @@ BS_PLOT="conbench-block-size.${NOW}.png" BSS_PLOT="conbench-block-sizes.${NOW}.png" BSSB_PLOT="conbench-block-sizes-bytes.${NOW}.png" BTS_PLOT="conbench-block-times.${NOW}.png" +MATPS_PLOT="conbench-max-avg-tps.${NOW}.png" # Get the root directory of the repository. ROOT="$(cd $(dirname $0)/../../../; pwd -P)" @@ -77,6 +78,7 @@ BS_DATA_FILE="$(mktemp -t oasis-conbench-bs-plot-XXXXXXXXXX)" BSS_DATA_FILE="$(mktemp -t oasis-conbench-bss-plot-XXXXXXXXXX)" BSSB_DATA_FILE="$(mktemp -t oasis-conbench-bssb-plot-XXXXXXXXXX)" BTS_DATA_FILE="$(mktemp -t oasis-conbench-bts-plot-XXXXXXXXXX)" +MATPS_DATA_FILE="$(mktemp -t oasis-conbench-max-avg-tps-plot-XXXXXXXXXX)" ARGS="$@" @@ -99,7 +101,7 @@ run_bench() { # Run benchmark. printf "${GRN}*** Running benchmark for ${num_accounts} accounts...${OFF}\n" - conbench --skip_funding --num_accounts ${num_accounts} > "${output}" + conbench --skip_funding --no_wait=60s --num_accounts ${num_accounts} > "${output}" local results=$(fgrep 'msg="benchmark finished"' "${output}") echo "${results}" | tee -a "${RAW_DATA}" @@ -118,6 +120,8 @@ run_bench() { local bts=$(echo "${results}" | sed -r 's/[[:alnum:]_]+=/\n&/g' | awk -F= '$1=="block_delta_t_s"{print $2}' | tr -d '"') + local matps=$(echo "${results}" | sed -r 's/[[:alnum:]_]+=/\n&/g' | awk -F= '$1=="max_avg_tps"{print $2}') + rm "${output}" if [[ "${no_plot}" == "no_plot" ]]; then @@ -148,6 +152,8 @@ run_bench() { echo "${num_accounts} ${blk} ${bt}" >> "${BTS_DATA_FILE}" blk=$((blk+1)) done + + echo "${num_accounts} ${matps}" >> "${MATPS_DATA_FILE}" } ACCT="10, 50, 100, 175, 250, 325, 425, 500, 650, 800, 900" @@ -304,7 +310,21 @@ set palette defined (0 "dark-violet", 1 "blue", 2 "cyan", 3 "yellow", 4 "red") splot '${BTS_DATA_FILE}' with impulses lw 2 lc palette notitle EOF -rm "${TPS_DATA_FILE}" "${ST_DATA_FILE}" "${BS_DATA_FILE}" "${BSS_DATA_FILE}" "${BSSB_DATA_FILE}" "${BTS_DATA_FILE}" +# Plot max avg TPS graph. +gnuplot <<- EOF +set title "Maximum average transactions per second" +set xlabel "Number of parallel accounts" +set xtics (${ACCT}) +set ylabel "transactions/s" textcolor lt 1 +set autoscale y +set grid +set term png +set output "${MATPS_PLOT}" +plot '${MATPS_DATA_FILE}' using 1:2 with linespoint notitle +EOF + + +rm "${TPS_DATA_FILE}" "${ST_DATA_FILE}" "${BS_DATA_FILE}" "${BSS_DATA_FILE}" "${BSSB_DATA_FILE}" "${BTS_DATA_FILE}" "${MATPS_DATA_FILE}" printf "${GRN}*** Refunding original funding account...${OFF}\n" conbench --num_accounts ${MAX_ACCTS} --refund_and_exit From b487942f76edb9e9e7e0459c82bdbcc8019ec45c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Buko=C5=A1ek?= Date: Wed, 7 Oct 2020 15:15:23 +0200 Subject: [PATCH 3/3] w1p: Multiple validators --- go/extra/conbench/cmd/conbench.go | 46 +++++++++++++++++++++++-- go/extra/conbench/conbench-test.sh | 3 +- go/oasis-net-runner/fixtures/default.go | 8 +++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/go/extra/conbench/cmd/conbench.go b/go/extra/conbench/cmd/conbench.go index 3e045671440..ca81cc8d2d5 100644 --- a/go/extra/conbench/cmd/conbench.go +++ b/go/extra/conbench/cmd/conbench.go @@ -86,6 +86,7 @@ type localAccount struct { addr staking.Address cachedNonce uint64 cachedGas uint64 + errorCount map[error]uint64 } func transfer(ctx context.Context, cc consensus.ClientBackend, from *localAccount, toAddr staking.Address, amount uint64, noCache, noWait bool) error { @@ -99,6 +100,7 @@ func transfer(ctx context.Context, cc consensus.ClientBackend, from *localAccoun Height: consensus.HeightLatest, }) if err != nil { + from.errorCount[err]++ return fmt.Errorf("unable to get sender's nonce: %w", err) } atomic.StoreUint64(&from.cachedNonce, nonce) @@ -109,6 +111,7 @@ func transfer(ctx context.Context, cc consensus.ClientBackend, from *localAccoun To: toAddr, } if err = transfer.Amount.FromUint64(amount); err != nil { + from.errorCount[err]++ return fmt.Errorf("unable to convert given amount from uint64: %w", err) } @@ -123,6 +126,7 @@ func transfer(ctx context.Context, cc consensus.ClientBackend, from *localAccoun Transaction: tx, }) if grr != nil { + from.errorCount[grr]++ return fmt.Errorf("unable to estimate gas: %w", grr) } gas = uint64(estGas) @@ -131,11 +135,13 @@ func transfer(ctx context.Context, cc consensus.ClientBackend, from *localAccoun tx.Fee.Gas = transaction.Gas(gas) if err = tx.Fee.Amount.FromUint64(gas * viper.GetUint64(CfgGasPrice)); err != nil { + from.errorCount[err]++ return fmt.Errorf("unable to convert fee amount from uint64: %w", err) } signedTx, err := transaction.Sign(from.signer, tx) if err != nil { + from.errorCount[err]++ return fmt.Errorf("unable to sign transfer transaction: %w", err) } @@ -144,7 +150,11 @@ func transfer(ctx context.Context, cc consensus.ClientBackend, from *localAccoun if noWait { // Submit transaction, but don't wait for it to be included in a block. - return cc.SubmitTxNoWait(ctx, signedTx) + grr := cc.SubmitTxNoWait(ctx, signedTx) + if grr != nil { + from.errorCount[grr]++ + } + return grr } // Otherwise, submit and wait for the txn to be included in a block. @@ -155,6 +165,7 @@ func transfer(ctx context.Context, cc consensus.ClientBackend, from *localAccoun submissionCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() if err = cc.SubmitTx(submissionCtx, signedTx); err != nil { + from.errorCount[err]++ return err } return nil @@ -286,6 +297,7 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo account[a].addr = staking.NewAddress(signer.Public()) account[a].cachedNonce = notYetCached account[a].cachedGas = notYetCached + account[a].errorCount = make(map[error]uint64) } var fundingSigner signature.Signer @@ -427,7 +439,9 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo } if noWait { // Send transactions until terminated. + // Ignore cache because it results in too many errors. s = 0 + noCache = true } fromIdx := idx @@ -439,16 +453,16 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo atomic.AddUint64(&numSubmitErrors, 1) // Disable cache for the next sample, just in case // we messed up the nonce or if the gas cost changed. - noCache = true if !noWait { + noCache = true doneCh <- true } continue } atomic.AddUint64(&totalSubmitTimeNs, uint64(time.Since(startT).Nanoseconds())) atomic.AddUint64(&numSubmitSamples, 1) - noCache = false if !noWait { + noCache = false doneCh <- true } } @@ -565,6 +579,29 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo } } + // Collect number of transfer errors from all accounts. + errCounts := make(map[string]uint64) + for a := range account { + for e, c := range account[a].errorCount { + er := strings.ReplaceAll(e.Error(), " ", "_") + er = strings.ReplaceAll(er, "\"", "'") + errCounts[er] += c + } + } + // Output the results sorted by key. + keys := make([]string, 0, len(errCounts)) + for k := range errCounts { + keys = append(keys, k) + } + sort.Strings(keys) + errCountsString := "" + for _, k := range keys { + if errCountsString != "" { + errCountsString += " " + } + errCountsString += k + "#" + fmt.Sprintf("%v", errCounts[k]) + } + logger.Info("benchmark finished", // Number of accounts involved in benchmark (level of parallelism). "num_accounts", numAccounts, @@ -600,6 +637,9 @@ func doRun(cmd *cobra.Command, args []string) error { // nolint: gocyclo "block_delta_t_s", strings.Trim(fmt.Sprint(blockDeltaT), "[]"), // Maximum average tps over a sliding window. "max_avg_tps", bestAvgTps, + // Map of errors that occurred during benchmarking (if any). + // These are sorted by key. + "error_counts", errCountsString, ) // Refund money into original funding account. diff --git a/go/extra/conbench/conbench-test.sh b/go/extra/conbench/conbench-test.sh index 270bdffced9..f58b854bfa2 100755 --- a/go/extra/conbench/conbench-test.sh +++ b/go/extra/conbench/conbench-test.sh @@ -48,8 +48,9 @@ printf "${GRN}### Starting the test network...${OFF}\n" ${OASIS_NET_RUNNER} \ --fixture.default.setup_runtimes=false \ --fixture.default.num_entities=1 \ + --fixture.default.num_validators=4 \ --fixture.default.disable_supplementary_sanity_checks=true \ - --fixture.default.timeout_commit=1ms \ + --fixture.default.timeout_commit=100ms \ --basedir.no_temp_dir \ --basedir "${TEST_BASE_DIR}" & diff --git a/go/oasis-net-runner/fixtures/default.go b/go/oasis-net-runner/fixtures/default.go index ec393e25880..d45638b3f02 100644 --- a/go/oasis-net-runner/fixtures/default.go +++ b/go/oasis-net-runner/fixtures/default.go @@ -23,6 +23,7 @@ const ( cfgKeymanagerBinary = "fixture.default.keymanager.binary" cfgNodeBinary = "fixture.default.node.binary" cfgNumEntities = "fixture.default.num_entities" + cfgNumValidators = "fixture.default.num_validators" cfgRuntimeBinary = "fixture.default.runtime.binary" cfgRuntimeGenesisState = "fixture.default.runtime.genesis_state" cfgRuntimeLoader = "fixture.default.runtime.loader" @@ -84,6 +85,12 @@ func newDefaultFixture() (*oasis.NetworkFixture, error) { fixture.Entities = append(fixture.Entities, oasis.EntityCfg{}) } + for i := 0; i < viper.GetInt(cfgNumValidators); i++ { + fixture.Validators = append(fixture.Validators, oasis.ValidatorFixture{ + Entity: 1, Consensus: oasis.ConsensusFixture{DisableSupplementarySanityChecks: viper.GetBool(cfgDisableSupSanityChecks)}, + }) + } + if viper.GetBool(cfgSetupRuntimes) { fixture.Runtimes = []oasis.RuntimeFixture{ // Key manager runtime. @@ -156,6 +163,7 @@ func init() { DefaultFixtureFlags.Bool(cfgSetupRuntimes, true, "initialize the network with runtimes and runtime nodes") DefaultFixtureFlags.Bool(cfgDisableSupSanityChecks, false, "disable supplementary sanity checks") DefaultFixtureFlags.Int(cfgNumEntities, 1, "number of (non debug) entities in genesis") + DefaultFixtureFlags.Int(cfgNumValidators, 1, "number of validator nodes") DefaultFixtureFlags.String(cfgKeymanagerBinary, "simple-keymanager", "path to the keymanager runtime") DefaultFixtureFlags.String(cfgNodeBinary, "oasis-node", "path to the oasis-node binary") DefaultFixtureFlags.String(cfgRuntimeBinary, "simple-keyvalue", "path to the runtime binary")