From 2fe07797b378f6a3d37a73aeb7507d07e278943a Mon Sep 17 00:00:00 2001
From: Marcelo Diop-Gonzalez <marcelo827@gmail.com>
Date: Thu, 2 Nov 2023 23:25:45 -0400
Subject: [PATCH] feat(mocknet): add a script that runs the locust load test
 (#9444)

The mocknet scripts currently allow sending mirrored mainnet traffic,
but there are no easy scripts to send traffic generated by the locust
load test scripts. This adds a locust.py file that sets up an
environment for running this load test, so that we can also send this
traffic to mocknet nodes that have large mainnet state

This is just a first version, which doesn't take care of the issue of
getting a load testing account and key to use for the locust tests.
Right now this is actually possible by just picking a mainnet account
with lots of NEAR, since on the mocknet chain, access keys are rewritten
so that we have access to every account. But finding an account to use
and finding a valid key are relatively involved steps, so this is the
obvious next thing to make easier
---
 pytest/tests/mocknet/README.md    |   3 +
 pytest/tests/mocknet/cmd_utils.py |  30 ++++
 pytest/tests/mocknet/locust.py    | 237 ++++++++++++++++++++++++++++++
 pytest/tests/mocknet/mirror.py    |  40 ++---
 4 files changed, 281 insertions(+), 29 deletions(-)
 create mode 100644 pytest/tests/mocknet/cmd_utils.py
 create mode 100644 pytest/tests/mocknet/locust.py

diff --git a/pytest/tests/mocknet/README.md b/pytest/tests/mocknet/README.md
index bd816d03038..7af3eb170c5 100644
--- a/pytest/tests/mocknet/README.md
+++ b/pytest/tests/mocknet/README.md
@@ -9,3 +9,6 @@ Mirror transactions from a given network into a custom mocktest network and add
 - See metrics on grafana mocknet https://grafana.near.org/d/jHbiNgSnz/mocknet?orgId=1&refresh=30s&var-chain_id=All&var-node_id=.*unique_id.*&var-account_id=All replacing the "unique_id" with the value from earlier
 
 If there's ever a problem with the neard runners on each node, for example if you get a connection error running the `status` command, run the `restart-neard-runner` command to restart them, which should be safe to do.
+
+To run a locust load test on the mocknet network, run `python3 tests/mocknet/locust.py init --instance-names {}`, where
+the instance names are VMs that have been prepared for this purpose, and then run `python3 tests/mocknet/locust.py run --master {master_instance_name} --workers {worker_instance_name0,worker_instance_name1,etc...} --funding-key {key.json} --node-ip-port {mocknet_node_ip}:3030`, where `mocknet_node_ip` is an IP address of a node that's been setup by the mirror.py script, and `key.json` is an account key that contains lots of NEAR for this load test. TODO: add extra accounts for load testing purposes during the mocknet setup step
\ No newline at end of file
diff --git a/pytest/tests/mocknet/cmd_utils.py b/pytest/tests/mocknet/cmd_utils.py
new file mode 100644
index 00000000000..dd654eee05a
--- /dev/null
+++ b/pytest/tests/mocknet/cmd_utils.py
@@ -0,0 +1,30 @@
+import sys
+
+LOG_DIR = '/home/ubuntu/logs'
+STATUS_DIR = '/home/ubuntu/logs/status'
+
+
+def run_cmd(node, cmd):
+    r = node.machine.run(cmd)
+    if r.exitcode != 0:
+        sys.exit(
+            f'failed running {cmd} on {node.instance_name}:\nstdout: {r.stdout}\nstderr: {r.stderr}'
+        )
+    return r
+
+
+def run_in_background(node, cmd, log_filename, env='', pre_cmd=None):
+    setup_cmd = f'truncate --size 0 {STATUS_DIR}/{log_filename} '
+    setup_cmd += f'&& for i in {{8..0}}; do if [ -f {LOG_DIR}/{log_filename}.$i ]; then mv {LOG_DIR}/{log_filename}.$i {LOG_DIR}/{log_filename}.$((i+1)); fi done'
+    if pre_cmd is not None:
+        pre_cmd += ' && '
+    else:
+        pre_cmd = ''
+    run_cmd(
+        node,
+        f'( {pre_cmd}{setup_cmd} && {env} nohup {cmd} > {LOG_DIR}/{log_filename}.0 2>&1; nohup echo "$?" ) > {STATUS_DIR}/{log_filename} 2>&1 &'
+    )
+
+
+def init_node(node):
+    run_cmd(node, f'mkdir -p {LOG_DIR} && mkdir -p {STATUS_DIR}')
diff --git a/pytest/tests/mocknet/locust.py b/pytest/tests/mocknet/locust.py
new file mode 100644
index 00000000000..08b962fc4bc
--- /dev/null
+++ b/pytest/tests/mocknet/locust.py
@@ -0,0 +1,237 @@
+import argparse
+import cmd_utils
+import pathlib
+from rc import pmap, run
+import sys
+
+sys.path.append(str(pathlib.Path(__file__).resolve().parents[2] / 'lib'))
+
+import mocknet
+
+from configured_logger import logger
+
+
+def init_locust_node(instance_name):
+    node = mocknet.get_node(instance_name)
+    if node is None:
+        sys.exit(f'could not find node {instance_name}')
+    cmd_utils.init_node(node)
+    commands = [
+        'sudo apt update',
+        'sudo apt-get install -y git virtualenv build-essential python3-dev',
+        'git clone https://github.com/near/nearcore /home/ubuntu/nearcore',
+        'mkdir /home/ubuntu/locust',
+        'cd /home/ubuntu/locust && python3 -m virtualenv venv -p $(which python3)',
+        './venv/bin/pip install -r /home/ubuntu/nearcore/pytest/requirements.txt',
+        './venv/bin/pip install locust',
+    ]
+    init_command = ' && '.join(commands)
+    cmd_utils.run_cmd(node, init_command)
+
+
+def init_cmd(args):
+    nodes = [x for x in args.instance_names.split(',') if len(x) > 0]
+    pmap(init_locust_node, nodes)
+
+
+def parse_instsance_names(args):
+    if args.master is None:
+        print('master instance name?: ')
+        args.master = sys.stdin.readline().strip()
+
+    if args.workers is None:
+        print('''
+worker instance names? Give a comma separated list. It is also valid to
+have the machine used for the master process in this list as well:''')
+        args.workers = sys.stdin.readline().strip()
+
+    master = mocknet.get_node(args.master)
+    if master is None:
+        sys.exit(f'could not find node {args.master}')
+
+    worker_names = [x for x in args.workers.split(',') if len(x) > 0]
+    workers = [
+        mocknet.get_node(instance_name) for instance_name in worker_names
+    ]
+    for (name, node) in zip(worker_names, workers):
+        if node is None:
+            sys.exit(f'could not find node {name}')
+
+    return master, workers
+
+
+def upload_key(node, filename):
+    node.machine.upload(args.funding_key,
+                        '/home/ubuntu/locust/funding_key.json',
+                        switch_user='ubuntu')
+
+
+def run_master(args, node, num_workers):
+    upload_key(node, args.funding_key)
+    cmd = f'/home/ubuntu/locust/venv/bin/python3 -m locust --web-port 3030 --master-bind-port 3000 -H {args.node_ip_port} -f locustfiles/{args.locustfile} --shard-layout-chain-id mainnet --funding-key=/home/ubuntu/locust/funding_key.json --max-workers {args.max_workers} --master'
+    if args.num_users is not None:
+        cmd += f' --users {args.num_users}'
+    if args.run_time is not None:
+        cmd += f' --run-time {args.run_time}'
+    if not args.web_ui:
+        cmd += f' --headless --expect-workers {num_workers}'
+
+    logger.info(f'running "{cmd}" on master node {node.instance_name}')
+    cmd_utils.run_in_background(
+        node,
+        cmd,
+        'locust-master.txt',
+        pre_cmd=
+        'ulimit -S -n 100000 && cd /home/ubuntu/nearcore/pytest/tests/loadtest/locust'
+    )
+
+
+def wait_locust_inited(node, log_filename):
+    # We want to wait for the locust process to finish the initialization steps. Is there a better way than
+    # just waiting for the string "Starting Locust" to appear in the logs?
+    cmd_utils.run_cmd(
+        node,
+        f'tail -f {cmd_utils.LOG_DIR}/{log_filename}.0 | grep --line-buffered -m 1 -q "Starting Locust"'
+    )
+
+
+def wait_master_inited(node):
+    wait_locust_inited(node, 'locust-master.txt')
+    logger.info(f'master locust node initialized')
+
+
+def wait_worker_inited(node):
+    wait_locust_inited(node, 'locust-worker.txt')
+    logger.info(f'worker locust node {node.instance_name} initialized')
+
+
+def run_worker(args, node, master_ip):
+    cmd = f'/home/ubuntu/locust/venv/bin/python3 -m locust --web-port 3030 -H {args.node_ip_port} -f locustfiles/{args.locustfile} --shard-layout-chain-id mainnet --funding-key=/home/ubuntu/locust/funding_key.json --worker --master-port 3000'
+    if master_ip != node.machine.ip:
+        # if this node is also the master node, the key has already been uploaded
+        upload_key(node, args.funding_key)
+        cmd += f' --master-host {master_ip}'
+    logger.info(f'running "{cmd}" on worker node {node.instance_name}')
+    cmd_utils.run_in_background(
+        node,
+        cmd,
+        'locust-worker.txt',
+        pre_cmd=
+        'ulimit -S -n 100000 && cd /home/ubuntu/nearcore/pytest/tests/loadtest/locust'
+    )
+
+
+def run_cmd(args):
+    if not args.web_ui and args.num_users is None:
+        sys.exit('unless you pass --web-ui, --num-users must be set')
+
+    master, workers = parse_instsance_names(args)
+
+    run_master(args, master, len(workers))
+    if args.web_ui:
+        wait_master_inited(master)
+    pmap(lambda n: run_worker(args, n, master.machine.ip), workers)
+    if args.web_ui:
+        pmap(wait_worker_inited, workers)
+        logger.info(
+            f'All locust workers initialized. Visit http://{master.machine.ip}:3030/ to start and control the test'
+        )
+    else:
+        logger.info('All workers started.')
+
+
+def stop_cmd(args):
+    master, workers = parse_instsance_names(args)
+    # TODO: this feels kind of imprecise and heavy-handed, since we're just looking for a command that matches "python3.*locust.*master" and killing it,
+    # instead of remembering what the process' IP was. Should be possible to do this right, but this will work for now
+    cmd_utils.run_cmd(
+        master,
+        'pids=$(ps -C python3 -o pid=,cmd= | grep "locust" | cut -d " " -f 2) && if [ ! -z "$pids" ]; then kill $pids; fi'
+    )
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Run a locust load test')
+
+    subparsers = parser.add_subparsers(title='subcommands',
+                                       description='valid subcommands',
+                                       help='additional help')
+
+    init_parser = subparsers.add_parser('init',
+                                        help='''
+    Sets up the python environment and downloads the code on each node.
+    ''')
+    init_parser.add_argument('--instance-names', type=str)
+    init_parser.set_defaults(func=init_cmd)
+
+    run_parser = subparsers.add_parser('run',
+                                       help='''
+    Runs the locust load test on each node.
+    ''')
+    run_parser.add_argument('--master',
+                            type=str,
+                            required=True,
+                            help='instance name of master node')
+    run_parser.add_argument(
+        '--workers',
+        type=str,
+        required=True,
+        help='comma-separated list of instance names of worker nodes')
+    run_parser.add_argument(
+        '--node-ip-port',
+        type=str,
+        required=True,
+        help='IP address and port of a node in the network under test')
+    run_parser.add_argument(
+        '--funding-key',
+        type=str,
+        required=True,
+        help=
+        'local path to a key file for the base account to be used in the test')
+    run_parser.add_argument(
+        '--locustfile',
+        type=str,
+        default='ft.py',
+        help=
+        'locustfile name in nearcore/pytest/tests/loadtest/locust/locustfiles')
+    run_parser.add_argument(
+        '--max-workers',
+        type=int,
+        default=16,
+        help='max number of workers the test should support')
+    run_parser.add_argument(
+        '--web-ui',
+        action='store_true',
+        help=
+        'if given, sets up a web UI to control the test, otherwise starts automatically'
+    )
+    run_parser.add_argument(
+        '--num-users',
+        type=int,
+        help=
+        'number of users to run the test with. Required unless --web-ui is given.'
+    )
+    run_parser.add_argument(
+        '--run-time',
+        type=str,
+        help=
+        'A string specifying the total run time of the test, passed to the locust --run-time argument. e.g. (300s, 20m, 3h, 1h30m, etc.)'
+    )
+    run_parser.set_defaults(func=run_cmd)
+
+    stop_parser = subparsers.add_parser('stop',
+                                        help='''
+    Stops the locust load test on each node.
+    ''')
+    stop_parser.add_argument('--master',
+                             type=str,
+                             help='instance name of master node')
+    stop_parser.add_argument(
+        '--workers',
+        type=str,
+        help='comma-separated list of instance names of worker nodes')
+    stop_parser.set_defaults(func=stop_cmd)
+
+    args = parser.parse_args()
+
+    args.func(args)
diff --git a/pytest/tests/mocknet/mirror.py b/pytest/tests/mocknet/mirror.py
index d96455ed339..fe8ce145755 100755
--- a/pytest/tests/mocknet/mirror.py
+++ b/pytest/tests/mocknet/mirror.py
@@ -3,6 +3,7 @@
 
 """
 from argparse import ArgumentParser, BooleanOptionalAction
+import cmd_utils
 import pathlib
 import json
 import random
@@ -42,28 +43,6 @@ def get_nodes(args):
     return traffic_generator, nodes
 
 
-def run_cmd(node, cmd):
-    r = node.machine.run(cmd)
-    if r.exitcode != 0:
-        sys.exit(
-            f'failed running {cmd} on {node.instance_name}:\nstdout: {r.stdout}\nstderr: {r.stderr}'
-        )
-    return r
-
-
-LOG_DIR = '/home/ubuntu/logs'
-STATUS_DIR = '/home/ubuntu/logs/status'
-
-
-def run_in_background(node, cmd, log_filename, env=''):
-    setup_cmd = f'truncate --size 0 {STATUS_DIR}/{log_filename} '
-    setup_cmd += f'&& for i in {{8..0}}; do if [ -f {LOG_DIR}/{log_filename}.$i ]; then mv {LOG_DIR}/{log_filename}.$i {LOG_DIR}/{log_filename}.$((i+1)); fi done'
-    run_cmd(
-        node,
-        f'( {setup_cmd} && {env} nohup {cmd} > {LOG_DIR}/{log_filename}.0 2>&1; nohup echo "$?" ) > {STATUS_DIR}/{log_filename} 2>&1 &'
-    )
-
-
 def wait_node_up(node):
     while True:
         try:
@@ -104,7 +83,7 @@ def prompt_setup_flags(args):
 
 
 def start_neard_runner(node):
-    run_in_background(node, f'/home/ubuntu/neard-runner/venv/bin/python /home/ubuntu/neard-runner/neard_runner.py ' \
+    cmd_utils.run_in_background(node, f'/home/ubuntu/neard-runner/venv/bin/python /home/ubuntu/neard-runner/neard_runner.py ' \
         '--home /home/ubuntu/neard-runner --neard-home /home/ubuntu/.near ' \
         '--neard-logs /home/ubuntu/neard-logs --port 3000', 'neard-runner.txt')
 
@@ -120,16 +99,19 @@ def upload_neard_runner(node):
 
 def init_neard_runner(node, config, remove_home_dir=False):
     stop_neard_runner(node)
-    rm_cmd = 'rm -rf /home/ubuntu/neard-runner && ' if remove_home_dir else ''
-    run_cmd(
-        node,
-        f'{rm_cmd}mkdir -p {LOG_DIR} && mkdir -p {STATUS_DIR} && mkdir -p /home/ubuntu/neard-runner'
-    )
+    cmd_utils.init_node(node)
+    if remove_home_dir:
+        cmd_utils.run_cmd(
+            node,
+            'rm -rf /home/ubuntu/neard-runner && mkdir -p /home/ubuntu/neard-runner'
+        )
+    else:
+        cmd_utils.run_cmd(node, 'mkdir -p /home/ubuntu/neard-runner')
     upload_neard_runner(node)
     mocknet.upload_json(node, '/home/ubuntu/neard-runner/config.json', config)
     cmd = 'cd /home/ubuntu/neard-runner && python3 -m virtualenv venv -p $(which python3)' \
     ' && ./venv/bin/pip install -r requirements.txt'
-    run_cmd(node, cmd)
+    cmd_utils.run_cmd(node, cmd)
     start_neard_runner(node)