diff --git a/output.tf b/output.tf index 3fd5b22..5746584 100644 --- a/output.tf +++ b/output.tf @@ -4,4 +4,29 @@ output "PublicIP" { output "CQL_PASS" { value = data.scylladbcloud_cql_auth.scylla.password sensitive = true -} \ No newline at end of file +} + +# Output the VPC peering connection ID +output "scylladbcloud_vpc_peering_connection_id" { + value = scylladbcloud_vpc_peering.scylladbcloud.connection_id +} + +// Output the private IP addresses of the nodes +output "scylladbcloud_cluster_ips" { + value = scylladbcloud_cluster.scylladbcloud.node_private_ips +} + +// Output the dns names of the nodes +output "scylladbcloud_dns_names" { + value = scylladbcloud_cluster.scylladbcloud.node_dns_names +} + +output "scylladbcloud_datacenter" { + value = scylladbcloud_cluster.scylladbcloud.datacenter +} + +// Output the CQL password +output "scylladbcloud_cql_password" { + value = data.scylladbcloud_cql_auth.scylla.password # Get the CQL password for the cluster + sensitive = true # Mark the output as sensitive so it won't be shown in logs or output +} diff --git a/profile/stress-0.yml b/profile/stress-0.yml index a4c8049..46146c3 100644 --- a/profile/stress-0.yml +++ b/profile/stress-0.yml @@ -3,7 +3,7 @@ keyspace: pet_store_iot # Create the keyspace keyspace_definition: | - CREATE KEYSPACE pet_store_iot WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}; + CREATE KEYSPACE pet_store_iot WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}; # The name of the table table: sensor_data @@ -21,10 +21,10 @@ table_definition: | AND compaction = { 'class': 'TimeWindowCompactionStrategy', 'compaction_window_size': '1', - 'compaction_window_unit': 'DAYS', - 'max_sstable_age_days': '365', - 'timestamp_resolution': 'MICROSECONDS' - }; + 'compaction_window_unit': 'DAYS', + 'timestamp_resolution': 'MICROSECONDS' + } + AND speculative_retry = 'NONE'; # Define the columns for the table columnspec: @@ -63,4 +63,4 @@ queries: fields: samerow add_sensor_data: cql: INSERT INTO sensor_data (device_id, sensor_type, bucket, timestamp, value) VALUES (?, ?, ?, ?, ?); - fields: samerow \ No newline at end of file + fields: samerow diff --git a/profile/stress-1.yml b/profile/stress-1.yml index a4c8049..10f3bf3 100644 --- a/profile/stress-1.yml +++ b/profile/stress-1.yml @@ -3,7 +3,7 @@ keyspace: pet_store_iot # Create the keyspace keyspace_definition: | - CREATE KEYSPACE pet_store_iot WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}; + CREATE KEYSPACE pet_store_iot WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}; # The name of the table table: sensor_data @@ -22,9 +22,9 @@ table_definition: | 'class': 'TimeWindowCompactionStrategy', 'compaction_window_size': '1', 'compaction_window_unit': 'DAYS', - 'max_sstable_age_days': '365', 'timestamp_resolution': 'MICROSECONDS' - }; + } + AND speculative_retry = 'NONE'; # Define the columns for the table columnspec: @@ -63,4 +63,4 @@ queries: fields: samerow add_sensor_data: cql: INSERT INTO sensor_data (device_id, sensor_type, bucket, timestamp, value) VALUES (?, ?, ?, ?, ?); - fields: samerow \ No newline at end of file + fields: samerow diff --git a/profile/stress-2.yml b/profile/stress-2.yml index a4c8049..7ab6e14 100644 --- a/profile/stress-2.yml +++ b/profile/stress-2.yml @@ -3,7 +3,7 @@ keyspace: pet_store_iot # Create the keyspace keyspace_definition: | - CREATE KEYSPACE pet_store_iot WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}; + CREATE KEYSPACE pet_store_iot WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}; # The name of the table table: sensor_data @@ -21,10 +21,10 @@ table_definition: | AND compaction = { 'class': 'TimeWindowCompactionStrategy', 'compaction_window_size': '1', - 'compaction_window_unit': 'DAYS', - 'max_sstable_age_days': '365', + 'compaction_window_unit': 'DAYS', 'timestamp_resolution': 'MICROSECONDS' - }; + } + AND speculative_retry = 'NONE'; # Define the columns for the table columnspec: @@ -63,4 +63,4 @@ queries: fields: samerow add_sensor_data: cql: INSERT INTO sensor_data (device_id, sensor_type, bucket, timestamp, value) VALUES (?, ?, ?, ?, ?); - fields: samerow \ No newline at end of file + fields: samerow diff --git a/profile/stress-3.yml b/profile/stress-3.yml new file mode 100644 index 0000000..7ab6e14 --- /dev/null +++ b/profile/stress-3.yml @@ -0,0 +1,66 @@ +# The name of the keyspace +keyspace: pet_store_iot + +# Create the keyspace +keyspace_definition: | + CREATE KEYSPACE pet_store_iot WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}; + +# The name of the table +table: sensor_data + +# The CQL command to create the table +table_definition: | + CREATE TABLE sensor_data ( + device_id text, + sensor_type text, + bucket timestamp, + timestamp timestamp, + value double, + PRIMARY KEY ((device_id, sensor_type, bucket), timestamp) + ) WITH CLUSTERING ORDER BY (timestamp DESC) + AND compaction = { + 'class': 'TimeWindowCompactionStrategy', + 'compaction_window_size': '1', + 'compaction_window_unit': 'DAYS', + 'timestamp_resolution': 'MICROSECONDS' + } + AND speculative_retry = 'NONE'; + +# Define the columns for the table +columnspec: + - name: device_id + size: FIXED(10) + population: uniform(1..10000) + - name: sensor_type + size: FIXED(10) + population: uniform(1..10) + - name: bucket + size: FIXED(10) + # Set the population of the bucket column to a sequence of timestamps from 1619798400 to the current time + population: seq(1640995200..1683013974) + - name: timestamp + size: FIXED(10) + # Set the population of the timestamp column to a sequence of timestamps from 1619798400 to the current time + population: seq(1640995200..1683013974) + - name: value + size: FIXED(10) + # Set the population of the value column to a Gaussian distribution with a mean of 50 and a standard deviation of 10 + population: gaussian(10..50) + +# Define the insertion profile +insert: + partitions: fixed(1) + batchtype: UNLOGGED + select: fixed(1)/1 + +# Define the CQL queries for the stress test +queries: + get_sensor_data: + cql: SELECT * FROM sensor_data WHERE device_id = ? AND sensor_type = ? AND bucket = ? AND timestamp >= ? AND timestamp <= ? LIMIT 100; + fields: samerow + get_latest_sensor_data: + cql: SELECT * FROM sensor_data WHERE device_id = ? AND sensor_type = ? AND bucket = ? LIMIT 1; + fields: samerow + add_sensor_data: + cql: INSERT INTO sensor_data (device_id, sensor_type, bucket, timestamp, value) VALUES (?, ?, ?, ?, ?); + fields: samerow diff --git a/provider.tf b/provider.tf index ace31bb..feb7022 100644 --- a/provider.tf +++ b/provider.tf @@ -1,5 +1,5 @@ provider "aws" { region = var.scylla_cloud_region shared_credentials_files = ["${var.aws_creds}"] - profile = "DeveloperAccessRole" -} \ No newline at end of file + profile = "${var.aws_profile}" +} diff --git a/scylladb-cloud.tf b/scylladb-cloud.tf index 1b864df..280b7b6 100644 --- a/scylladb-cloud.tf +++ b/scylladb-cloud.tf @@ -39,24 +39,9 @@ resource "scylladbcloud_vpc_peering" "scylladbcloud" { cluster_id = scylladbcloud_cluster.scylladbcloud.id datacenter = scylladbcloud_cluster.scylladbcloud.datacenter peer_vpc_id = aws_vpc.custom_vpc.id - peer_cidr_block = var.custom_vpc + peer_cidr_blocks = [var.custom_vpc] peer_region = data.aws_region.current.name peer_account_id = data.aws_caller_identity.current.account_id allow_cql = true } -# Output the VPC peering connection ID -output "scylladbcloud_vpc_peering_connection_id" { - value = scylladbcloud_vpc_peering.scylladbcloud.connection_id -} - -// Output the private IP addresses of the nodes -output "scylladbcloud_cluster_ips" { - value = scylladbcloud_cluster.scylladbcloud.node_private_ips -} - -// Output the CQL password -output "scylladbcloud_cql_password" { - value = data.scylladbcloud_cql_auth.scylla.password # Get the CQL password for the cluster - sensitive = true # Mark the output as sensitive so it won't be shown in logs or output -} diff --git a/scylladb-loaders.tf b/scylladb-loaders.tf index 1a200b9..d17f7ee 100644 --- a/scylladb-loaders.tf +++ b/scylladb-loaders.tf @@ -2,7 +2,7 @@ # Create tags to identify the instances and sets timeouts for creating the instances. resource "aws_instance" "instance" { - count = length(aws_subnet.public_subnet.*.id) + count = var.loader_node_count ami = var.ami_id instance_type = var.instance_type subnet_id = element(aws_subnet.public_subnet.*.id, count.index) @@ -41,8 +41,9 @@ resource "aws_instance" "instance" { inline = [ "sudo systemctl stop scylla-server |tee scylla.log", "echo '/usr/bin/cassandra-stress user profile=./stress.yml n=${var.num_of_ops} cl=local_quorum no-warmup \"ops(insert=1)\" -rate threads=${var.num_threads} fixed=450000/s -mode native cql3 user=${var.scylla_user} password=${local.scylla_pass} -log file=populating.log -node ${local.scylla_ips}' > start.sh", - "echo '/usr/bin/cassandra-stress user profile=./stress.yml duration=24h no-warmup cl=local_quorum \"ops(insert=4,simple1=2)\" -rate threads=${var.num_threads} fixed=${var.throttle} -mode native cql3 user=${var.scylla_user} password=${local.scylla_pass} -log file=benchmarking.log -node ${local.scylla_ips}' > benchmark.sh", + "echo '/usr/bin/cassandra-stress user profile=./stress.yml duration=24h no-warmup cl=local_quorum \"ops(add_sensor_data=1,get_sensor_data=3)\" -rate threads=${var.num_threads} fixed=${var.throttle} -mode native cql3 user=${var.scylla_user} password=${local.scylla_pass} -log file=benchmarking.log -node ${local.scylla_ips}' > benchmark.sh", "sudo chmod +x start.sh benchmark.sh", + "echo '/home/scyllaadm/benchmark.sh' >> /home/scyllaadm/start.sh", "sudo mv /home/scyllaadm/cassandra-stress.service /etc/systemd/system/cassandra-stress.service ", "sudo mv /home/scyllaadm/cassandra-stress-benchmark.service /etc/systemd/system/cassandra-stress-benchmark.service ", "sudo systemctl daemon-reload ", "sudo systemctl start cassandra-stress.service", @@ -67,7 +68,7 @@ resource "aws_eip" "eip" { count = length(aws_instance.instance.*.id) # Create an Elastic IP for each EC2 instance instance = element(aws_instance.instance.*.id, count.index) # Associate the Elastic IP with the current EC2 instance public_ipv4_pool = "amazon" # Use the Amazon pool for public IPv4 addresses - vpc = true # Create a VPC Elastic IP address + domain = "vpc" # Create a VPC Elastic IP address tags = { # Add tags to the Elastic IP resource "Name" = "${var.custom_name}-EIP-${count.index}" diff --git a/service/cassandra-stress.service b/service/cassandra-stress.service index c4183a9..c316550 100644 --- a/service/cassandra-stress.service +++ b/service/cassandra-stress.service @@ -4,7 +4,7 @@ Description="Start Cassandra-stress to populate ScyllaDB" [Service] User=scyllaadm WorkingDirectory=/home/scyllaadm/ -ExecStart=/usr/bin/bash /home/scyllaadm/start.sh & wait && /usr/bin/bash /home/scyllaadm/benchmark.sh +ExecStart=/usr/bin/bash /home/scyllaadm/start.sh Type=simple [Install] -WantedBy=multi-user.target \ No newline at end of file +WantedBy=multi-user.target diff --git a/variables.tf b/variables.tf index c3ab4af..a779f71 100644 --- a/variables.tf +++ b/variables.tf @@ -36,6 +36,12 @@ variable "aws_creds" { default = "/home/user/.aws/credentials" } +# AWS Profile to Use +variable "aws_profile" { + description = "AWS Profile to Use" + type = string + default = "DeveloperAccessRole" +} ################################################ # @@ -54,7 +60,7 @@ variable "num_threads" { variable "num_of_ops" { description = "Total number of operations to run" type = string - default = "1000M" + default = "5M" } # Throttling for the Cassandra stress tool @@ -71,9 +77,16 @@ variable "instance_type" { default = "i4i.8xlarge" } +# Number of Loader instances to create +variable "loader_node_count" { + description = "Number of Loader instances to create" + type = string + default = "3" +} + # ScyllaDB Cloud instance type variable "scylla_node_type" { - description = "Type of ScyllaDB Cloud instance" + description = "Type of ScyllaDB Cloud instance (3,6,9,12,15,18,21)" type = string default = "i4i.4xlarge" } @@ -121,6 +134,6 @@ variable "scylla_node_count" { } locals { - scylla_ips = (join(",", [for s in scylladbcloud_cluster.scylladbcloud.node_private_ips : format("%s", s)])) + scylla_ips = (join(",", [for s in scylladbcloud_cluster.scylladbcloud.node_dns_names : format("%s", s)])) scylla_pass = data.scylladbcloud_cql_auth.scylla.password }