From 228664b113aeeaf5fdf98d498dd79844e3f46dc9 Mon Sep 17 00:00:00 2001 From: viivek46 Date: Thu, 10 Oct 2019 20:19:51 -0300 Subject: [PATCH 01/11] added code to make settings in cassandra-yaml.yml configurable --- operator/params.yaml | 450 ++++++++++++++++++++++++- operator/templates/cassandra-yaml.yaml | 233 ++++++------- operator/templates/service.yaml | 2 + operator/templates/stateful-set.yaml | 2 + 4 files changed, 567 insertions(+), 120 deletions(-) diff --git a/operator/params.yaml b/operator/params.yaml index 1b70d81b..a6a1aa4f 100644 --- a/operator/params.yaml +++ b/operator/params.yaml @@ -20,23 +20,23 @@ NODE_MEM_LIMIT: default: 4096 STORAGE_PORT: - description: "TODO" + description: "The port for inter-node communication." default: "7000" SSL_STORAGE_PORT: - description: "TODO" + description: "The port for inter-node communication over SSL." default: "7001" NATIVE_TRANSPORT_PORT: - description: "TODO" + description: "The port for CQL communication." default: "9042" RPC_PORT: - description: "TODO" + description: "The port for Thrift RPC communication." default: "9160" JMX_PORT: - description: "TODO" + description: "The JMX port that will be used to interface with the Cassandra application." default: "7199" DISK_SIZE: @@ -50,3 +50,443 @@ STORAGE_CLASS: PERSISTENT_STORAGE: description: "If false, ephemeral storage is used. Not recommended for production use." default: "true" + +CLUSTER_NAME: + description: "The name of the cluster managed by the Service" + default: "cassandra" + +SEED_PROVIDER_CLASS: + description: "The class within Cassandra that handles the seed logic." + default: "org.apache.cassandra.locator.SimpleSeedProvider" + +NUM_TOKENS: + description: "The number of tokens assigned to each node." + default: 256 + +HINTED_HANDOFF_ENABLED: + description: "If true, hinted handoff is enabled for the cluster." + default: true + +MAX_HINT_WINDOW_IN_MS: + description: "The maximum amount of time, in ms, that hints are generates hints for an unresponsive node." + default: 10800000 + +HINTED_HANDOFF_THROTTLE_IN_KB: + description: "The maximum throttle per delivery thread in kilobytes per second." + default: 1024 + +MAX_HINTS_DELIVERY_THREADS: + description: "The maximum number of delivery threads for hinted handoff." + default: 2 + +BATCHLOG_REPLAY_THROTTLE_IN_KB: + description: "The total throttle for log replay in Kpbs." + default: 1024 + +PARTITIONER: + description: "The partitioner used to distribute rows across the cluster. Murmur3Partitioner is the recommended setting. RandomPartitioner and ByteOrderedPartitioner are supported for legacy applications." + default: "org.apache.cassandra.dht.Murmur3Partitioner" + +KEY_CACHE_SAVE_PERIOD: + description: "The duration in seconds that keys are saved in cache. Saved caches greatly improve cold-start speeds and has relatively little effect on I/O." + default: 14400 + +ROW_CACHE_SIZE_IN_MB: + description: "Maximum size of the row cache in memory. Row cache can save more time than key_cache_size_in_mb, but is space-intensive because it contains the entire row. Use the row cache only for hot rows or static rows. 0 disables the row cache." + default: 0 + +ROW_CACHE_SAVE_PERIOD: + description: "Duration in seconds that rows are saved in cache. 0 disables caching." + default: 0 + +COMMITLOG_SYNC_PERIOD_IN_MS: + description: "The number of milliseconds between disk fsync calls." + default: 10000 + +COMMITLOG_SYNC_BATCH_WINDOW_IN_MS: + description: "Time to wait between batch fsyncs, if commitlog_sync is in batch mode then default value should be: 2" + default: "" + +COMMITLOG_SEGMENT_SIZE_IN_MB: + description: "The size of each commit log segment in Mb." + default: 32 + +CONCURRENT_READS: + description: "For workloads with more data than can fit in memory, the bottleneck is reads fetching data from disk. Setting to (16 times the number of drives) allows operations to queue low enough in the stack so that the OS and drives can reorder them." + default: 16 + +CONCURRENT_WRITES: + description: "Writes in Cassandra are rarely I/O bound, so the ideal number of concurrent writes depends on the number of CPU cores in your system. The recommended value is 8 times the number of cpu cores." + default: 32 + +CONCURRENT_COUNTER_WRITES: + description: "Counter writes read the current values before incrementing and writing them back. The recommended value is (16 times the number of drives) ." + default: 16 + +MEMTABLE_ALLOCATION_TYPE: + description: "The type of allocations for the Cassandra memtable. heap_buffers keep all data on the JVM heap. offheap_buffers may reduce heap utilization for large string or binary values. offheap_objects may improve heap size for small integers or UUIDs as well. Both off heap options will increase read latency." + default: "heap_buffers" + +INDEX_SUMMARY_RESIZE_INTERVAL_IN_MINUTES: + description: "How frequently index summaries should be re-sampled in minutes. This is done periodically to redistribute memory from the fixed-size pool to SSTables proportional their recent read rates." + default: 60 + +START_NATIVE_TRANSPORT: + description: "If true CQL is enabled." + default: true + +START_RPC: + description: "If true Thrift RPC is enabled. This is deprecated but may be necessary for legacy applications." + default: false + +RPC_KEEPALIVE: + description: "Enables or TCP keepalive for RPC connections." + default: true + +THRIFT_FRAMED_TRANSPORT_SIZE_IN_MB: + description: "Frame size (maximum field length) for Thrift." + default: 15 + +TOMBSTONE_WARN_THRESHOLD: + description: "The maximum number of tombstones a query can scan before warning." + default: 1000 + +TOMBSTONE_FAILURE_THRESHOLD: + description: "The maximum number of tombstones a query can scan before aborting." + default: 100000 + +COLUMN_INDEX_SIZE_IN_KB: + description: "The granularity of the index of rows within a partition. For huge rows, decrease this setting to improve seek time. If you use key cache, be careful not to make this setting too large because key cache will be overwhelmed." + default: 64 + +BATCH_SIZE_WARN_THRESHOLD_IN_KB: + description: "Warn the operator on a batch size exceeding this value in kilobytes. Caution should be taken on increasing the size of this threshold as it can lead to node instability." + default: 5 + +BATCH_SIZE_FAIL_THRESHOLD_IN_KB: + description: "Fail batch sizes exceeding this value in kilobytes. Caution should be taken on increasing the size of this threshold as it can lead to node instability." + default: 50 + +COMPACTION_THROUGHPUT_MB_PER_SEC: + description: "Throttles compaction to the specified total throughput across the node. Compaction frequency varies with direct proportion to write throughput and is necessary to limit the SSTable size. The recommended value is 16 to 32 times the rate of write throughput (in MB/second)." + default: 16 + +SSTABLE_PREEMPTIVE_OPEN_INTERVAL_IN_MB: + description: "When compacting, the replacement opens SSTables before they are completely written and uses in place of the prior SSTables for any range previously written. This setting helps to smoothly transfer reads between the SSTables by reducing page cache churn and keeps hot rows hot." + default: 50 + +READ_REQUEST_TIMEOUT_IN_MS: + description: "The time that the coordinator waits for read operations to complete in ms." + default: 5000 + +RANGE_REQUEST_TIMEOUT_IN_MS: + description: "The time that the coordinator waits for range scans complete in ms." + default: 10000 + +WRITE_REQUEST_TIMEOUT_IN_MS: + description: "The time that the coordinator waits for write operations to complete in ms." + default: 2000 + +COUNTER_WRITE_REQUEST_TIMEOUT_IN_MS: + description: "The time that the coordinator waits for counter write operations to complete in ms." + default: 5000 + +CAS_CONTENTION_TIMEOUT_IN_MS: + description: "The time for which the coordinator will retry CAS operations on the same row in ms." + default: 1000 + +TRUNCATE_REQUEST_TIMEOUT_IN_MS: + description: "The time that the coordinator waits for truncate operations to complete in ms." + default: 60000 + +REQUEST_TIMEOUT_IN_MS: + description: "The default timeout for all other requests in ms." + default: 10000 + +DYNAMIC_SNITCH_UPDATE_INTERVAL_IN_MS: + description: "The time, in ms, the snitch will wait before updating node scores." + default: 100 + +DYNAMIC_SNITCH_RESET_INTERVAL_IN_MS: + description: "The time, in ms, the snitch will wait before resetting node scores allowing bad nodes to recover." + default: 600000 + +DYNAMIC_SNITCH_BADNESS_THRESHOLD: + description: "Sets the performance threshold for dynamically routing client requests away from a poorly performing node." + default: 0.1 + +INTERNODE_COMPRESSION: + description: "Controls whether traffic between nodes is compressed. all compresses all traffic. none compresses no traffic. dc compresses between datacenters." + default: "all" + +MAX_HINTS_FILE_SIZE_IN_MB: + description: "The maximum size of the hints file in Mb." + default: 128 + +HINTS_FLUSH_PERIOD_IN_MS: + description: "The time, in ms, for the period in which hints are flushed to disk." + default: 10000 + +CONCURRENT_MATERIALIZED_VIEW_WRITES: + description: "The maximum number of concurrent writes to materialized views." + default: 32 + +COMMITLOG_TOTAL_SPACE_IN_MB: + description: "The total size of the commit log in Mb." + default: 8192 + +AUTO_SNAPSHOT: + description: "Take a snapshot of the data before truncating a keyspace or dropping a table" + default: true + +KEY_CACHE_KEYS_TO_SAVE: + description: "The number of keys from the key cache to save" + default: 100 + +ROW_CACHE_KEYS_TO_SAVE: + description: "The number of keys from the row cache to save" + default: 100 + +COUNTER_CACHE_KEYS_TO_SAVE: + description: "The number of keys from the counter cache to save" + default: 100 + +FILE_CACHE_SIZE_IN_MB: + description: "The total memory to use for SSTable-reading buffers" + default: 512 + +MEMTABLE_HEAP_SPACE_IN_MB: + description: "The amount of on-heap memory allocated for memtables" + default: 2048 + +MEMTABLE_OFFHEAP_SPACE_IN_MB: + description: "The total amount of off-heap memory allocated for memtables" + default: 2048 + +MEMTABLE_CLEANUP_THRESHOLD: + description: "The ratio used for automatic memtable flush" + default: 0.11 + +MEMTABLE_FLUSH_WRITERS: + description: "The number of memtable flush writer threads" + default: 2 + +LISTEN_ON_BROADCAST_ADDRESS: + description: "Listen on the address set in broadcast_address property" + default: false + +INTERNODE_AUTHENTICATOR: + description: "The internode authentication backend" + default: "org.apache.cassandra.auth.AllowAllInternodeAuthenticator" + +NATIVE_TRANSPORT_MAX_THREADS: + description: "The maximum number of thread handling requests" + default: 128 + +NATIVE_TRANSPORT_MAX_FRAME_SIZE_IN_MB: + description: "The maximum allowed size of a frame" + default: 256 + +NATIVE_TRANSPORT_MAX_CONCURRENT_CONNECTIONS: + description: "The maximum number of concurrent client connections" + default: -1 + +NATIVE_TRANSPORT_MAX_CONCURRENT_CONNECTIONS_PER_IP: + description: "The maximum number of concurrent client connections per source IP address" + default: -1 + +RPC_MIN_THREADS: + description: "The minimum thread pool size for remote procedure calls" + default: 16 + +RPC_MAX_THREADS: + description: "The maximum thread pool size for remote procedure calls" + default: 2048 + +RPC_SEND_BUFF_SIZE_IN_BYTES: + description: "The sending socket buffer size in bytes for remote procedure calls" + default: 16384 + +RPC_RECV_BUFF_SIZE_IN_BYTES: + description: "The receiving socket buffer size for remote procedure calls" + default: 16384 + +CONCURRENT_COMPACTORS: + description: "The number of concurrent compaction processes allowed to run simultaneously on a node" + default: 1 + +STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC: + description: "The maximum throughput of all outbound streaming file transfers on a node" + default: 200 + +INTER_DC_STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC: + description: "The maximum throughput of all streaming file transfers between datacenters" + default: 200 + +STREAMING_KEEP_ALIVE_PERIOD_IN_SECS: + description: "Interval to send keep-alive messages. The stream session fails when a keep-alive message is not received for 2 keep-alive cycles." + default: 300 + +PHI_CONVICT_THRESHOLD: + description: "The sensitivity of the failure detector on an exponential scale" + default: 8 + +BUFFER_POOL_USE_HEAP_IF_EXHAUSTED: + description: "Allocate on-heap memory when the SSTable buffer pool is exhausted" + default: true + +DISK_OPTIMIZATION_STRATEGY: + description: "The strategy for optimizing disk reads" + default: "ssd" + +MAX_VALUE_SIZE_IN_MB: + description: "The maximum size of any value in SSTables" + default: 256 + +OTC_COALESCING_STRATEGY: + description: "The strategy to use for coalescing network messages" + default: "DISABLED" + +UNLOGGED_BATCH_ACROSS_PARTITIONS_WARN_THRESHOLD: + description: "Causes Cassandra to log a WARN message on any batches not of type LOGGED that span across more partitions than this limit." + default: 10 + +COMPACTION_LARGE_PARTITION_WARNING_THRESHOLD_MB: + description: "Cassandra logs a warning when compacting partitions larger than the set value." + default: 100 + +REQUEST_SCHEDULER: + description: "The scheduler to handle incoming client requests according to a defined policy. This scheduler is useful for throttling client requests in single clusters containing multiple keyspaces." + default: "org.apache.cassandra.scheduler.NoScheduler" + +INTER_DC_TCP_NODELAY: + description: "Enable this property for inter-datacenter communication." + default: false + +TRACETYPE_QUERY_TTL: + description: "TTL for different trace types used during logging of the query process." + default: 86400 + +TRACETYPE_REPAIR_TTL: + description: "TTL for different trace types used during logging of the repair process." + default: 604800 + +GC_WARN_THRESHOLD_IN_MS: + description: "Any GC pause longer than this interval is logged at the WARN level." + default: 1000 + +WINDOWS_TIMER_INTERVAL: + description: "The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. Lowering this value on Windows can provide much tighter latency and better throughput, however some virtualized environments may see a negative performance impact from changing this setting below their system default." + default: 1 + +COUNTER_CACHE_SAVE_PERIOD: + description: "the amount of time after which Cassandra saves the counter cache (keys only)." + default: 7200 + +TRICKLE_FSYNC_INTERVAL_IN_KB: + description: "The size of the fsync in kilobytes." + default: 10240 + +TRICKLE_FSYNC: + description: "When set to true, causes fsync to force the operating system to flush the dirty buffers at the set interval " + default: false + +INCREMENTAL_BACKUPS: + description: "Backs up data updated since the last snapshot was taken. When enabled, Cassandra creates a hard link to each SSTable flushed or streamed locally in a backups subdirectory of the keyspace data." + default: false + +SNAPSHOT_BEFORE_COMPACTION: + description: "Enables or disables taking a snapshot before each compaction. A snapshot is useful to back up data when there is a data format change." + default: false + +CROSS_NODE_TIMEOUT: + description: "operation timeout information exchange between nodes (to accurately measure request timeouts)." + default: false + +COMMIT_FAILURE_POLICY: + description: "Policy for commit disk failures." + default: "stop" + +KEY_CACHE_SIZE_IN_MB: + description: "A global cache setting for the maximum size of the key cache in memory (for all tables). " + default: "" + +COUNTER_CACHE_SIZE_IN_MB: + description: "When no value is set, Cassandra uses the smaller of minimum of 2.5% of Heap or 50MB." + default: "" + +COMMITLOG_SYNC: + description: "The method that Cassandra uses to acknowledge writes in milliseconds" + default: "periodic" + +INDEX_SUMMARY_CAPACITY_IN_MB: + description: "Fixed memory pool size in MB for SSTable index summaries." + default: "" + +RPC_SERVER_TYPE: + description: "Cassandra provides three options for the RPC server. sync and hsha performance is about the same, but hsha uses less memory." + default: "sync" + +ENDPOINT_SNITCH: + description: "Set to a class that implements the IEndpointSnitch interface. Cassandra uses the snitch to locate nodes and route requests." + default: "SimpleSnitch" + +DISK_FAILURE_POLICY: + description: "The policy for how Cassandra responds to disk failure" + default: "stop" + +ENABLE_USER_DEFINED_FUNCTIONS: + description: " User defined functions (UDFs) present a security risk, since they are executed on the server side. UDFs are executed in a sandbox to contain the execution of malicious code." + default: false + +ENABLE_SCRIPTED_USER_DEFINED_FUNCTIONS: + description: "Java UDFs are always enabled, if enable_user_defined_functions is true. Enable this option to use UDFs with language javascript or any custom JSR-223 provider. This option has no effect if enable_user_defined_functions is false" + default: false + +CDC_ENABLED: + description: "Enable / disable CDC functionality on a per-node basis. This modifies the logic used for write path allocation rejection" + default: false + +CDC_TOTAL_SPACE_IN_MB: + description: "Total space to use for change-data-capture (CDC) logs on disk. " + default: 4096 + +CDC_FREE_SPACE_CHECK_INTERVAL_MS: + description: "Interval between checks for new available space for CDC-tracked tables when the cdc_total_space_in_mb threshold is reached and the CDCCompactor is running behind or experiencing back pressure." + default: 250 + +PREPARED_STATEMENTS_CACHE_SIZE_MB: + description: "Maximum size of the native protocol prepared statement cache" + default: "" + +THRIFT_PREPARED_STATEMENTS_CACHE_SIZE_MB: + description: "Maximum size of the Thrift prepared statement cache. Leave empty if you do not use Thrift." + default: "" + +COLUMN_INDEX_CACHE_SIZE_IN_KB: + description: "A threshold for the total size of all index entries for a partition that the database stores in the partition key cache." + default: 2 + +SLOW_QUERY_LOG_TIMEOUT_IN_MS: + description: "How long before a node logs slow queries. Select queries that exceed this value generate an aggregated log message to identify slow queries. To disable, set to 0." + default: 500 + +BACK_PRESSURE_ENABLED: + description: "Enable for the coordinator to apply the specified back pressure strategy to each mutation that is sent to replicas." + default: false + +BACK_PRESSURE_STRATEGY_CLASS: + description: "The back-pressure strategy applied. The default implementation, RateBasedBackPressure, takes three arguments: high ratio, factor, and flow type, and uses the ratio between incoming mutation responses and outgoing mutation requests." + default: "org.apache.cassandra.net.RateBasedBackPressure" + +BACK_PRESSURE_HIGH_RATIO: + description: "When outgoing mutations are below this value, they are rate limited according to the incoming rate decreased by the factor. When above this value, the rate limiting is increased by the factor." + default: 0.9 + +BACK_PRESSURE_FACTOR: + description: "A number between 1 and 10. Increases or decreases rate limiting." + default: 5 + +BACK_PRESSURE_FLOW: + description: "The flow speed to apply rate limiting: FAST - rate limited to the speed of the fastest replica. SLOW - rate limit to the speed of the slowest replica." + default: "FAST" \ No newline at end of file diff --git a/operator/templates/cassandra-yaml.yaml b/operator/templates/cassandra-yaml.yaml index dfdce547..c4c157d0 100644 --- a/operator/templates/cassandra-yaml.yaml +++ b/operator/templates/cassandra-yaml.yaml @@ -28,7 +28,7 @@ data: # # If you already have a cluster with 1 token per node, and wish to migrate to # multiple tokens per node, see http://wiki.apache.org/cassandra/Operations - num_tokens: 256 + num_tokens: {{ .Params.NUM_TOKENS }} # Triggers automatic allocation of num_tokens tokens for this node. The allocation # algorithm attempts to choose tokens in a way that optimizes replicated load over @@ -49,7 +49,7 @@ data: # See http://wiki.apache.org/cassandra/HintedHandoff # May either be "true" or "false" to enable globally - hinted_handoff_enabled: true + hinted_handoff_enabled: {{ .Params.HINTED_HANDOFF_ENABLED }} # When hinted_handoff_enabled is true, a black list of data centers that will not # perform hinted handoff @@ -60,19 +60,19 @@ data: # this defines the maximum amount of time a dead host will have hints # generated. After it has been dead this long, new hints for it will not be # created until it has been seen alive and gone down again. - max_hint_window_in_ms: 10800000 # 3 hours + max_hint_window_in_ms: {{ .Params.MAX_HINT_WINDOW_IN_MS }} # 3 hours # Maximum throttle in KBs per second, per delivery thread. This will be # reduced proportionally to the number of nodes in the cluster. (If there # are two nodes in the cluster, each delivery thread will use the maximum # rate; if there are three, each will throttle to half of the maximum, # since we expect two nodes to be delivering hints simultaneously.) - hinted_handoff_throttle_in_kb: 1024 + hinted_handoff_throttle_in_kb: {{ .Params.HINTED_HANDOFF_THROTTLE_IN_KB }} # Number of threads with which to deliver hints; # Consider increasing this number when you have multi-dc deployments, since # cross-dc handoff tends to be slower - max_hints_delivery_threads: 2 + max_hints_delivery_threads: {{ .Params.MAX_HINTS_DELIVERY_THREADS }} # Directory where Cassandra should store hints. # If not set, the default directory is $CASSANDRA_HOME/data/hints. @@ -80,10 +80,10 @@ data: # How often hints should be flushed from the internal buffers to disk. # Will *not* trigger fsync. - hints_flush_period_in_ms: 10000 + hints_flush_period_in_ms: {{ .Params.HINTS_FLUSH_PERIOD_IN_MS }} # Maximum size for a single hints file, in megabytes. - max_hints_file_size_in_mb: 128 + max_hints_file_size_in_mb: {{ .Params.MAX_HINTS_FILE_SIZE_IN_MB }} # Compression to apply to the hint files. If omitted, hints files # will be written uncompressed. LZ4, Snappy, and Deflate compressors @@ -95,7 +95,7 @@ data: # Maximum throttle in KBs per second, total. This will be # reduced proportionally to the number of nodes in the cluster. - batchlog_replay_throttle_in_kb: 1024 + batchlog_replay_throttle_in_kb: {{ .Params.BATCHLOG_REPLAY_THROTTLE_IN_KB }} # Authentication backend, implementing IAuthenticator; used to identify users # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, @@ -187,7 +187,7 @@ data: # compatibility include RandomPartitioner, ByteOrderedPartitioner, and # OrderPreservingPartitioner. # - partitioner: org.apache.cassandra.dht.Murmur3Partitioner + partitioner: {{ .Params.PARTITIONER }} # Directories where Cassandra should store data on disk. Cassandra # will spread data evenly across them, subject to the granularity of @@ -204,7 +204,7 @@ data: # Enable / disable CDC functionality on a per-node basis. This modifies the logic used # for write path allocation rejection (standard: never reject. cdc: reject Mutation # containing a CDC-enabled table if at space limit in cdc_raw_directory). - cdc_enabled: false + cdc_enabled: {{ .Params.CDC_ENABLED }} # CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the # segment contains mutations for a CDC-enabled table. This should be placed on a @@ -233,7 +233,7 @@ data: # # ignore # ignore fatal errors and let requests fail, as in pre-1.2 Cassandra - disk_failure_policy: stop + disk_failure_policy: {{ .Params.DISK_FAILURE_POLICY }} # Policy for commit disk failures: # @@ -250,7 +250,7 @@ data: # # ignore # ignore fatal errors and let the batches fail - commit_failure_policy: stop + commit_failure_policy: {{ .Params.COMMIT_FAILURE_POLICY }} # Maximum size of the native protocol prepared statement cache # @@ -269,7 +269,7 @@ data: # Constantly re-preparing statements is a performance penalty. # # Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater - prepared_statements_cache_size_mb: + prepared_statements_cache_size_mb: {{ .Params.PREPARED_STATEMENTS_CACHE_SIZE_MB }} # Maximum size of the Thrift prepared statement cache # @@ -278,7 +278,7 @@ data: # See description of 'prepared_statements_cache_size_mb' above for more information. # # Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater - thrift_prepared_statements_cache_size_mb: + thrift_prepared_statements_cache_size_mb: {{ .Params.THRIFT_PREPARED_STATEMENTS_CACHE_SIZE_MB }} # Maximum size of the key cache in memory. # @@ -292,7 +292,7 @@ data: # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. # # Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. - key_cache_size_in_mb: + key_cache_size_in_mb: {{ .Params.KEY_CACHE_SIZE_IN_MB }} # Duration in seconds after which Cassandra should # save the key cache. Caches are saved to saved_caches_directory as @@ -303,11 +303,11 @@ data: # has limited use. # # Default is 14400 or 4 hours. - key_cache_save_period: 14400 + key_cache_save_period: {{ .Params.KEY_CACHE_SAVE_PERIOD }} # Number of keys from the key cache to save # Disabled by default, meaning all keys are going to be saved - # key_cache_keys_to_save: 100 + key_cache_keys_to_save: {{ .Params.KEY_CACHE_KEYS_TO_SAVE }} # Row cache implementation class name. Available implementations: # @@ -327,7 +327,7 @@ data: # headroom for OS block level cache. Do never allow your system to swap. # # Default value is 0, to disable row caching. - row_cache_size_in_mb: 0 + row_cache_size_in_mb: {{ .Params.ROW_CACHE_SIZE_IN_MB }} # Duration in seconds after which Cassandra should save the row cache. # Caches are saved to saved_caches_directory as specified in this configuration file. @@ -337,11 +337,11 @@ data: # has limited use. # # Default is 0 to disable saving the row cache. - row_cache_save_period: 0 + row_cache_save_period: {{ .Params.ROW_CACHE_SAVE_PERIOD }} # Number of keys from the row cache to save. # Specify 0 (which is the default), meaning all keys are going to be saved - # row_cache_keys_to_save: 100 + row_cache_keys_to_save: {{ .Params.ROW_CACHE_KEYS_TO_SAVE }} # Maximum size of the counter cache in memory. # @@ -356,18 +356,18 @@ data: # # Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. # NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. - counter_cache_size_in_mb: + counter_cache_size_in_mb: {{ .Params.COUNTER_CACHE_SIZE_IN_MB }} # Duration in seconds after which Cassandra should # save the counter cache (keys only). Caches are saved to saved_caches_directory as # specified in this configuration file. # # Default is 7200 or 2 hours. - counter_cache_save_period: 7200 + counter_cache_save_period: {{ .Params.COUNTER_CACHE_SAVE_PERIOD }} # Number of keys from the counter cache to save # Disabled by default, meaning all keys are going to be saved - # counter_cache_keys_to_save: 100 + counter_cache_keys_to_save: {{ .Params.COUNTER_CACHE_KEYS_TO_SAVE }} # saved caches # If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. @@ -383,13 +383,15 @@ data: # concurrent_writes for the same reason.) # # commitlog_sync: batch - # commitlog_sync_batch_window_in_ms: 2 + {{ if .Params.COMMITLOG_SYNC_BATCH_WINDOW_IN_MS }} + commitlog_sync_batch_window_in_ms: {{ .Params.COMMITLOG_SYNC_BATCH_WINDOW_IN_MS }} + {{ end }} # # the other option is "periodic" where writes may be acked immediately # and the CommitLog is simply synced every commitlog_sync_period_in_ms # milliseconds. - commitlog_sync: periodic - commitlog_sync_period_in_ms: 10000 + commitlog_sync: {{ .Params.COMMITLOG_SYNC }} + commitlog_sync_period_in_ms: {{ .Params.COMMITLOG_SYNC_PERIOD_IN_MS }} # The size of the individual commitlog file segments. A commitlog # segment may be archived, deleted, or recycled once all the data @@ -407,7 +409,7 @@ data: # NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must # be set to at least twice the size of max_mutation_size_in_kb / 1024 # - commitlog_segment_size_in_mb: 32 + commitlog_segment_size_in_mb: {{ .Params.COMMITLOG_SEGMENT_SIZE_IN_MB }} # Compression to apply to the commit log. If omitted, the commit log # will be written uncompressed. LZ4, Snappy, and Deflate compressors @@ -424,7 +426,7 @@ data: # Cassandra nodes use this list of hosts to find each other and learn # the topology of the ring. You must change this if you are running # multiple nodes! - - class_name: org.apache.cassandra.locator.SimpleSeedProvider + - class_name: {{ .Params.SEED_PROVIDER_CLASS }} parameters: # Here we follow the advice from DataStax and make the first 3 # nodes in a DC the seed nodes. @@ -446,13 +448,13 @@ data: # On the other hand, since writes are almost never IO bound, the ideal # number of "concurrent_writes" is dependent on the number of cores in # your system; (8 * number_of_cores) is a good rule of thumb. - concurrent_reads: 32 - concurrent_writes: 32 - concurrent_counter_writes: 32 + concurrent_reads: {{ .Params.CONCURRENT_READS }} + concurrent_writes: {{ .Params.CONCURRENT_WRITES }} + concurrent_counter_writes: {{ .Params.CONCURRENT_COUNTER_WRITES }} # For materialized view writes, as there is a read involved, so this should # be limited by the less of concurrent reads or concurrent writes. - concurrent_materialized_view_writes: 32 + concurrent_materialized_view_writes: {{ .Params.CONCURRENT_MATERIALIZED_VIEW_WRITES }} # Maximum memory to use for sstable chunk cache and buffer pooling. # 32MB of this are reserved for pooling buffers, the rest is used as an @@ -462,26 +464,26 @@ data: # overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size # if the default 64k chunk size is used). # Memory is only allocated when needed. - # file_cache_size_in_mb: 512 + file_cache_size_in_mb: {{ .Params.FILE_CACHE_SIZE_IN_MB }} # Flag indicating whether to allocate on or off heap when the sstable buffer # pool is exhausted, that is when it has exceeded the maximum memory # file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request. - # buffer_pool_use_heap_if_exhausted: true + # buffer_pool_use_heap_if_exhausted: {{ .Params.BUFFER_POOL_USE_HEAP_IF_EXHAUSTED }} # The strategy for optimizing disk read # Possible values are: # ssd (for solid state disks, the default) # spinning (for spinning disks) - # disk_optimization_strategy: ssd + # disk_optimization_strategy: {{ .Params.DISK_OPTIMIZATION_STRATEGY }} # Total permitted memory to use for memtables. Cassandra will stop # accepting writes when the limit is exceeded until a flush completes, # and will trigger a flush based on memtable_cleanup_threshold # If omitted, Cassandra will set both to 1/4 the size of the heap. - # memtable_heap_space_in_mb: 2048 - # memtable_offheap_space_in_mb: 2048 + memtable_heap_space_in_mb: {{ .Params.MEMTABLE_HEAP_SPACE_IN_MB }} + memtable_offheap_space_in_mb: {{ .Params.MEMTABLE_OFFHEAP_SPACE_IN_MB }} # memtable_cleanup_threshold is deprecated. The default calculation # is the only reasonable choice. See the comments on memtable_flush_writers @@ -494,7 +496,7 @@ data: # under heavy write load. # # memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) - # memtable_cleanup_threshold: 0.11 + memtable_cleanup_threshold: {{ .Params.MEMTABLE_CLEANUP_THRESHOLD }} # Specify the way Cassandra allocates and manages memtable memory. # Options are: @@ -507,7 +509,7 @@ data: # # offheap_objects # off heap objects - memtable_allocation_type: heap_buffers + memtable_allocation_type: {{ .Params.MEMTABLE_ALLOCATION_TYPE }} # Total space to use for commit logs on disk. # @@ -518,7 +520,7 @@ data: # The default value is the smaller of 8192, and 1/4 of the total space # of the commitlog volume. # - # commitlog_total_space_in_mb: 8192 + commitlog_total_space_in_mb: {{ .Params.COMMITLOG_TOTAL_SPACE_IN_MB }} # This sets the number of memtable flush writer threads per disk # as well as the total number of memtables that can be flushed concurrently. @@ -547,7 +549,7 @@ data: # and flush size and frequency. More is not better you just need enough flush writers # to never stall waiting for flushing to free memory. # - #memtable_flush_writers: 2 + memtable_flush_writers: {{ .Params.MEMTABLE_FLUSH_WRITERS }} # Total space to use for change-data-capture logs on disk. # @@ -557,12 +559,12 @@ data: # # The default value is the min of 4096 mb and 1/8th of the total space # of the drive where cdc_raw_directory resides. - # cdc_total_space_in_mb: 4096 + cdc_total_space_in_mb: {{ .Params.CDC_TOTAL_SPACE_IN_MB }} # When we hit our cdc_raw limit and the CDCCompactor is either running behind # or experiencing backpressure, we check at the following interval to see if any # new space for cdc-tracked tables has been made available. Default to 250ms - # cdc_free_space_check_interval_ms: 250 + cdc_free_space_check_interval_ms: {{ .Params.CDC_FREE_SPACE_CHECK_INTERVAL_MS }} # A fixed memory pool size in MB for for SSTable index summaries. If left # empty, this will default to 5% of the heap size. If the memory usage of @@ -570,30 +572,30 @@ data: # shrink their index summaries in order to meet this limit. However, this # is a best-effort process. In extreme conditions Cassandra may need to use # more than this amount of memory. - index_summary_capacity_in_mb: + index_summary_capacity_in_mb: {{ .Params.INDEX_SUMMARY_CAPACITY_IN_MB }} # How frequently index summaries should be resampled. This is done # periodically to redistribute memory from the fixed-size pool to sstables # proportional their recent read rates. Setting to -1 will disable this # process, leaving existing index summaries at their current sampling level. - index_summary_resize_interval_in_minutes: 60 + index_summary_resize_interval_in_minutes: {{ .Params.INDEX_SUMMARY_RESIZE_INTERVAL_IN_MINUTES }} # Whether to, when doing sequential writing, fsync() at intervals in # order to force the operating system to flush the dirty # buffers. Enable this to avoid sudden dirty buffer flushing from # impacting read latencies. Almost always a good idea on SSDs; not # necessarily on platters. - trickle_fsync: false - trickle_fsync_interval_in_kb: 10240 + trickle_fsync: {{ .Params.TRICKLE_FSYNC }} + trickle_fsync_interval_in_kb: {{ .Params.TRICKLE_FSYNC_INTERVAL_IN_KB }} # TCP port, for commands and data # For security reasons, you should not expose this port to the internet. Firewall it if needed. - storage_port: 7000 + storage_port: {{ .Params.STORAGE_PORT }} # SSL port, for encrypted communication. Unused unless enabled in # encryption_options # For security reasons, you should not expose this port to the internet. Firewall it if needed. - ssl_storage_port: 7001 + ssl_storage_port: {{ .Params.SSL_STORAGE_PORT }} # Address or interface to bind to and tell other Cassandra nodes to connect to. # You _must_ change this if you want multiple nodes to be able to communicate! @@ -630,19 +632,19 @@ data: # interfaces. # Ignore this property if the network configuration automatically # routes between the public and private networks such as EC2. - # listen_on_broadcast_address: false + listen_on_broadcast_address: {{ .Params.LISTEN_ON_BROADCAST_ADDRESS }} # Internode authentication backend, implementing IInternodeAuthenticator; # used to allow/disallow connections from peer nodes. - # internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator + internode_authenticator: {{ .Params.INTERNODE_AUTHENTICATOR }} # Whether to start the native transport server. # Please note that the address on which the native transport is bound is the # same as the rpc_address. The port however is different and specified below. - start_native_transport: true + start_native_transport: {{ .Params.START_NATIVE_TRANSPORT }} # port for the CQL native transport to listen for clients on # For security reasons, you should not expose this port to the internet. Firewall it if needed. - native_transport_port: 9042 + native_transport_port: {{ .Params.NATIVE_TRANSPORT_PORT }} # Enabling native transport encryption in client_encryption_options allows you to either use # encryption for the standard port or to use a dedicated, additional port along with the unencrypted # standard native_transport_port. @@ -655,23 +657,23 @@ data: # This is similar to rpc_max_threads though the default differs slightly (and # there is no native_transport_min_threads, idle threads will always be stopped # after 30 seconds). - # native_transport_max_threads: 128 + native_transport_max_threads: {{ .Params.NATIVE_TRANSPORT_MAX_THREADS }} # # The maximum size of allowed frame. Frame (requests) larger than this will # be rejected as invalid. The default is 256MB. If you're changing this parameter, # you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048. - # native_transport_max_frame_size_in_mb: 256 + native_transport_max_frame_size_in_mb: {{ .Params.NATIVE_TRANSPORT_MAX_FRAME_SIZE_IN_MB }} # The maximum number of concurrent client connections. # The default is -1, which means unlimited. - # native_transport_max_concurrent_connections: -1 + native_transport_max_concurrent_connections: {{ .Params.NATIVE_TRANSPORT_MAX_CONCURRENT_CONNECTIONS }} # The maximum number of concurrent client connections per source ip. # The default is -1, which means unlimited. - # native_transport_max_concurrent_connections_per_ip: -1 + native_transport_max_concurrent_connections_per_ip: {{ .Params.NATIVE_TRANSPORT_MAX_CONCURRENT_CONNECTIONS_PER_IP }} # Whether to start the thrift rpc server. - start_rpc: false + start_rpc: {{ .Params.START_RPC }} # The address or interface to bind the Thrift RPC service and native transport # server to. @@ -699,7 +701,7 @@ data: # rpc_interface_prefer_ipv6: false # port for Thrift to listen for clients on - rpc_port: 9160 + rpc_port: {{ .Params.RPC_PORT }} # RPC address to broadcast to drivers and other Cassandra nodes. This cannot # be set to 0.0.0.0. If left blank, this will be set to the value of @@ -708,7 +710,7 @@ data: # broadcast_rpc_address: 1.2.3.4 # enable or disable keepalive on rpc/native connections - rpc_keepalive: true + rpc_keepalive: {{ .Params.RPC_KEEPALIVE }} # Cassandra provides two out-of-the-box options for the RPC Server: # @@ -730,7 +732,7 @@ data: # # Alternatively, can provide your own RPC server by providing the fully-qualified class name # of an o.a.c.t.TServerFactory that can create an instance of it. - rpc_server_type: sync + rpc_server_type: {{ .Params.RPC_SERVER_TYPE }} # Uncomment rpc_min|max_thread to set request pool size limits. # @@ -742,12 +744,12 @@ data: # encouraged to set a maximum that makes sense for you in production, but do keep in mind that # rpc_max_threads represents the maximum number of client requests this server may execute concurrently. # - # rpc_min_threads: 16 - # rpc_max_threads: 2048 + rpc_min_threads: {{ .Params.RPC_MIN_THREADS }} + rpc_max_threads: {{ .Params.RPC_MAX_THREADS }} # uncomment to set socket buffer sizes on rpc connections - # rpc_send_buff_size_in_bytes: - # rpc_recv_buff_size_in_bytes: + rpc_send_buff_size_in_bytes: {{ .Params.RPC_SEND_BUFF_SIZE_IN_BYTES }} + rpc_recv_buff_size_in_bytes: {{ .Params.RPC_RECV_BUFF_SIZE_IN_BYTES }} # Uncomment to set socket buffer size for internode communication # Note that when setting this, the buffer size is limited by net.core.wmem_max @@ -766,25 +768,25 @@ data: # internode_recv_buff_size_in_bytes: # Frame size for thrift (maximum message length). - thrift_framed_transport_size_in_mb: 15 + thrift_framed_transport_size_in_mb: {{ .Params.THRIFT_FRAMED_TRANSPORT_SIZE_IN_MB }} # Set to true to have Cassandra create a hard link to each sstable # flushed or streamed locally in a backups/ subdirectory of the # keyspace data. Removing these links is the operator's # responsibility. - incremental_backups: false + incremental_backups: {{ .Params.INCREMENTAL_BACKUPS }} # Whether or not to take a snapshot before each compaction. Be # careful using this option, since Cassandra won't clean up the # snapshots for you. Mostly useful if you're paranoid when there # is a data format change. - snapshot_before_compaction: false + snapshot_before_compaction: {{ .Params.SNAPSHOT_BEFORE_COMPACTION }} # Whether or not a snapshot is taken of the data before keyspace truncation # or dropping of column families. The STRONGLY advised default of true # should be used to provide data safety. If you set this flag to false, you will # lose data on truncation or drop. - auto_snapshot: true + auto_snapshot: {{ .Params.AUTO_SNAPSHOT }} # Granularity of the collation index of rows within a partition. # Increase if your rows are large, or if you have a very large @@ -796,7 +798,7 @@ data: # - but, Cassandra will keep the collation index in memory for hot # rows (as part of the key cache), so a larger granularity means # you can cache more hot rows - column_index_size_in_kb: 64 + column_index_size_in_kb: {{ .Params.COLUMN_INDEX_SIZE_IN_KB }} # Per sstable indexed key cache entries (the collation index in memory # mentioned above) exceeding this size will not be held on heap. @@ -805,7 +807,7 @@ data: # # Note that this size refers to the size of the # serialized index information and not the size of the partition. - column_index_cache_size_in_kb: 2 + column_index_cache_size_in_kb: {{ .Params.COLUMN_INDEX_CACHE_SIZE_IN_KB }} # Number of simultaneous compactions to allow, NOT including # validation "compactions" for anti-entropy repair. Simultaneous @@ -821,7 +823,7 @@ data: # # If your data directories are backed by SSD, you should increase this # to the number of cores. - #concurrent_compactors: 1 + concurrent_compactors: {{ .Params.CONCURRENT_COMPACTORS }} # Throttles compaction to the given total throughput across the entire # system. The faster you insert data, the faster you need to compact in @@ -829,50 +831,50 @@ data: # 16 to 32 times the rate you are inserting data is more than sufficient. # Setting this to 0 disables throttling. Note that this account for all types # of compaction, including validation compaction. - compaction_throughput_mb_per_sec: 16 + compaction_throughput_mb_per_sec: {{ .Params.COMPACTION_THROUGHPUT_MB_PER_SEC }} # When compacting, the replacement sstable(s) can be opened before they # are completely written, and used in place of the prior sstables for # any range that has been written. This helps to smoothly transfer reads # between the sstables, reducing page cache churn and keeping hot rows hot - sstable_preemptive_open_interval_in_mb: 50 + sstable_preemptive_open_interval_in_mb: {{ .Params.SSTABLE_PREEMPTIVE_OPEN_INTERVAL_IN_MB }} # Throttles all outbound streaming file transfers on this node to the # given total throughput in Mbps. This is necessary because Cassandra does # mostly sequential IO when streaming data during bootstrap or repair, which # can lead to saturating the network connection and degrading rpc performance. # When unset, the default is 200 Mbps or 25 MB/s. - # stream_throughput_outbound_megabits_per_sec: 200 + # stream_throughput_outbound_megabits_per_sec: {{ .Params.STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC }} # Throttles all streaming file transfer between the datacenters, # this setting allows users to throttle inter dc stream throughput in addition # to throttling all network stream traffic as configured with # stream_throughput_outbound_megabits_per_sec # When unset, the default is 200 Mbps or 25 MB/s - # inter_dc_stream_throughput_outbound_megabits_per_sec: 200 + # inter_dc_stream_throughput_outbound_megabits_per_sec: {{ .Params.INTER_DC_STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC }} # How long the coordinator should wait for read operations to complete - read_request_timeout_in_ms: 5000 + read_request_timeout_in_ms: {{ .Params.READ_REQUEST_TIMEOUT_IN_MS }} # How long the coordinator should wait for seq or index scans to complete - range_request_timeout_in_ms: 10000 + range_request_timeout_in_ms: {{ .Params.RANGE_REQUEST_TIMEOUT_IN_MS }} # How long the coordinator should wait for writes to complete - write_request_timeout_in_ms: 2000 + write_request_timeout_in_ms: {{ .Params.WRITE_REQUEST_TIMEOUT_IN_MS }} # How long the coordinator should wait for counter writes to complete - counter_write_request_timeout_in_ms: 5000 + counter_write_request_timeout_in_ms: {{ .Params.COUNTER_WRITE_REQUEST_TIMEOUT_IN_MS }} # How long a coordinator should continue to retry a CAS operation # that contends with other proposals for the same row - cas_contention_timeout_in_ms: 1000 + cas_contention_timeout_in_ms: {{ .Params.CAS_CONTENTION_TIMEOUT_IN_MS }} # How long the coordinator should wait for truncates to complete # (This can be much longer, because unless auto_snapshot is disabled # we need to flush first so we can snapshot before removing the data.) - truncate_request_timeout_in_ms: 60000 + truncate_request_timeout_in_ms: {{ .Params.TRUNCATE_REQUEST_TIMEOUT_IN_MS }} # The default timeout for other, miscellaneous operations - request_timeout_in_ms: 10000 + request_timeout_in_ms: {{ .Params.REQUEST_TIMEOUT_IN_MS }} # How long before a node logs slow queries. Select queries that take longer than # this timeout to execute, will generate an aggregated log message, so that slow queries # can be identified. Set this value to zero to disable slow query logging. - slow_query_log_timeout_in_ms: 500 + slow_query_log_timeout_in_ms: {{ .Params.SLOW_QUERY_LOG_TIMEOUT_IN_MS }} # Enable operation timeout information exchange between nodes to accurately # measure request timeouts. If disabled, replicas will assume that requests @@ -882,7 +884,7 @@ data: # # Warning: before enabling this property make sure to ntp is installed # and the times are synchronized between the nodes. - cross_node_timeout: false + cross_node_timeout: {{ .Params.CROSS_NODE_TIMEOUT }} # Set keep-alive period for streaming # This node will send a keep-alive message periodically with this period. @@ -890,11 +892,11 @@ data: # 2 keep-alive cycles the stream session times out and fail # Default value is 300s (5 minutes), which means stalled stream # times out in 10 minutes by default - # streaming_keep_alive_period_in_secs: 300 + streaming_keep_alive_period_in_secs: {{ .Params.STREAMING_KEEP_ALIVE_PERIOD_IN_SECS }} # phi value that must be reached for a host to be marked down. # most users should never need to adjust this. - # phi_convict_threshold: 8 + phi_convict_threshold: {{ .Params.PHI_CONVICT_THRESHOLD }} # endpoint_snitch -- Set this to a class that implements # IEndpointSnitch. The snitch has two functions: @@ -959,14 +961,14 @@ data: # # You can use a custom Snitch by setting this to the full class name # of the snitch, which will be assumed to be on your classpath. - endpoint_snitch: SimpleSnitch + endpoint_snitch: {{ .Params.ENDPOINT_SNITCH }} # controls how often to perform the more expensive part of host score # calculation - dynamic_snitch_update_interval_in_ms: 100 + dynamic_snitch_update_interval_in_ms: {{ .Params.DYNAMIC_SNITCH_UPDATE_INTERVAL_IN_MS }} # controls how often to reset all host scores, allowing a bad host to # possibly recover - dynamic_snitch_reset_interval_in_ms: 600000 + dynamic_snitch_reset_interval_in_ms: {{ .Params.DYNAMIC_SNITCH_RESET_INTERVAL_IN_MS }} # if set greater than zero and read_repair_chance is < 1.0, this will allow # 'pinning' of replicas to hosts in order to increase cache capacity. # The badness threshold will control how much worse the pinned host has to be @@ -974,7 +976,7 @@ data: # expressed as a double which represents a percentage. Thus, a value of # 0.2 means Cassandra would continue to prefer the static snitch values # until the pinned host was 20% worse than the fastest. - dynamic_snitch_badness_threshold: 0.1 + dynamic_snitch_badness_threshold: {{ .Params.DYNAMIC_SNITCH_BADNESS_THRESHOLD }} # request_scheduler -- Set this to a class that implements # RequestScheduler, which will schedule incoming client requests @@ -987,7 +989,7 @@ data: # client requests to a node with a separate queue for each # request_scheduler_id. The scheduler is further customized by # request_scheduler_options as described below. - request_scheduler: org.apache.cassandra.scheduler.NoScheduler + request_scheduler: {{ .Params.REQUEST_SCHEDULER }} # Scheduler Options vary based on the type of scheduler # @@ -1084,17 +1086,17 @@ data: # # none # nothing is compressed. - internode_compression: dc + internode_compression: {{ .Params.INTERNODE_COMPRESSION }} # Enable or disable tcp_nodelay for inter-dc communication. # Disabling it will result in larger (but fewer) network packets being sent, # reducing overhead from the TCP protocol itself, at the cost of increasing # latency if you block for cross-datacenter responses. - inter_dc_tcp_nodelay: false + inter_dc_tcp_nodelay: {{ .Params.INTER_DC_TCP_NODELAY }} # TTL for different trace types used during logging of the repair process. - tracetype_query_ttl: 86400 - tracetype_repair_ttl: 604800 + tracetype_query_ttl: {{ .Params.TRACETYPE_QUERY_TTL }} + tracetype_repair_ttl: {{ .Params.TRACETYPE_REPAIR_TTL }} # By default, Cassandra logs GC Pauses greater than 200 ms at INFO level # This threshold can be adjusted to minimize logging if necessary @@ -1104,24 +1106,24 @@ data: # INFO level # UDFs (user defined functions) are disabled by default. # As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code. - enable_user_defined_functions: false + enable_user_defined_functions: {{ .Params.ENABLE_USER_DEFINED_FUNCTIONS }} # Enables scripted UDFs (JavaScript UDFs). # Java UDFs are always enabled, if enable_user_defined_functions is true. # Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider. # This option has no effect, if enable_user_defined_functions is false. - enable_scripted_user_defined_functions: false + enable_scripted_user_defined_functions: {{ .Params.ENABLE_SCRIPTED_USER_DEFINED_FUNCTIONS }} # Enables materialized view creation on this node. # Materialized views are considered experimental and are not recommended for production use. - enable_materialized_views: true + #enable_materialized_views: true # The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. # Lowering this value on Windows can provide much tighter latency and better throughput, however # some virtualized environments may see a negative performance impact from changing this setting # below their system default. The sysinternals 'clockres' tool can confirm your system's default # setting. - windows_timer_interval: 1 + windows_timer_interval: {{ .Params.WINDOWS_TIMER_INTERVAL }} # Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from @@ -1165,36 +1167,36 @@ data: # Adjust the thresholds here if you understand the dangers and want to # scan more tombstones anyway. These thresholds may also be adjusted at runtime # using the StorageService mbean. - tombstone_warn_threshold: 1000 - tombstone_failure_threshold: 100000 + tombstone_warn_threshold: {{ .Params.TOMBSTONE_WARN_THRESHOLD }} + tombstone_failure_threshold: {{ .Params.TOMBSTONE_FAILURE_THRESHOLD }} # Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default. # Caution should be taken on increasing the size of this threshold as it can lead to node instability. - batch_size_warn_threshold_in_kb: 5 + batch_size_warn_threshold_in_kb: {{ .Params.BATCH_SIZE_WARN_THRESHOLD_IN_KB }} # Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default. - batch_size_fail_threshold_in_kb: 50 + batch_size_fail_threshold_in_kb: {{ .Params.BATCH_SIZE_FAIL_THRESHOLD_IN_KB }} # Log WARN on any batches not of type LOGGED than span across more partitions than this limit - unlogged_batch_across_partitions_warn_threshold: 10 + unlogged_batch_across_partitions_warn_threshold: {{ .Params.UNLOGGED_BATCH_ACROSS_PARTITIONS_WARN_THRESHOLD }} # Log a warning when compacting partitions larger than this value - compaction_large_partition_warning_threshold_mb: 100 + compaction_large_partition_warning_threshold_mb: {{ .Params.COMPACTION_LARGE_PARTITION_WARNING_THRESHOLD_MB }} # GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level # Adjust the threshold based on your application throughput requirement # By default, Cassandra logs GC Pauses greater than 200 ms at INFO level - gc_warn_threshold_in_ms: 1000 + gc_warn_threshold_in_ms: {{ .Params.GC_WARN_THRESHOLD_IN_MS }} # Maximum size of any value in SSTables. Safety measure to detect SSTable corruption # early. Any value size larger than this threshold will result into marking an SSTable # as corrupted. This should be positive and less than 2048. - # max_value_size_in_mb: 256 + max_value_size_in_mb: {{ .Params.MAX_VALUE_SIZE_IN_MB }} # Back-pressure settings # # If enabled, the coordinator will apply the back-pressure strategy specified below to each mutation # sent to replicas, with the aim of reducing pressure on overloaded replicas. - back_pressure_enabled: false + back_pressure_enabled: {{ .Params.BACK_PRESSURE_ENABLED }} # The back-pressure strategy applied. # The default implementation, RateBasedBackPressure, takes three arguments: # high ratio, factor, and flow type, and uses the ratio between incoming mutation responses and outgoing mutation requests. @@ -1206,13 +1208,14 @@ data: # if SLOW at the speed of the slowest one. # New strategies can be added. Implementors need to implement org.apache.cassandra.net.BackpressureStrategy and # provide a public constructor accepting a Map. + {{ if .Params.BACK_PRESSURE_ENABLED }} back_pressure_strategy: - - class_name: org.apache.cassandra.net.RateBasedBackPressure + - class_name: {{ .Params.BACK_PRESSURE_STRATEGY_CLASS }} parameters: - - high_ratio: 0.90 - factor: 5 - flow: FAST - + - high_ratio: {{ .Params.BACK_PRESSURE_HIGH_RATIO }} + factor: {{ .Params.BACK_PRESSURE_FACTOR }} + flow: {{ .Params.BACK_PRESSURE_FLOW }} + {{ end }} # Coalescing Strategies # # Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more). # On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in @@ -1229,7 +1232,7 @@ data: # Strategy to use for coalescing messages in OutboundTcpConnection. # Can be fixed, movingaverage, timehorizon, disabled (default). # You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name. - # otc_coalescing_strategy: DISABLED + # otc_coalescing_strategy: {{ .Params.OTC_COALESCING_STRATEGY }} # How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first # message is received before it will be sent with any accompanying messages. For moving average this is the diff --git a/operator/templates/service.yaml b/operator/templates/service.yaml index 4494341d..c1131232 100644 --- a/operator/templates/service.yaml +++ b/operator/templates/service.yaml @@ -10,8 +10,10 @@ spec: name: ssl-storage - port: {{ .Params.NATIVE_TRANSPORT_PORT }} name: native-transport + {{ if .Params.START_RPC }} - port: {{ .Params.RPC_PORT }} name: rpc + {{ end }} - port: {{ .Params.JMX_PORT }} name: jmx clusterIP: None diff --git a/operator/templates/stateful-set.yaml b/operator/templates/stateful-set.yaml index 5239ee0a..978cf17d 100644 --- a/operator/templates/stateful-set.yaml +++ b/operator/templates/stateful-set.yaml @@ -112,8 +112,10 @@ spec: name: ssl-storage - containerPort: {{ .Params.NATIVE_TRANSPORT_PORT }} name: native + {{ if .Params.START_RPC }} - containerPort: {{ .Params.RPC_PORT }} name: rpc + {{ end }} - containerPort: {{ .Params.JMX_PORT }} name: jmx volumeMounts: From 72dba5dae444c0b855fb2e3fe739f0c57b2e31e5 Mon Sep 17 00:00:00 2001 From: viivek46 Date: Thu, 10 Oct 2019 21:15:02 -0300 Subject: [PATCH 02/11] made few more settings in cassandra-yaml.yml configurable --- operator/params.yaml | 24 ++++++++++++++++++++++++ operator/templates/cassandra-yaml.yaml | 12 ++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/operator/params.yaml b/operator/params.yaml index a6a1aa4f..644b7168 100644 --- a/operator/params.yaml +++ b/operator/params.yaml @@ -83,6 +83,30 @@ BATCHLOG_REPLAY_THROTTLE_IN_KB: description: "The total throttle for log replay in Kpbs." default: 1024 +ROLES_VALIDITY_IN_MS: + description: "Validity period for roles cache; set to 0 to disable" + default: 2000 + +ROLES_UPDATE_INTERVAL_IN_MS: + description: "After this interval, cache entries become eligible for refresh. Upon next access, Cassandra schedules an async reload, and returns the old value until the reload completes. If roles_validity_in_ms is non-zero, then this must be also." + default: 2000 + +CREDENTIALS_VALIDITY_IN_MS: + description: " This cache is tightly coupled to the provided PasswordAuthenticator implementation of IAuthenticator. If another IAuthenticator implementation is configured, Cassandra does not use this cache, and these settings have no effect. Set to 0 to disable." + default: 2000 + +CREDENTIALS_UPDATE_INTERVAL_IN_MS: + description: "After this interval, cache entries become eligible for refresh. The next time the cache is accessed, the system schedules an asynchronous reload of the cache. Until this cache reload is complete, the cache returns the old values. If credentials_validity_in_ms is nonzero, this property must also be nonzero." + default: 2000 + +PERMISSIONS_VALIDITY_IN_MS: + description: "How many milliseconds permissions in cache remain valid. Fetching permissions can be resource intensive. To disable the cache, set this to 0." + default: 2000 + +PERMISSIONS_UPDATE_INTERVAL_IN_MS: + description: "If enabled, sets refresh interval for the permissions cache. After this interval, cache entries become eligible for refresh. On next access, Cassandra schedules an async reload and returns the old value until the reload completes. If permissions_validity_in_ms is nonzero, permissions_update_interval_in_ms must also be non-zero." + default: 2000 + PARTITIONER: description: "The partitioner used to distribute rows across the cluster. Murmur3Partitioner is the recommended setting. RandomPartitioner and ByteOrderedPartitioner are supported for legacy applications." default: "org.apache.cassandra.dht.Murmur3Partitioner" diff --git a/operator/templates/cassandra-yaml.yaml b/operator/templates/cassandra-yaml.yaml index c4c157d0..687a8cb5 100644 --- a/operator/templates/cassandra-yaml.yaml +++ b/operator/templates/cassandra-yaml.yaml @@ -134,7 +134,7 @@ data: # after the period specified here, become eligible for (async) reload. # Defaults to 2000, set to 0 to disable caching entirely. # Will be disabled automatically for AllowAllAuthenticator. - roles_validity_in_ms: 2000 + roles_validity_in_ms: {{ .Params.ROLES_VALIDITY_IN_MS }} # Refresh interval for roles cache (if enabled). # After this interval, cache entries become eligible for refresh. Upon next @@ -142,13 +142,13 @@ data: # completes. If roles_validity_in_ms is non-zero, then this must be # also. # Defaults to the same value as roles_validity_in_ms. - # roles_update_interval_in_ms: 2000 + roles_update_interval_in_ms: {{ .Params.ROLES_UPDATE_INTERVAL_IN_MS }} # Validity period for permissions cache (fetching permissions can be an # expensive operation depending on the authorizer, CassandraAuthorizer is # one example). Defaults to 2000, set to 0 to disable. # Will be disabled automatically for AllowAllAuthorizer. - permissions_validity_in_ms: 2000 + permissions_validity_in_ms: {{ .Params.PERMISSIONS_VALIDITY_IN_MS }} # Refresh interval for permissions cache (if enabled). # After this interval, cache entries become eligible for refresh. Upon next @@ -156,7 +156,7 @@ data: # completes. If permissions_validity_in_ms is non-zero, then this must be # also. # Defaults to the same value as permissions_validity_in_ms. - # permissions_update_interval_in_ms: 2000 + permissions_update_interval_in_ms: {{ .Params.PERMISSIONS_UPDATE_INTERVAL_IN_MS }} # Validity period for credentials cache. This cache is tightly coupled to # the provided PasswordAuthenticator implementation of IAuthenticator. If @@ -167,7 +167,7 @@ data: # underlying table, it may not bring a significant reduction in the # latency of individual authentication attempts. # Defaults to 2000, set to 0 to disable credentials caching. - credentials_validity_in_ms: 2000 + credentials_validity_in_ms: {{ .Params.CREDENTIALS_VALIDITY_IN_MS }} # Refresh interval for credentials cache (if enabled). # After this interval, cache entries become eligible for refresh. Upon next @@ -175,7 +175,7 @@ data: # completes. If credentials_validity_in_ms is non-zero, then this must be # also. # Defaults to the same value as credentials_validity_in_ms. - # credentials_update_interval_in_ms: 2000 + credentials_update_interval_in_ms: {{ .Params.CREDENTIALS_UPDATE_INTERVAL_IN_MS }} # The partitioner is responsible for distributing groups of rows (by # partition key) across nodes in the cluster. You should leave this From 03be7932c4d8ed32b9dbc71ddaddc664042ac4d7 Mon Sep 17 00:00:00 2001 From: viivek46 Date: Thu, 10 Oct 2019 23:22:23 -0300 Subject: [PATCH 03/11] added code to make settings in jvm-options.yaml configurable --- operator/params.yaml | 8 ++++++++ operator/templates/jvm-options.yaml | 6 +++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/operator/params.yaml b/operator/params.yaml index 644b7168..1aaf5bff 100644 --- a/operator/params.yaml +++ b/operator/params.yaml @@ -55,6 +55,14 @@ CLUSTER_NAME: description: "The name of the cluster managed by the Service" default: "cassandra" +CASSANDRA_HEAP_SIZE_MB: + description: "The amount of JVM heap, in MB, allocated to the Cassandra process." + default: 2048 + +CASSANDRA_HEAP_NEW_MB: + description: "The amount of JVM new generation heap, in MB, allocated to the Cassandra process." + default: 100 + SEED_PROVIDER_CLASS: description: "The class within Cassandra that handles the seed logic." default: "org.apache.cassandra.locator.SimpleSeedProvider" diff --git a/operator/templates/jvm-options.yaml b/operator/templates/jvm-options.yaml index 957a4bc8..4b590a23 100644 --- a/operator/templates/jvm-options.yaml +++ b/operator/templates/jvm-options.yaml @@ -170,8 +170,8 @@ data: # the same value to avoid stop-the-world GC pauses during resize, and # so that we can lock the heap in memory on startup to prevent any # of it from being swapped out. - -Xms{{ max (min 8192 (div .Params.NODE_MEM 4)) (min 1024 (div .Params.NODE_MEM 2)) }}m - -Xmx{{ max (min 8192 (div .Params.NODE_MEM 4)) (min 1024 (div .Params.NODE_MEM 2)) }}m + -Xms{{ .Params.CASSANDRA_HEAP_SIZE_MB }}M + -Xmx{{ .Params.CASSANDRA_HEAP_SIZE_MB }}M # Young generation size is automatically calculated by cassandra-env # based on this formula: min(100 * num_cores, 1/4 * heap size) @@ -187,7 +187,7 @@ data: # The example below assumes a modern 8-core+ machine for decent # times. If in doubt, and if you do not particularly want to tweak, go # 100 MB per physical CPU core. - #-Xmn800M + -Xmn{{ .Params.CASSANDRA_HEAP_NEW_MB }}M ################################### # EXPIRATION DATE OVERFLOW POLICY # From 4aae807a0ddb2c7f4e066ba07a7ab7d91a3f7b03 Mon Sep 17 00:00:00 2001 From: viivek46 Date: Thu, 10 Oct 2019 23:49:13 -0300 Subject: [PATCH 04/11] added line at the end of params.yaml --- operator/params.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operator/params.yaml b/operator/params.yaml index 1aaf5bff..7fef9df9 100644 --- a/operator/params.yaml +++ b/operator/params.yaml @@ -521,4 +521,4 @@ BACK_PRESSURE_FACTOR: BACK_PRESSURE_FLOW: description: "The flow speed to apply rate limiting: FAST - rate limited to the speed of the fastest replica. SLOW - rate limit to the speed of the slowest replica." - default: "FAST" \ No newline at end of file + default: "FAST" From 1790aaaee1645552c08671abe7e454ee74f10d2a Mon Sep 17 00:00:00 2001 From: viivek46 <39732335+viivek46@users.noreply.github.com> Date: Mon, 14 Oct 2019 21:25:47 -0300 Subject: [PATCH 05/11] Apply suggestions from code review Co-Authored-By: Sam Tran --- operator/params.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/operator/params.yaml b/operator/params.yaml index 7fef9df9..3c04940e 100644 --- a/operator/params.yaml +++ b/operator/params.yaml @@ -76,11 +76,11 @@ HINTED_HANDOFF_ENABLED: default: true MAX_HINT_WINDOW_IN_MS: - description: "The maximum amount of time, in ms, that hints are generates hints for an unresponsive node." + description: "The maximum amount of time, in ms, that hints are generated for an unresponsive node." default: 10800000 HINTED_HANDOFF_THROTTLE_IN_KB: - description: "The maximum throttle per delivery thread in kilobytes per second." + description: "The maximum throttle per delivery thread in KBs per second." default: 1024 MAX_HINTS_DELIVERY_THREADS: @@ -88,7 +88,7 @@ MAX_HINTS_DELIVERY_THREADS: default: 2 BATCHLOG_REPLAY_THROTTLE_IN_KB: - description: "The total throttle for log replay in Kpbs." + description: "The total maximum throttle for replaying failed logged batches in KBs per second." default: 1024 ROLES_VALIDITY_IN_MS: @@ -164,15 +164,15 @@ INDEX_SUMMARY_RESIZE_INTERVAL_IN_MINUTES: default: 60 START_NATIVE_TRANSPORT: - description: "If true CQL is enabled." + description: "If true, CQL is enabled." default: true START_RPC: - description: "If true Thrift RPC is enabled. This is deprecated but may be necessary for legacy applications." + description: "If true, Thrift RPC is enabled. This is deprecated but may be necessary for legacy applications." default: false RPC_KEEPALIVE: - description: "Enables or TCP keepalive for RPC connections." + description: "Enables or disables keepalive on client connections (RPC or native)." default: true THRIFT_FRAMED_TRANSPORT_SIZE_IN_MB: @@ -376,7 +376,7 @@ MAX_VALUE_SIZE_IN_MB: default: 256 OTC_COALESCING_STRATEGY: - description: "The strategy to use for coalescing network messages" + description: "The strategy to use for coalescing network messages. Values can be: fixed, movingaverage, timehorizon, disabled (default)" default: "DISABLED" UNLOGGED_BATCH_ACROSS_PARTITIONS_WARN_THRESHOLD: From a8aaf6ff60eb9113d47d4a3b59472c8623b2ce1c Mon Sep 17 00:00:00 2001 From: viivek46 Date: Tue, 15 Oct 2019 04:13:10 -0300 Subject: [PATCH 06/11] added conditional block around originally commented settings for all settings that are originally commented out in the official configuration file, added a conditional block around it --- operator/params.yaml | 74 +++++++++++----------- operator/templates/cassandra-yaml.yaml | 85 +++++++++++++++++++++++--- operator/templates/jvm-options.yaml | 7 +++ 3 files changed, 121 insertions(+), 45 deletions(-) diff --git a/operator/params.yaml b/operator/params.yaml index 3c04940e..d83cfd58 100644 --- a/operator/params.yaml +++ b/operator/params.yaml @@ -57,11 +57,11 @@ CLUSTER_NAME: CASSANDRA_HEAP_SIZE_MB: description: "The amount of JVM heap, in MB, allocated to the Cassandra process." - default: 2048 + default: CASSANDRA_HEAP_NEW_MB: description: "The amount of JVM new generation heap, in MB, allocated to the Cassandra process." - default: 100 + default: SEED_PROVIDER_CLASS: description: "The class within Cassandra that handles the seed logic." @@ -97,7 +97,7 @@ ROLES_VALIDITY_IN_MS: ROLES_UPDATE_INTERVAL_IN_MS: description: "After this interval, cache entries become eligible for refresh. Upon next access, Cassandra schedules an async reload, and returns the old value until the reload completes. If roles_validity_in_ms is non-zero, then this must be also." - default: 2000 + default: CREDENTIALS_VALIDITY_IN_MS: description: " This cache is tightly coupled to the provided PasswordAuthenticator implementation of IAuthenticator. If another IAuthenticator implementation is configured, Cassandra does not use this cache, and these settings have no effect. Set to 0 to disable." @@ -105,7 +105,7 @@ CREDENTIALS_VALIDITY_IN_MS: CREDENTIALS_UPDATE_INTERVAL_IN_MS: description: "After this interval, cache entries become eligible for refresh. The next time the cache is accessed, the system schedules an asynchronous reload of the cache. Until this cache reload is complete, the cache returns the old values. If credentials_validity_in_ms is nonzero, this property must also be nonzero." - default: 2000 + default: PERMISSIONS_VALIDITY_IN_MS: description: "How many milliseconds permissions in cache remain valid. Fetching permissions can be resource intensive. To disable the cache, set this to 0." @@ -113,7 +113,7 @@ PERMISSIONS_VALIDITY_IN_MS: PERMISSIONS_UPDATE_INTERVAL_IN_MS: description: "If enabled, sets refresh interval for the permissions cache. After this interval, cache entries become eligible for refresh. On next access, Cassandra schedules an async reload and returns the old value until the reload completes. If permissions_validity_in_ms is nonzero, permissions_update_interval_in_ms must also be non-zero." - default: 2000 + default: PARTITIONER: description: "The partitioner used to distribute rows across the cluster. Murmur3Partitioner is the recommended setting. RandomPartitioner and ByteOrderedPartitioner are supported for legacy applications." @@ -265,7 +265,7 @@ CONCURRENT_MATERIALIZED_VIEW_WRITES: COMMITLOG_TOTAL_SPACE_IN_MB: description: "The total size of the commit log in Mb." - default: 8192 + default: AUTO_SNAPSHOT: description: "Take a snapshot of the data before truncating a keyspace or dropping a table" @@ -273,111 +273,111 @@ AUTO_SNAPSHOT: KEY_CACHE_KEYS_TO_SAVE: description: "The number of keys from the key cache to save" - default: 100 + default: ROW_CACHE_KEYS_TO_SAVE: description: "The number of keys from the row cache to save" - default: 100 + default: COUNTER_CACHE_KEYS_TO_SAVE: description: "The number of keys from the counter cache to save" - default: 100 + default: FILE_CACHE_SIZE_IN_MB: description: "The total memory to use for SSTable-reading buffers" - default: 512 + default: MEMTABLE_HEAP_SPACE_IN_MB: description: "The amount of on-heap memory allocated for memtables" - default: 2048 + default: MEMTABLE_OFFHEAP_SPACE_IN_MB: description: "The total amount of off-heap memory allocated for memtables" - default: 2048 + default: MEMTABLE_CLEANUP_THRESHOLD: description: "The ratio used for automatic memtable flush" - default: 0.11 + default: MEMTABLE_FLUSH_WRITERS: description: "The number of memtable flush writer threads" - default: 2 + default: LISTEN_ON_BROADCAST_ADDRESS: description: "Listen on the address set in broadcast_address property" - default: false + default: INTERNODE_AUTHENTICATOR: description: "The internode authentication backend" - default: "org.apache.cassandra.auth.AllowAllInternodeAuthenticator" + default: "" NATIVE_TRANSPORT_MAX_THREADS: description: "The maximum number of thread handling requests" - default: 128 + default: NATIVE_TRANSPORT_MAX_FRAME_SIZE_IN_MB: description: "The maximum allowed size of a frame" - default: 256 + default: NATIVE_TRANSPORT_MAX_CONCURRENT_CONNECTIONS: description: "The maximum number of concurrent client connections" - default: -1 + default: NATIVE_TRANSPORT_MAX_CONCURRENT_CONNECTIONS_PER_IP: description: "The maximum number of concurrent client connections per source IP address" - default: -1 + default: RPC_MIN_THREADS: description: "The minimum thread pool size for remote procedure calls" - default: 16 + default: RPC_MAX_THREADS: description: "The maximum thread pool size for remote procedure calls" - default: 2048 + default: RPC_SEND_BUFF_SIZE_IN_BYTES: description: "The sending socket buffer size in bytes for remote procedure calls" - default: 16384 + default: RPC_RECV_BUFF_SIZE_IN_BYTES: description: "The receiving socket buffer size for remote procedure calls" - default: 16384 + default: CONCURRENT_COMPACTORS: description: "The number of concurrent compaction processes allowed to run simultaneously on a node" - default: 1 + default: STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC: description: "The maximum throughput of all outbound streaming file transfers on a node" - default: 200 + default: INTER_DC_STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC: description: "The maximum throughput of all streaming file transfers between datacenters" - default: 200 + default: STREAMING_KEEP_ALIVE_PERIOD_IN_SECS: description: "Interval to send keep-alive messages. The stream session fails when a keep-alive message is not received for 2 keep-alive cycles." - default: 300 + default: PHI_CONVICT_THRESHOLD: description: "The sensitivity of the failure detector on an exponential scale" - default: 8 + default: BUFFER_POOL_USE_HEAP_IF_EXHAUSTED: description: "Allocate on-heap memory when the SSTable buffer pool is exhausted" - default: true + default: DISK_OPTIMIZATION_STRATEGY: description: "The strategy for optimizing disk reads" - default: "ssd" + default: "" MAX_VALUE_SIZE_IN_MB: description: "The maximum size of any value in SSTables" - default: 256 + default: OTC_COALESCING_STRATEGY: description: "The strategy to use for coalescing network messages. Values can be: fixed, movingaverage, timehorizon, disabled (default)" - default: "DISABLED" + default: "" UNLOGGED_BATCH_ACROSS_PARTITIONS_WARN_THRESHOLD: description: "Causes Cassandra to log a WARN message on any batches not of type LOGGED that span across more partitions than this limit." @@ -475,17 +475,21 @@ ENABLE_SCRIPTED_USER_DEFINED_FUNCTIONS: description: "Java UDFs are always enabled, if enable_user_defined_functions is true. Enable this option to use UDFs with language javascript or any custom JSR-223 provider. This option has no effect if enable_user_defined_functions is false" default: false +ENABLE_MATERIALIZED_VIEWS: + description: "Enables materialized view creation on this node. Materialized views are considered experimental and are not recommended for production use." + default: false + CDC_ENABLED: description: "Enable / disable CDC functionality on a per-node basis. This modifies the logic used for write path allocation rejection" default: false CDC_TOTAL_SPACE_IN_MB: description: "Total space to use for change-data-capture (CDC) logs on disk. " - default: 4096 + default: CDC_FREE_SPACE_CHECK_INTERVAL_MS: description: "Interval between checks for new available space for CDC-tracked tables when the cdc_total_space_in_mb threshold is reached and the CDCCompactor is running behind or experiencing back pressure." - default: 250 + default: PREPARED_STATEMENTS_CACHE_SIZE_MB: description: "Maximum size of the native protocol prepared statement cache" diff --git a/operator/templates/cassandra-yaml.yaml b/operator/templates/cassandra-yaml.yaml index 687a8cb5..8301f8bd 100644 --- a/operator/templates/cassandra-yaml.yaml +++ b/operator/templates/cassandra-yaml.yaml @@ -142,7 +142,9 @@ data: # completes. If roles_validity_in_ms is non-zero, then this must be # also. # Defaults to the same value as roles_validity_in_ms. + {{ if .Params.ROLES_UPDATE_INTERVAL_IN_MS }} roles_update_interval_in_ms: {{ .Params.ROLES_UPDATE_INTERVAL_IN_MS }} + {{ end }} # Validity period for permissions cache (fetching permissions can be an # expensive operation depending on the authorizer, CassandraAuthorizer is @@ -156,7 +158,9 @@ data: # completes. If permissions_validity_in_ms is non-zero, then this must be # also. # Defaults to the same value as permissions_validity_in_ms. + {{ if .Params.PERMISSIONS_UPDATE_INTERVAL_IN_MS }} permissions_update_interval_in_ms: {{ .Params.PERMISSIONS_UPDATE_INTERVAL_IN_MS }} + {{ end }} # Validity period for credentials cache. This cache is tightly coupled to # the provided PasswordAuthenticator implementation of IAuthenticator. If @@ -175,7 +179,9 @@ data: # completes. If credentials_validity_in_ms is non-zero, then this must be # also. # Defaults to the same value as credentials_validity_in_ms. + {{ if .Params.CREDENTIALS_UPDATE_INTERVAL_IN_MS }} credentials_update_interval_in_ms: {{ .Params.CREDENTIALS_UPDATE_INTERVAL_IN_MS }} + {{ end }} # The partitioner is responsible for distributing groups of rows (by # partition key) across nodes in the cluster. You should leave this @@ -307,7 +313,9 @@ data: # Number of keys from the key cache to save # Disabled by default, meaning all keys are going to be saved + {{ if .Params.KEY_CACHE_KEYS_TO_SAVE }} key_cache_keys_to_save: {{ .Params.KEY_CACHE_KEYS_TO_SAVE }} + {{ end }} # Row cache implementation class name. Available implementations: # @@ -341,7 +349,9 @@ data: # Number of keys from the row cache to save. # Specify 0 (which is the default), meaning all keys are going to be saved + {{ if .Params.ROW_CACHE_KEYS_TO_SAVE }} row_cache_keys_to_save: {{ .Params.ROW_CACHE_KEYS_TO_SAVE }} + {{ end }} # Maximum size of the counter cache in memory. # @@ -367,7 +377,9 @@ data: # Number of keys from the counter cache to save # Disabled by default, meaning all keys are going to be saved + {{ if .Params.COUNTER_CACHE_KEYS_TO_SAVE }} counter_cache_keys_to_save: {{ .Params.COUNTER_CACHE_KEYS_TO_SAVE }} + {{ end }} # saved caches # If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. @@ -464,26 +476,36 @@ data: # overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size # if the default 64k chunk size is used). # Memory is only allocated when needed. + {{ if .Params.FILE_CACHE_SIZE_IN_MB }} file_cache_size_in_mb: {{ .Params.FILE_CACHE_SIZE_IN_MB }} + {{ end }} # Flag indicating whether to allocate on or off heap when the sstable buffer # pool is exhausted, that is when it has exceeded the maximum memory # file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request. - # buffer_pool_use_heap_if_exhausted: {{ .Params.BUFFER_POOL_USE_HEAP_IF_EXHAUSTED }} + {{ if .Params.BUFFER_POOL_USE_HEAP_IF_EXHAUSTED }} + buffer_pool_use_heap_if_exhausted: {{ .Params.BUFFER_POOL_USE_HEAP_IF_EXHAUSTED }} + {{ end }} # The strategy for optimizing disk read # Possible values are: # ssd (for solid state disks, the default) # spinning (for spinning disks) - # disk_optimization_strategy: {{ .Params.DISK_OPTIMIZATION_STRATEGY }} + {{ if .Params.DISK_OPTIMIZATION_STRATEGY }} + disk_optimization_strategy: {{ .Params.DISK_OPTIMIZATION_STRATEGY }} + {{ end }} # Total permitted memory to use for memtables. Cassandra will stop # accepting writes when the limit is exceeded until a flush completes, # and will trigger a flush based on memtable_cleanup_threshold # If omitted, Cassandra will set both to 1/4 the size of the heap. + {{ if .Params.MEMTABLE_HEAP_SPACE_IN_MB }} memtable_heap_space_in_mb: {{ .Params.MEMTABLE_HEAP_SPACE_IN_MB }} + {{ end }} + {{ if .Params.MEMTABLE_OFFHEAP_SPACE_IN_MB }} memtable_offheap_space_in_mb: {{ .Params.MEMTABLE_OFFHEAP_SPACE_IN_MB }} + {{ end }} # memtable_cleanup_threshold is deprecated. The default calculation # is the only reasonable choice. See the comments on memtable_flush_writers @@ -496,7 +518,10 @@ data: # under heavy write load. # # memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) + {{ if .Params.MEMTABLE_CLEANUP_THRESHOLD }} memtable_cleanup_threshold: {{ .Params.MEMTABLE_CLEANUP_THRESHOLD }} + {{ end }} + # Specify the way Cassandra allocates and manages memtable memory. # Options are: @@ -520,7 +545,9 @@ data: # The default value is the smaller of 8192, and 1/4 of the total space # of the commitlog volume. # + {{ if .Params.COMMITLOG_TOTAL_SPACE_IN_MB }} commitlog_total_space_in_mb: {{ .Params.COMMITLOG_TOTAL_SPACE_IN_MB }} + {{ end }} # This sets the number of memtable flush writer threads per disk # as well as the total number of memtables that can be flushed concurrently. @@ -549,7 +576,9 @@ data: # and flush size and frequency. More is not better you just need enough flush writers # to never stall waiting for flushing to free memory. # + {{ if .Params.MEMTABLE_FLUSH_WRITERS }} memtable_flush_writers: {{ .Params.MEMTABLE_FLUSH_WRITERS }} + {{ end }} # Total space to use for change-data-capture logs on disk. # @@ -559,12 +588,16 @@ data: # # The default value is the min of 4096 mb and 1/8th of the total space # of the drive where cdc_raw_directory resides. + {{ if .Params.CDC_TOTAL_SPACE_IN_MB }} cdc_total_space_in_mb: {{ .Params.CDC_TOTAL_SPACE_IN_MB }} + {{ end }} # When we hit our cdc_raw limit and the CDCCompactor is either running behind # or experiencing backpressure, we check at the following interval to see if any # new space for cdc-tracked tables has been made available. Default to 250ms + {{ if .Params.CDC_FREE_SPACE_CHECK_INTERVAL_MS }} cdc_free_space_check_interval_ms: {{ .Params.CDC_FREE_SPACE_CHECK_INTERVAL_MS }} + {{ end }} # A fixed memory pool size in MB for for SSTable index summaries. If left # empty, this will default to 5% of the heap size. If the memory usage of @@ -632,11 +665,15 @@ data: # interfaces. # Ignore this property if the network configuration automatically # routes between the public and private networks such as EC2. + {{ if .Params.LISTEN_ON_BROADCAST_ADDRESS }} listen_on_broadcast_address: {{ .Params.LISTEN_ON_BROADCAST_ADDRESS }} + {{ end }} # Internode authentication backend, implementing IInternodeAuthenticator; # used to allow/disallow connections from peer nodes. + {{ if .Params.INTERNODE_AUTHENTICATOR }} internode_authenticator: {{ .Params.INTERNODE_AUTHENTICATOR }} + {{ end }} # Whether to start the native transport server. # Please note that the address on which the native transport is bound is the @@ -657,20 +694,28 @@ data: # This is similar to rpc_max_threads though the default differs slightly (and # there is no native_transport_min_threads, idle threads will always be stopped # after 30 seconds). + {{ if .Params.NATIVE_TRANSPORT_MAX_THREADS }} native_transport_max_threads: {{ .Params.NATIVE_TRANSPORT_MAX_THREADS }} + {{ end }} # # The maximum size of allowed frame. Frame (requests) larger than this will # be rejected as invalid. The default is 256MB. If you're changing this parameter, # you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048. + {{ if .Params.NATIVE_TRANSPORT_MAX_FRAME_SIZE_IN_MB }} native_transport_max_frame_size_in_mb: {{ .Params.NATIVE_TRANSPORT_MAX_FRAME_SIZE_IN_MB }} + {{ end }} # The maximum number of concurrent client connections. # The default is -1, which means unlimited. + {{ if .Params.NATIVE_TRANSPORT_MAX_CONCURRENT_CONNECTIONS }} native_transport_max_concurrent_connections: {{ .Params.NATIVE_TRANSPORT_MAX_CONCURRENT_CONNECTIONS }} + {{ end }} # The maximum number of concurrent client connections per source ip. # The default is -1, which means unlimited. + {{ if .Params.NATIVE_TRANSPORT_MAX_FRAME_SIZE_IN_MB }} native_transport_max_concurrent_connections_per_ip: {{ .Params.NATIVE_TRANSPORT_MAX_CONCURRENT_CONNECTIONS_PER_IP }} + {{ end }} # Whether to start the thrift rpc server. start_rpc: {{ .Params.START_RPC }} @@ -744,12 +789,20 @@ data: # encouraged to set a maximum that makes sense for you in production, but do keep in mind that # rpc_max_threads represents the maximum number of client requests this server may execute concurrently. # + {{ if .Params.RPC_MIN_THREADS }} rpc_min_threads: {{ .Params.RPC_MIN_THREADS }} + {{ end }} + {{ if .Params.RPC_MAX_THREADS }} rpc_max_threads: {{ .Params.RPC_MAX_THREADS }} + {{ end }} # uncomment to set socket buffer sizes on rpc connections + {{ if .Params.RPC_SEND_BUFF_SIZE_IN_BYTES }} rpc_send_buff_size_in_bytes: {{ .Params.RPC_SEND_BUFF_SIZE_IN_BYTES }} - rpc_recv_buff_size_in_bytes: {{ .Params.RPC_RECV_BUFF_SIZE_IN_BYTES }} + {{ end }} + {{ if .Params.RPC_RECV_BUFF_SIZE_IN_BYTES }} + rpc_recv_buff_size_in_bytes: {{ .Params.RPC_RECV_BUFF_SIZE_IN_BYTES }} + {{ end }} # Uncomment to set socket buffer size for internode communication # Note that when setting this, the buffer size is limited by net.core.wmem_max @@ -823,7 +876,9 @@ data: # # If your data directories are backed by SSD, you should increase this # to the number of cores. + {{ if .Params.CONCURRENT_COMPACTORS }} concurrent_compactors: {{ .Params.CONCURRENT_COMPACTORS }} + {{ end }} # Throttles compaction to the given total throughput across the entire # system. The faster you insert data, the faster you need to compact in @@ -844,14 +899,18 @@ data: # mostly sequential IO when streaming data during bootstrap or repair, which # can lead to saturating the network connection and degrading rpc performance. # When unset, the default is 200 Mbps or 25 MB/s. - # stream_throughput_outbound_megabits_per_sec: {{ .Params.STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC }} + {{ if .Params.STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC }} + stream_throughput_outbound_megabits_per_sec: {{ .Params.STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC }} + {{ end }} # Throttles all streaming file transfer between the datacenters, # this setting allows users to throttle inter dc stream throughput in addition # to throttling all network stream traffic as configured with # stream_throughput_outbound_megabits_per_sec - # When unset, the default is 200 Mbps or 25 MB/s - # inter_dc_stream_throughput_outbound_megabits_per_sec: {{ .Params.INTER_DC_STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC }} + # When unset, the default is 200 Mbps or 25 MB/s + {{ if .Params.INTER_DC_STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC }} + inter_dc_stream_throughput_outbound_megabits_per_sec: {{ .Params.INTER_DC_STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC }} + {{ end }} # How long the coordinator should wait for read operations to complete read_request_timeout_in_ms: {{ .Params.READ_REQUEST_TIMEOUT_IN_MS }} @@ -892,11 +951,15 @@ data: # 2 keep-alive cycles the stream session times out and fail # Default value is 300s (5 minutes), which means stalled stream # times out in 10 minutes by default + {{ if .Params.STREAMING_KEEP_ALIVE_PERIOD_IN_SECS }} streaming_keep_alive_period_in_secs: {{ .Params.STREAMING_KEEP_ALIVE_PERIOD_IN_SECS }} + {{ end }} # phi value that must be reached for a host to be marked down. # most users should never need to adjust this. + {{ if .Params.PHI_CONVICT_THRESHOLD }} phi_convict_threshold: {{ .Params.PHI_CONVICT_THRESHOLD }} + {{ end }} # endpoint_snitch -- Set this to a class that implements # IEndpointSnitch. The snitch has two functions: @@ -1116,7 +1179,7 @@ data: # Enables materialized view creation on this node. # Materialized views are considered experimental and are not recommended for production use. - #enable_materialized_views: true + enable_materialized_views: {{ .Params.ENABLE_MATERIALIZED_VIEWS }} # The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. # Lowering this value on Windows can provide much tighter latency and better throughput, however @@ -1191,7 +1254,9 @@ data: # Maximum size of any value in SSTables. Safety measure to detect SSTable corruption # early. Any value size larger than this threshold will result into marking an SSTable # as corrupted. This should be positive and less than 2048. + {{ if .Params.MAX_VALUE_SIZE_IN_MB }} max_value_size_in_mb: {{ .Params.MAX_VALUE_SIZE_IN_MB }} + {{ end }} # Back-pressure settings # # If enabled, the coordinator will apply the back-pressure strategy specified below to each mutation @@ -1208,14 +1273,12 @@ data: # if SLOW at the speed of the slowest one. # New strategies can be added. Implementors need to implement org.apache.cassandra.net.BackpressureStrategy and # provide a public constructor accepting a Map. - {{ if .Params.BACK_PRESSURE_ENABLED }} back_pressure_strategy: - class_name: {{ .Params.BACK_PRESSURE_STRATEGY_CLASS }} parameters: - high_ratio: {{ .Params.BACK_PRESSURE_HIGH_RATIO }} factor: {{ .Params.BACK_PRESSURE_FACTOR }} flow: {{ .Params.BACK_PRESSURE_FLOW }} - {{ end }} # Coalescing Strategies # # Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more). # On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in @@ -1232,7 +1295,9 @@ data: # Strategy to use for coalescing messages in OutboundTcpConnection. # Can be fixed, movingaverage, timehorizon, disabled (default). # You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name. - # otc_coalescing_strategy: {{ .Params.OTC_COALESCING_STRATEGY }} + {{ if .Params.OTC_COALESCING_STRATEGY }} + otc_coalescing_strategy: {{ .Params.OTC_COALESCING_STRATEGY }} + {{ end }} # How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first # message is received before it will be sent with any accompanying messages. For moving average this is the diff --git a/operator/templates/jvm-options.yaml b/operator/templates/jvm-options.yaml index 4b590a23..00cfb39b 100644 --- a/operator/templates/jvm-options.yaml +++ b/operator/templates/jvm-options.yaml @@ -170,8 +170,13 @@ data: # the same value to avoid stop-the-world GC pauses during resize, and # so that we can lock the heap in memory on startup to prevent any # of it from being swapped out. + {{ if .Params.CASSANDRA_HEAP_SIZE_MB }} -Xms{{ .Params.CASSANDRA_HEAP_SIZE_MB }}M -Xmx{{ .Params.CASSANDRA_HEAP_SIZE_MB }}M + {{ else }} + -Xms{{ max (min 8192 (div .Params.NODE_MEM 4)) (min 1024 (div .Params.NODE_MEM 2)) }}m + -Xmx{{ max (min 8192 (div .Params.NODE_MEM 4)) (min 1024 (div .Params.NODE_MEM 2)) }}m + {{ end }} # Young generation size is automatically calculated by cassandra-env # based on this formula: min(100 * num_cores, 1/4 * heap size) @@ -187,7 +192,9 @@ data: # The example below assumes a modern 8-core+ machine for decent # times. If in doubt, and if you do not particularly want to tweak, go # 100 MB per physical CPU core. + {{ if .Params.CASSANDRA_HEAP_SIZE_MB }} -Xmn{{ .Params.CASSANDRA_HEAP_NEW_MB }}M + {{ end }} ################################### # EXPIRATION DATE OVERFLOW POLICY # From 8e9559ee73ff5ba187fabbaf2dc8252448fc95ea Mon Sep 17 00:00:00 2001 From: viivek46 Date: Tue, 15 Oct 2019 17:17:18 -0300 Subject: [PATCH 07/11] added more settings in cassandra-yaml.yml configurable --- operator/params.yaml | 60 +++++++++++++++++++++++- operator/templates/cassandra-yaml.yaml | 63 +++++++++++++++++++------- 2 files changed, 105 insertions(+), 18 deletions(-) diff --git a/operator/params.yaml b/operator/params.yaml index d83cfd58..87abf3a7 100644 --- a/operator/params.yaml +++ b/operator/params.yaml @@ -53,7 +53,7 @@ PERSISTENT_STORAGE: CLUSTER_NAME: description: "The name of the cluster managed by the Service" - default: "cassandra" + default: "" CASSANDRA_HEAP_SIZE_MB: description: "The amount of JVM heap, in MB, allocated to the Cassandra process." @@ -91,6 +91,10 @@ BATCHLOG_REPLAY_THROTTLE_IN_KB: description: "The total maximum throttle for replaying failed logged batches in KBs per second." default: 1024 +ROLE_MANAGER: + description: "Part of the Authentication & Authorization backend that implements IRoleManager to maintain grants and memberships between roles, By default, the value set is Apache Cassandra's out of the box Role Manager: CassandraRoleManager" + default: "CassandraRoleManager" + ROLES_VALIDITY_IN_MS: description: "Validity period for roles cache; set to 0 to disable" default: 2000 @@ -249,7 +253,7 @@ DYNAMIC_SNITCH_BADNESS_THRESHOLD: INTERNODE_COMPRESSION: description: "Controls whether traffic between nodes is compressed. all compresses all traffic. none compresses no traffic. dc compresses between datacenters." - default: "all" + default: "dc" MAX_HINTS_FILE_SIZE_IN_MB: description: "The maximum size of the hints file in Mb." @@ -526,3 +530,55 @@ BACK_PRESSURE_FACTOR: BACK_PRESSURE_FLOW: description: "The flow speed to apply rate limiting: FAST - rate limited to the speed of the fastest replica. SLOW - rate limit to the speed of the slowest replica." default: "FAST" + +ALLOCATE_TOKENS_FOR_KEYSPACE: + description: "Triggers automatic allocation of num_tokens tokens for this node. The allocation algorithm attempts to choose tokens in a way that optimizes replicated load over the nodes in the datacenter for the replication strategy used by the specified keyspace." + default: "" + +INITIAL_TOKEN: + description: "initial_token allows you to specify tokens manually." + default: + +HINTS_DIRECTORY: + description: "Directory where Cassandra should store hints." + default: "" + +COMMITLOG_DIRECTORY: + description: "When running on magnetic HDD, this should be a separate spindle than the data directories. If not set, the default directory is $CASSANDRA_HOME/data/commitlog." + default: "" + +CDC_RAW_DIRECTORY: + description: "CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the segment contains mutations for a CDC-enabled table" + default: "" + +ROW_CACHE_CLASS_NAME: + description: "Row cache implementation class name." + default: "" + +SAVED_CACHES_DIRECTORY: + description: "saved caches If not set, the default directory is $CASSANDRA_HOME/data/saved_caches." + default: "" + +INTERNODE_SEND_BUFF_SIZE_IN_BYTES: + description: "Set socket buffer size for internode communication Note that when setting this, the buffer size is limited by net.core.wmem_max and when not setting it it is defined by net.ipv4.tcp_wm" + default: + +INTERNODE_RECV_BUFF_SIZE_IN_BYTES: + description: "Set socket buffer size for internode communication Note that when setting this, the buffer size is limited by net.core.wmem_max and when not setting it it is defined by net.ipv4.tcp_wmem" + default: + +GC_LOG_THRESHOLD_IN_MS: + description: "GC Pauses greater than 200 ms will be logged at INFO level This threshold can be adjusted to minimize logging if necessary" + default: + +OTC_COALESCING_WINDOW_US: + description: "How many microseconds to wait for coalescing." + default: + +OTC_COALESCING_ENOUGH_COALESCED_MESSAGES: + description: "Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128." + default: + +OTC_BACKLOG_EXPIRATION_INTERVAL_MS: + description: "How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection." + default: diff --git a/operator/templates/cassandra-yaml.yaml b/operator/templates/cassandra-yaml.yaml index 8301f8bd..2ee6071b 100644 --- a/operator/templates/cassandra-yaml.yaml +++ b/operator/templates/cassandra-yaml.yaml @@ -13,7 +13,11 @@ data: # The name of the cluster. This is mainly used to prevent machines in # one logical cluster from joining another. + {{ if .Params.CLUSTER_NAME }} + cluster_name: '{{ .Params.CLUSTER_NAME }}' + {{ else }} cluster_name: '{{ .Name }}' + {{ end }} # This defines the number of tokens randomly assigned to this node on the ring # The more tokens, relative to other nodes, the larger the proportion of data @@ -39,13 +43,17 @@ data: # vnodes. # # Only supported with the Murmur3Partitioner. - # allocate_tokens_for_keyspace: KEYSPACE + {{ if .Params.ALLOCATE_TOKENS_FOR_KEYSPACE }} + allocate_tokens_for_keyspace: {{ .Params.ALLOCATE_TOKENS_FOR_KEYSPACE }} + {{ end }} # initial_token allows you to specify tokens manually. While you can use it with # vnodes (num_tokens > 1, above) -- in which case you should provide a # comma-separated list -- it's primarily used when adding nodes to legacy clusters # that do not have vnodes enabled. - # initial_token: + {{ if .Params.INITIAL_TOKEN }} + initial_token: {{ .Params.INITIAL_TOKEN }} + {{ end }} # See http://wiki.apache.org/cassandra/HintedHandoff # May either be "true" or "false" to enable globally @@ -76,7 +84,9 @@ data: # Directory where Cassandra should store hints. # If not set, the default directory is $CASSANDRA_HOME/data/hints. - # hints_directory: /var/lib/cassandra/hints + {{ if .Params.HINTS_DIRECTORY }} + hints_directory: {{ .Params.HINTS_DIRECTORY }} + {{ end }} # How often hints should be flushed from the internal buffers to disk. # Will *not* trigger fsync. @@ -126,7 +136,7 @@ data: # # - CassandraRoleManager stores role data in the system_auth keyspace. Please # increase system_auth keyspace replication factor if you use this role manager. - role_manager: CassandraRoleManager + role_manager: {{ .Params.ROLE_MANAGER }} # Validity period for roles cache (fetching granted roles can be an expensive # operation depending on the role manager, CassandraRoleManager is one example) @@ -204,8 +214,9 @@ data: # commit log. when running on magnetic HDD, this should be a # separate spindle than the data directories. - # If not set, the default directory is $CASSANDRA_HOME/data/commitlog. - # commitlog_directory: /var/lib/cassandra/commitlog + {{ if .Params.COMMITLOG_DIRECTORY }} + commitlog_directory: {{ .Params.COMMITLOG_DIRECTORY }} + {{ end }} # Enable / disable CDC functionality on a per-node basis. This modifies the logic used # for write path allocation rejection (standard: never reject. cdc: reject Mutation @@ -216,7 +227,9 @@ data: # segment contains mutations for a CDC-enabled table. This should be placed on a # separate spindle than the data directories. If not set, the default directory is # $CASSANDRA_HOME/data/cdc_raw. - # cdc_raw_directory: /var/lib/cassandra/cdc_raw + {{ if .Params.CDC_RAW_DIRECTORY }} + cdc_raw_directory: {{ .Params.CDC_RAW_DIRECTORY }} + {{ end }} # Policy for data disk failures: # @@ -324,8 +337,11 @@ data: # # org.apache.cassandra.cache.SerializingCacheProvider # This is the row cache implementation availabile - # in previous releases of Cassandra. - # row_cache_class_name: org.apache.cassandra.cache.OHCProvider + # in previous releases of Cassandra. + {{ if .Params.ROW_CACHE_CLASS_NAME }} + row_cache_class_name: {{ .Params.ROW_CACHE_CLASS_NAME }} + {{ end }} + # Maximum size of the row cache in memory. # Please note that OHC cache implementation requires some additional off-heap memory to manage @@ -383,7 +399,10 @@ data: # saved caches # If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. - # saved_caches_directory: /var/lib/cassandra/saved_caches + # saved_caches_directory: /var/lib/cassandra/saved_caches + {{ if .Params.SAVED_CACHES_DIRECTORY }} + saved_caches_directory: {{ .Params.SAVED_CACHES_DIRECTORY }} + {{ end }} # commitlog_sync may be either "periodic" or "batch." # @@ -813,12 +832,16 @@ data: # /proc/sys/net/ipv4/tcp_wmem # /proc/sys/net/ipv4/tcp_wmem # and 'man tcp' - # internode_send_buff_size_in_bytes: + {{ if .Params.INTERNODE_SEND_BUFF_SIZE_IN_BYTES }} + internode_send_buff_size_in_bytes: {{ .Params.INTERNODE_SEND_BUFF_SIZE_IN_BYTES }} + {{ end }} # Uncomment to set socket buffer size for internode communication # Note that when setting this, the buffer size is limited by net.core.wmem_max # and when not setting it it is defined by net.ipv4.tcp_wmem - # internode_recv_buff_size_in_bytes: + {{ if .Params.INTERNODE_RECV_BUFF_SIZE_IN_BYTES }} + internode_recv_buff_size_in_bytes: {{ .Params.INTERNODE_RECV_BUFF_SIZE_IN_BYTES }} + {{ end }} # Frame size for thrift (maximum message length). thrift_framed_transport_size_in_mb: {{ .Params.THRIFT_FRAMED_TRANSPORT_SIZE_IN_MB }} @@ -1163,7 +1186,9 @@ data: # By default, Cassandra logs GC Pauses greater than 200 ms at INFO level # This threshold can be adjusted to minimize logging if necessary - # gc_log_threshold_in_ms: 200 + {{ if .Params.GC_LOG_THRESHOLD_IN_MS }} + gc_log_threshold_in_ms: {{ .Params.GC_LOG_THRESHOLD_IN_MS }} + {{ end }} # If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at # INFO level @@ -1303,10 +1328,14 @@ data: # message is received before it will be sent with any accompanying messages. For moving average this is the # maximum amount of time that will be waited as well as the interval at which messages must arrive on average # for coalescing to be enabled. - # otc_coalescing_window_us: 200 + {{ if .Params.OTC_COALESCING_WINDOW_US }} + otc_coalescing_window_us: {{ .Params.OTC_COALESCING_WINDOW_US }} + {{ end }} # Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128. - # otc_coalescing_enough_coalesced_messages: 8 + {{ if .Params.OTC_COALESCING_ENOUGH_COALESCED_MESSAGES }} + otc_coalescing_enough_coalesced_messages: {{ .Params.OTC_COALESCING_ENOUGH_COALESCED_MESSAGES }} + {{ end }} # How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection. # Expiration is done if messages are piling up in the backlog. Droppable messages are expired to free the memory @@ -1315,4 +1344,6 @@ data: # time and queue contention while iterating the backlog of messages. # An interval of 0 disables any wait time, which is the behavior of former Cassandra versions. # - # otc_backlog_expiration_interval_ms: 200 + {{ if .Params.OTC_BACKLOG_EXPIRATION_INTERVAL_MS }} + otc_backlog_expiration_interval_ms: {{ .Params.OTC_BACKLOG_EXPIRATION_INTERVAL_MS }} + {{ end }} From 10da9fd43037cfa5f6a98db5a36ef31a54eeda29 Mon Sep 17 00:00:00 2001 From: viivek46 Date: Wed, 16 Oct 2019 00:44:53 -0300 Subject: [PATCH 08/11] added more settings jvm-options.yml configurable --- operator/params.yaml | 105 ++++++++++++++++++++++++++++ operator/templates/jvm-options.yaml | 96 ++++++++++++++++++------- 2 files changed, 175 insertions(+), 26 deletions(-) diff --git a/operator/params.yaml b/operator/params.yaml index 87abf3a7..78ee2c09 100644 --- a/operator/params.yaml +++ b/operator/params.yaml @@ -582,3 +582,108 @@ OTC_COALESCING_ENOUGH_COALESCED_MESSAGES: OTC_BACKLOG_EXPIRATION_INTERVAL_MS: description: "How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection." default: + +#JVM OPTIONS PARAMS +AVAILABLE_PROCESSORS: + description: "In a multi-instance deployment, multiple Cassandra instances will independently assume that all CPU processors are available to it. This setting allows you to specify a smaller set of processors and perhaps have affinity." + default: + +JOIN_RING: + description: "Set to false to start Cassandra on a node but not have the node join the cluster." + default: + +LOAD_RING_STATE: + description: "Set to false to clear all gossip state for the node on restart. Use when you have changed node information in cassandra.yaml (such as listen_address)." + default: + +REPLAYLIST: + description: "Allow restoring specific tables from an archived commit log." + default: "" + +RING_DELAY_MS: + description: "Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits before joining the ring." + default: + +TRIGGERS_DIR: + description: "Set the default location for the trigger JARs. (Default: conf/triggers)" + default: "" + +WRITE_SURVEY: + description: "For testing new compaction and compression strategies. It allows you to experiment with different strategies and benchmark write performance differences without affecting the production workload." + default: + +DISABLE_AUTH_CACHES_REMOTE_CONFIGURATION: + description: "To disable configuration via JMX of auth caches (such as those for credentials, permissions and roles). This will mean those config options can only be set (persistently) in cassandra.yaml and will require a restart for new values to take effect." + default: + +FORCE_DEFAULT_INDEXING_PAGE_SIZE: + description: "To disable dynamic calculation of the page size used when indexing an entire partition (during initial index build/rebuild). If set to true, the page size will be fixed to the default of 10000 rows per page." + default: + +PREFERIPV4STACK: + description: "Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version: comment out this entry to enable IPv6 support)." + default: true + +EXPIRATION_DATE_OVERFLOW_POLICY: + description: "Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date." + default: + +THREADPRIORITYPOLICY: + description: "allows lowering thread priority without being root on linux - probably not necessary on Windows but doesn't harm anything." + default: 42 + +PER_THREAD_STACK_SIZE: + description: "Per-thread stack size." + default: "256k" + +STRINGTABLESIZE: + description: "Larger interned string table, for gossip's benefit (CASSANDRA-6410)" + default: 1000003 + +SURVIVORRATIO: + description: "CMS Settings: SurvivorRatio" + default: 8 + +MAXTENURINGTHRESHOLD: + description: "CMS Settings: MaxTenuringThreshold" + default: 1 + +CMSINITIATINGOCCUPANCYFRACTION: + description: "CMS Settings: CMSInitiatingOccupancyFraction" + default: 75 + +CMSWAITDURATION: + description: "CMS Settings: CMSWaitDuration" + default: 10000 + +NUMBEROFGCLOGFILES: + description: "GC logging options: NumberOfGCLogFiles" + default: 10 + +GCLOGFILESIZE: + description: "GC logging options: GCLOGFILESIZE" + default: "10M" + +GC_LOG_DIRECTORY: + description: "GC logging options: GC_LOG_DIRECTORY" + default: "" + +PRINTFLSSTATISTICS: + description: "GC logging options: PrintFLSStatistics" + default: "" + +CONCGCTHREADS: + description: "By default, ConcGCThreads is 1/4 of ParallelGCThreads. Setting both to the same value can reduce STW durations." + default: "" + +INITIATINGHEAPOCCUPANCYPERCENT: + description: "Save CPU time on large (>= 16GB) heaps by delaying region scanning until the heap is 70% full. The default in Hotspot 8u40 is 40%." + default: + +MAXGCPAUSEMILLIS: + description: "Main G1GC tunable: lowering the pause target will lower throughput and vise versa." + default: + +G1RSETUPDATINGPAUSETIMEPERCENT: + description: "Have the JVM do less remembered set work during STW, instead preferring concurrent GC. Reduces p99.9 latency." + default: diff --git a/operator/templates/jvm-options.yaml b/operator/templates/jvm-options.yaml index 00cfb39b..3e6a2723 100644 --- a/operator/templates/jvm-options.yaml +++ b/operator/templates/jvm-options.yaml @@ -23,7 +23,9 @@ data: # In a multi-instance deployment, multiple Cassandra instances will independently assume that all # CPU processors are available to it. This setting allows you to specify a smaller set of processors # and perhaps have affinity. - #-Dcassandra.available_processors=number_of_processors + {{ if .Params.AVAILABLE_PROCESSORS }} + -Dcassandra.available_processors={{ .Params.AVAILABLE_PROCESSORS }} + {{ end }} # The directory location of the cassandra.yaml file. #-Dcassandra.config=directory @@ -32,11 +34,15 @@ data: #-Dcassandra.initial_token=token # Set to false to start Cassandra on a node but not have the node join the cluster. - #-Dcassandra.join_ring=true|false + {{ if .Params.JOIN_RING }} + -Dcassandra.join_ring={{ .Params.JOIN_RING }} + {{ end }} # Set to false to clear all gossip state for the node on restart. Use when you have changed node # information in cassandra.yaml (such as listen_address). - #-Dcassandra.load_ring_state=true|false + {{ if .Params.LOAD_RING_STATE }} + -Dcassandra.load_ring_state={{ .Params.LOAD_RING_STATE }} + {{ end }} # Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2. #-Dcassandra.metricsReporterConfigFile=file @@ -53,11 +59,15 @@ data: #-Dcassandra.replace_address=listen_address or broadcast_address of dead node # Allow restoring specific tables from an archived commit log. - #-Dcassandra.replayList=table + {{ if .Params.REPLAYLIST }} + -Dcassandra.replayList={{ .Params.REPLAYLIST }} + {{ end }} # Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits # before joining the ring. - #-Dcassandra.ring_delay_ms=ms + {{ if .Params.RING_DELAY_MS }} + -Dcassandra.ring_delay_ms={{ .Params.RING_DELAY_MS }} + {{ end }} # Set the port for the Thrift RPC service, which is used for client connections. (Default: 9160) #-Dcassandra.rpc_port=port @@ -75,21 +85,29 @@ data: #-Dcassandra.storage_port=port # Set the default location for the trigger JARs. (Default: conf/triggers) - #-Dcassandra.triggers_dir=directory + {{ if .Params.TRIGGERS_DIR }} + -Dcassandra.triggers_dir={{ .Params.TRIGGERS_DIR }} + {{ end }} # For testing new compaction and compression strategies. It allows you to experiment with different # strategies and benchmark write performance differences without affecting the production workload. - #-Dcassandra.write_survey=true + {{ if .Params.WRITE_SURVEY }} + -Dcassandra.write_survey={{ .Params.WRITE_SURVEY }} + {{ end }} # To disable configuration via JMX of auth caches (such as those for credentials, permissions and # roles). This will mean those config options can only be set (persistently) in cassandra.yaml # and will require a restart for new values to take effect. - #-Dcassandra.disable_auth_caches_remote_configuration=true + {{ if .Params.DISABLE_AUTH_CACHES_REMOTE_CONFIGURATION }} + -Dcassandra.disable_auth_caches_remote_configuration={{ .Params.DISABLE_AUTH_CACHES_REMOTE_CONFIGURATION }} + {{ end }} # To disable dynamic calculation of the page size used when indexing an entire partition (during # initial index build/rebuild). If set to true, the page size will be fixed to the default of # 10000 rows per page. - #-Dcassandra.force_default_indexing_page_size=true + {{ if .Params.FORCE_DEFAULT_INDEXING_PAGE_SIZE }} + -Dcassandra.force_default_indexing_page_size={{ .Params.FORCE_DEFAULT_INDEXING_PAGE_SIZE }} + {{ end }} ######################## # GENERAL JVM SETTINGS # @@ -105,16 +123,16 @@ data: # allows lowering thread priority without being root on linux - probably # not necessary on Windows but doesn't harm anything. # see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workar - -XX:ThreadPriorityPolicy=42 + -XX:ThreadPriorityPolicy={{ .Params.THREADPRIORITYPOLICY }} # Enable heap-dump if there's an OOM -XX:+HeapDumpOnOutOfMemoryError # Per-thread stack size. - -Xss256k + -Xss{{ .Params.PER_THREAD_STACK_SIZE }} # Larger interned string table, for gossip's benefit (CASSANDRA-6410) - -XX:StringTableSize=1000003 + -XX:StringTableSize={{ .Params.STRINGTABLESIZE }} # Make sure all memory is faulted and zeroed on startup. # This helps prevent soft faults in containers and makes @@ -136,7 +154,7 @@ data: # Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See # http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version: # comment out this entry to enable IPv6 support). - -Djava.net.preferIPv4Stack=true + -Djava.net.preferIPv4Stack={{ .Params.PREFERIPV4STACK }} ### Debug options @@ -205,7 +223,9 @@ data: # * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning. # * CAP_NOWARN: same as previous, except that the client warning will not be emitted. # - #-Dcassandra.expiration_date_overflow_policy=REJECT + {{ if .Params.EXPIRATION_DATE_OVERFLOW_POLICY }} + -Dcassandra.expiration_date_overflow_policy={{ .Params.EXPIRATION_DATE_OVERFLOW_POLICY }} + {{ end }} ################# # GC SETTINGS # @@ -216,11 +236,19 @@ data: -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled - -XX:SurvivorRatio=8 - -XX:MaxTenuringThreshold=1 - -XX:CMSInitiatingOccupancyFraction=75 + {{ if .Params.SURVIVORRATIO }} + -XX:SurvivorRatio={{ .Params.SURVIVORRATIO }} + {{ end }} + {{ if .Params.MAXTENURINGTHRESHOLD }} + -XX:MaxTenuringThreshold={{ .Params.MAXTENURINGTHRESHOLD }} + {{ end }} + {{ if .Params.CMSINITIATINGOCCUPANCYFRACTION }} + -XX:CMSInitiatingOccupancyFraction={{ .Params.CMSINITIATINGOCCUPANCYFRACTION }} + {{ end }} -XX:+UseCMSInitiatingOccupancyOnly - -XX:CMSWaitDuration=10000 + {{ if .Params.CMSWAITDURATION }} + -XX:CMSWaitDuration={{ .Params.CMSWAITDURATION }} + {{ end }} -XX:+CMSParallelInitialMarkEnabled -XX:+CMSEdenChunksRecordAlways # some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 @@ -233,18 +261,24 @@ data: # ## Have the JVM do less remembered set work during STW, instead ## preferring concurrent GC. Reduces p99.9 latency. - #-XX:G1RSetUpdatingPauseTimePercent=5 + {{ if .Params.G1RSETUPDATINGPAUSETIMEPERCENT }} + -XX:G1RSetUpdatingPauseTimePercent={{ .Params.G1RSETUPDATINGPAUSETIMEPERCENT }} + {{ end }} # ## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. ## 200ms is the JVM default and lowest viable setting ## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. - #-XX:MaxGCPauseMillis=500 + {{ if .Params.MAXGCPAUSEMILLIS }} + -XX:MaxGCPauseMillis={{ .Params.MAXGCPAUSEMILLIS }} + {{ end }} ## Optional G1 Settings # Save CPU time on large (>= 16GB) heaps by delaying region scanning # until the heap is 70% full. The default in Hotspot 8u40 is 40%. - #-XX:InitiatingHeapOccupancyPercent=70 + {{ if .Params.INITIATINGHEAPOCCUPANCYPERCENT }} + -XX:InitiatingHeapOccupancyPercent={{ .Params.INITIATINGHEAPOCCUPANCYPERCENT }} + {{ end }} # For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. # Otherwise equal to the number of cores when 8 or less. @@ -252,7 +286,9 @@ data: #-XX:ParallelGCThreads=16 # By default, ConcGCThreads is 1/4 of ParallelGCThreads. # Setting both to the same value can reduce STW durations. - #-XX:ConcGCThreads=16 + {{ if .Params.CONCGCTHREADS }} + -XX:ConcGCThreads={{ .Params.CONCGCTHREADS }} + {{ end }} ### GC logging options -- uncomment to enable @@ -262,11 +298,19 @@ data: -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime -XX:+PrintPromotionFailure - #-XX:PrintFLSStatistics=1 - #-Xloggc:/var/log/cassandra/gc.log + {{ if .Params.PRINTFLSSTATISTICS }} + -XX:PrintFLSStatistics={{ .Params.PRINTFLSSTATISTICS }} + {{ end }} + {{ if .Params.GC_LOG_DIRECTORY }} + -Xloggc:{{ .Params.GC_LOG_DIRECTORY }} + {{ end }} -XX:+UseGCLogFileRotation - -XX:NumberOfGCLogFiles=10 - -XX:GCLogFileSize=10M + {{ if .Params.SURVIVORRATIO }} + -XX:NumberOfGCLogFiles={{ .Params.NUMBEROFGCLOGFILES }} + {{ end }} + {{ if .Params.SURVIVORRATIO }} + -XX:GCLogFileSize={{ .Params.GCLOGFILESIZE }} + {{ end }} ### Allow the JVM to read CGgroup memory information. This is JDK 8/9 ### specific and deprecated on JDK 10. It will have to be removed for From 9111b0536e68164436379875ef9a422ab1f5b530 Mon Sep 17 00:00:00 2001 From: viivek46 Date: Wed, 16 Oct 2019 15:01:58 -0300 Subject: [PATCH 09/11] Apply suggestions from code review --- operator/params.yaml | 57 +++++++++-------- operator/templates/jvm-options.yaml | 96 ++++++++++++++--------------- 2 files changed, 78 insertions(+), 75 deletions(-) diff --git a/operator/params.yaml b/operator/params.yaml index 78ee2c09..05dba2a5 100644 --- a/operator/params.yaml +++ b/operator/params.yaml @@ -583,107 +583,110 @@ OTC_BACKLOG_EXPIRATION_INTERVAL_MS: description: "How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection." default: -#JVM OPTIONS PARAMS -AVAILABLE_PROCESSORS: + +################################################################################ +############################ JVM OPTIONS ####################################### +################################################################################ +JVM_OPT_AVAILABLE_PROCESSORS: description: "In a multi-instance deployment, multiple Cassandra instances will independently assume that all CPU processors are available to it. This setting allows you to specify a smaller set of processors and perhaps have affinity." default: -JOIN_RING: +JVM_OPT_JOIN_RING: description: "Set to false to start Cassandra on a node but not have the node join the cluster." default: -LOAD_RING_STATE: +JVM_OPT_LOAD_RING_STATE: description: "Set to false to clear all gossip state for the node on restart. Use when you have changed node information in cassandra.yaml (such as listen_address)." default: -REPLAYLIST: +JVM_OPT_REPLAYLIST: description: "Allow restoring specific tables from an archived commit log." default: "" -RING_DELAY_MS: +JVM_OPT_RING_DELAY_MS: description: "Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits before joining the ring." default: -TRIGGERS_DIR: +JVM_OPT_TRIGGERS_DIR: description: "Set the default location for the trigger JARs. (Default: conf/triggers)" default: "" -WRITE_SURVEY: +JVM_OPT_WRITE_SURVEY: description: "For testing new compaction and compression strategies. It allows you to experiment with different strategies and benchmark write performance differences without affecting the production workload." default: -DISABLE_AUTH_CACHES_REMOTE_CONFIGURATION: +JVM_OPT_DISABLE_AUTH_CACHES_REMOTE_CONFIGURATION: description: "To disable configuration via JMX of auth caches (such as those for credentials, permissions and roles). This will mean those config options can only be set (persistently) in cassandra.yaml and will require a restart for new values to take effect." default: -FORCE_DEFAULT_INDEXING_PAGE_SIZE: +JVM_OPT_FORCE_DEFAULT_INDEXING_PAGE_SIZE: description: "To disable dynamic calculation of the page size used when indexing an entire partition (during initial index build/rebuild). If set to true, the page size will be fixed to the default of 10000 rows per page." default: -PREFERIPV4STACK: +JVM_OPT_PREFER_IPV4_STACK: description: "Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version: comment out this entry to enable IPv6 support)." default: true -EXPIRATION_DATE_OVERFLOW_POLICY: +JVM_OPT_EXPIRATION_DATE_OVERFLOW_POLICY: description: "Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date." default: -THREADPRIORITYPOLICY: +JVM_OPT_THREAD_PRIORITY_POLICY: description: "allows lowering thread priority without being root on linux - probably not necessary on Windows but doesn't harm anything." default: 42 -PER_THREAD_STACK_SIZE: +JVM_OPT_PER_THREAD_STACK_SIZE: description: "Per-thread stack size." default: "256k" -STRINGTABLESIZE: +JVM_OPT_STRING_TABLE_SIZE: description: "Larger interned string table, for gossip's benefit (CASSANDRA-6410)" default: 1000003 -SURVIVORRATIO: +JVM_OPT_SURVIVOR_RATIO: description: "CMS Settings: SurvivorRatio" default: 8 -MAXTENURINGTHRESHOLD: +JVM_OPT_MAX_TENURING_THRESHOLD: description: "CMS Settings: MaxTenuringThreshold" default: 1 -CMSINITIATINGOCCUPANCYFRACTION: +JVM_OPT_CMS_INITIATING_OCCUPANCY_FRACTION: description: "CMS Settings: CMSInitiatingOccupancyFraction" default: 75 -CMSWAITDURATION: +JVM_OPT_CMS_WAIT_DURATION: description: "CMS Settings: CMSWaitDuration" default: 10000 -NUMBEROFGCLOGFILES: +JVM_OPT_NUMBER_OF_GC_LOG_FILES: description: "GC logging options: NumberOfGCLogFiles" default: 10 -GCLOGFILESIZE: +JVM_OPT_GC_LOG_FILE_SIZE: description: "GC logging options: GCLOGFILESIZE" default: "10M" -GC_LOG_DIRECTORY: +JVM_OPT_GC_LOG_DIRECTORY: description: "GC logging options: GC_LOG_DIRECTORY" default: "" -PRINTFLSSTATISTICS: +JVM_OPT_PRINT_FLS_STATISTICS: description: "GC logging options: PrintFLSStatistics" default: "" -CONCGCTHREADS: +JVM_OPT_CONC_GC_THREADS: description: "By default, ConcGCThreads is 1/4 of ParallelGCThreads. Setting both to the same value can reduce STW durations." default: "" -INITIATINGHEAPOCCUPANCYPERCENT: +JVM_OPT_INITIATING_HEAP_OCCUPANCY_PERCENT: description: "Save CPU time on large (>= 16GB) heaps by delaying region scanning until the heap is 70% full. The default in Hotspot 8u40 is 40%." default: -MAXGCPAUSEMILLIS: +JVM_OPT_MAX_GC_PAUSE_MILLIS: description: "Main G1GC tunable: lowering the pause target will lower throughput and vise versa." default: -G1RSETUPDATINGPAUSETIMEPERCENT: +JVM_OPT_G1R_SET_UPDATING_PAUSE_TIME_PERCENT: description: "Have the JVM do less remembered set work during STW, instead preferring concurrent GC. Reduces p99.9 latency." default: diff --git a/operator/templates/jvm-options.yaml b/operator/templates/jvm-options.yaml index 3e6a2723..7b40bd06 100644 --- a/operator/templates/jvm-options.yaml +++ b/operator/templates/jvm-options.yaml @@ -23,8 +23,8 @@ data: # In a multi-instance deployment, multiple Cassandra instances will independently assume that all # CPU processors are available to it. This setting allows you to specify a smaller set of processors # and perhaps have affinity. - {{ if .Params.AVAILABLE_PROCESSORS }} - -Dcassandra.available_processors={{ .Params.AVAILABLE_PROCESSORS }} + {{ if .Params.JVM_OPT_AVAILABLE_PROCESSORS }} + -Dcassandra.available_processors={{ .Params.JVM_OPT_AVAILABLE_PROCESSORS }} {{ end }} # The directory location of the cassandra.yaml file. @@ -34,14 +34,14 @@ data: #-Dcassandra.initial_token=token # Set to false to start Cassandra on a node but not have the node join the cluster. - {{ if .Params.JOIN_RING }} - -Dcassandra.join_ring={{ .Params.JOIN_RING }} + {{ if .Params.JVM_OPT_JOIN_RING }} + -Dcassandra.join_ring={{ .Params.JVM_OPT_JOIN_RING }} {{ end }} # Set to false to clear all gossip state for the node on restart. Use when you have changed node # information in cassandra.yaml (such as listen_address). - {{ if .Params.LOAD_RING_STATE }} - -Dcassandra.load_ring_state={{ .Params.LOAD_RING_STATE }} + {{ if .Params.JVM_OPT_LOAD_RING_STATE }} + -Dcassandra.load_ring_state={{ .Params.JVM_OPT_LOAD_RING_STATE }} {{ end }} # Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2. @@ -59,14 +59,14 @@ data: #-Dcassandra.replace_address=listen_address or broadcast_address of dead node # Allow restoring specific tables from an archived commit log. - {{ if .Params.REPLAYLIST }} - -Dcassandra.replayList={{ .Params.REPLAYLIST }} + {{ if .Params.JVM_OPT_REPLAYLIST }} + -Dcassandra.replayList={{ .Params.JVM_OPT_REPLAYLIST }} {{ end }} # Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits # before joining the ring. - {{ if .Params.RING_DELAY_MS }} - -Dcassandra.ring_delay_ms={{ .Params.RING_DELAY_MS }} + {{ if .Params.JVM_OPT_RING_DELAY_MS }} + -Dcassandra.ring_delay_ms={{ .Params.JVM_OPT_RING_DELAY_MS }} {{ end }} # Set the port for the Thrift RPC service, which is used for client connections. (Default: 9160) @@ -85,28 +85,28 @@ data: #-Dcassandra.storage_port=port # Set the default location for the trigger JARs. (Default: conf/triggers) - {{ if .Params.TRIGGERS_DIR }} - -Dcassandra.triggers_dir={{ .Params.TRIGGERS_DIR }} + {{ if .Params.JVM_OPT_TRIGGERS_DIR }} + -Dcassandra.triggers_dir={{ .Params.JVM_OPT_TRIGGERS_DIR }} {{ end }} # For testing new compaction and compression strategies. It allows you to experiment with different # strategies and benchmark write performance differences without affecting the production workload. - {{ if .Params.WRITE_SURVEY }} - -Dcassandra.write_survey={{ .Params.WRITE_SURVEY }} + {{ if .Params.JVM_OPT_WRITE_SURVEY }} + -Dcassandra.write_survey={{ .Params.JVM_OPT_WRITE_SURVEY }} {{ end }} # To disable configuration via JMX of auth caches (such as those for credentials, permissions and # roles). This will mean those config options can only be set (persistently) in cassandra.yaml # and will require a restart for new values to take effect. - {{ if .Params.DISABLE_AUTH_CACHES_REMOTE_CONFIGURATION }} - -Dcassandra.disable_auth_caches_remote_configuration={{ .Params.DISABLE_AUTH_CACHES_REMOTE_CONFIGURATION }} + {{ if .Params.JVM_OPT_DISABLE_AUTH_CACHES_REMOTE_CONFIGURATION }} + -Dcassandra.disable_auth_caches_remote_configuration={{ .Params.JVM_OPT_DISABLE_AUTH_CACHES_REMOTE_CONFIGURATION }} {{ end }} # To disable dynamic calculation of the page size used when indexing an entire partition (during # initial index build/rebuild). If set to true, the page size will be fixed to the default of # 10000 rows per page. - {{ if .Params.FORCE_DEFAULT_INDEXING_PAGE_SIZE }} - -Dcassandra.force_default_indexing_page_size={{ .Params.FORCE_DEFAULT_INDEXING_PAGE_SIZE }} + {{ if .Params.JVM_OPT_FORCE_DEFAULT_INDEXING_PAGE_SIZE }} + -Dcassandra.force_default_indexing_page_size={{ .Params.JVM_OPT_FORCE_DEFAULT_INDEXING_PAGE_SIZE }} {{ end }} ######################## @@ -123,16 +123,16 @@ data: # allows lowering thread priority without being root on linux - probably # not necessary on Windows but doesn't harm anything. # see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workar - -XX:ThreadPriorityPolicy={{ .Params.THREADPRIORITYPOLICY }} + -XX:ThreadPriorityPolicy={{ .Params.JVM_OPT_THREAD_PRIORITY_POLICY }} # Enable heap-dump if there's an OOM -XX:+HeapDumpOnOutOfMemoryError # Per-thread stack size. - -Xss{{ .Params.PER_THREAD_STACK_SIZE }} + -Xss{{ .Params.JVM_OPT_PER_THREAD_STACK_SIZE }} # Larger interned string table, for gossip's benefit (CASSANDRA-6410) - -XX:StringTableSize={{ .Params.STRINGTABLESIZE }} + -XX:StringTableSize={{ .Params.JVM_OPT_STRING_TABLE_SIZE }} # Make sure all memory is faulted and zeroed on startup. # This helps prevent soft faults in containers and makes @@ -154,7 +154,7 @@ data: # Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See # http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version: # comment out this entry to enable IPv6 support). - -Djava.net.preferIPv4Stack={{ .Params.PREFERIPV4STACK }} + -Djava.net.preferIPv4Stack={{ .Params.JVM_OPT_PREFER_IPV4_STACK }} ### Debug options @@ -223,8 +223,8 @@ data: # * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning. # * CAP_NOWARN: same as previous, except that the client warning will not be emitted. # - {{ if .Params.EXPIRATION_DATE_OVERFLOW_POLICY }} - -Dcassandra.expiration_date_overflow_policy={{ .Params.EXPIRATION_DATE_OVERFLOW_POLICY }} + {{ if .Params.JVM_OPT_EXPIRATION_DATE_OVERFLOW_POLICY }} + -Dcassandra.expiration_date_overflow_policy={{ .Params.JVM_OPT_EXPIRATION_DATE_OVERFLOW_POLICY }} {{ end }} ################# @@ -236,18 +236,18 @@ data: -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled - {{ if .Params.SURVIVORRATIO }} - -XX:SurvivorRatio={{ .Params.SURVIVORRATIO }} + {{ if .Params.JVM_OPT_SURVIVOR_RATIO }} + -XX:SurvivorRatio={{ .Params.JVM_OPT_SURVIVOR_RATIO }} {{ end }} - {{ if .Params.MAXTENURINGTHRESHOLD }} - -XX:MaxTenuringThreshold={{ .Params.MAXTENURINGTHRESHOLD }} + {{ if .Params.JVM_OPT_MAX_TENURING_THRESHOLD }} + -XX:MaxTenuringThreshold={{ .Params.JVM_OPT_MAX_TENURING_THRESHOLD }} {{ end }} - {{ if .Params.CMSINITIATINGOCCUPANCYFRACTION }} - -XX:CMSInitiatingOccupancyFraction={{ .Params.CMSINITIATINGOCCUPANCYFRACTION }} + {{ if .Params.JVM_OPT_CMS_INITIATING_OCCUPANCY_FRACTION }} + -XX:CMSInitiatingOccupancyFraction={{ .Params.JVM_OPT_CMS_INITIATING_OCCUPANCY_FRACTION }} {{ end }} -XX:+UseCMSInitiatingOccupancyOnly - {{ if .Params.CMSWAITDURATION }} - -XX:CMSWaitDuration={{ .Params.CMSWAITDURATION }} + {{ if .Params.JVM_OPT_CMS_WAIT_DURATION }} + -XX:CMSWaitDuration={{ .Params.JVM_OPT_CMS_WAIT_DURATION }} {{ end }} -XX:+CMSParallelInitialMarkEnabled -XX:+CMSEdenChunksRecordAlways @@ -261,23 +261,23 @@ data: # ## Have the JVM do less remembered set work during STW, instead ## preferring concurrent GC. Reduces p99.9 latency. - {{ if .Params.G1RSETUPDATINGPAUSETIMEPERCENT }} - -XX:G1RSetUpdatingPauseTimePercent={{ .Params.G1RSETUPDATINGPAUSETIMEPERCENT }} + {{ if .Params.JVM_OPT_G1R_SET_UPDATING_PAUSE_TIME_PERCENT }} + -XX:G1RSetUpdatingPauseTimePercent={{ .Params.JVM_OPT_G1R_SET_UPDATING_PAUSE_TIME_PERCENT }} {{ end }} # ## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. ## 200ms is the JVM default and lowest viable setting ## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. - {{ if .Params.MAXGCPAUSEMILLIS }} - -XX:MaxGCPauseMillis={{ .Params.MAXGCPAUSEMILLIS }} + {{ if .Params.JVM_OPT_MAX_GC_PAUSE_MILLIS }} + -XX:MaxGCPauseMillis={{ .Params.JVM_OPT_MAX_GC_PAUSE_MILLIS }} {{ end }} ## Optional G1 Settings # Save CPU time on large (>= 16GB) heaps by delaying region scanning # until the heap is 70% full. The default in Hotspot 8u40 is 40%. - {{ if .Params.INITIATINGHEAPOCCUPANCYPERCENT }} - -XX:InitiatingHeapOccupancyPercent={{ .Params.INITIATINGHEAPOCCUPANCYPERCENT }} + {{ if .Params.JVM_OPT_INITIATING_HEAP_OCCUPANCY_PERCENT }} + -XX:InitiatingHeapOccupancyPercent={{ .Params.JVM_OPT_INITIATING_HEAP_OCCUPANCY_PERCENT }} {{ end }} # For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. @@ -286,8 +286,8 @@ data: #-XX:ParallelGCThreads=16 # By default, ConcGCThreads is 1/4 of ParallelGCThreads. # Setting both to the same value can reduce STW durations. - {{ if .Params.CONCGCTHREADS }} - -XX:ConcGCThreads={{ .Params.CONCGCTHREADS }} + {{ if .Params.JVM_OPT_CONC_GC_THREADS }} + -XX:ConcGCThreads={{ .Params.JVM_OPT_CONC_GC_THREADS }} {{ end }} ### GC logging options -- uncomment to enable @@ -298,18 +298,18 @@ data: -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime -XX:+PrintPromotionFailure - {{ if .Params.PRINTFLSSTATISTICS }} - -XX:PrintFLSStatistics={{ .Params.PRINTFLSSTATISTICS }} + {{ if .Params.JVM_OPT_PRINT_FLS_STATISTICS }} + -XX:PrintFLSStatistics={{ .Params.JVM_OPT_PRINT_FLS_STATISTICS }} {{ end }} - {{ if .Params.GC_LOG_DIRECTORY }} - -Xloggc:{{ .Params.GC_LOG_DIRECTORY }} + {{ if .Params.JVM_OPT_GC_LOG_DIRECTORY }} + -Xloggc:{{ .Params.JVM_OPT_GC_LOG_DIRECTORY }} {{ end }} -XX:+UseGCLogFileRotation - {{ if .Params.SURVIVORRATIO }} - -XX:NumberOfGCLogFiles={{ .Params.NUMBEROFGCLOGFILES }} + {{ if .Params.JVM_OPT_NUMBER_OF_GC_LOG_FILES }} + -XX:NumberOfGCLogFiles={{ .Params.JVM_OPT_NUMBER_OF_GC_LOG_FILES }} {{ end }} - {{ if .Params.SURVIVORRATIO }} - -XX:GCLogFileSize={{ .Params.GCLOGFILESIZE }} + {{ if .Params.JVM_OPT_GC_LOG_FILE_SIZE }} + -XX:GCLogFileSize={{ .Params.JVM_OPT_GC_LOG_FILE_SIZE }} {{ end }} ### Allow the JVM to read CGgroup memory information. This is JDK 8/9 From 121a2843f4635871efd0bebc4de523b1ff34d54e Mon Sep 17 00:00:00 2001 From: viivek46 Date: Thu, 17 Oct 2019 23:03:46 -0300 Subject: [PATCH 10/11] made authenticator/authenticator configurable --- operator/params.yaml | 8 ++++++++ operator/templates/cassandra-yaml.yaml | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/operator/params.yaml b/operator/params.yaml index 05dba2a5..89dfdc41 100644 --- a/operator/params.yaml +++ b/operator/params.yaml @@ -91,6 +91,14 @@ BATCHLOG_REPLAY_THROTTLE_IN_KB: description: "The total maximum throttle for replaying failed logged batches in KBs per second." default: 1024 +AUTHENTICATOR: + description: "Authentication backend, implementing IAuthenticator; used to identify users." + default: "AllowAllAuthenticator" + +AUTHORIZER: + description: "Authorization backend, implementing IAuthorizer; used to limit access/provide permissions." + default: "AllowAllAuthorizer" + ROLE_MANAGER: description: "Part of the Authentication & Authorization backend that implements IRoleManager to maintain grants and memberships between roles, By default, the value set is Apache Cassandra's out of the box Role Manager: CassandraRoleManager" default: "CassandraRoleManager" diff --git a/operator/templates/cassandra-yaml.yaml b/operator/templates/cassandra-yaml.yaml index 2ee6071b..47d0dccf 100644 --- a/operator/templates/cassandra-yaml.yaml +++ b/operator/templates/cassandra-yaml.yaml @@ -116,7 +116,7 @@ data: # users. It keeps usernames and hashed passwords in system_auth.roles table. # Please increase system_auth keyspace replication factor if you use this authenticator. # If using PasswordAuthenticator, CassandraRoleManager must also be used (see below) - authenticator: AllowAllAuthenticator + authenticator: {{ .Params.AUTHENTICATOR }} # Authorization backend, implementing IAuthorizer; used to limit access/provide permissions # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, @@ -125,7 +125,7 @@ data: # - AllowAllAuthorizer allows any action to any user - set it to disable authorization. # - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please # increase system_auth keyspace replication factor if you use this authorizer. - authorizer: AllowAllAuthorizer + authorizer: {{ .Params.AUTHORIZER }} # Part of the Authentication & Authorization backend, implementing IRoleManager; used # to maintain grants and memberships between roles. From 661be5135cef2c5e6f6e8aaa8c8fbec5a33be00d Mon Sep 17 00:00:00 2001 From: Murilo Pereira Date: Fri, 18 Oct 2019 21:01:35 +0200 Subject: [PATCH 11/11] Cassandra settings improvements. (#2) * Add units to setting names, improve names, use correct settings. * Make this look scarier. * Misc improvements. * Better name. * Use fully-scoped actual setting names. * Leave initial_token unconfigurable for now. --- operator/params.yaml | 89 ++++++------ operator/templates/cassandra-yaml.yaml | 194 +++++++++++++------------ operator/templates/jvm-options.yaml | 81 +++++++---- operator/templates/stateful-set.yaml | 14 +- 4 files changed, 206 insertions(+), 172 deletions(-) diff --git a/operator/params.yaml b/operator/params.yaml index 89dfdc41..51e3bcbd 100644 --- a/operator/params.yaml +++ b/operator/params.yaml @@ -1,24 +1,43 @@ +################################################################################ +############################### Operator settings ############################## +################################################################################ + NODE_COUNT: - description: "Number of Cassandra nodes" - displayName: "Node Count" + description: "Number of Cassandra nodes." default: 3 -NODE_CPUS: - description: "CPUs (request) for the Cassandra node pods. spec.containers[].resources.requests.cpu" - default: "2000m" +NODE_CPU_MC: + description: "CPU request (in millicores) for the Cassandra node pods." + default: 2000 -NODE_CPUS_LIMIT: - description: "CPUs (limit) for the Cassandra node pods. spec.containers[].resources.limits.cpu" - default: "2000m" +NODE_CPU_LIMIT_MC: + description: "CPU limit (in millicores) for the Cassandra node pods." + default: 2000 -NODE_MEM: - description: "Memory (request) for the Cassandra node pods. spec.containers[].resources.requests.memory" +NODE_MEM_MIB: + description: "Memory request (in MiB) for the Cassandra node pods." default: 4096 -NODE_MEM_LIMIT: - description: "Memory (limit) for the Cassandra node pods. spec.containers[].resources.limits.memory" +NODE_MEM_LIMIT_MIB: + description: "Memory limit (in MiB) for the Cassandra node pods." default: 4096 +NODE_DISK_SIZE_GIB: + description: "Disk size (in GiB) for the Cassandra node pods." + default: 20 + +NODE_STORAGE_CLASS: + description: "The storage class to be used in volumeClaimTemplates. By default, it is not required and the default storage class is used." + required: false + +OVERRIDE_CLUSTER_NAME: + description: "Override the name of the Cassandra cluster set by the operator. This shouldn't be explicit set, unless you know what you're doing." + default: "" + +################################################################################ +########################### Cassandra node settings ############################ +################################################################################ + STORAGE_PORT: description: "The port for inter-node communication." default: "7000" @@ -39,28 +58,16 @@ JMX_PORT: description: "The JMX port that will be used to interface with the Cassandra application." default: "7199" -DISK_SIZE: - description: "Disk size for the nodes" - default: "20Gi" - -STORAGE_CLASS: - description: "The storage class to be used in volumeClaimTemplates. By default its not required and the default storage class is used." - required: false - -PERSISTENT_STORAGE: - description: "If false, ephemeral storage is used. Not recommended for production use." - default: "true" - -CLUSTER_NAME: - description: "The name of the cluster managed by the Service" - default: "" +NODE_MIN_HEAP_SIZE_MB: + description: "The minimum JVM heap size in MB. This has a smart default and doesn't need to be explicitly set." + default: -CASSANDRA_HEAP_SIZE_MB: - description: "The amount of JVM heap, in MB, allocated to the Cassandra process." +NODE_MAX_HEAP_SIZE_MB: + description: "The maximum JVM heap size in MB. This has a smart default and doesn't need to be explicitly set." default: -CASSANDRA_HEAP_NEW_MB: - description: "The amount of JVM new generation heap, in MB, allocated to the Cassandra process." +NODE_NEW_GENERATION_HEAP_SIZE_MB: + description: "The JVM new generation heap size in MB." default: SEED_PROVIDER_CLASS: @@ -309,7 +316,7 @@ MEMTABLE_OFFHEAP_SPACE_IN_MB: MEMTABLE_CLEANUP_THRESHOLD: description: "The ratio used for automatic memtable flush" - default: + default: MEMTABLE_FLUSH_WRITERS: description: "The number of memtable flush writer threads" @@ -523,19 +530,19 @@ BACK_PRESSURE_ENABLED: description: "Enable for the coordinator to apply the specified back pressure strategy to each mutation that is sent to replicas." default: false -BACK_PRESSURE_STRATEGY_CLASS: +BACK_PRESSURE_STRATEGY_CLASS_NAME: description: "The back-pressure strategy applied. The default implementation, RateBasedBackPressure, takes three arguments: high ratio, factor, and flow type, and uses the ratio between incoming mutation responses and outgoing mutation requests." default: "org.apache.cassandra.net.RateBasedBackPressure" -BACK_PRESSURE_HIGH_RATIO: +BACK_PRESSURE_STRATEGY_HIGH_RATIO: description: "When outgoing mutations are below this value, they are rate limited according to the incoming rate decreased by the factor. When above this value, the rate limiting is increased by the factor." default: 0.9 -BACK_PRESSURE_FACTOR: +BACK_PRESSURE_STRATEGY_FACTOR: description: "A number between 1 and 10. Increases or decreases rate limiting." default: 5 -BACK_PRESSURE_FLOW: +BACK_PRESSURE_STRATEGY_FLOW: description: "The flow speed to apply rate limiting: FAST - rate limited to the speed of the fastest replica. SLOW - rate limit to the speed of the slowest replica." default: "FAST" @@ -543,10 +550,6 @@ ALLOCATE_TOKENS_FOR_KEYSPACE: description: "Triggers automatic allocation of num_tokens tokens for this node. The allocation algorithm attempts to choose tokens in a way that optimizes replicated load over the nodes in the datacenter for the replication strategy used by the specified keyspace." default: "" -INITIAL_TOKEN: - description: "initial_token allows you to specify tokens manually." - default: - HINTS_DIRECTORY: description: "Directory where Cassandra should store hints." default: "" @@ -591,10 +594,10 @@ OTC_BACKLOG_EXPIRATION_INTERVAL_MS: description: "How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection." default: - ################################################################################ -############################ JVM OPTIONS ####################################### +################################ JVM Options ################################### ################################################################################ + JVM_OPT_AVAILABLE_PROCESSORS: description: "In a multi-instance deployment, multiple Cassandra instances will independently assume that all CPU processors are available to it. This setting allows you to specify a smaller set of processors and perhaps have affinity." default: @@ -643,7 +646,7 @@ JVM_OPT_THREAD_PRIORITY_POLICY: description: "allows lowering thread priority without being root on linux - probably not necessary on Windows but doesn't harm anything." default: 42 -JVM_OPT_PER_THREAD_STACK_SIZE: +JVM_OPT_THREAD_STACK_SIZE: description: "Per-thread stack size." default: "256k" diff --git a/operator/templates/cassandra-yaml.yaml b/operator/templates/cassandra-yaml.yaml index 47d0dccf..b8957556 100644 --- a/operator/templates/cassandra-yaml.yaml +++ b/operator/templates/cassandra-yaml.yaml @@ -13,9 +13,11 @@ data: # The name of the cluster. This is mainly used to prevent machines in # one logical cluster from joining another. - {{ if .Params.CLUSTER_NAME }} - cluster_name: '{{ .Params.CLUSTER_NAME }}' - {{ else }} + {{ if .Params.OVERRIDE_CLUSTER_NAME }} + cluster_name: '{{ .Params.OVERRIDE_CLUSTER_NAME }}' + {{ else }} + # TODO(mpereira): does it make sense to prepend the Kubernetes namespace to + # the Cassandra cluster name? cluster_name: '{{ .Name }}' {{ end }} @@ -51,9 +53,13 @@ data: # vnodes (num_tokens > 1, above) -- in which case you should provide a # comma-separated list -- it's primarily used when adding nodes to legacy clusters # that do not have vnodes enabled. - {{ if .Params.INITIAL_TOKEN }} - initial_token: {{ .Params.INITIAL_TOKEN }} - {{ end }} + # + # NOTE(mpereira): "initial_token" should be set on a per-node basis, so it + # doesn't make sense to expose it as an operator setting. Maybe we'll + # somehow support this more officially in the future. For now we'll leave it + # commented out. + # + # initial_token: ... # See http://wiki.apache.org/cassandra/HintedHandoff # May either be "true" or "false" to enable globally @@ -61,14 +67,16 @@ data: # When hinted_handoff_enabled is true, a black list of data centers that will not # perform hinted handoff + # + # TODO(mpereira): expose this setting when we add multi-datacenter support. # hinted_handoff_disabled_datacenters: - # - DC1 - # - DC2 + # - DC1 + # - DC2 # this defines the maximum amount of time a dead host will have hints # generated. After it has been dead this long, new hints for it will not be # created until it has been seen alive and gone down again. - max_hint_window_in_ms: {{ .Params.MAX_HINT_WINDOW_IN_MS }} # 3 hours + max_hint_window_in_ms: {{ .Params.MAX_HINT_WINDOW_IN_MS }} # Maximum throttle in KBs per second, per delivery thread. This will be # reduced proportionally to the number of nodes in the cluster. (If there @@ -98,10 +106,10 @@ data: # Compression to apply to the hint files. If omitted, hints files # will be written uncompressed. LZ4, Snappy, and Deflate compressors # are supported. - #hints_compression: + # hints_compression: # - class_name: LZ4Compressor # parameters: - # - + # - ... # Maximum throttle in KBs per second, total. This will be # reduced proportionally to the number of nodes in the cluster. @@ -337,12 +345,11 @@ data: # # org.apache.cassandra.cache.SerializingCacheProvider # This is the row cache implementation availabile - # in previous releases of Cassandra. + # in previous releases of Cassandra. {{ if .Params.ROW_CACHE_CLASS_NAME }} row_cache_class_name: {{ .Params.ROW_CACHE_CLASS_NAME }} {{ end }} - # Maximum size of the row cache in memory. # Please note that OHC cache implementation requires some additional off-heap memory to manage # the map structures and some in-flight memory during operations before/after cache entries can be @@ -399,30 +406,27 @@ data: # saved caches # If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. - # saved_caches_directory: /var/lib/cassandra/saved_caches + # saved_caches_directory: /var/lib/cassandra/saved_caches {{ if .Params.SAVED_CACHES_DIRECTORY }} saved_caches_directory: {{ .Params.SAVED_CACHES_DIRECTORY }} {{ end }} # commitlog_sync may be either "periodic" or "batch." # - # When in batch mode, Cassandra won't ack writes until the commit log - # has been fsynced to disk. It will wait - # commitlog_sync_batch_window_in_ms milliseconds between fsyncs. - # This window should be kept short because the writer threads will - # be unable to do extra work while waiting. (You may need to increase - # concurrent_writes for the same reason.) + # When in batch mode, Cassandra won't ack writes until the commit log has + # been fsynced to disk. It will wait commitlog_sync_batch_window_in_ms + # milliseconds between fsyncs. This window should be kept short because the + # writer threads will be unable to do extra work while waiting. (You may + # need to increase concurrent_writes for the same reason.) # - # commitlog_sync: batch - {{ if .Params.COMMITLOG_SYNC_BATCH_WINDOW_IN_MS }} - commitlog_sync_batch_window_in_ms: {{ .Params.COMMITLOG_SYNC_BATCH_WINDOW_IN_MS }} - {{ end }} - # - # the other option is "periodic" where writes may be acked immediately - # and the CommitLog is simply synced every commitlog_sync_period_in_ms + # the other option is "periodic" where writes may be acked immediately and + # the CommitLog is simply synced every commitlog_sync_period_in_ms # milliseconds. commitlog_sync: {{ .Params.COMMITLOG_SYNC }} commitlog_sync_period_in_ms: {{ .Params.COMMITLOG_SYNC_PERIOD_IN_MS }} + {{ if .Params.COMMITLOG_SYNC_BATCH_WINDOW_IN_MS }} + commitlog_sync_batch_window_in_ms: {{ .Params.COMMITLOG_SYNC_BATCH_WINDOW_IN_MS }} + {{ end }} # The size of the individual commitlog file segments. A commitlog # segment may be archived, deleted, or recycled once all the data @@ -448,25 +452,24 @@ data: # commitlog_compression: # - class_name: LZ4Compressor # parameters: - # - + # - # any class that implements the SeedProvider interface and has a # constructor that takes a Map of parameters will do. seed_provider: - # Addresses of hosts that are deemed contact points. - # Cassandra nodes use this list of hosts to find each other and learn - # the topology of the ring. You must change this if you are running - # multiple nodes! - - class_name: {{ .Params.SEED_PROVIDER_CLASS }} - parameters: - # Here we follow the advice from DataStax and make the first 3 - # nodes in a DC the seed nodes. - # https://docs.datastax.com/en/dse/6.0/dse-admin/datastax_enterprise/production/seedNodesForSingleDC.html - - seeds: "{{- range $i, $node := until (int (min 3 .Params.NODE_COUNT)) -}} - {{- if $i -}}, {{- end -}} - {{ $.Name }}-node-{{ $node }}.{{ $.Name }}-svc.{{ $.Namespace }}.svc.cluster.local - {{- end -}}" - + # Addresses of hosts that are deemed contact points. + # Cassandra nodes use this list of hosts to find each other and learn + # the topology of the ring. You must change this if you are running + # multiple nodes! + - class_name: {{ .Params.SEED_PROVIDER_CLASS }} + parameters: + # Here we follow the advice from DataStax and make the first 3 + # nodes in a DC the seed nodes. + # https://docs.datastax.com/en/dse/6.0/dse-admin/datastax_enterprise/production/seedNodesForSingleDC.html + - seeds: "{{- range $i, $node := until (int (min 3 .Params.NODE_COUNT)) -}} + {{- if $i -}}, {{- end -}} + {{ $.Name }}-node-{{ $node }}.{{ $.Name }}-svc.{{ $.Namespace }}.svc.cluster.local + {{- end -}}" # For workloads with more data than can fit in memory, Cassandra's # bottleneck will be reads that need to fetch data from @@ -541,7 +544,6 @@ data: memtable_cleanup_threshold: {{ .Params.MEMTABLE_CLEANUP_THRESHOLD }} {{ end }} - # Specify the way Cassandra allocates and manages memtable memory. # Options are: # @@ -798,7 +800,7 @@ data: # of an o.a.c.t.TServerFactory that can create an instance of it. rpc_server_type: {{ .Params.RPC_SERVER_TYPE }} - # Uncomment rpc_min|max_thread to set request pool size limits. + # Set request pool size limits. # # Regardless of your choice of RPC server (see above), the number of maximum requests in the # RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync @@ -811,19 +813,20 @@ data: {{ if .Params.RPC_MIN_THREADS }} rpc_min_threads: {{ .Params.RPC_MIN_THREADS }} {{ end }} + {{ if .Params.RPC_MAX_THREADS }} rpc_max_threads: {{ .Params.RPC_MAX_THREADS }} {{ end }} - # uncomment to set socket buffer sizes on rpc connections {{ if .Params.RPC_SEND_BUFF_SIZE_IN_BYTES }} rpc_send_buff_size_in_bytes: {{ .Params.RPC_SEND_BUFF_SIZE_IN_BYTES }} {{ end }} + {{ if .Params.RPC_RECV_BUFF_SIZE_IN_BYTES }} - rpc_recv_buff_size_in_bytes: {{ .Params.RPC_RECV_BUFF_SIZE_IN_BYTES }} + rpc_recv_buff_size_in_bytes: {{ .Params.RPC_RECV_BUFF_SIZE_IN_BYTES }} {{ end }} - # Uncomment to set socket buffer size for internode communication + # Set socket buffer size for internode communication # Note that when setting this, the buffer size is limited by net.core.wmem_max # and when not setting it it is defined by net.ipv4.tcp_wmem # See also: @@ -836,7 +839,7 @@ data: internode_send_buff_size_in_bytes: {{ .Params.INTERNODE_SEND_BUFF_SIZE_IN_BYTES }} {{ end }} - # Uncomment to set socket buffer size for internode communication + # Set socket buffer size for internode communication # Note that when setting this, the buffer size is limited by net.core.wmem_max # and when not setting it it is defined by net.ipv4.tcp_wmem {{ if .Params.INTERNODE_RECV_BUFF_SIZE_IN_BYTES }} @@ -930,7 +933,7 @@ data: # this setting allows users to throttle inter dc stream throughput in addition # to throttling all network stream traffic as configured with # stream_throughput_outbound_megabits_per_sec - # When unset, the default is 200 Mbps or 25 MB/s + # When unset, the default is 200 Mbps or 25 MB/s {{ if .Params.INTER_DC_STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC }} inter_dc_stream_throughput_outbound_megabits_per_sec: {{ .Params.INTER_DC_STREAM_THROUGHPUT_OUTBOUND_MEGABITS_PER_SEC }} {{ end }} @@ -1129,36 +1132,39 @@ data: # the keystore and truststore. For instructions on generating these files, see: # http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore # + # TODO(mpereira): we'll have to configure this when adding TLS support. server_encryption_options: - internode_encryption: none - keystore: conf/.keystore - keystore_password: cassandra - truststore: conf/.truststore - truststore_password: cassandra - # More advanced defaults below: - # protocol: TLS - # algorithm: SunX509 - # store_type: JKS - # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] - # require_client_auth: false - # require_endpoint_verification: false + internode_encryption: none + keystore: conf/.keystore + keystore_password: cassandra + truststore: conf/.truststore + truststore_password: cassandra + # More advanced defaults below: + # protocol: TLS + # algorithm: SunX509 + # store_type: JKS + # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] + # require_client_auth: false + # require_endpoint_verification: false # enable or disable client/server encryption. + # + # TODO(mpereira): we'll have to configure this when adding TLS support. client_encryption_options: - enabled: false - # If enabled and optional is set to true encrypted and unencrypted connections are handled. - optional: false - keystore: conf/.keystore - keystore_password: cassandra - # require_client_auth: false - # Set trustore and truststore_password if require_client_auth is true - # truststore: conf/.truststore - # truststore_password: cassandra - # More advanced defaults below: - # protocol: TLS - # algorithm: SunX509 - # store_type: JKS - # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] + enabled: false + # If enabled and optional is set to true encrypted and unencrypted connections are handled. + optional: false + keystore: conf/.keystore + keystore_password: cassandra + # require_client_auth: false + # Set trustore and truststore_password if require_client_auth is true + # truststore: conf/.truststore + # truststore_password: cassandra + # More advanced defaults below: + # protocol: TLS + # algorithm: SunX509 + # store_type: JKS + # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] # internode_compression controls whether traffic between nodes is # compressed. @@ -1213,7 +1219,6 @@ data: # setting. windows_timer_interval: {{ .Params.WINDOWS_TIMER_INTERVAL }} - # Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from # a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by # the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys @@ -1227,20 +1232,19 @@ data: # Currently, only the following file types are supported for transparent data encryption, although # more are coming in future cassandra releases: commitlog, hints transparent_data_encryption_options: - enabled: false - chunk_length_kb: 64 - cipher: AES/CBC/PKCS5Padding - key_alias: testing:1 - # CBC IV length for AES needs to be 16 bytes (which is also the default size) - # iv_length: 16 - key_provider: - - class_name: org.apache.cassandra.security.JKSKeyProvider - parameters: - - keystore: conf/.keystore - keystore_password: cassandra - store_type: JCEKS - key_password: cassandra - + enabled: false + chunk_length_kb: 64 + cipher: AES/CBC/PKCS5Padding + key_alias: testing:1 + # CBC IV length for AES needs to be 16 bytes (which is also the default size) + # iv_length: 16 + key_provider: + - class_name: org.apache.cassandra.security.JKSKeyProvider + parameters: + - keystore: conf/.keystore + keystore_password: cassandra + store_type: JCEKS + key_password: cassandra ##################### # SAFETY THRESHOLDS # @@ -1299,11 +1303,11 @@ data: # New strategies can be added. Implementors need to implement org.apache.cassandra.net.BackpressureStrategy and # provide a public constructor accepting a Map. back_pressure_strategy: - - class_name: {{ .Params.BACK_PRESSURE_STRATEGY_CLASS }} - parameters: - - high_ratio: {{ .Params.BACK_PRESSURE_HIGH_RATIO }} - factor: {{ .Params.BACK_PRESSURE_FACTOR }} - flow: {{ .Params.BACK_PRESSURE_FLOW }} + - class_name: {{ .Params.BACK_PRESSURE_STRATEGY_CLASS_NAME }} + parameters: + - high_ratio: {{ .Params.BACK_PRESSURE_STRATEGY_HIGH_RATIO }} + factor: {{ .Params.BACK_PRESSURE_STRATEGY_FACTOR }} + flow: {{ .Params.BACK_PRESSURE_STRATEGY_FLOW }} # Coalescing Strategies # # Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more). # On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in diff --git a/operator/templates/jvm-options.yaml b/operator/templates/jvm-options.yaml index 7b40bd06..86b8fee5 100644 --- a/operator/templates/jvm-options.yaml +++ b/operator/templates/jvm-options.yaml @@ -28,10 +28,10 @@ data: {{ end }} # The directory location of the cassandra.yaml file. - #-Dcassandra.config=directory + # -Dcassandra.config=directory # Sets the initial partitioner token for a node the first time the node is started. - #-Dcassandra.initial_token=token + # -Dcassandra.initial_token=token # Set to false to start Cassandra on a node but not have the node join the cluster. {{ if .Params.JVM_OPT_JOIN_RING }} @@ -45,18 +45,23 @@ data: {{ end }} # Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2. - #-Dcassandra.metricsReporterConfigFile=file + # -Dcassandra.metricsReporterConfigFile=file # Set the port on which the CQL native transport listens for clients. (Default: 9042) - #-Dcassandra.native_transport_port=port + # Overrides the value set in cassandra.yaml. + # -Dcassandra.native_transport_port=port - # Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner) - #-Dcassandra.partitioner=partitioner + # Overrides the value set in cassandra.yaml. + # (Default: org.apache.cassandra.dht.Murmur3Partitioner) + # -Dcassandra.partitioner=partitioner # To replace a node that has died, restart a new node in its place specifying the address of the # dead node. The new node must not have any data in its data directory, that is, it must be in the # same state as before bootstrapping. - #-Dcassandra.replace_address=listen_address or broadcast_address of dead node + # + # TODO(mpereira): we'll need to set this when replacing pods. Should we make + # it configurable via params.yaml too? + # -Dcassandra.replace_address=listen_address or broadcast_address of dead node # Allow restoring specific tables from an archived commit log. {{ if .Params.JVM_OPT_REPLAYLIST }} @@ -70,19 +75,24 @@ data: {{ end }} # Set the port for the Thrift RPC service, which is used for client connections. (Default: 9160) - #-Dcassandra.rpc_port=port + # Overrides the value set in cassandra.yaml. + # -Dcassandra.rpc_port=port # Set the SSL port for encrypted communication. (Default: 7001) - #-Dcassandra.ssl_storage_port=port + # Overrides the value set in cassandra.yaml. + # -Dcassandra.ssl_storage_port=port # Enable or disable the native transport server. See start_native_transport in cassandra.yaml. - # cassandra.start_native_transport=true|false + # Overrides the value set in cassandra.yaml. + # -Dcassandra.start_native_transport=true|false # Enable or disable the Thrift RPC server. (Default: true) - #-Dcassandra.start_rpc=true/false + # Overrides the value set in cassandra.yaml. + # -Dcassandra.start_rpc=true/false # Set the port for inter-node communication. (Default: 7000) - #-Dcassandra.storage_port=port + # Overrides the value set in cassandra.yaml. + # -Dcassandra.storage_port=port # Set the default location for the trigger JARs. (Default: conf/triggers) {{ if .Params.JVM_OPT_TRIGGERS_DIR }} @@ -129,7 +139,7 @@ data: -XX:+HeapDumpOnOutOfMemoryError # Per-thread stack size. - -Xss{{ .Params.JVM_OPT_PER_THREAD_STACK_SIZE }} + -Xss{{ .Params.JVM_OPT_THREAD_STACK_SIZE }} # Larger interned string table, for gossip's benefit (CASSANDRA-6410) -XX:StringTableSize={{ .Params.JVM_OPT_STRING_TABLE_SIZE }} @@ -159,15 +169,15 @@ data: ### Debug options # uncomment to enable flight recorder - #-XX:+UnlockCommercialFeatures - #-XX:+FlightRecorder + # -XX:+UnlockCommercialFeatures + # -XX:+FlightRecorder # uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414 - #-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414 + # -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414 # uncomment to have Cassandra JVM log internal method compilation (developers only) - #-XX:+UnlockDiagnosticVMOptions - #-XX:+LogCompilation + # -XX:+UnlockDiagnosticVMOptions + # -XX:+LogCompilation ################# # HEAP SETTINGS # @@ -188,12 +198,16 @@ data: # the same value to avoid stop-the-world GC pauses during resize, and # so that we can lock the heap in memory on startup to prevent any # of it from being swapped out. - {{ if .Params.CASSANDRA_HEAP_SIZE_MB }} - -Xms{{ .Params.CASSANDRA_HEAP_SIZE_MB }}M - -Xmx{{ .Params.CASSANDRA_HEAP_SIZE_MB }}M + {{ if .Params.NODE_MIN_HEAP_SIZE_MB }} + -Xms{{ .Params.NODE_MIN_HEAP_SIZE_MB }}M + {{ else }} + -Xms{{ max (min 8192 (div .Params.NODE_MEM_MIB 4)) (min 1024 (div .Params.NODE_MEM_MIB 2)) }}m + {{ end }} + + {{ if .Params.NODE_MAX_HEAP_SIZE_MB }} + -Xmx{{ .Params.NODE_MAX_HEAP_SIZE_MB }}M {{ else }} - -Xms{{ max (min 8192 (div .Params.NODE_MEM 4)) (min 1024 (div .Params.NODE_MEM 2)) }}m - -Xmx{{ max (min 8192 (div .Params.NODE_MEM 4)) (min 1024 (div .Params.NODE_MEM 2)) }}m + -Xmx{{ max (min 8192 (div .Params.NODE_MEM_MIB 4)) (min 1024 (div .Params.NODE_MEM_MIB 2)) }}m {{ end }} # Young generation size is automatically calculated by cassandra-env @@ -210,8 +224,10 @@ data: # The example below assumes a modern 8-core+ machine for decent # times. If in doubt, and if you do not particularly want to tweak, go # 100 MB per physical CPU core. - {{ if .Params.CASSANDRA_HEAP_SIZE_MB }} - -Xmn{{ .Params.CASSANDRA_HEAP_NEW_MB }}M + # + # TODO(mpereira): calculate this automatically based on the formula above. + {{ if .Params.NODE_NEW_GENERATION_HEAP_SIZE_MB }} + -Xmn{{ .Params.NODE_NEW_GENERATION_HEAP_SIZE_MB }}M {{ end }} ################################### @@ -236,19 +252,25 @@ data: -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled + {{ if .Params.JVM_OPT_SURVIVOR_RATIO }} -XX:SurvivorRatio={{ .Params.JVM_OPT_SURVIVOR_RATIO }} {{ end }} + {{ if .Params.JVM_OPT_MAX_TENURING_THRESHOLD }} -XX:MaxTenuringThreshold={{ .Params.JVM_OPT_MAX_TENURING_THRESHOLD }} {{ end }} + {{ if .Params.JVM_OPT_CMS_INITIATING_OCCUPANCY_FRACTION }} -XX:CMSInitiatingOccupancyFraction={{ .Params.JVM_OPT_CMS_INITIATING_OCCUPANCY_FRACTION }} {{ end }} + -XX:+UseCMSInitiatingOccupancyOnly + {{ if .Params.JVM_OPT_CMS_WAIT_DURATION }} -XX:CMSWaitDuration={{ .Params.JVM_OPT_CMS_WAIT_DURATION }} {{ end }} + -XX:+CMSParallelInitialMarkEnabled -XX:+CMSEdenChunksRecordAlways # some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 @@ -257,7 +279,7 @@ data: ### G1 Settings (experimental, comment previous section and uncomment section below to enable) ## Use the Hotspot garbage-first collector. - #-XX:+UseG1GC + # -XX:+UseG1GC # ## Have the JVM do less remembered set work during STW, instead ## preferring concurrent GC. Reduces p99.9 latency. @@ -283,7 +305,7 @@ data: # For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. # Otherwise equal to the number of cores when 8 or less. # Machines with > 10 cores should try setting these to <= full cores. - #-XX:ParallelGCThreads=16 + # -XX:ParallelGCThreads=16 # By default, ConcGCThreads is 1/4 of ParallelGCThreads. # Setting both to the same value can reduce STW durations. {{ if .Params.JVM_OPT_CONC_GC_THREADS }} @@ -298,16 +320,21 @@ data: -XX:+PrintTenuringDistribution -XX:+PrintGCApplicationStoppedTime -XX:+PrintPromotionFailure + {{ if .Params.JVM_OPT_PRINT_FLS_STATISTICS }} -XX:PrintFLSStatistics={{ .Params.JVM_OPT_PRINT_FLS_STATISTICS }} {{ end }} + {{ if .Params.JVM_OPT_GC_LOG_DIRECTORY }} -Xloggc:{{ .Params.JVM_OPT_GC_LOG_DIRECTORY }} {{ end }} + -XX:+UseGCLogFileRotation + {{ if .Params.JVM_OPT_NUMBER_OF_GC_LOG_FILES }} -XX:NumberOfGCLogFiles={{ .Params.JVM_OPT_NUMBER_OF_GC_LOG_FILES }} {{ end }} + {{ if .Params.JVM_OPT_GC_LOG_FILE_SIZE }} -XX:GCLogFileSize={{ .Params.JVM_OPT_GC_LOG_FILE_SIZE }} {{ end }} diff --git a/operator/templates/stateful-set.yaml b/operator/templates/stateful-set.yaml index 978cf17d..8391c96f 100644 --- a/operator/templates/stateful-set.yaml +++ b/operator/templates/stateful-set.yaml @@ -99,11 +99,11 @@ spec: fieldPath: metadata.uid resources: requests: - memory: "{{ .Params.NODE_MEM }}Mi" - cpu: {{ .Params.NODE_CPUS }} + memory: "{{ .Params.NODE_MEM_MIB }}Mi" + cpu: "{{ .Params.NODE_CPU_MC }}m" limits: - memory: "{{ .Params.NODE_MEM }}Mi" - cpu: {{ .Params.NODE_CPUS }} + memory: "{{ .Params.NODE_MEM_LIMIT_MIB }}Mi" + cpu: "{{ .Params.NODE_CPU_LIMIT_MC }}m" # Port names can't be longer than 15 characters. ports: - containerPort: {{ .Params.STORAGE_PORT }} @@ -158,7 +158,7 @@ spec: accessModes: ["ReadWriteOnce"] resources: requests: - storage: {{ .Params.DISK_SIZE }} - {{ if .Params.STORAGE_CLASS }} - storageClassName: {{ .Params.STORAGE_CLASS }} + storage: "{{ .Params.NODE_DISK_SIZE_GIB }}Gi" + {{ if .Params.NODE_STORAGE_CLASS }} + storageClassName: {{ .Params.NODE_STORAGE_CLASS }} {{ end }}