manual/application.template.conf

####################################################################################################
# This is a template configuration file for the DataStax Bulk Loader (DSBulk).
#
# This file is written in HOCON format; see
# https://github.com/typesafehub/config/blob/master/HOCON.md
# for more information on its syntax.
#
# Please make sure you've read the DataStax Bulk Loader documentation included in this binary
# distribution:
# ../manual/README.md
#
# An exhaustive list of available settings can be found here:
# ../manual/settings.md
#
# Also, two template configuration files meant to be used together can be found here:
# ../manual/application.template.conf
# ../manual/driver.template.conf
#
# We recommend that this file be named application.conf and placed in the /conf directory; these
# are indeed the default file name and path where DSBulk looks for configuration files.
#
# To use other file names, or another folder, you can use the -f command line switch; consult the
# DataStax Bulk Loader online documentation for more information:
# https://docs.datastax.com/en/dsbulk/doc/dsbulk/dsbulkLoadConfigFile.html
####################################################################################################

####################################################################################################
# DataStax Java Driver settings.
#
# You can declare any Java Driver settings directly in this file, but for maintainability sake, we
# placed them in a separate file, which is expected to be named driver.conf and located in the same
# /conf directory.
# Use that file, for example, to define contact points, provide authentication and encryption
# settings, modify timeouts, consistency levels, page sizes, policies, and much more.
# If you decide to declare the driver settings in a different way, or in a file named differently,
# make sure to test your setup to ensure that all settings are correctly detected.
#
# You can also consult the Java Driver online documentation for more details:
# https://docs.datastax.com/en/developer/java-driver/latest/
# https://docs.datastax.com/en/developer/java-driver-dse/latest/
include classpath("driver.conf")
####################################################################################################

####################################################################################################
# DataStax Bulk Loader settings.
#
# Settings for the DataStax Bulk Loader (DSBulk) are declared below. Use this section, for
# example, to define which connector to use and how, to customize logging, monitoring, codecs, to
# specify schema settings and mappings, and much more.
#
# You can also consult the DataStax Bulk Loader online documentation for more details:
# https://docs.datastax.com/en/dsbulk/doc/dsbulk/dsbulkRef.html
####################################################################################################
dsbulk {

    ################################################################################################
    # Connector-specific settings. This section contains settings for the connector to use; it also
    # contains sub-sections, one for each available connector.
    # 
    # This setting is ignored when counting.
    ################################################################################################

    # The name of the connector to use.
    # Type: string
    # Default value: "csv"
    #connector.name = "csv"

    ################################################################################################
    # CSV Connector configuration.
    ################################################################################################

    # The URL or path of the resource(s) to read from or write to.
    # 
    # Which URL protocols are available depend on which URL stream handlers have been installed, but
    # at least the **file** protocol is guaranteed to be supported for reads and writes, and the
    # **http** and **https** protocols are guaranteed to be supported for reads.
    # 
    # The file protocol can be used with all supported file systems, local or not.
    # - When reading: the URL can point to a single file, or to an existing directory; in case of a
    # directory, the *fileNamePattern* setting can be used to filter files to read, and the
    # *recursive* setting can be used to control whether or not the connector should look for files
    # in subdirectories as well.
    # - When writing: the URL will be treated as a directory; if it doesn't exist, the loader will
    # attempt to create it; CSV files will be created inside this directory, and their names can be
    # controlled with the *fileNameFormat* setting.
    # 
    # Note that if the value specified here does not have a protocol, then it is assumed to be a
    # file protocol. Relative URLs will be resolved against the current working directory. Also, for
    # convenience, if the path begins with a tilde (`~`), that symbol will be expanded to the
    # current user's home directory.
    # 
    # In addition the value `-` indicates `stdin` when loading and `stdout` when unloading. This is
    # in line with Unix tools such as tar, which uses `-` to represent stdin/stdout when
    # reading/writing an archive.
    # 
    # Examples:
    # 
    # url = "/path/to/dir/or/file"           # without protocol
    # url = "./path/to/dir/or/file"          # without protocol, relative to working directory
    # url = "~/path/to/dir/or/file"          # without protocol, relative to the user's home
    # directory
    # url = "file:///path/to/dir/or/file"    # with file protocol
    # url = "http://acme.com/file.csv"       # with HTTP protocol
    # url = "-"                              # to read csv data from stdin (for load) or
    # url = "-"                              # write csv data to stdout (for unload)
    # 
    # For other URLs: the URL will be read or written directly; settings like *fileNamePattern*,
    # *recursive*, and *fileNameFormat* will have no effect.
    # 
    # The default value is `-` (read from `stdin` / write to `stdout`).
    # Type: string
    # Default value: "-"
    #connector.csv.url = "-"

    # The character(s) to use as field delimiter. Field delimiters containing more than one
    # character are accepted.
    # Type: string
    # Default value: ","
    #connector.csv.delimiter = ","

    # Enable or disable whether the files to read or write begin with a header line. If enabled for
    # loading, the first non-empty line in every file will assign field names for each record
    # column, in lieu of `schema.mapping`, `fieldA = col1, fieldB = col2, fieldC = col3`. If
    # disabled for loading, records will not contain fields names, only field indexes, `0 = col1, 1
    # = col2, 2 = col3`. For unloading, if this setting is enabled, each file will begin with a
    # header line, and if disabled, each file will not contain a header line.
    # 
    # Note: This option will apply to all files loaded or unloaded.
    # Type: boolean
    # Default value: true
    #connector.csv.header = true

    # The number of records to skip from each input file before the parser can begin to execute.
    # Note that if the file contains a header line, that line is not counted as a valid record. This
    # setting is ignored when writing.
    # Type: number
    # Default value: 0
    #connector.csv.skipRecords = 0

    # The maximum number of records to read from or write to each file. When reading, all records
    # past this number will be discarded. When writing, a file will contain at most this number of
    # records; if more records remain to be written, a new file will be created using the
    # *fileNameFormat* setting. Note that when writing to anything other than a directory, this
    # setting is ignored. This setting takes into account the *header* setting: if a file begins
    # with a header line, that line is not counted as a record. This feature is disabled by default
    # (indicated by its `-1` value).
    # Type: number
    # Default value: -1
    #connector.csv.maxRecords = -1

    # The character used for quoting fields when the field delimiter is part of the field value.
    # Only one character can be specified. Note that this setting applies to all files to be read or
    # written.
    # Type: string
    # Default value: "\""
    #connector.csv.quote = "\""

    # The character that represents a line comment when found in the beginning of a line of text.
    # Only one character can be specified. Note that this setting applies to all files to be read or
    # written. This feature is disabled by default (indicated by its `null` character value).
    # Type: string
    # Default value: "\u0000"
    #connector.csv.comment = "\u0000"

    # The compression that will be used for writing or reading files. Supported values are (for both
    # reading and writing): `none`, `xz`, `gzip`, `bzip2`, `zstd`, `lz4`, `lzma`, `snappy`,
    # `deflate`.  For reading only, supported values are: `brotli`, `z`, `deflate64`.
    # Type: string
    # Default value: "none"
    #connector.csv.compression = "none"

    # Sets the String representation of an empty value. When reading, if the parser does not read
    # any character from the input, and the input is within quotes, this value will be used instead.
    # When writing, if the writer has an empty string to write to the output, this value will be
    # used instead. The default value is `AUTO`, which means that, when reading, the parser will
    # emit an empty string, and when writing, the writer will write a quoted empty field to the
    # output.
    # Type: string
    # Default value: "AUTO"
    #connector.csv.emptyValue = "AUTO"

    # The file encoding to use for all read or written files.
    # Type: string
    # Default value: "UTF-8"
    #connector.csv.encoding = "UTF-8"

    # The character used for escaping quotes inside an already quoted value. Only one character can
    # be specified. Note that this setting applies to all files to be read or written.
    # Type: string
    # Default value: "\\"
    #connector.csv.escape = "\\"

    # The file name format to use when writing. This setting is ignored when reading and for
    # non-file URLs. The file name must comply with the formatting rules of `String.format()`, and
    # must contain a `%d` format specifier that will be used to increment file name counters.
    # 
    # If compression is enabled, the default value for this setting will be modified to include the
    # default suffix for the selected compression method. For example, if compression is `gzip`, the
    # default file name format will be `output-%06d.csv.gz`.
    # Type: string
    # Default value: "output-%06d.csv"
    #connector.csv.fileNameFormat = "output-%06d.csv"

    # The glob pattern to use when searching for files to read. The syntax to use is the glob
    # syntax, as described in `java.nio.file.FileSystem.getPathMatcher()`. This setting is ignored
    # when writing and for non-file URLs. Only applicable when the *url* setting points to a
    # directory on a known filesystem, ignored otherwise.
    # 
    # If compression is enabled, the default value for this setting will be modified to include the
    # default suffix for the selected compression method. For example, if compression is `gzip`, the
    # default glob pattern will be `**/*.csv.gz`.
    # Type: string
    # Default value: "**/*.csv"
    #connector.csv.fileNamePattern = "**/*.csv"

    # Defines whether or not leading whitespaces from values being read/written should be skipped.
    # This setting is honored when reading and writing. Default value is false.
    # Type: boolean
    # Default value: false
    #connector.csv.ignoreLeadingWhitespaces = false

    # Defines whether or not trailing whitespaces from quoted values should be skipped. This setting
    # is only honored when reading; it is ignored when writing. Default value is false.
    # Type: boolean
    # Default value: false
    #connector.csv.ignoreLeadingWhitespacesInQuotes = false

    # Defines whether or not trailing whitespaces from values being read/written should be skipped.
    # This setting is honored when reading and writing. Default value is false.
    # Type: boolean
    # Default value: false
    #connector.csv.ignoreTrailingWhitespaces = false

    # Defines whether or not leading whitespaces from quoted values should be skipped. This setting
    # is only honored when reading; it is ignored when writing. Default value is false.
    # Type: boolean
    # Default value: false
    #connector.csv.ignoreTrailingWhitespacesInQuotes = false

    # The maximum number of characters that a field can contain. This setting is used to size
    # internal buffers and to avoid out-of-memory problems. If set to -1, internal buffers will be
    # resized dynamically. While convenient, this can lead to memory problems. It could also hurt
    # throughput, if some large fields require constant resizing; if this is the case, set this
    # value to a fixed positive number that is big enough to contain all field values.
    # Type: number
    # Default value: 4096
    #connector.csv.maxCharsPerColumn = 4096

    # The maximum number of columns that a record can contain. This setting is used to size internal
    # buffers and to avoid out-of-memory problems.
    # Type: number
    # Default value: 512
    #connector.csv.maxColumns = 512

    # The maximum number of files that can be read or written simultaneously. This setting is
    # effective only when reading from or writing to many resources in parallel, such as a
    # collection of files in a root directory; it is ignored otherwise. The special syntax `NC` can
    # be used to specify a number of threads that is a multiple of the number of available cores,
    # e.g. if the number of cores is 8, then 0.5C = 0.5 * 8 = 4 threads.
    # 
    # The default value is the special value AUTO; with this value, the connector will decide the
    # best number of files.
    # Type: string
    # Default value: "AUTO"
    #connector.csv.maxConcurrentFiles = "AUTO"

    # The character(s) that represent a line ending. When set to the special value `auto` (default),
    # the system's line separator, as determined by `System.lineSeparator()`, will be used when
    # writing, and auto-detection of line endings will be enabled when reading. Only one or two
    # characters can be specified; beware that most typical line separator characters need to be
    # escaped, e.g. one should specify `\r\n` for the typical line ending on Windows systems
    # (carriage return followed by a new line).
    # Type: string
    # Default value: "auto"
    #connector.csv.newline = "auto"

    # Defines whether or not line separators should be replaced by a normalized line separator '\n'
    # inside quoted values. This setting is honored when reading and writing. Note: due to a bug in
    # the CSV parsing library, on Windows systems, the line ending detection mechanism may not
    # function properly when this setting is false; in case of problem, set this to true. Default
    # value is false.
    # Type: boolean
    # Default value: false
    #connector.csv.normalizeLineEndingsInQuotes = false

    # Sets the String representation of a null value. When reading, if the parser does not read any
    # character from the input, this value will be used instead. When writing, if the writer has a
    # null object to write to the output, this value will be used instead. The default value is
    # `AUTO`, which means that, when reading, the parser will emit a `null`, and when writing, the
    # writer won't write any character at all to the output.
    # Type: string
    # Default value: "AUTO"
    #connector.csv.nullValue = "AUTO"

    # Enable or disable scanning for files in the root's subdirectories. Only applicable when *url*
    # is set to a directory on a known filesystem. Used for loading only.
    # Type: boolean
    # Default value: false
    #connector.csv.recursive = false

    # The URL or path of the file that contains the list of resources to read from.
    # 
    # The file specified here should be located on the local filesystem.
    # 
    # This setting and `connector.csv.url` are mutually exclusive. If both are defined and non
    # empty, this setting takes precedence over `connector.csv.url`.
    # 
    # This setting applies only when loading. When unloading, this setting should be left empty or
    # set to null; any non-empty value will trigger a fatal error.
    # 
    # The file with URLs should follow this format:
    # 
    # ```
    # /path/to/file/file.csv
    # /path/to.dir/
    # ```
    # 
    # Every line should contain one path. You don't need to escape paths in this file.
    # 
    # All the remarks for `connector.csv.url` apply for each line in the file, and especially,
    # settings like `fileNamePattern`, `recursive`, and `fileNameFormat` all apply to each line
    # individually.
    # 
    # You can comment out a line in the URL file by making it start with a # sign:
    # 
    # ```
    # #/path/that/will/be/ignored
    # ```
    # 
    # Such a line will be ignored.
    # 
    # For your convenience, every line in the urlfile will be trimmed - that is, any leading and
    # trailing white space will be removed.
    # 
    # The file should be encoded in UTF-8, and each line should be a valid URL to load.
    # 
    # The default value is "" - which means that this property is ignored.
    # Type: string
    # Default value: ""
    #connector.csv.urlfile = ""

    ################################################################################################
    # JSON Connector configuration.
    ################################################################################################

    # The URL or path of the resource(s) to read from or write to.
    # 
    # Which URL protocols are available depend on which URL stream handlers have been installed, but
    # at least the **file** protocol is guaranteed to be supported for reads and writes, and the
    # **http** and **https** protocols are guaranteed to be supported for reads.
    # 
    # The file protocol can be used with all supported file systems, local or not.
    # - When reading: the URL can point to a single file, or to an existing directory; in case of a
    # directory, the *fileNamePattern* setting can be used to filter files to read, and the
    # *recursive* setting can be used to control whether or not the connector should look for files
    # in subdirectories as well.
    # - When writing: the URL will be treated as a directory; if it doesn't exist, the loader will
    # attempt to create it; json files will be created inside this directory, and their names can be
    # controlled with the *fileNameFormat* setting.
    # 
    # Note that if the value specified here does not have a protocol, then it is assumed to be a
    # file protocol. Relative URLs will be resolved against the current working directory. Also, for
    # convenience, if the path begins with a tilde (`~`), that symbol will be expanded to the
    # current user's home directory.
    # 
    # In addition the value `-` indicates `stdin` when loading and `stdout` when unloading. This is
    # in line with Unix tools such as tar, which uses `-` to represent stdin/stdout when
    # reading/writing an archive.
    # 
    # Examples:
    # 
    # url = "/path/to/dir/or/file"           # without protocol
    # url = "./path/to/dir/or/file"          # without protocol, relative to working directory
    # url = "~/path/to/dir/or/file"          # without protocol, relative to the user's home
    # directory
    # url = "file:///path/to/dir/or/file"    # with file protocol
    # url = "http://acme.com/file.json"      # with HTTP protocol
    # url = "-"                              # to read json data from stdin (for load) or
    # url = "-"                              # write json data to stdout (for unload)
    # 
    # For other URLs: the URL will be read or written directly; settings like *fileNamePattern*,
    # *recursive*, and *fileNameFormat* will have no effect.
    # 
    # The default value is `-` (read from `stdin` / write to `stdout`).
    # Type: string
    # Default value: "-"
    #connector.json.url = "-"

    # The number of JSON records to skip from each input file before the parser can begin to
    # execute. This setting is ignored when writing.
    # Type: number
    # Default value: 0
    #connector.json.skipRecords = 0

    # The maximum number of records to read from or write to each file. When reading, all records
    # past this number will be discarded. When writing, a file will contain at most this number of
    # records; if more records remain to be written, a new file will be created using the
    # *fileNameFormat* setting. Note that when writing to anything other than a directory, this
    # setting is ignored. This feature is disabled by default (indicated by its `-1` value).
    # Type: number
    # Default value: -1
    #connector.json.maxRecords = -1

    # The mode for loading and unloading JSON documents. Valid values are:
    # 
    # * MULTI_DOCUMENT: Each resource may contain an arbitrary number of successive JSON documents
    # to be mapped to records. For example the format of each JSON document is a single document:
    # `{doc1}`. The root directory for the JSON documents can be specified with `url` and the
    # documents can be read recursively by setting `connector.json.recursive` to true.
    # * SINGLE_DOCUMENT: Each resource contains a root array whose elements are JSON documents to be
    # mapped to records. For example, the format of the JSON document is an array with embedded JSON
    # documents: `[ {doc1}, {doc2}, {doc3} ]`.
    # Type: string
    # Default value: "MULTI_DOCUMENT"
    #connector.json.mode = "MULTI_DOCUMENT"

    # The compression that will be used for writing or reading files. Supported values are (for both
    # reading and writing): `none`, `xz`, `gzip`, `bzip2`, `zstd`, `lz4`, `lzma`, `snappy`,
    # `deflate`.  For reading only, supported values are: `brotli`, `z`, `deflate64`.
    # Type: string
    # Default value: "none"
    #connector.json.compression = "none"

    # A map of JSON deserialization features to set. Map keys should be enum constants defined in
    # `com.fasterxml.jackson.databind.DeserializationFeature`. The default value is the only way to
    # guarantee that floating point numbers will not have their precision truncated when parsed, but
    # can result in slightly slower parsing. Used for loading only.
    # 
    # Note that some Jackson features might not be supported, in particular features that operate on
    # the resulting Json tree by filtering elements or altering their contents, since such features
    # conflict with dsbulk's own filtering and formatting capabilities. Instead of trying to modify
    # the resulting tree using Jackson features, you should try to achieve the same result using the
    # settings available under the `codec` and `schema` sections.
    # Type: map<string,boolean>
    # Default value: {"USE_BIG_DECIMAL_FOR_FLOATS":true}
    #connector.json.deserializationFeatures = {"USE_BIG_DECIMAL_FOR_FLOATS":true}

    # The file encoding to use for all read or written files.
    # Type: string
    # Default value: "UTF-8"
    #connector.json.encoding = "UTF-8"

    # The file name format to use when writing. This setting is ignored when reading and for
    # non-file URLs. The file name must comply with the formatting rules of `String.format()`, and
    # must contain a `%d` format specifier that will be used to increment file name counters.
    # 
    # If compression is enabled, the default value for this setting will be modified to include the
    # default suffix for the selected compression method. For example, if compression is `gzip`, the
    # default file name format will be `output-%06d.json.gz`.
    # Type: string
    # Default value: "output-%06d.json"
    #connector.json.fileNameFormat = "output-%06d.json"

    # The glob pattern to use when searching for files to read. The syntax to use is the glob
    # syntax, as described in `java.nio.file.FileSystem.getPathMatcher()`. This setting is ignored
    # when writing and for non-file URLs. Only applicable when the *url* setting points to a
    # directory on a known filesystem, ignored otherwise.
    # 
    # If compression is enabled, the default value for this setting will be modified to include the
    # default suffix for the selected compression method. For example, if compression is `gzip`, the
    # default glob pattern will be `**/*.json.gz`.
    # Type: string
    # Default value: "**/*.json"
    #connector.json.fileNamePattern = "**/*.json"

    # JSON generator features to enable. Valid values are all the enum constants defined in
    # `com.fasterxml.jackson.core.JsonGenerator.Feature`. For example, a value of `{
    # ESCAPE_NON_ASCII : true, QUOTE_FIELD_NAMES : true }` will configure the generator to escape
    # all characters beyond 7-bit ASCII and quote field names when writing JSON output. Used for
    # unloading only.
    # 
    # Note that some Jackson features might not be supported, in particular features that operate on
    # the resulting Json tree by filtering elements or altering their contents, since such features
    # conflict with dsbulk's own filtering and formatting capabilities. Instead of trying to modify
    # the resulting tree using Jackson features, you should try to achieve the same result using the
    # settings available under the `codec` and `schema` sections.
    # Type: map<string,boolean>
    # Default value: {}
    #connector.json.generatorFeatures = {}

    # The maximum number of files that can be read or written simultaneously. This setting is
    # effective only when reading from or writing to many resources in parallel, such as a
    # collection of files in a root directory; it is ignored otherwise. The special syntax `NC` can
    # be used to specify a number of threads that is a multiple of the number of available cores,
    # e.g. if the number of cores is 8, then 0.5C = 0.5 * 8 = 4 threads.
    # 
    # The default value is the special value AUTO; with this value, the connector will decide the
    # best number of files.
    # Type: string
    # Default value: "AUTO"
    #connector.json.maxConcurrentFiles = "AUTO"

    # JSON parser features to enable. Valid values are all the enum constants defined in
    # `com.fasterxml.jackson.core.JsonParser.Feature`. For example, a value of `{ ALLOW_COMMENTS :
    # true, ALLOW_SINGLE_QUOTES : true }` will configure the parser to allow the use of comments and
    # single-quoted strings in JSON data. Used for loading only.
    # 
    # Note that some Jackson features might not be supported, in particular features that operate on
    # the resulting Json tree by filtering elements or altering their contents, since such features
    # conflict with dsbulk's own filtering and formatting capabilities. Instead of trying to modify
    # the resulting tree using Jackson features, you should try to achieve the same result using the
    # settings available under the `codec` and `schema` sections.
    # Type: map<string,boolean>
    # Default value: {}
    #connector.json.parserFeatures = {}

    # Enable or disable pretty printing. When enabled, JSON records are written with indents. Used
    # for unloading only.
    # 
    # Note: Can result in much bigger records.
    # Type: boolean
    # Default value: false
    #connector.json.prettyPrint = false

    # Enable or disable scanning for files in the root's subdirectories. Only applicable when *url*
    # is set to a directory on a known filesystem. Used for loading only.
    # Type: boolean
    # Default value: false
    #connector.json.recursive = false

    # A map of JSON serialization features to set. Map keys should be enum constants defined in
    # `com.fasterxml.jackson.databind.SerializationFeature`. Used for unloading only.
    # 
    # Note that some Jackson features might not be supported, in particular features that operate on
    # the resulting Json tree by filtering elements or altering their contents, since such features
    # conflict with dsbulk's own filtering and formatting capabilities. Instead of trying to modify
    # the resulting tree using Jackson features, you should try to achieve the same result using the
    # settings available under the `codec` and `schema` sections.
    # Type: map<string,boolean>
    # Default value: {}
    #connector.json.serializationFeatures = {}

    # The strategy to use for filtering out entries when formatting output. Valid values are enum
    # constants defined in `com.fasterxml.jackson.annotation.JsonInclude.Include` (but beware that
    # the `CUSTOM` strategy cannot be honored). Used for unloading only.
    # Type: string
    # Default value: "ALWAYS"
    #connector.json.serializationStrategy = "ALWAYS"

    # The URL or path of the file that contains the list of resources to read from.
    # 
    # The file specified here should be located on the local filesystem.
    # 
    # This setting and `connector.json.url` are mutually exclusive. If both are defined and non
    # empty, this setting takes precedence over `connector.json.url`.
    # 
    # This setting applies only when loading. When unloading, this setting should be left empty or
    # set to null; any non-empty value will trigger a fatal error.
    # 
    # The file with URLs should follow this format:
    # 
    # ```
    # /path/to/file/file.json
    # /path/to.dir/
    # ```
    # 
    # Every line should contain one path. You don't need to escape paths in this file.
    # 
    # All the remarks for `connector.csv.json` apply for each line in the file, and especially,
    # settings like `fileNamePattern`, `recursive`, and `fileNameFormat` all apply to each line
    # individually.
    # 
    # You can comment out a line in the URL file by making it start with a # sign:
    # 
    # ```
    # #/path/that/will/be/ignored
    # ```
    # 
    # Such a line will be ignored.
    # 
    # For your convenience, every line in the urlfile will be trimmed - that is, any leading and
    # trailing white space will be removed.
    # 
    # The file should be encoded in UTF-8, and each line should be a valid URL to load.
    # 
    # The default value is "" - which means that this property is ignored.
    # Type: string
    # Default value: ""
    #connector.json.urlfile = ""

    ################################################################################################
    # Schema-specific settings.
    ################################################################################################

    # Keyspace used for loading or unloading data. Keyspace names should not be quoted and are
    # case-sensitive. `MyKeyspace` will match a keyspace named `MyKeyspace` but not `mykeyspace`.
    # Either `keyspace` or `graph` is required if `query` is not specified or is not qualified with
    # a keyspace name.
    # Type: string
    # Default value: null
    #schema.keyspace = null

    # Table used for loading or unloading data. Table names should not be quoted and are
    # case-sensitive. `MyTable` will match a table named `MyTable` but not `mytable`. Either
    # `table`, `vertex` or `edge` is required if `query` is not specified.
    # Type: string
    # Default value: null
    #schema.table = null

    # The field-to-column mapping to use, that applies to both loading and unloading; ignored when
    # counting. If not specified, the loader will apply a strict one-to-one mapping between the
    # source fields and the database table. If that is not what you want, then you must supply an
    # explicit mapping. Mappings should be specified as a map of the following form:
    # 
    # - Indexed data sources: `0 = col1, 1 = col2, 2 = col3`, where `0`, `1`, `2`, are the
    # zero-based indices of fields in the source data; and `col1`, `col2`, `col3` are bound variable
    # names in the insert statement.
    # - A shortcut to map the first `n` fields is to simply specify the destination columns: `col1,
    # col2, col3`.
    # - Mapped data sources: `fieldA = col1, fieldB = col2, fieldC = col3`, where `fieldA`,
    # `fieldB`, `fieldC`, are field names in the source data; and `col1`, `col2`, `col3` are bound
    # variable names in the insert statement.
    # - A shortcut to map fields named like columns is to simply specify the destination columns:
    # `col1, col2, col3`.
    # 
    # To specify that a field should be used as the timestamp (a.k.a. write-time) or ttl (a.k.a.
    # time-to-live) of the inserted row, use the specially named fake columns `__ttl` and
    # `__timestamp`: `fieldA = __timestamp, fieldB = __ttl`. Note that Timestamp fields are parsed
    # as regular CQL timestamp columns and must comply with either `codec.timestamp`, or
    # alternatively, with `codec.unit` + `codec.epoch`. TTL fields are parsed as integers
    # representing durations in seconds, and must comply with `codec.number`.
    # 
    # To specify that a column should be populated with the result of a function call, specify the
    # function call as the input field (e.g. `now() = c4`). Note, this is only relevant for load
    # operations. Similarly, to specify that a field should be populated with the result of a
    # function call, specify the function call as the input column (e.g. `field1 = now()`). This is
    # only relevant for unload operations. Function calls can also be qualified by a keyspace name:
    # `field1 = ks1.max(c1,c2)`.
    # 
    # In addition, for mapped data sources, it is also possible to specify that the mapping be
    # partly auto-generated and partly explicitly specified. For example, if a source row has fields
    # `c1`, `c2`, `c3`, and `c5`, and the table has columns `c1`, `c2`, `c3`, `c4`, one can map all
    # like-named columns and specify that `c5` in the source maps to `c4` in the table as follows:
    # `* = *, c5 = c4`.
    # 
    # One can specify that all like-named fields be mapped, except for `c2`: `* = -c2`. To skip `c2`
    # and `c3`: `* = [-c2, -c3]`.
    # 
    # Any identifier, field or column, that is not strictly alphanumeric (i.e. not matching
    # `[a-zA-Z0-9_]+`) must be surrounded by double-quotes, just like you would do in CQL: `"Field
    # ""A""" = "Column 2"` (to escape a double-quote, simply double it). Note that, contrary to the
    # CQL grammar, unquoted identifiers will not be lower-cased: an identifier such as `MyColumn1`
    # will match a column named `"MyColumn1"` and not `mycolumn1`.
    # 
    # The exact type of mapping to use depends on the connector being used. Some connectors can only
    # produce indexed records; others can only produce mapped ones, while others are capable of
    # producing both indexed and mapped records at the same time. Refer to the connector's
    # documentation to know which kinds of mapping it supports.
    # Type: string
    # Default value: null
    #schema.mapping = null

    # Specify whether or not to accept records that contain extra fields that are not declared in
    # the mapping. For example, if a record contains three fields A, B, and C, but the mapping only
    # declares fields A and B, then if this option is true, C will be silently ignored and the
    # record will be considered valid, and if false, the record will be rejected. This setting also
    # applies to user-defined types and tuples. Only applicable for loading, ignored otherwise.
    # 
    # This setting is ignored when counting.
    # Type: boolean
    # Default value: true
    #schema.allowExtraFields = true

    # Specify whether or not to accept records that are missing fields declared in the mapping. For
    # example, if the mapping declares three fields A, B, and C, but a record contains only fields A
    # and B, then if this option is true, C will be silently assigned null and the record will be
    # considered valid, and if false, the record will be rejected. If the missing field is mapped to
    # a primary key column, the record will always be rejected, since the database will reject the
    # record. This setting also applies to user-defined types and tuples. Only applicable for
    # loading, ignored otherwise.
    # 
    # This setting is ignored when counting.
    # Type: boolean
    # Default value: false
    #schema.allowMissingFields = false

    # Edge label used for loading or unloading graph data. This option can only be used for modern
    # graphs created with the Native engine (DSE 6.8+). The edge label must correspond to an
    # existing table created with the `WITH EDGE LABEL` option; also, when `edge` is specified, then
    # `from` and `to` must be specified as well. Edge labels should not be quoted and are
    # case-sensitive. `MyEdge` will match a label named `MyEdge` but not `myedge`. Either `table`,
    # `vertex` or `edge` is required if `query` is not specified.
    # Type: string
    # Default value: null
    #schema.edge = null

    # The name of the edge's incoming vertex label, for loading or unloading graph data. This option
    # can only be used for modern graphs created with the Native engine (DSE 6.8+). This option is
    # mandatory when `edge` is specified; ignored otherwise. Vertex labels should not be quoted and
    # are case-sensitive. `MyVertex` will match a label named `MyVertex` but not `myvertex`.
    # Type: string
    # Default value: null
    #schema.from = null

    # Graph name used for loading or unloading graph data. This option can only be used for modern
    # graphs created with the Native engine (DSE 6.8+). Graph names should not be quoted and are
    # case-sensitive. `MyGraph` will match a graph named `MyGraph` but not `mygraph`. Either
    # `keyspace` or `graph` is required if `query` is not specified or is not qualified with a
    # keyspace name.
    # Type: string
    # Default value: null
    #schema.graph = null

    # Specify whether to map `null` input values to "unset" in the database, i.e., don't modify a
    # potentially pre-existing value of this field for this row. Valid for load scenarios, otherwise
    # ignore. Note that setting to false creates tombstones to represent `null`.
    # 
    # Note that this setting is applied after the *codec.nullStrings* setting, and may intercept
    # `null`s produced by that setting.
    # 
    # This setting is ignored when counting. When set to true but the protocol version in use does
    # not support unset values (i.e., all protocol versions lesser than 4), this setting will be
    # forced to false and a warning will be logged.
    # Type: boolean
    # Default value: true
    #schema.nullToUnset = true

    # Whether to preserve cell timestamps when loading and unloading. Ignored when `schema.query` is
    # provided, or when the target table is a counter table. If true, the following rules will be
    # applied to generated queries:
    # 
    # - When loading, instead of a single INSERT statement, the generated query will be a BATCH
    # query; this is required in order to preserve individual column timestamps for each row.
    # - When unloading, the generated SELECT statement will export each column along with its
    # individual timestamp.
    # 
    # For both loading and unlaoding, DSBulk will import and export timestamps using field names
    # such as `"writetime(<column>)"`, where `<column>` is the column's internal CQL name; for
    # example, if the table has a column named `"MyCol"`, its corresponding timestamp would be
    # exported as `"writetime(MyCol)"` in the generated query and in the resulting connector record.
    # If you intend to use this feature to export and import tables letting DSBulk generate the
    # appropriate queries, these names are fine and need not be changed. If, however, you would like
    # to export or import data to or from external sources that use different field names, you could
    # do so by using the function `writetime` in a schema.mapping entry; for example, the following
    # mapping would map `col1` along with its timestamp to two distinct fields, `field1` and
    # `field1_writetime`: `field1 = col1, field1_writetime = writetime(col1)`.
    # Type: boolean
    # Default value: false
    #schema.preserveTimestamp = false

    # Whether to preserve cell TTLs when loading and unloading. Ignored when `schema.query` is
    # provided, or when the target table is a counter table. If true, the following rules will be
    # applied to generated queries:
    # 
    # - When loading, instead of a single INSERT statement, the generated query will be a BATCH
    # query; this is required in order to preserve individual column TTLs for each row.
    # - When unloading, the generated SELECT statement will export each column along with its
    # individual TTL.
    # 
    # For both loading and unlaoding, DSBulk will import and export TTLs using field names such as
    # `"ttl(<column>)"`, where `<column>` is the column's internal CQL name; for example, if the
    # table has a column named `"MyCol"`, its corresponding TTL would be exported as `"ttl(MyCol)"`
    # in the generated query and in the resulting connector record. If you intend to use this
    # feature to export and import tables letting DSBulk generate the appropriate queries, these
    # names are fine and need not be changed. If, however, you would like to export or import data
    # to or from external sources that use different field names, you could do so by using the
    # function `ttl` in a schema.mapping entry; for example, the following mapping would map `col1`
    # along with its TTL to two distinct fields, `field1` and `field1_ttl`: `field1 = col1,
    # field1_ttl = ttl(col1)`.
    # Type: boolean
    # Default value: false
    #schema.preserveTtl = false

    # The query to use. If not specified, then *schema.keyspace* and *schema.table* must be
    # specified, and dsbulk will infer the appropriate statement based on the table's metadata,
    # using all available columns. If `schema.keyspace` is provided, the query need not include the
    # keyspace to qualify the table reference.
    # 
    # For loading, the statement can be any `INSERT`, `UPDATE` or `DELETE` statement. `INSERT`
    # statements are preferred for most load operations, and bound variables should correspond to
    # mapped fields; for example, `INSERT INTO table1 (c1, c2, c3) VALUES (:fieldA, :fieldB,
    # :fieldC)`. `UPDATE` statements are required if the target table is a counter table, and the
    # columns are updated with incremental operations (`SET col1 = col1 + :fieldA` where `fieldA` is
    # a field in the input data). A `DELETE` statement will remove existing data during the load
    # operation.
    # 
    # For unloading and counting, the statement can be any regular `SELECT` statement. If the
    # statement does not contain any WHERE, ORDER BY, GROUP BY, or LIMIT clause, the engine will
    # generate a token range restriction clause of the form: `WHERE token(...) > :start and
    # token(...) <= :end` and will generate range read statements, thus allowing parallelization of
    # reads while at the same time targeting coordinators that are also replicas (see
    # schema.splits). If the statement does contain WHERE, ORDER BY, GROUP BY or LIMIT clauses
    # however, the query will be executed as is; the engine will only be able to parallelize the
    # operation if the query includes a WHERE clause including the following relations: `token(...)
    # > :start AND token(...) <= :end` (the bound variables can have any name). Note that, unlike
    # LIMIT clauses, PER PARTITION LIMIT clauses can be parallelized.
    # 
    # Statements can use both named and positional bound variables. Named bound variables should be
    # preferred, unless the protocol version in use does not allow them; they usually have names
    # matching those of the columns in the destination table, but this is not a strict requirement;
    # it is, however, required that their names match those of fields specified in the mapping.
    # Positional variables can also be used, and will be named after their corresponding column in
    # the destination table.
    # 
    # When loading and unloading graph data, the query must be provided in plain CQL; Gremlin
    # queries are not supported.
    # 
    # Note: The query is parsed to discover which bound variables are present, and to map the
    # variables correctly to fields.
    # 
    # See *mapping* setting for more information.
    # Type: string
    # Default value: null
    #schema.query = null

    # The timestamp of inserted/updated cells during load; otherwise, the current time of the system
    # running the tool is used. Not applicable to unloading nor counting. Ignored when
    # `schema.query` is provided. The value must be expressed in the timestamp format specified by
    # the `codec.timestamp` setting.
    # 
    # Query timestamps for Cassandra have microsecond resolution; any sub-microsecond information
    # specified is lost. For more information, see the [CQL
    # Reference](https://docs.datastax.com/en/dse/6.0/cql/cql/cql_reference/cql_commands/cqlInsert.html#cqlInsert__timestamp-value).
    # Type: string
    # Default value: null
    #schema.queryTimestamp = null

    # The Time-To-Live (TTL) of inserted/updated cells during load (seconds); a value of -1 means
    # there is no TTL. Not applicable to unloading nor counting. Ignored when `schema.query` is
    # provided. For more information, see the [CQL
    # Reference](https://docs.datastax.com/en/dse/6.0/cql/cql/cql_reference/cql_commands/cqlInsert.html#cqlInsert__ime-value),
    # [Setting the time-to-live (TTL) for
    # value](http://docs.datastax.com/en/dse/6.0/cql/cql/cql_using/useTTL.html), and [Expiring data
    # with time-to-live](http://docs.datastax.com/en/dse/6.0/cql/cql/cql_using/useExpire.html).
    # Type: number
    # Default value: -1
    #schema.queryTtl = -1

    # The number of token range splits in which to divide the token ring. In other words, this
    # setting determines how many read requests will be generated in order to read an entire table.
    # Only used when unloading and counting; ignored otherwise. Note that the actual number of
    # splits may be slightly greater or lesser than the number specified here, depending on the
    # actual cluster topology and token ownership. Also, it is not possible to generate fewer splits
    # than the total number of primary token ranges in the cluster, so the actual number of splits
    # is always equal to or greater than that number. Set this to higher values if you experience
    # timeouts when reading from the database, specially if paging is disabled. This setting should
    # also be greater than `engine.maxConcurrentQueries`. The special syntax `NC` can be used to
    # specify a number that is a multiple of the number of available cores, e.g. if the number of
    # cores is 8, then 0.5C = 0.5 * 8 = 4 splits.
    # Type: string
    # Default value: "8C"
    #schema.splits = "8C"

    # The name of the edge's outgoing vertex label, for loading or unloading graph data. This option
    # can only be used for modern graphs created with the Native engine (DSE 6.8+). This option is
    # mandatory when `edge` is specified; ignored otherwise. Vertex labels should not be quoted and
    # are case-sensitive. `MyVertex` will match a label named `MyVertex` but not `myvertex`.
    # Type: string
    # Default value: null
    #schema.to = null

    # Vertex label used for loading or unloading graph data. This option can only be used for modern
    # graphs created with the Native engine (DSE 6.8+). The vertex label must correspond to an
    # existing table created with the `WITH VERTEX LABEL` option. Vertex labels should not be quoted
    # and are case-sensitive. `MyVertex` will match a label named `MyVertex` but not `myvertex`.
    # Either `table`, `vertex` or `edge` is required if `query` is not specified.
    # Type: string
    # Default value: null
    #schema.vertex = null

    ################################################################################################
    # Batch-specific settings.
    # 
    # These settings control how the workflow engine groups together statements before writing them.
    # 
    # Only applicable for loading.
    ################################################################################################

    # The buffer size to use for flushing batched statements. Should be set to a multiple of
    # `maxBatchStatements`, e.g. 2 or 4 times that value; higher values consume more memory and
    # usually do not incur in any noticeable performance gain. When set to a value lesser than or
    # equal to zero, the buffer size is implicitly set to 4 times `maxBatchStatments`.
    # Type: number
    # Default value: -1
    #batch.bufferSize = -1

    # **DEPRECATED**. Use `maxBatchStatements` instead.
    # Type: number
    # Default value: null
    #batch.maxBatchSize = null

    # The maximum number of statements that a batch can contain. The ideal value depends on two
    # factors:
    # - The data being loaded: the larger the data, the smaller the batches should be.
    # - The batch mode: when `PARTITION_KEY` is used, larger batches are acceptable, whereas when
    # `REPLICA_SET` is used, smaller batches usually perform better. Also, when using `REPLICA_SET`,
    # it is preferrable to keep this number below the threshold configured server-side for the
    # setting `unlogged_batch_across_partitions_warn_threshold` (the default is 10); failing to do
    # so is likely to trigger query warnings (see `log.maxQueryWarnings` for more information).
    # When set to a value lesser than or equal to zero, the maximum number of statements is
    # considered unlimited. At least one of `maxBatchStatements` or `maxSizeInBytes` must be set to
    # a positive value when batching is enabled.
    # Type: number
    # Default value: 32
    #batch.maxBatchStatements = 32

    # The maximum data size that a batch can hold. This is the number of bytes required to encode
    # all the data to be persisted, without counting the overhead generated by the native protocol
    # (headers, frames, etc.).
    # 
    # The value specified here should be lesser than or equal to the value that has been configured
    # server-side for the option `batch_size_fail_threshold_in_kb` in cassandra.yaml, but note that
    # the heuristic used to compute data sizes is not 100% accurate and sometimes underestimates the
    # actual size. See the documentation for the [cassandra.yaml configuration
    # file](https://docs.datastax.com/en/dse/6.0/dse-dev/datastax_enterprise/config/configCassandra_yaml.html#configCassandra_yaml__advProps)
    # for more information.
    # 
    # When set to a value lesser than or equal to zero, the maximum data size is considered
    # unlimited. At least one of `maxBatchStatements` or `maxSizeInBytes` must be set to a positive
    # value when batching is enabled.
    # 
    # Values for this option should either be valid long integers, or use HOCON's
    # [size-in-bytes](https://github.com/lightbend/config/blob/master/HOCON.md#size-in-bytes-format)
    # format, e.g. `1234`, `1K` or `5 kibibytes`.
    # Type: number
    # Default value: -1
    #batch.maxSizeInBytes = -1

    # The grouping mode. Valid values are:
    # - `DISABLED`: batching is disabled.
    # - `PARTITION_KEY`: groups together statements that share the same partition key. This is
    # usually the most performant mode; however it may not work at all if the dataset is unordered,
    # i.e., if partition keys appear randomly and cannot be grouped together.
    # - `REPLICA_SET`: groups together statements that share the same replica set. This mode works
    # in all cases, but may incur in some throughput and latency degradation, specially with large
    # clusters or high replication factors.
    # When tuning DSBulk for batching, the recommended approach is as follows:
    # 1. Start with `PARTITION_KEY`;
    # 2. If the average batch size is close to 1, try increasing `bufferSize`;
    # 3. If increasing `bufferSize` doesn't help, switch to `REPLICA_SET` and set
    # `maxBatchStatements` or `maxSizeInBytes` to low values to avoid timeouts or errors;
    # 4. Increase `maxBatchStatements` or `maxSizeInBytes` to get the best throughput while keeping
    # latencies acceptable.
    # The default is `PARTITION_KEY`.
    # Type: string
    # Default value: "PARTITION_KEY"
    #batch.mode = "PARTITION_KEY"

    ################################################################################################
    # Conversion-specific settings. These settings apply for both load and unload workflows.
    # 
    # When writing, these settings determine how record fields emitted by connectors are parsed.
    # 
    # When unloading, these settings determine how row cells emitted by DSE are formatted.
    # 
    # When counting, these settings are ignored.
    ################################################################################################

    # Strategy to use when converting binary data to strings. Only applicable when unloading columns
    # of CQL type `blob`, or columns of geometry types, if the value of `codec.geo` is `WKB`; and
    # only if the connector in use requires stringification. Valid values are:
    # 
    # - BASE64: Encode the binary data into a Base-64 string. This is the default strategy.
    # - HEX: Encode the binary data as CQL blob literals. CQL blob literals follow the general
    # syntax: `0[xX][0-9a-fA-F]+`, that is, `0x` followed by hexadecimal characters, for example:
    # `0xcafebabe`. This format produces lengthier strings than BASE64, but is also the only format
    # compatible with CQLSH.
    # Type: string
    # Default value: "BASE64"
    #codec.binary = "BASE64"

    # Set how true and false representations of numbers are interpreted. The representation is of
    # the form `true_value,false_value`. The mapping is reciprocal, so that numbers are mapping to
    # Boolean and vice versa. All numbers unspecified in this setting are rejected.
    # Type: list<number>
    # Default value: [1,0]
    #codec.booleanNumbers = [1,0]

    # Specify how true and false representations can be used by dsbulk. Each representation is of
    # the form `true_value:false_value`, case-insensitive. For loading, all representations are
    # honored: when a record field value exactly matches one of the specified strings, the value is
    # replaced with `true` of `false` before writing to the database. For unloading, this setting is
    # only applicable for string-based connectors, such as the CSV connector: the first
    # representation will be used to format booleans before they are written out, and all others are
    # ignored.
    # Type: list<string>
    # Default value: ["1:0","Y:N","T:F","YES:NO","TRUE:FALSE"]
    #codec.booleanStrings = ["1:0","Y:N","T:F","YES:NO","TRUE:FALSE"]

    # The temporal pattern to use for `String` to CQL `date` conversion. Valid choices:
    # 
    # - A date-time pattern such as `yyyy-MM-dd`.
    # - A pre-defined formatter such as `ISO_LOCAL_DATE`. Any public static field in
    # `java.time.format.DateTimeFormatter` can be used.
    # - The special formatter `UNITS_SINCE_EPOCH`, which is a special parser that reads and writes
    # local dates as numbers representing time units since a given epoch; the unit and the epoch to
    # use can be specified with `codec.unit` and `codec.timestamp`.
    # 
    # For more information on patterns and pre-defined formatters, see [Patterns for Formatting and
    # Parsing](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html#patterns)
    # in Oracle Java documentation.
    # 
    # For more information about CQL date, time and timestamp literals, see [Date, time, and
    # timestamp
    # format](https://docs.datastax.com/en/dse/6.0/cql/cql/cql_reference/refDateTimeFormats.html?hl=timestamp).
    # Type: string
    # Default value: "ISO_LOCAL_DATE"
    #codec.date = "ISO_LOCAL_DATE"

    # This setting is used in the following situations:
    # 
    # - When the target column is of CQL `timestamp` type, or when loading to a `USING TIMESTAMP`
    # clause, or when unloading from a `writetime()` function call, and if `codec.timestamp` is set
    # to `UNITS_SINCE_EPOCH`, then the epoch specified here determines the relative point in time to
    # use to convert numeric data to and from temporals. For example, if the input is 123 and the
    # epoch specified here is `2000-01-01T00:00:00Z`, then the input will be interpreted as N
    # `codec.unit`s since January 1st 2000.
    # - When loading, and the target CQL type is numeric, but the input is alphanumeric and
    # represents a temporal literal, the time unit specified here will be used to convert the parsed
    # temporal into a numeric value. For example, if the input is `2018-12-10T19:32:45Z` and the
    # epoch specified here is `2000-01-01T00:00:00Z`, then the parsed timestamp will be converted to
    # N `codec.unit`s since January 1st 2000.
    # - When parsing temporal literals, if the input does not contain a date part, then the date
    # part of the instant specified here will be used instead. For example, if the input is
    # `19:32:45` and the epoch specified here is `2000-01-01T00:00:00Z`, then the input will be
    # interpreted `2000-01-01T19:32:45Z`.
    # 
    # The value must be expressed in
    # [`ISO_ZONED_DATE_TIME`](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html#ISO_ZONED_DATE_TIME)
    # format.
    # Type: string
    # Default value: "1970-01-01T00:00:00Z"
    #codec.epoch = "1970-01-01T00:00:00Z"

    # Whether or not to use the `codec.number` pattern to format numeric output. When set to `true`,
    # the numeric pattern defined by `codec.number` will be applied. This allows for
    # nicely-formatted output, but may result in rounding (see `codec.roundingStrategy`), or
    # alteration of the original decimal's scale. When set to `false`, numbers will be stringified
    # using the `toString()` method, and will never result in rounding or scale alteration. Only
    # applicable when unloading, and only if the connector in use requires stringification, because
    # the connector, such as the CSV connector, does not handle raw numeric data; ignored otherwise.
    # Type: boolean
    # Default value: false
    #codec.formatNumbers = false

    # Strategy to use when converting geometry types to strings. Geometry types are only available
    # in DataStax Enterprise (DSE) 5.0 or higher. Only applicable when unloading columns of CQL type
    # `Point`, `LineString` or `Polygon`, and only if the connector in use requires stringification.
    # Valid values are:
    # 
    # - WKT: Encode the data in Well-known text format. This is the default strategy.
    # - WKB: Encode the data in Well-known binary format. The actual encoding will depend on the
    # value chosen for the `codec.binary` setting (HEX or BASE64).
    # - JSON: Encode the data in GeoJson format.
    # Type: string
    # Default value: "WKT"
    #codec.geo = "WKT"

    # The locale to use for locale-sensitive conversions.
    # Type: string
    # Default value: "en_US"
    #codec.locale = "en_US"

    # Comma-separated list of case-sensitive strings that should be mapped to `null`. For loading,
    # when a record field value exactly matches one of the specified strings, the value is replaced
    # with `null` before writing to the database. For unloading, this setting is only applicable for
    # string-based connectors, such as the CSV connector: the first string specified will be used to
    # change a row cell containing `null` to the specified string when written out.
    # 
    # For example, setting this to `["NULL"]` will cause a field containing the word `NULL` to be
    # mapped to `null` while loading, and a column containing `null` to be converted to the word
    # `NULL` while unloading.
    # 
    # The default value is `[]` (no strings are mapped to `null`). In the default mode, DSBulk
    # behaves as follows:
    # * When loading, if the target CQL type is textual (i.e. text, varchar or ascii), the original
    # field value is left untouched; for other types, if the value is an empty string, it is
    # converted to `null`.
    # * When unloading, `null` values are left untouched.
    # 
    # Note that, regardless of this setting, DSBulk will always convert empty strings to `null` if
    # the target CQL type is not textual when loading (i.e. not text, varchar or ascii).
    # 
    # This setting is applied before `schema.nullToUnset`, hence any `null` produced by a
    # null-string can still be left unset if required.
    # Type: list
    # Default value: []
    #codec.nullStrings = []

    # The `DecimalFormat` pattern to use for conversions between `String` and CQL numeric types.
    # 
    # See
    # [java.text.DecimalFormat](https://docs.oracle.com/javase/8/docs/api/java/text/DecimalFormat.html)
    # for details about the pattern syntax to use.
    # 
    # Most inputs are recognized: optional localized thousands separator, localized decimal
    # separator, or optional exponent. Using locale `en_US`, `1234`, `1,234`, `1234.5678`,
    # `1,234.5678` and `1,234.5678E2` are all valid. For unloading and formatting, rounding may
    # occur and cause precision loss. See `codec.formatNumbers` and `codec.roundingStrategy`.
    # Type: string
    # Default value: "#,###.##"
    #codec.number = "#,###.##"

    # This setting can mean one of three possibilities:
    # 
    # - The value is outside the range of the target CQL type. For example, trying to convert 128 to
    # a CQL `tinyint` (max value of 127) results in overflow.
    # - The value is decimal, but the target CQL type is integral. For example, trying to convert
    # 123.45 to a CQL `int` results in overflow.
    # - The value's precision is too large for the target CQL type. For example, trying to insert
    # 0.1234567890123456789 into a CQL `double` results in overflow, because there are too many
    # significant digits to fit in a 64-bit double.
    # 
    # Valid choices:
    # 
    # - `REJECT`: overflows are considered errors and the data is rejected. This is the default
    # value.
    # - `TRUNCATE`: the data is truncated to fit in the target CQL type. The truncation algorithm is
    # similar to the narrowing primitive conversion defined in The Java Language Specification,
    # Section 5.1.3, with the following exceptions:
    # - If the value is too big or too small, it is rounded up or down to the maximum or minimum
    # value allowed, rather than truncated at bit level. For example, 128 would be rounded down to
    # 127 to fit in a byte, whereas Java would have truncated the exceeding bits and converted to
    # -127 instead.
    # - If the value is decimal, but the target CQL type is integral, it is first rounded to an
    # integral using the defined rounding strategy, then narrowed to fit into the target type. This
    # can result in precision loss and should be used with caution.
    # 
    # Only applicable for loading, when parsing numeric inputs; it does not apply for unloading,
    # since formatting never results in overflow.
    # Type: string
    # Default value: "REJECT"
    #codec.overflowStrategy = "REJECT"

    # The rounding strategy to use for conversions from CQL numeric types to `String`.
    # 
    # Valid choices: any `java.math.RoundingMode` enum constant name, including: `CEILING`, `FLOOR`,
    # `UP`, `DOWN`, `HALF_UP`, `HALF_EVEN`, `HALF_DOWN`, and `UNNECESSARY`. The precision used when
    # rounding is inferred from the numeric pattern declared under `codec.number`. For example, the
    # default `codec.number` (`#,###.##`) has a rounding precision of 2, and the number 123.456
    # would be rounded to 123.46 if `roundingStrategy` was set to `UP`. The default value will
    # result in infinite precision, and ignore the `codec.number` setting.
    # 
    # Only applicable when unloading, if `codec.formatNumbers` is true and if the connector in use
    # requires stringification, because the connector, such as the CSV connector, does not handle
    # raw numeric data; ignored otherwise.
    # Type: string
    # Default value: "UNNECESSARY"
    #codec.roundingStrategy = "UNNECESSARY"

    # The temporal pattern to use for `String` to CQL `time` conversion. Valid choices:
    # 
    # - A date-time pattern, such as `HH:mm:ss`.
    # - A pre-defined formatter, such as `ISO_LOCAL_TIME`. Any public static field in
    # `java.time.format.DateTimeFormatter` can be used.
    # - The special formatter `UNITS_SINCE_EPOCH`, which is a special parser that reads and writes
    # local times as numbers representing time units since a given epoch; the unit and the epoch to
    # use can be specified with `codec.unit` and `codec.timestamp`.
    # 
    # For more information on patterns and pre-defined formatters, see [Patterns for formatting and
    # Parsing](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html#patterns)
    # in Oracle Java documentation.
    # 
    # For more information about CQL date, time and timestamp literals, see [Date, time, and
    # timestamp
    # format](https://docs.datastax.com/en/dse/6.0/cql/cql/cql_reference/refDateTimeFormats.html?hl=timestamp).
    # Type: string
    # Default value: "ISO_LOCAL_TIME"
    #codec.time = "ISO_LOCAL_TIME"

    # The time zone to use for temporal conversions. When loading, the time zone will be used to
    # obtain a timestamp from inputs that do not convey any explicit time zone information. When
    # unloading, the time zone will be used to format all timestamps.
    # Type: string
    # Default value: "UTC"
    #codec.timeZone = "UTC"

    # The temporal pattern to use for `String` to CQL `timestamp` conversion. Valid choices:
    # 
    # - A date-time pattern such as `yyyy-MM-dd HH:mm:ss`.
    # - A pre-defined formatter such as `ISO_ZONED_DATE_TIME` or `ISO_INSTANT`. Any public static
    # field in `java.time.format.DateTimeFormatter` can be used.
    # - The special formatter `CQL_TIMESTAMP`, which is a special parser that accepts all valid CQL
    # literal formats for the `timestamp` type.
    # - The special formatter `UNITS_SINCE_EPOCH`, which is a special parser that reads and writes
    # timestamps as numbers representing time units since a given epoch; the unit and the epoch to
    # use can be specified with `codec.unit` and `codec.timestamp`.
    # 
    # For more information on patterns and pre-defined formatters, see [Patterns for Formatting and
    # Parsing](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html#patterns)
    # in Oracle Java documentation.
    # 
    # For more information about CQL date, time and timestamp literals, see [Date, time, and
    # timestamp
    # format](https://docs.datastax.com/en/dse/6.0/cql/cql/cql_reference/refDateTimeFormats.html?hl=timestamp).
    # 
    # The default value is the special `CQL_TIMESTAMP` value. When parsing, this format recognizes
    # all CQL temporal literals; if the input is a local date or date/time, the timestamp is
    # resolved using the time zone specified under `timeZone`. When formatting, this format uses the
    # `ISO_OFFSET_DATE_TIME` pattern, which is compliant with both CQL and ISO-8601.
    # Type: string
    # Default value: "CQL_TIMESTAMP"
    #codec.timestamp = "CQL_TIMESTAMP"

    # This setting is used in the following situations:
    # 
    # - When the target column is of CQL `timestamp` type, or when loading data through a `USING
    # TIMESTAMP` clause, or when unloading data from a `writetime()` function call, and if
    # `codec.timestamp` is set to `UNITS_SINCE_EPOCH`, then the time unit specified here is used to
    # convert numeric data to and from temporals. For example, if the input is 123 and the time unit
    # specified here is SECONDS, then the input will be interpreted as 123 seconds since
    # `codec.epoch`.
    # - When loading, and the target CQL type is numeric, but the input is alphanumeric and
    # represents a temporal literal, the time unit specified here will be used to convert the parsed
    # temporal into a numeric value. For example, if the input is `2018-12-10T19:32:45Z` and the
    # time unit specified here is SECONDS, then the parsed temporal will be converted into seconds
    # since `codec.epoch`.
    # 
    # All `TimeUnit` enum constants are valid choices.
    # Type: string
    # Default value: "MILLISECONDS"
    #codec.unit = "MILLISECONDS"

    # Strategy to use when generating time-based (version 1) UUIDs from timestamps. Clock sequence
    # and node ID parts of generated UUIDs are determined on a best-effort basis and are not fully
    # compliant with RFC 4122. Valid values are:
    # 
    # - RANDOM: Generates UUIDs using a random number in lieu of the local clock sequence and node
    # ID. This strategy will ensure that the generated UUIDs are unique, even if the original
    # timestamps are not guaranteed to be unique.
    # - FIXED: Preferred strategy if original timestamps are guaranteed unique, since it is faster.
    # Generates UUIDs using a fixed local clock sequence and node ID.
    # - MIN: Generates the smallest possible type 1 UUID for a given timestamp. Warning: this
    # strategy doesn't guarantee uniquely generated UUIDs and should be used with caution.
    # - MAX: Generates the biggest possible type 1 UUID for a given timestamp. Warning: this
    # strategy doesn't guarantee uniquely generated UUIDs and should be used with caution.
    # Type: string
    # Default value: "RANDOM"
    #codec.uuidStrategy = "RANDOM"

    ################################################################################################
    # Engine-specific settings. Engine settings control how workflows are configured, and notably,
    # what is their execution ID, whether they should run in Dry-run mode, and the desired amount of
    # concurrency.
    ################################################################################################

    # Enable or disable dry-run mode, a test mode that runs the command but does not load data. Not
    # applicable for unloading nor counting.
    # Type: boolean
    # Default value: false
    #engine.dryRun = false

    # The maximum number of concurrent queries that should be carried in parallel.
    # 
    # This acts as a safeguard to prevent more queries executing in parallel than the cluster can
    # handle, or to regulate throughput when latencies get too high. Batch statements count as one
    # query.
    # 
    # When using continuous paging, also make sure to set this number to a value equal to or lesser
    # than the number of nodes in the local datacenter multiplied by the value configured
    # server-side for `continuous_paging.max_concurrent_sessions` in the cassandra.yaml
    # configuration file (60 by default); otherwise some requests might be rejected.
    # 
    # The special syntax `NC` can be used to specify a number that is a multiple of the number of
    # available cores, e.g. if the number of cores is 8, then 0.5C = 0.5 * 8 = 4 concurrent queries.
    # 
    # The default value is 'AUTO'; with this special value, DSBulk will optimize the number of
    # concurrent queries according to the number of available cores, and the operation being
    # executed. The actual value usually ranges from the number of cores to eight times that number.
    # Type: string
    # Default value: "AUTO"
    #engine.maxConcurrentQueries = "AUTO"

    # Specify whether DSBulk should use data size sampling to optimize its execution engine. Only
    # applicable for loading, ignored otherwise.
    # 
    # Data size sampling is done by reading a few records from the connector; in this case, the
    # connector will be invoked twice: once to sample the data size, then again to read the entire
    # data. This is only possible if the data source can be rewinded and read again from the
    # beginning. If your data source does not support this – for example, because it can only be
    # read once – then you should set this option to false.
    # 
    # Note that when loading from standard input, DSBulk will never perform data size sampling,
    # regardless of the value set here.
    # 
    # The default value is 'true', meaning that data size sampling is enabled.
    # Type: boolean
    # Default value: true
    #engine.dataSizeSamplingEnabled = true

    # A unique identifier to attribute to each execution. When unspecified or empty, the engine will
    # automatically generate identifiers of the following form: *workflow*_*timestamp*, where :
    # 
    # - *workflow* stands for the workflow type (`LOAD`, `UNLOAD`, etc.);
    # - *timestamp* is the current timestamp formatted as `uuuuMMdd-HHmmss-SSSSSS` (see
    # [https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html#patterns](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html#patterns))
    # in UTC, with microsecond precision if available, and millisecond precision otherwise.
    # 
    # When this identifier is user-supplied, it is important to guarantee its uniqueness; failing to
    # do so may result in execution failures. It is also possible to provide templates here. Any
    # format compliant with the formatting rules of
    # [`String.format()`](https://docs.oracle.com/javase/8/docs/api/java/util/Formatter.html#syntax)
    # is accepted, and can contain the following parameters:
    # 
    # - `%1$s` : the workflow type (`LOAD`, `UNLOAD`, etc.);
    # - `%2$t` : the current time (with microsecond precision if available, and millisecond
    # precision otherwise);
    # - `%3$s` : the JVM process PID (this parameter might not be available on some operating
    # systems; if its value cannot be determined, a random integer will be inserted instead).
    # Type: string
    # Default value: null
    #engine.executionId = null

    ################################################################################################
    # Executor-specific settings. Executor settings control how the DataStax Java driver is used by
    # DSBulk, and notably, the desired amount of driver-level concurrency and throughput. These
    # settings are for advanced users.
    ################################################################################################

    # Enable or disable continuous paging. If the target cluster does not support continuous paging
    # or if `driver.query.consistency` is not `ONE` or `LOCAL_ONE`, traditional paging will be used
    # regardless of this setting.
    # Type: boolean
    # Default value: true
    #executor.continuousPaging.enabled = true

    # The maximum number of concurrent continuous paging queries that should be carried in parallel.
    # Set this number to a value equal to or lesser than the value configured server-side for
    # `continuous_paging.max_concurrent_sessions` in the cassandra.yaml configuration file (60 by
    # default); otherwise some requests might be rejected. Settting this option to any negative
    # value or zero will disable it.
    # 
    # **DEPRECATED**: use `engine.maxConcurrentQueries` instead.
    # Type: number
    # Default value: 60
    #executor.continuousPaging.maxConcurrentQueries = 60

    # The maximum number of pages to retrieve. Setting this value to zero retrieves all pages
    # available.
    # 
    # **DEPRECATED**. Use `datastax-java-driver.advanced.continuous-paging.max-pages` instead.
    # Type: number
    # Default value: 0
    #executor.continuousPaging.maxPages = 0

    # The maximum number of pages per second. Setting this value to zero indicates no limit.
    # 
    # **DEPRECATED**. Use `datastax-java-driver.advanced.continuous-paging.max-pages-per-second`
    # instead.
    # Type: number
    # Default value: 0
    #executor.continuousPaging.maxPagesPerSecond = 0

    # The size of the page. The unit to use is determined by the `pageUnit` setting. The ideal page
    # size depends on the size of the rows being unloaded: larger page sizes may have a positive
    # impact on throughput for small rows, and vice versa.
    # 
    # **DEPRECATED**. Use `datastax-java-driver.advanced.continuous-paging.page-size` instead.
    # Type: number
    # Default value: 5000
    #executor.continuousPaging.pageSize = 5000

    # The unit to use for the `pageSize` setting. Possible values are: `ROWS`, `BYTES`.
    # 
    # **DEPRECATED**. Use `datastax-java-driver.advanced.continuous-paging.page-size-in-bytes`
    # instead.
    # Type: string
    # Default value: "ROWS"
    #executor.continuousPaging.pageUnit = "ROWS"

    # The maximum number of bytes per second. When writing to the database, this means the maximum
    # number of bytes written per second; when reading from the database, this means the maximum
    # number of bytes read per second.
    # 
    # This acts as a safeguard to prevent overloading the cluster. Reduce this value when the
    # throughput for reads and writes cannot match the throughput of connectors, and latencies get
    # too high; this is usually a sign that the workflow engine is not well calibrated and will
    # eventually run out of memory, or some queries will timeout.
    # 
    # This setting applies a "hard" limit to the gloabl throughput, capping it at a fixed value. If
    # you need a a soft throughput limit, you should use `maxInFlight` instead.
    # 
    # Note that this setting is implemented by a semaphore and may block application threads if
    # there are too many in-flight requests.
    # 
    # Setting this option to any negative value or zero will disable it.
    # 
    # Values for this option should either be valid long integers, or use HOCON's
    # [size-in-bytes](https://github.com/lightbend/config/blob/master/HOCON.md#size-in-bytes-format)
    # format, e.g. `1234`, `1K` or `5 kibibytes`.
    # Type: number
    # Default value: -1
    #executor.maxBytesPerSecond = -1

    # The maximum number of "in-flight" queries, or maximum number of concurrent requests waiting
    # for a response from the server. When writing to the database, batch statements count as one
    # request. When reading from the database, each request for the next pages count as one request.
    # 
    # This acts as a safeguard to prevent overloading the cluster. Reduce this value when the
    # throughput for reads and writes cannot match the throughput of connectors, and latencies get
    # too high; this is usually a sign that the workflow engine is not well calibrated and will
    # eventually run out of memory, or some queries will timeout.
    # 
    # This setting applies a "soft" limit to the gloabl throughput, without capping it at a fixed
    # value. If you need a fixed maximum throughput, you should use `maxPerSecond` instead.
    # 
    # Note that this setting is implemented by a semaphore and may block application threads if
    # there are too many in-flight requests.
    # 
    # Setting this option to any negative value or zero will disable it.
    # Type: number
    # Default value: -1
    #executor.maxInFlight = -1

    # The maximum number of concurrent operations per second. When writing to the database, this
    # means the maximum number of writes per second (batch statements are counted by the number of
    # statements included); when reading from the database, this means the maximum number of rows
    # per second.
    # 
    # This acts as a safeguard to prevent overloading the cluster. Reduce this value when the
    # throughput for reads and writes cannot match the throughput of connectors, and latencies get
    # too high; this is usually a sign that the workflow engine is not well calibrated and will
    # eventually run out of memory, or some queries will timeout.
    # 
    # When connecting to DataStax Astra clusters, a rate limit is always enforced. By default, the
    # limit is 4,096 operations per second and per coordinator; therefore, if no rate limit is set,
    # and the cluster is an Astra cluster, DSBulk will automatically apply an appropriate limit and
    # log a message.
    # 
    # This setting applies a "hard" limit to the gloabl throughput, capping it at a fixed value. If
    # you need a a soft throughput limit, you should use `maxInFlight` instead.
    # 
    # Note that this setting is implemented by a semaphore and may block application threads if
    # there are too many in-flight requests.
    # 
    # Setting this option to any negative value or zero will disable it.
    # Type: number
    # Default value: -1
    #executor.maxPerSecond = -1

    ################################################################################################
    # Log and error management settings.
    ################################################################################################

    # The maximum number of errors to tolerate before aborting the entire operation. This can be
    # expressed either as an absolute number of errors - in which case, set this to an integer
    # greater than or equal to zero; or as a percentage of total rows processed so far - in which
    # case, set this to a string of the form `N%`, where `N` is a decimal number between 0 and 100
    # exclusive (e.g. "20%"). Setting this value to any negative integer disables this feature (not
    # recommended).
    # Type: number
    # Default value: 100
    #log.maxErrors = 100

    # The writable directory where all log files will be stored; if the directory specified does not
    # exist, it will be created. URLs are not acceptable (not even `file:/` URLs). Log files for a
    # specific run, or execution, will be located in a sub-directory under the specified directory.
    # Each execution generates a sub-directory identified by an "execution ID". See
    # `engine.executionId` for more information about execution IDs. Relative paths will be resolved
    # against the current working directory. Also, for convenience, if the path begins with a tilde
    # (`~`), that symbol will be expanded to the current user's home directory.
    # Type: string
    # Default value: "./logs"
    #log.directory = "./logs"

    # The desired level of verbosity. Valid values are:
    # 
    # - `quiet`: DSBulk will only log WARN and ERROR messages.
    # - `normal`: DSBulk will log a few INFO messages, as well as WARN and ERROR messages.
    # - `high`: DSBulk will also print some DEBUG messages at the beginning of the operation, such
    # as DSBulk's settings, inferred query, and the read and write concurrency.
    # - `max`: DSBulk will print numerous DEBUG messages from itself, the driver, and from some
    # important libraries (Netty and Reactor).
    # 
    # Verbosity levels `quiet`, `normal` and `high` are suitable to use in a production environment.
    # Verbosity `high` is the recommended level to diagnose problems related to configuration and/or
    # performance in production environments. Verbosity `max`, however, should only be used for
    # debugging, and preferably on small amounts of data; otherwise it could print hundreds of
    # gigabytes of text to the main log file and to the console.
    # Type: string
    # Default value: "normal"
    #log.verbosity = "normal"

    # Whether or not to use ANSI colors and other escape sequences in log messages printed to the
    # console. Valid values are:
    # 
    # - `normal`: this is the default option. DSBulk will only use ANSI when the terminal is:
    # - compatible with ANSI escape sequences; all common terminals on *nix and BSD systems,
    # including MacOS, are ANSI-compatible, and some popular terminals for Windows (Mintty, MinGW);
    # - a standard Windows DOS command prompt (ANSI sequences are translated on the fly).
    # - `force`: DSBulk will use ANSI, even if the terminal has not been detected as
    # ANSI-compatible.
    # - `disabled`: DSBulk will not use ANSI.
    # Type: string
    # Default value: "normal"
    #log.ansiMode = "normal"

    # Whether to enable checkpointing for the current operation. When set to true (the default),
    # DSBulk will track records that were processed and will produce a checkpoint file at the end of
    # the operation. The checkpoint file can then be used later on to resume the same operation, if
    # not all records were processed, or if the operation was interrupted.
    # 
    # Note that a checkpointed operation consumes more memory, and is slightly slower. If you don't
    # need checkpointing, you should disable it.
    # Type: boolean
    # Default value: true
    #log.checkpoint.enabled = true

    # The path to a checkpoint file to resume an operation from. If this option is set, and
    # depending on the replay strategy, then only unprocessed and/or failed data will be
    # re-processed.
    # 
    # When using a checkpoint file to resume an operation, make sure that both operations target the
    # same dataset:
    # 
    # - When loading, make sure that the files to load weren't renamed or moved, otherwise all files
    # would be considered new and loaded entirely. Also, if the file contents have changed, new
    # records may go unnoticed, or cause other records to be processed twice.
    # - When unloading, make sure that the read query, the token distribution across the ring, the
    # number of splits (see `schema.splits`) and the data to read are all the same across
    # operations, otherwise the unloaded data could be inconsistent.
    # Type: string
    # Default value: null
    #log.checkpoint.file = null

    # The replay strategy to use when resuming an operation from a checkpoint file. Valid values
    # are:
    # - `resume`: DSBulk will only process new records from resources that weren't consumed
    # entirely. Records that were already processed will be ignored, including rejected ones
    # (rejected records are always written to bad files). This is the safest option when loading if
    # the operation is not idempotent.
    # - `retry`: this is the default option. DSBulk will process new and rejected records from
    # resources that weren't consumed entirely. Note that this strategy may result in some rows
    # being inserted twice and thus should only be used if the operation is idempotent.
    # - `retryAll`: like `retry`, DSBulk will process new and rejected records, but unlike `retry`,
    # it will process all resources, including those marked as consumed entirely. Note that this
    # strategy may result in some rows being inserted twice and thus should only be used if the
    # operation is idempotent.
    # Type: string
    # Default value: "retry"
    #log.checkpoint.replayStrategy = "retry"

    # The maximum number of query warnings to log before muting them. Query warnings are sent by the
    # server (for example, if the number of statements in a batch is greater than the warning
    # threshold configured on the server). They are useful to diagnose suboptimal configurations but
    # tend to be too invasive, which is why DSBulk by default will only log the 50 first query
    # warnings; any subsequent warnings will be muted and won't be logged at all. Setting this value
    # to any negative integer disables this feature (not recommended).
    # Type: number
    # Default value: 50
    #log.maxQueryWarnings = 50

    # The maximum length for a result set value. Result set values longer than this value will be
    # truncated.
    # 
    # Setting this value to `-1` makes the maximum length for a result set value unlimited (not
    # recommended).
    # Type: number
    # Default value: 50
    #log.row.maxResultSetValueLength = 50

    # The maximum number of result set values to print. If the row has more result set values than
    # this limit, the exceeding values will not be printed.
    # 
    # Setting this value to `-1` makes the maximum number of result set values unlimited (not
    # recommended).
    # Type: number
    # Default value: 50
    #log.row.maxResultSetValues = 50

    # Whether to print record sources in debug files. When set to true (the default), debug files
    # will contain, for each record that failed to be processed, its original source, such as the
    # text line that the record was parsed from.
    # 
    # Furthermore, when loading, enabling this option also enables the creation of so-called "bad
    # files", that is, files containing the original lines that could not be inserted; these files
    # could then be used as the data source of a subsequent load operation that would load only the
    # failed records.
    # 
    # This feature is useful to locate failed records more easily and diagnose processing failures –
    # especially if the original data source is a remote one, such as an FTP or HTTP URL.
    # 
    # But for this feature to be possible, record sources must be kept in memory until the record is
    # fully processed. For large record sizes (over 1 megabyte per record), retaining record sources
    # in memory could put a high pressure on the JVM heap, thus exposing the operation to
    # out-of-memory errors. This phenomenon is exacerbated when batching is enabled. If you are
    # experiencing such errors, consider disabling this option.
    # 
    # Note that, regardless of the value of this option, DSBulk will always print the record's
    # *resource* – that is, the file name or the database table where it came from – and the
    # record's *position* – that is, the ordinal position of the record inside the resource, when
    # available (for example, this could be the line number in a CSV file).
    # Type: boolean
    # Default value: true
    #log.sources = true

    # The desired log level. Valid values are:
    # 
    # - ABRIDGED: Print only basic information in summarized form.
    # - NORMAL: Print basic information in summarized form, and the statement's query string, if
    # available. For batch statements, this verbosity level also prints information about the
    # batch's inner statements.
    # - EXTENDED: Print full information, including the statement's query string, if available, and
    # the statement's bound values, if available. For batch statements, this verbosity level also
    # prints all information available about the batch's inner statements.
    # Type: string
    # Default value: "EXTENDED"
    #log.stmt.level = "EXTENDED"

    # The maximum length for a bound value. Bound values longer than this value will be truncated.
    # 
    # Setting this value to `-1` makes the maximum length for a bound value unlimited (not
    # recommended).
    # Type: number
    # Default value: 50
    #log.stmt.maxBoundValueLength = 50

    # The maximum number of bound values to print. If the statement has more bound values than this
    # limit, the exceeding values will not be printed.
    # 
    # Setting this value to `-1` makes the maximum number of bound values unlimited (not
    # recommended).
    # Type: number
    # Default value: 50
    #log.stmt.maxBoundValues = 50

    # The maximum number of inner statements to print for a batch statement. Only applicable for
    # batch statements, ignored otherwise. If the batch statement has more children than this value,
    # the exceeding child statements will not be printed.
    # 
    # Setting this value to `-1` disables this feature (not recommended).
    # Type: number
    # Default value: 10
    #log.stmt.maxInnerStatements = 10

    # The maximum length for a query string. Query strings longer than this value will be truncated.
    # 
    # Setting this value to `-1` disables this feature (not recommended).
    # Type: number
    # Default value: 500
    #log.stmt.maxQueryStringLength = 500

    ################################################################################################
    # Monitoring-specific settings.
    ################################################################################################

    # The report interval. DSBulk will print useful metrics about the ongoing operation at this
    # rate; for example, if this value is set to 10 seconds, then DSBulk will print metrics every
    # ten seconds. Valid values: any value specified in [HOCON duration
    # syntax](https://github.com/lightbend/config/blob/master/HOCON.md#duration-format), but
    # durations lesser than one second will be rounded up to 1 second.
    # Type: string
    # Default value: "5 seconds"
    #monitoring.reportRate = "5 seconds"

    # Enable or disable console reporting. If enabled, DSBulk will print useful metrics about the
    # ongoing operation to standard error; the metrics will be refreshed at `reportRate`. Displayed
    # information includes: total records, failed records, throughput, latency, and if available,
    # average batch size. Note that when `log.verbosity` is set to quiet (0), DSBulk will disable
    # the console reporter regardless of the value specified here. The default is true (print
    # ongoing metrics to the console).
    # Type: boolean
    # Default value: true
    #monitoring.console = true

    # Enable or disable CSV reporting. If enabled, CSV files containing metrics will be generated in
    # the designated log directory. Driver metrics can also be exported, but they are disabled by
    # default; see `monitoring.jmx` for details for details about how to enable them.
    # Type: boolean
    # Default value: false
    #monitoring.csv = false

    # The time unit used when printing latency durations. For example, if this unit is MILLISECONDS,
    # then the latencies will be displayed in milliseconds. Valid values: all `TimeUnit` enum
    # constants.
    # Type: string
    # Default value: "MILLISECONDS"
    #monitoring.durationUnit = "MILLISECONDS"

    # The expected total number of reads. Optional, but if set, the console reporter will also print
    # the overall achievement percentage. Setting this value to `-1` disables this feature.
    # Type: number
    # Default value: -1
    #monitoring.expectedReads = -1

    # The expected total number of writes. Optional, but if set, the console reporter will also
    # print the overall achievement percentage. Setting this value to `-1` disables this feature.
    # Type: number
    # Default value: -1
    #monitoring.expectedWrites = -1

    # Enable or disable JMX reporting. Note that to enable remote JMX reporting, several properties
    # must also be set in the JVM during launch. This is accomplished via the `DSBULK_JAVA_OPTS`
    # environment variable.
    # 
    # Driver metrics can also be exposed; note however that by default, all driver metrics are
    # disabled. You can enable them with the following driver settings:
    # - `datastax-java-driver.advanced.metrics.session.enabled` should contain a list of
    # session-level metric names to enable;
    # - `datastax-java-driver.advanced.metrics.node.enabled` should contain a list of node-level
    # metric names to enable.
    # Driver metrics appear under a folder named after the session name (by default in DSBulk, the
    # session name is simply "driver").
    # Type: boolean
    # Default value: true
    #monitoring.jmx = true

    # The job name to use. DSBulk will add a label "job" with this value to each exported metric.
    # This is also the job name used when pushing metrics to a PushGateway. The default is "DSBulk".
    # See https://github.com/prometheus/pushgateway#about-the-job-and-instance-labels for more
    # information.
    # Type: string
    # Default value: "DSBulk"
    #monitoring.prometheus.job = "DSBulk"

    # A set of static labels to add to each exported metric, in both pull and push modes. Note that
    # DSBulk automatically adds the following labels:
    # 
    # - `operation_id` is set to the current operation ID (a.k.a. execution ID, see
    # `engine.executionId`);
    # - `job` is set to the job name (as defined by `monitoring.prometheus.job`, by default
    # "DSBulk");
    # - `application_name` is set to "DataStax Bulk Loader" followed by the operation ID;
    # - `application_version` is set to the DSBulk's version;
    # - `driver_version` is set to the DataStax Java driver version;
    # - `client_id` is set to DSBulk's client UUID.
    # 
    # The last four labels correspond to values that DSBulk also passes to the driver, which in turn
    # uses the same info to connect to Cassandra. This makes it possible to correlate data sent by
    # the driver to Cassandra with data sent by DSBulk to Prometheus.
    # Type: map<string,string>
    # Default value: {}
    #monitoring.prometheus.labels = {}

    # Enable or disable exposing metrics to Prometheus in the traditional pull model (scraping). If
    # enabled, all DSBulk and metrics will be accessible at an (unsecured) HTTP endpoint. Driver
    # metrics can also be exported, but they are disabled by default; see `monitoring.jmx` for
    # details about how to enable them.
    # Type: boolean
    # Default value: false
    #monitoring.prometheus.pull.enabled = false

    # The hostname that the metrics HTTP server should bind to. Leave empty to have the server bind
    # to the wildcard address (0.0.0.0).
    # Type: string
    # Default value: ""
    #monitoring.prometheus.pull.hostname = ""

    # The port that the metrics HTTP server should bind to.
    # Type: number
    # Default value: 8080
    #monitoring.prometheus.pull.port = 8080

    # Enabled or disable pushing metrics to a PushGateway. If enabled, DSBulk will push metrics to
    # this URL at the end of the operation. Note that not all metrics are exported when pushing to a
    # PushGateway; only some high-level ones are, including the total time elapsed, the number of
    # records processed and the number of rows written or read. In particular, driver metrics are
    # currently not pushed.
    # Type: boolean
    # Default value: false
    #monitoring.prometheus.push.enabled = false

    # Whether to add an instance grouping key to exported metrics. If enabled, DSBulk adds an
    # "instance" grouping key with its value set to the machine's IP address to the exported
    # metrics. This will effectively group metrics by instance, rather than by job. See
    # https://github.com/prometheus/pushgateway#about-the-job-and-instance-labels for more
    # information.
    # Type: boolean
    # Default value: false
    #monitoring.prometheus.push.groupBy.instance = false

    # A set of static extra keys to add to the grouping keys. Note that grouping keys are also added
    # as labels to each exported metric in push mode.
    # Type: map<string,string>
    # Default value: {}
    #monitoring.prometheus.push.groupBy.keys = {}

    # Whether to add the operation ID as a grouping key to exported metrics. If enabled, DSBulk adds
    # an "operation_id" grouping key with its value set to the current operation ID (see
    # `engine.executionId`) to the exported metrics. This will effectively group metrics by
    # operation, rather than by job.
    # Type: boolean
    # Default value: false
    #monitoring.prometheus.push.groupBy.operation = false

    # The password to authenticate against the push gateway, using basic HTTP auth. Leave empty to
    # use unautheticated HTTP requests.
    # Type: string
    # Default value: ""
    #monitoring.prometheus.push.password = ""

    # The base URL of a Prometheus PushGateway server, e.g. http://pushgateway.example.org:9091
    # (don't include the "/metrics" path).
    # Type: string
    # Default value: "http://localhost:9091"
    #monitoring.prometheus.push.url = "http://localhost:9091"

    # The username to authenticate against the push gateway, using basic HTTP auth. Leave empty to
    # use unautheticated HTTP requests.
    # Type: string
    # Default value: ""
    #monitoring.prometheus.push.username = ""

    # The time unit used when printing throughput rates. For example, if this unit is SECONDS, then
    # the throughput will be displayed in rows per second. Valid values: all `TimeUnit` enum
    # constants.
    # Type: string
    # Default value: "SECONDS"
    #monitoring.rateUnit = "SECONDS"

    # Whether or not to track the throughput in bytes. When enabled, DSBulk will track and display
    # the number of bytes sent or received per second. While useful to evaluate how much data is
    # actually being transferred, computing such metrics is CPU-intensive and may slow down the
    # operation. This is why it is disabled by default. Also note that the heuristic used to compute
    # data sizes is not 100% accurate and sometimes underestimates the actual size.
    # Type: boolean
    # Default value: false
    #monitoring.trackBytes = false

    ################################################################################################
    # Runner-specific settings. Runner settings control how DSBulk parses command lines and reads
    # its configuration.
    ################################################################################################

    # Whether to prompt for passwords when they are missing from configuration files. When this
    # option is true (the default value), if a login or username is present in the configuration,
    # but not its corresponding password, DSBulk will prompt for it.
    # 
    # Prompting from passwords require interactive shells; if the standard input is not connected to
    # a terminal, no passwords will be prompted, even if this option is true. You should only
    # disable this feature if DSBulk mistankenly assumes that it is running in an interactive shell.
    # Type: boolean
    # Default value: true
    #runner.promptForPasswords = true

    ################################################################################################
    # Settings applicable for reading from AWS S3 URLs.
    ################################################################################################

    # The size (count) of the S3Client cache. Since each S3 URL
    # must contain the credentials for the target bucket, we cache
    # the clients to prevent rebuilding the same client over and over.
    # The default size of 20 is totally arbitrary, as we generally
    # expect that most S3 URLs in a given batch will be using the
    # same credentials, meaning the cache will really only ever
    # contain one entry.
    # Type: number
    # Default value: 20
    #s3.clientCacheSize = 20

    ################################################################################################
    # Settings applicable for the count workflow, ignored otherwise.
    ################################################################################################

    # Which kind(s) of statistics to compute. Only applicaple for the count workflow, ignored
    # otherwise. Possible values are:
    # * `global`: count the total number of rows in the table.
    # * `ranges`: count the total number of rows per token range in the table.
    # * `hosts`: count the total number of rows per hosts in the table.
    # * `partitions`: count the total number of rows in the N biggest partitions in the table. When
    # using this mode, you can chose how many partitions to track with the `numPartitions` setting.
    # Type: list<string>
    # Default value: ["global"]
    #stats.modes = ["global"]

    # The number of distinct partitions to count rows for. Only applicaple for the count workflow
    # when `stats.modes` contains `partitions`, ignored otherwise.
    # Type: number
    # Default value: 10
    #stats.numPartitions = 10

}