diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 43aafccaf..37310b31f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,7 @@ Unreleased ---------- * :doc:`/scripts/csvformat` adds a :code:`--skip-header` (:code:`-E`) option to not output a header row. +* :doc:`/scripts/csvstat` supports the :code:`--no-inference` (:code:`-I`), :code:`--locale` (:code:`-L`), :code:`--blanks`, :code:`--date-format` and :code:`datetime-format` options. * :doc:`/scripts/csvstat` adds a :code:`--json` option to output results as JSON text. * :doc:`/scripts/csvstat` adds an :code:`--indent` option to indent the JSON text when :code:`--json` is set. * :doc:`/scripts/csvstat` reports a "Non-null values" statistic (or a :code:`nonnulls` column when :code:`--csv` is set). diff --git a/csvkit/utilities/csvstat.py b/csvkit/utilities/csvstat.py index 2d0a93f71..17854dff3 100644 --- a/csvkit/utilities/csvstat.py +++ b/csvkit/utilities/csvstat.py @@ -69,7 +69,6 @@ class CSVStat(CSVKitUtility): description = 'Print descriptive statistics for each column in a CSV file.' - override_flags = ['L', 'blanks', 'date-format', 'datetime-format'] def add_arguments(self): self.argparser.add_argument( @@ -144,6 +143,9 @@ def add_arguments(self): '-y', '--snifflimit', dest='sniff_limit', type=int, default=1024, help='Limit CSV dialect sniffing to the specified number of bytes. ' 'Specify "0" to disable sniffing entirely, or "-1" to sniff the entire file.') + self.argparser.add_argument( + '-I', '--no-inference', dest='no_inference', action='store_true', + help='Disable type inference when parsing the input. Disable reformatting of values.') def main(self): if self.args.names_only: @@ -183,6 +185,7 @@ def main(self): self.input_file, skip_lines=self.args.skip_lines, sniff_limit=sniff_limit, + column_types=self.get_column_types(), **self.reader_kwargs, ) diff --git a/docs/common_arguments.rst b/docs/common_arguments.rst index 874f1214e..6487ce046 100644 --- a/docs/common_arguments.rst +++ b/docs/common_arguments.rst @@ -31,8 +31,11 @@ csvkit's tools share a set of common command-line arguments. Not every argument Specify the locale (en_US) of any formatted numbers. -S, --skipinitialspace Ignore whitespace immediately following the delimiter. - --blanks Do not coerce empty, "na", "n/a", "none", "null", "." - strings to NULL values. + --blanks Do not convert "", "na", "n/a", "none", "null", "." to + NULL. + --null-value NULL_VALUES [NULL_VALUES ...] + Convert this value to NULL. --null-value can be + specified multiple times. --date-format DATE_FORMAT Specify a strptime date format string like "%m/%d/%Y". --datetime-format DATETIME_FORMAT diff --git a/docs/scripts/csvstat.rst b/docs/scripts/csvstat.rst index 2d4b6babd..259d2cf0c 100644 --- a/docs/scripts/csvstat.rst +++ b/docs/scripts/csvstat.rst @@ -10,12 +10,15 @@ Prints descriptive statistics for all columns in a CSV file. Will intelligently .. code-block:: bash usage: csvstat [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b] - [-p ESCAPECHAR] [-z FIELD_SIZE_LIMIT] [-e ENCODING] [-S] [-H] - [-K SKIP_LINES] [-v] [-l] [--zero] [-V] [--csv] [--json] + [-p ESCAPECHAR] [-z FIELD_SIZE_LIMIT] [-e ENCODING] [-L LOCALE] + [-S] [--blanks] [--null-value NULL_VALUES [NULL_VALUES ...]] + [--date-format DATE_FORMAT] [--datetime-format DATETIME_FORMAT] + [-H] [-K SKIP_LINES] [-v] [-l] [--zero] [-V] [--csv] [--json] [-i INDENT] [-n] [-c COLUMNS] [--type] [--nulls] [--non-nulls] - [--unique] [--min] [--max] [--sum] [--mean] [--median] [--stdev] - [--len] [--freq] [--freq-count FREQ_COUNT] [--count] - [--decimal-format DECIMAL_FORMAT] [-G] [-y SNIFF_LIMIT] + [--unique] [--min] [--max] [--sum] [--mean] [--median] + [--stdev] [--len] [--max-precision] [--freq] + [--freq-count FREQ_COUNT] [--count] + [--decimal-format DECIMAL_FORMAT] [-G] [-y SNIFF_LIMIT] [-I] [FILE] Print descriptive statistics for each column in a CSV file. @@ -48,6 +51,7 @@ Prints descriptive statistics for all columns in a CSV file. Will intelligently --median Only output medians. --stdev Only output standard deviations. --len Only output the length of the longest values. + --max-precision Only output the most decimal places. --freq Only output lists of frequent values. --freq-count FREQ_COUNT The maximum number of frequent values to display. @@ -59,7 +63,10 @@ Prints descriptive statistics for all columns in a CSV file. Will intelligently Do not use grouping separators in decimal numbers. -y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT Limit CSV dialect sniffing to the specified number of - bytes. Specify "0" to disable sniffing. + bytes. Specify "0" to disable sniffing entirely, or + "-1" to sniff the entire file. + -I, --no-inference Disable type inference when parsing the input. Disable + reformatting of values. See also: :doc:`../common_arguments`.