From a7057b042c38034430a7cbf7bfc6ab3b699bb570 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Aug 2023 14:46:58 -0400 Subject: [PATCH 1/4] Support ZSTD compression in-process --- go.mod | 1 + go.sum | 2 ++ internal/pkg/cli/option_parse.go | 14 ++++++++++++-- internal/pkg/lib/file_readers.go | 33 ++++++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index b9e11f7eb3..2373dea148 100644 --- a/go.mod +++ b/go.mod @@ -34,6 +34,7 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/felixge/fgprof v0.9.3 // indirect github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect + github.com/klauspost/compress v1.16.7 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index e896c8f4e9..84593de572 100644 --- a/go.sum +++ b/go.sum @@ -15,6 +15,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU= github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= +github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= +github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8= github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is= github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ= diff --git a/internal/pkg/cli/option_parse.go b/internal/pkg/cli/option_parse.go index 034f3a6902..6ff22ceebb 100644 --- a/internal/pkg/cli/option_parse.go +++ b/internal/pkg/cli/option_parse.go @@ -2200,7 +2200,8 @@ func CompressedDataPrintInfo() { fmt.Print(`Miller offers a few different ways to handle reading data files which have been compressed. -* Decompression done within the Miller process itself: ` + "`--bz2in`" + ` ` + "`--gzin`" + ` ` + "`--zin`" + ` +* Decompression done within the Miller process itself: ` + "`--bz2in`" + ` ` + "`--gzin`" + ` ` + "`--zin`" + "`--zstdin`" + + ` * Decompression done outside the Miller process: ` + "`--prepipe`" + ` ` + "`--prepipex`" + ` Using ` + "`--prepipe`" + ` and ` + "`--prepipex`" + ` you can specify an action to be @@ -2223,7 +2224,7 @@ compression (or other) utilities, simply pipe the output: Lastly, note that if ` + "`--prepipe`" + ` or ` + "`--prepipex`" + ` is specified, it replaces any decisions that might have been made based on the file suffix. Likewise, -` + "`--gzin`" + `/` + "`--bz2in`" + `/` + "`--zin`" + ` are ignored if ` + "`--prepipe`" + ` is also specified. +` + "`--gzin`" + `/` + "`--bz2in`" + `/` + "`--zin`" + "`--zin`" + ` are ignored if ` + "`--prepipe`" + ` is also specified. `) } @@ -2314,6 +2315,15 @@ var CompressedDataFlagSection = FlagSection{ *pargi += 1 }, }, + + { + name: "--zstdin", + help: "Uncompress zstd within the Miller process. Done by default if file ends in `.zstd`.", + parser: func(args []string, argc int, pargi *int, options *TOptions) { + options.ReaderOptions.FileInputEncoding = lib.FileInputEncodingZstd + *pargi += 1 + }, + }, }, } diff --git a/internal/pkg/lib/file_readers.go b/internal/pkg/lib/file_readers.go index fa42688efd..1511200406 100644 --- a/internal/pkg/lib/file_readers.go +++ b/internal/pkg/lib/file_readers.go @@ -25,6 +25,7 @@ import ( "compress/gzip" "compress/zlib" "fmt" + "github.com/klauspost/compress/zstd" "io" "net/http" "os" @@ -38,6 +39,7 @@ const ( FileInputEncodingBzip2 FileInputEncodingGzip FileInputEncodingZlib + FileInputEncodingZstd ) // OpenFileForRead: If prepipe is non-empty, popens "{prepipe} < {filename}" @@ -160,6 +162,8 @@ func openEncodedHandleForRead( return gzip.NewReader(handle) case FileInputEncodingZlib: return zlib.NewReader(handle) + case FileInputEncodingZstd: + return NewZstdReadCloser(handle) } InternalCodingErrorIf(encoding != FileInputEncodingDefault) @@ -173,6 +177,9 @@ func openEncodedHandleForRead( if strings.HasSuffix(filename, ".z") { return zlib.NewReader(handle) } + if strings.HasSuffix(filename, ".zst") { + return NewZstdReadCloser(handle) + } // Pass along os.Stdin or os.Open(filename) return handle, nil @@ -200,6 +207,32 @@ func (rc *BZip2ReadCloser) Close() error { return rc.originalHandle.Close() } +// ---------------------------------------------------------------- +// ZstdReadCloser remedies the fact that zstd.NewReader does not implement io.ReadCloser. +type ZstdReadCloser struct { + originalHandle io.ReadCloser + zstdHandle io.Reader +} + +func NewZstdReadCloser(handle io.ReadCloser) (*ZstdReadCloser, error) { + zstdHandle, err := zstd.NewReader(handle) + if err != nil { + return nil, err + } + return &ZstdReadCloser{ + originalHandle: handle, + zstdHandle: zstdHandle, + }, nil +} + +func (rc *ZstdReadCloser) Read(p []byte) (n int, err error) { + return rc.zstdHandle.Read(p) +} + +func (rc *ZstdReadCloser) Close() error { + return rc.originalHandle.Close() +} + // ---------------------------------------------------------------- // IsEOF handles the following problem: reading past end of files opened with From 83b8bdcf5fa5e7c4ba98f0d53fa77f01b33c361b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Aug 2023 15:04:16 -0400 Subject: [PATCH 2/4] doc mods --- docs/src/manpage.md | 8 ++++++-- docs/src/manpage.txt | 8 ++++++-- docs/src/reference-main-compressed-data.md | 12 ++++++------ docs/src/reference-main-compressed-data.md.in | 12 ++++++------ docs/src/reference-main-flag-list.md | 6 ++++-- internal/pkg/cli/option_parse.go | 10 ++++++++++ man/manpage.txt | 8 ++++++-- man/mlr.1 | 8 ++++++-- 8 files changed, 50 insertions(+), 22 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 74d7c6c9ac..aad8a4f50f 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -262,7 +262,7 @@ MILLER(1) MILLER(1) Miller offers a few different ways to handle reading data files which have been compressed. - * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin` + * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin` * Decompression done outside the Miller process: `--prepipe` `--prepipex` Using `--prepipe` and `--prepipex` you can specify an action to be @@ -285,7 +285,7 @@ MILLER(1) MILLER(1) Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any decisions that might have been made based on the file suffix. Likewise, - `--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified. + `--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified. --bz2in Uncompress bzip2 within the Miller process. Done by default if file ends in `.bz2`. @@ -302,6 +302,8 @@ MILLER(1) MILLER(1) `.mlrrc`. --prepipe-zcat Same as `--prepipe zcat`, except this is allowed in `.mlrrc`. + --prepipe-zstdcat Same as `--prepipe zstdcat`, except this is allowed + in `.mlrrc`. --prepipex {decompression command} Like `--prepipe` with one exception: doesn't insert `<` between command and filename at runtime. Useful @@ -310,6 +312,8 @@ MILLER(1) MILLER(1) in `.mlrrc` to avoid unexpected code execution. --zin Uncompress zlib within the Miller process. Done by default if file ends in `.z`. + --zstdin Uncompress zstd within the Miller process. Done by + default if file ends in `.zstd`. 1mCSV/TSV-ONLY FLAGS0m These are flags which are applicable to CSV format. diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 8d79e4f608..1d59128536 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -241,7 +241,7 @@ MILLER(1) MILLER(1) Miller offers a few different ways to handle reading data files which have been compressed. - * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin` + * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin` * Decompression done outside the Miller process: `--prepipe` `--prepipex` Using `--prepipe` and `--prepipex` you can specify an action to be @@ -264,7 +264,7 @@ MILLER(1) MILLER(1) Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any decisions that might have been made based on the file suffix. Likewise, - `--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified. + `--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified. --bz2in Uncompress bzip2 within the Miller process. Done by default if file ends in `.bz2`. @@ -281,6 +281,8 @@ MILLER(1) MILLER(1) `.mlrrc`. --prepipe-zcat Same as `--prepipe zcat`, except this is allowed in `.mlrrc`. + --prepipe-zstdcat Same as `--prepipe zstdcat`, except this is allowed + in `.mlrrc`. --prepipex {decompression command} Like `--prepipe` with one exception: doesn't insert `<` between command and filename at runtime. Useful @@ -289,6 +291,8 @@ MILLER(1) MILLER(1) in `.mlrrc` to avoid unexpected code execution. --zin Uncompress zlib within the Miller process. Done by default if file ends in `.z`. + --zstdin Uncompress zstd within the Miller process. Done by + default if file ends in `.zstd`. 1mCSV/TSV-ONLY FLAGS0m These are flags which are applicable to CSV format. diff --git a/docs/src/reference-main-compressed-data.md b/docs/src/reference-main-compressed-data.md index a54ed8026b..729cf5bbcc 100644 --- a/docs/src/reference-main-compressed-data.md +++ b/docs/src/reference-main-compressed-data.md @@ -16,13 +16,13 @@ Quick links: # Compressed data -As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, and -ZLIB formats transparently, and in-process. And (as before Miller 6) you have a +As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, ZLIB, and +ZSTD formats transparently, and in-process. And (as before Miller 6) you have a more general `--prepipe` option to support other decompression programs. ## Automatic detection on input -If your files end in `.gz`, `.bz2`, or `.z` then Miller will autodetect by file extension: +If your files end in `.gz`, `.bz2`, `.z`, or `.zst` then Miller will autodetect by file extension:
 file gz-example.csv.gz
@@ -52,7 +52,7 @@ This will decompress the input data on the fly, while leaving the disk file unmo
 
 ## Manual detection on input
 
-If the filename doesn't in in `.gz`, `.bz2`, or `.z` then you can use the flags `--gzin`, `--bz2in`, or `--zin` to let Miller know:
+If the filename doesn't in in `.gz`, `.bz2`, `-z`, or `.zst` then you can use the flags `--gzin`, `--bz2in`, `--zin`, or `--zstdin` to let Miller know:
 
 
 mlr --csv --gzin sort -f color myfile.bin # myfile.bin has gzip contents
@@ -94,7 +94,7 @@ If the command has flags, quote them: e.g. `mlr --prepipe 'zcat -cf'`.
 
 In your [.mlrrc file](customization.md), `--prepipe` and `--prepipex` are not
 allowed as they could be used for unexpected code execution. You can use
-`--prepipe-bz2`, `--prepipe-gunzip`, and `--prepipe-zcat` in `.mlrrc`, though.
+`--prepipe-bz2`, `--prepipe-gunzip`, `--prepipe-zcat`, and `--prepipe-zstdcat` in `.mlrrc`, though.
 
 Note that this feature is quite general and is not limited to decompression
 utilities. You can use it to apply per-file filters of your choice: e.g. `mlr
@@ -107,7 +107,7 @@ There is a `--prepipe` and a `--prepipex`:
 
 Lastly, note that if `--prepipe` or `--prepipex` is specified on the Miller
 command line, it replaces any autodetect decisions that might have been made
-based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin` are ignored if
+based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin`/`--zstdin` are ignored if
 `--prepipe` or `--prepipex` is also specified.
 
 ## Compressed output
diff --git a/docs/src/reference-main-compressed-data.md.in b/docs/src/reference-main-compressed-data.md.in
index b13e5e7327..cbca6a3c34 100644
--- a/docs/src/reference-main-compressed-data.md.in
+++ b/docs/src/reference-main-compressed-data.md.in
@@ -1,12 +1,12 @@
 # Compressed data
 
-As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, and
-ZLIB formats transparently, and in-process. And (as before Miller 6) you have a
+As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, ZLIB, and
+ZSTD formats transparently, and in-process. And (as before Miller 6) you have a
 more general `--prepipe` option to support other decompression programs.
 
 ## Automatic detection on input
 
-If your files end in `.gz`, `.bz2`, or `.z` then Miller will autodetect by file extension:
+If your files end in `.gz`, `.bz2`, `.z`, or `.zst` then Miller will autodetect by file extension:
 
 GENMD-CARDIFY-HIGHLIGHT-ONE
 file gz-example.csv.gz
@@ -21,7 +21,7 @@ This will decompress the input data on the fly, while leaving the disk file unmo
 
 ## Manual detection on input
 
-If the filename doesn't in in `.gz`, `.bz2`, or `.z` then you can use the flags `--gzin`, `--bz2in`, or `--zin` to let Miller know:
+If the filename doesn't in in `.gz`, `.bz2`, `-z`, or `.zst` then you can use the flags `--gzin`, `--bz2in`, `--zin`, or `--zstdin` to let Miller know:
 
 GENMD-CARDIFY-HIGHLIGHT-ONE
 mlr --csv --gzin sort -f color myfile.bin # myfile.bin has gzip contents
@@ -50,7 +50,7 @@ If the command has flags, quote them: e.g. `mlr --prepipe 'zcat -cf'`.
 
 In your [.mlrrc file](customization.md), `--prepipe` and `--prepipex` are not
 allowed as they could be used for unexpected code execution. You can use
-`--prepipe-bz2`, `--prepipe-gunzip`, and `--prepipe-zcat` in `.mlrrc`, though.
+`--prepipe-bz2`, `--prepipe-gunzip`, `--prepipe-zcat`, and `--prepipe-zstdcat` in `.mlrrc`, though.
 
 Note that this feature is quite general and is not limited to decompression
 utilities. You can use it to apply per-file filters of your choice: e.g. `mlr
@@ -63,7 +63,7 @@ There is a `--prepipe` and a `--prepipex`:
 
 Lastly, note that if `--prepipe` or `--prepipex` is specified on the Miller
 command line, it replaces any autodetect decisions that might have been made
-based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin` are ignored if
+based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin`/`--zstdin` are ignored if
 `--prepipe` or `--prepipex` is also specified.
 
 ## Compressed output
diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md
index b07c0847e0..8e2daf9d02 100644
--- a/docs/src/reference-main-flag-list.md
+++ b/docs/src/reference-main-flag-list.md
@@ -72,7 +72,7 @@ Notes:
 Miller offers a few different ways to handle reading data files
 	which have been compressed.
 
-* Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin`
+* Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin`
 * Decompression done outside the Miller process: `--prepipe` `--prepipex`
 
 Using `--prepipe` and `--prepipex` you can specify an action to be
@@ -95,7 +95,7 @@ compression (or other) utilities, simply pipe the output:
 
 Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any
 decisions that might have been made based on the file suffix. Likewise,
-`--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified.
+`--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified.
 
 
 **Flags:**
@@ -106,8 +106,10 @@ decisions that might have been made based on the file suffix. Likewise,
 * `--prepipe-bz2`: Same as  `--prepipe bz2`, except this is allowed in `.mlrrc`.
 * `--prepipe-gunzip`: Same as  `--prepipe gunzip`, except this is allowed in `.mlrrc`.
 * `--prepipe-zcat`: Same as  `--prepipe zcat`, except this is allowed in `.mlrrc`.
+* `--prepipe-zstdcat`: Same as  `--prepipe zstdcat`, except this is allowed in `.mlrrc`.
 * `--prepipex {decompression command}`: Like `--prepipe` with one exception: doesn't insert `<` between command and filename at runtime. Useful for some commands like `unzip -qc` which don't read standard input.  Allowed at the command line, but not in `.mlrrc` to avoid unexpected code execution.
 * `--zin`: Uncompress zlib within the Miller process. Done by default if file ends in `.z`.
+* `--zstdin`: Uncompress zstd within the Miller process. Done by default if file ends in `.zstd`.
 
 ## CSV/TSV-only flags
 
diff --git a/internal/pkg/cli/option_parse.go b/internal/pkg/cli/option_parse.go
index 6ff22ceebb..0ee362f2b3 100644
--- a/internal/pkg/cli/option_parse.go
+++ b/internal/pkg/cli/option_parse.go
@@ -2279,6 +2279,16 @@ var CompressedDataFlagSection = FlagSection{
 			},
 		},
 
+		{
+			name: "--prepipe-zstdcat",
+			help: "Same as  `--prepipe zstdcat`, except this is allowed in `.mlrrc`.",
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.Prepipe = "zstdcat"
+				options.ReaderOptions.PrepipeIsRaw = false
+				*pargi += 1
+			},
+		},
+
 		{
 			name: "--prepipe-bz2",
 			help: "Same as  `--prepipe bz2`, except this is allowed in `.mlrrc`.",
diff --git a/man/manpage.txt b/man/manpage.txt
index 8d79e4f608..1d59128536 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -241,7 +241,7 @@ MILLER(1)                                                            MILLER(1)
        Miller offers a few different ways to handle reading data files
             which have been compressed.
 
-       * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin`
+       * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin`
        * Decompression done outside the Miller process: `--prepipe` `--prepipex`
 
        Using `--prepipe` and `--prepipex` you can specify an action to be
@@ -264,7 +264,7 @@ MILLER(1)                                                            MILLER(1)
 
        Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any
        decisions that might have been made based on the file suffix. Likewise,
-       `--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified.
+       `--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified.
 
        --bz2in                  Uncompress bzip2 within the Miller process. Done by
                                 default if file ends in `.bz2`.
@@ -281,6 +281,8 @@ MILLER(1)                                                            MILLER(1)
                                 `.mlrrc`.
        --prepipe-zcat           Same as `--prepipe zcat`, except this is allowed in
                                 `.mlrrc`.
+       --prepipe-zstdcat        Same as `--prepipe zstdcat`, except this is allowed
+                                in `.mlrrc`.
        --prepipex {decompression command}
                                 Like `--prepipe` with one exception: doesn't insert
                                 `<` between command and filename at runtime. Useful
@@ -289,6 +291,8 @@ MILLER(1)                                                            MILLER(1)
                                 in `.mlrrc` to avoid unexpected code execution.
        --zin                    Uncompress zlib within the Miller process. Done by
                                 default if file ends in `.z`.
+       --zstdin                 Uncompress zstd within the Miller process. Done by
+                                default if file ends in `.zstd`.
 
 1mCSV/TSV-ONLY FLAGS0m
        These are flags which are applicable to CSV format.
diff --git a/man/mlr.1 b/man/mlr.1
index 1d25bb6c02..583b5dc11f 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -304,7 +304,7 @@ Notes:
 Miller offers a few different ways to handle reading data files
 	which have been compressed.
 
-* Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin`
+* Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin`
 * Decompression done outside the Miller process: `--prepipe` `--prepipex`
 
 Using `--prepipe` and `--prepipex` you can specify an action to be
@@ -327,7 +327,7 @@ compression (or other) utilities, simply pipe the output:
 
 Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any
 decisions that might have been made based on the file suffix. Likewise,
-`--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified.
+`--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified.
 
 --bz2in                  Uncompress bzip2 within the Miller process. Done by
                          default if file ends in `.bz2`.
@@ -344,6 +344,8 @@ decisions that might have been made based on the file suffix. Likewise,
                          `.mlrrc`.
 --prepipe-zcat           Same as `--prepipe zcat`, except this is allowed in
                          `.mlrrc`.
+--prepipe-zstdcat        Same as `--prepipe zstdcat`, except this is allowed
+                         in `.mlrrc`.
 --prepipex {decompression command}
                          Like `--prepipe` with one exception: doesn't insert
                          `<` between command and filename at runtime. Useful
@@ -352,6 +354,8 @@ decisions that might have been made based on the file suffix. Likewise,
                          in `.mlrrc` to avoid unexpected code execution.
 --zin                    Uncompress zlib within the Miller process. Done by
                          default if file ends in `.z`.
+--zstdin                 Uncompress zstd within the Miller process. Done by
+                         default if file ends in `.zstd`.
 .fi
 .if n \{\
 .RE

From 88a41570a83ebdd54699d042830ee0525e4ec592 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 19 Aug 2023 15:12:04 -0400
Subject: [PATCH 3/4] unit-test cases

---
 test/cases/io-compressed-input/0014/cmd    | 1 +
 test/cases/io-compressed-input/0014/experr | 0
 test/cases/io-compressed-input/0014/expout | 5 +++++
 test/cases/io-compressed-input/0015/cmd    | 1 +
 test/cases/io-compressed-input/0015/experr | 0
 test/cases/io-compressed-input/0015/expout | 5 +++++
 test/cases/io-compressed-input/0016/cmd    | 1 +
 test/cases/io-compressed-input/0016/experr | 0
 test/cases/io-compressed-input/0016/expout | 5 +++++
 9 files changed, 18 insertions(+)
 create mode 100644 test/cases/io-compressed-input/0014/cmd
 create mode 100644 test/cases/io-compressed-input/0014/experr
 create mode 100644 test/cases/io-compressed-input/0014/expout
 create mode 100644 test/cases/io-compressed-input/0015/cmd
 create mode 100644 test/cases/io-compressed-input/0015/experr
 create mode 100644 test/cases/io-compressed-input/0015/expout
 create mode 100644 test/cases/io-compressed-input/0016/cmd
 create mode 100644 test/cases/io-compressed-input/0016/experr
 create mode 100644 test/cases/io-compressed-input/0016/expout

diff --git a/test/cases/io-compressed-input/0014/cmd b/test/cases/io-compressed-input/0014/cmd
new file mode 100644
index 0000000000..f6141361ef
--- /dev/null
+++ b/test/cases/io-compressed-input/0014/cmd
@@ -0,0 +1 @@
+mlr count -g a test/input/medium.zst
diff --git a/test/cases/io-compressed-input/0014/experr b/test/cases/io-compressed-input/0014/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/io-compressed-input/0014/expout b/test/cases/io-compressed-input/0014/expout
new file mode 100644
index 0000000000..7dcf142127
--- /dev/null
+++ b/test/cases/io-compressed-input/0014/expout
@@ -0,0 +1,5 @@
+a=pan,count=8
+a=eks,count=10
+a=wye,count=7
+a=zee,count=8
+a=hat,count=7
diff --git a/test/cases/io-compressed-input/0015/cmd b/test/cases/io-compressed-input/0015/cmd
new file mode 100644
index 0000000000..8a6e18c1e2
--- /dev/null
+++ b/test/cases/io-compressed-input/0015/cmd
@@ -0,0 +1 @@
+mlr --zstdin count -g a < test/input/medium.zst
diff --git a/test/cases/io-compressed-input/0015/experr b/test/cases/io-compressed-input/0015/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/io-compressed-input/0015/expout b/test/cases/io-compressed-input/0015/expout
new file mode 100644
index 0000000000..7dcf142127
--- /dev/null
+++ b/test/cases/io-compressed-input/0015/expout
@@ -0,0 +1,5 @@
+a=pan,count=8
+a=eks,count=10
+a=wye,count=7
+a=zee,count=8
+a=hat,count=7
diff --git a/test/cases/io-compressed-input/0016/cmd b/test/cases/io-compressed-input/0016/cmd
new file mode 100644
index 0000000000..7d38bc22ac
--- /dev/null
+++ b/test/cases/io-compressed-input/0016/cmd
@@ -0,0 +1 @@
+mlr --zstdin count -g a test/input/medium.zst
diff --git a/test/cases/io-compressed-input/0016/experr b/test/cases/io-compressed-input/0016/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/io-compressed-input/0016/expout b/test/cases/io-compressed-input/0016/expout
new file mode 100644
index 0000000000..7dcf142127
--- /dev/null
+++ b/test/cases/io-compressed-input/0016/expout
@@ -0,0 +1,5 @@
+a=pan,count=8
+a=eks,count=10
+a=wye,count=7
+a=zee,count=8
+a=hat,count=7

From 71779dc7c9b2c83efc71b2874e5256f4878224d5 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 19 Aug 2023 15:14:52 -0400
Subject: [PATCH 4/4] doc-gen artifacts

---
 docs/src/customization.md      |   2 +-
 docs/src/customization.md.in   |   2 +-
 docs/src/glossary.md           |   5 +++++
 docs/src/glossary.md.in        |   5 +++++
 docs/src/new-in-miller-6.md    |   2 +-
 docs/src/new-in-miller-6.md.in |   2 +-
 test/input/medium.zst          | Bin 0 -> 957 bytes
 7 files changed, 14 insertions(+), 4 deletions(-)
 create mode 100644 test/input/medium.zst

diff --git a/docs/src/customization.md b/docs/src/customization.md
index 5a787ad4fc..cbc69928f3 100644
--- a/docs/src/customization.md
+++ b/docs/src/customization.md
@@ -50,7 +50,7 @@ and the `--csv` part will automatically be understood. If you do want to process
 
 * You can include any command-line flags, except the "terminal" ones such as `--help`.
 
-* The `--prepipe`, `--load`, and `--mload` flags aren't allowed in `.mlrrc` as they control code execution, and could result in your scripts running things you don't expect if you receive data from someone with a `./.mlrrc` in it. You can use `--prepipe-bz2`, `--prepipe-gunzip`, and `--prepipe-zcat` in `.mlrrc`, though.
+* The `--prepipe`, `--load`, and `--mload` flags aren't allowed in `.mlrrc` as they control code execution, and could result in your scripts running things you don't expect if you receive data from someone with a `./.mlrrc` in it. You can use `--prepipe-bz2`, `--prepipe-gunzip`, `--prepipe-zcat`, and `--prepipe-zstdcat` in `.mlrrc`, though.
 
 * The formatting rule is you need to put one flag beginning with `--` per line: for example, `--csv` on one line and `--nr-progress-mod 1000` on a separate line.
 
diff --git a/docs/src/customization.md.in b/docs/src/customization.md.in
index 9a1d2894b7..00367b2f76 100644
--- a/docs/src/customization.md.in
+++ b/docs/src/customization.md.in
@@ -34,7 +34,7 @@ and the `--csv` part will automatically be understood. If you do want to process
 
 * You can include any command-line flags, except the "terminal" ones such as `--help`.
 
-* The `--prepipe`, `--load`, and `--mload` flags aren't allowed in `.mlrrc` as they control code execution, and could result in your scripts running things you don't expect if you receive data from someone with a `./.mlrrc` in it. You can use `--prepipe-bz2`, `--prepipe-gunzip`, and `--prepipe-zcat` in `.mlrrc`, though.
+* The `--prepipe`, `--load`, and `--mload` flags aren't allowed in `.mlrrc` as they control code execution, and could result in your scripts running things you don't expect if you receive data from someone with a `./.mlrrc` in it. You can use `--prepipe-bz2`, `--prepipe-gunzip`, `--prepipe-zcat`, and `--prepipe-zstdcat` in `.mlrrc`, though.
 
 * The formatting rule is you need to put one flag beginning with `--` per line: for example, `--csv` on one line and `--nr-progress-mod 1000` on a separate line.
 
diff --git a/docs/src/glossary.md b/docs/src/glossary.md
index bb731297b7..774975c41e 100644
--- a/docs/src/glossary.md
+++ b/docs/src/glossary.md
@@ -905,3 +905,8 @@ See also the [arrays page](reference-main-arrays.md), as well as the page on
 
 A [data-compression format supported by Miller](reference-main-compressed-data.md).
 Files compressed using ZLIB compression normally end in `.z`.
+
+## ZSTD / .zst
+
+A [data-compression format supported by Miller](reference-main-compressed-data.md).
+Files compressed using ZSTD compression normally end in`.zst`.
diff --git a/docs/src/glossary.md.in b/docs/src/glossary.md.in
index 7e03b7d11b..b8eb8f4177 100644
--- a/docs/src/glossary.md.in
+++ b/docs/src/glossary.md.in
@@ -889,3 +889,8 @@ See also the [arrays page](reference-main-arrays.md), as well as the page on
 
 A [data-compression format supported by Miller](reference-main-compressed-data.md).
 Files compressed using ZLIB compression normally end in `.z`.
+
+## ZSTD / .zst
+
+A [data-compression format supported by Miller](reference-main-compressed-data.md).
+Files compressed using ZSTD compression normally end in`.zst`.
diff --git a/docs/src/new-in-miller-6.md b/docs/src/new-in-miller-6.md
index 3170819c9d..32633b6f8e 100644
--- a/docs/src/new-in-miller-6.md
+++ b/docs/src/new-in-miller-6.md
@@ -143,7 +143,7 @@ the `TZ` environment variable. Please see [DSL datetime/timezone functions](refe
 
 ### In-process support for compressed input
 
-In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z` and `.bz2` files.  Please see the page on [Compressed data](reference-main-compressed-data.md) for more information.
+In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z`, `.bz2`, and `.zst` files.  Please see the page on [Compressed data](reference-main-compressed-data.md) for more information.
 
 ### Support for reading web URLs
 
diff --git a/docs/src/new-in-miller-6.md.in b/docs/src/new-in-miller-6.md.in
index 43ea44d905..c450a96224 100644
--- a/docs/src/new-in-miller-6.md.in
+++ b/docs/src/new-in-miller-6.md.in
@@ -125,7 +125,7 @@ the `TZ` environment variable. Please see [DSL datetime/timezone functions](refe
 
 ### In-process support for compressed input
 
-In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z` and `.bz2` files.  Please see the page on [Compressed data](reference-main-compressed-data.md) for more information.
+In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z`, `.bz2`, and `.zst` files.  Please see the page on [Compressed data](reference-main-compressed-data.md) for more information.
 
 ### Support for reading web URLs
 
diff --git a/test/input/medium.zst b/test/input/medium.zst
new file mode 100644
index 0000000000000000000000000000000000000000..f7b5c9a0d729b4df5c16d7bd6767108b4eab3005
GIT binary patch
literal 957
zcmV;u148^LwJ-eyScM$`N>daH7@#T30iBn9E!->i4=+nfDN{;GwEE#G7_I=70FnT2
zg;<7WuTcjK0tyC#L{!PCqsbI4v5N3db#lpQtCfj{=9@+y5~kT?3I_&#v=Q9TB&U`
zDl(5Y#K9I00vH?=ASf6JQ{%Q2+ecn1RWz2W5&F=;fJ4DRXzzL`#311Sh=KwGlY&}%
zQ6nyZC?qgg;UItk0Yia;Awk;Y5Rz45UGv={l;P`kRq}bs5*p5fWPUp_8yfq;;YQ-+
z8bxu10|NsC4x{N?GXF!?NP?;->0DmQ%ZCL$G!UJ)loqv5K?5Cu=+Wj2XhuUOqRlhigtwW5%JQJfT~@(o!Px42*BY0x~U
zm&dd-G2B-D2#cwTi}(e_ip`N32R;VB|mlMy9>n9OyXk;c}|~bc$k_|l?^ynLovO0
f=>7aYPLKWGbLsdn?+i^K;A1?UQbidh3(Fx>)!MFg

literal 0
HcmV?d00001