From a685120988dd0abcc4879c1a0be4f2c29bb434a0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 17 Dec 2023 12:32:29 -0500 Subject: [PATCH] Rename internal regex functions --- docs/src/manpage.md | 37 +- docs/src/manpage.txt | 37 +- docs/src/reference-dsl-builtin-functions.md | 18 +- man/manpage.txt | 37 +- man/mlr.1 | 51 ++- pkg/bifs/regex.go | 6 +- pkg/dsl/cst/leaves.go | 2 +- pkg/input/record_reader.go | 4 +- pkg/input/record_reader_xtab.go | 4 +- pkg/lib/regex.go | 395 ++++++++++++-------- pkg/lib/regex_test.go | 8 +- pkg/runtime/state.go | 6 +- pkg/transformers/merge_fields.go | 2 +- pkg/transformers/rename.go | 4 +- 14 files changed, 374 insertions(+), 237 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index de7ce4b6fc..19cb2de072 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -220,18 +220,19 @@ MILLER(1) MILLER(1) is_numeric is_present is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect - mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub - nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile - percentiles pow qnorm reduce regextract regextract_or_else rightpad round - roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime - select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita - splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime - strfntime_local strftime strftime_local string strip strlen strpntime - strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 - sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate - typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement - urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . - .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul + mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os + percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad + round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate + sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort + sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub + stddev strfntime strfntime_local strftime strftime_local string strip strlen + strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 + sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper + truncate typeof unflatten unformat unformatx upntime uptime urand urand32 + urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % & + && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | + || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2650,6 +2651,16 @@ MILLER(1) MILLER(1) 1mmapsum0m (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. + 1mmatch0m + (class=string #args=2) TODO: WRITE ME + Example: + TODO: WRITE ME + + 1mmatchx0m + (class=string #args=2) TODO: WRITE ME + Example: + TODO: WRITE ME + 1mmax0m (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. @@ -3649,5 +3660,5 @@ MILLER(1) MILLER(1) - 2023-12-13 MILLER(1) + 2023-12-16 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index e7e3d35821..7f3a122af7 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -199,18 +199,19 @@ MILLER(1) MILLER(1) is_numeric is_present is_string joink joinkv joinv json_parse json_stringify kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect - mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub - nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile - percentiles pow qnorm reduce regextract regextract_or_else rightpad round - roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime - select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita - splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime - strfntime_local strftime strftime_local string strip strlen strpntime - strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2 - sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate - typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement - urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - . - .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul + mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os + percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad + round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate + sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort + sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub + stddev strfntime strfntime_local strftime strftime_local string strip strlen + strpntime strpntime_local strptime strptime_local sub substr substr0 substr1 + sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper + truncate typeof unflatten unformat unformatx upntime uptime urand urand32 + urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % & + && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | + || ~ 1mCOMMENTS-IN-DATA FLAGS0m Miller lets you put comments in your data, such as @@ -2629,6 +2630,16 @@ MILLER(1) MILLER(1) 1mmapsum0m (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'. + 1mmatch0m + (class=string #args=2) TODO: WRITE ME + Example: + TODO: WRITE ME + + 1mmatchx0m + (class=string #args=2) TODO: WRITE ME + Example: + TODO: WRITE ME + 1mmax0m (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values. @@ -3628,4 +3639,4 @@ MILLER(1) MILLER(1) - 2023-12-13 MILLER(1) + 2023-12-16 MILLER(1) diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 8c3b496407..d391e83419 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -75,7 +75,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary * [**Higher-order-functions functions**](#higher-order-functions-functions): [any](#any), [apply](#apply), [every](#every), [fold](#fold), [reduce](#reduce), [select](#select), [sort](#sort). * [**Math functions**](#math-functions): [abs](#abs), [acos](#acos), [acosh](#acosh), [asin](#asin), [asinh](#asinh), [atan](#atan), [atan2](#atan2), [atanh](#atanh), [cbrt](#cbrt), [ceil](#ceil), [cos](#cos), [cosh](#cosh), [erf](#erf), [erfc](#erfc), [exp](#exp), [expm1](#expm1), [floor](#floor), [invqnorm](#invqnorm), [log](#log), [log10](#log10), [log1p](#log1p), [logifit](#logifit), [max](#max), [min](#min), [qnorm](#qnorm), [round](#round), [roundm](#roundm), [sgn](#sgn), [sin](#sin), [sinh](#sinh), [sqrt](#sqrt), [tan](#tan), [tanh](#tanh), [urand](#urand), [urand32](#urand32), [urandelement](#urandelement), [urandint](#urandint), [urandrange](#urandrange). * [**Stats functions**](#stats-functions): [antimode](#antimode), [count](#count), [distinct_count](#distinct_count), [kurtosis](#kurtosis), [maxlen](#maxlen), [mean](#mean), [meaneb](#meaneb), [median](#median), [minlen](#minlen), [mode](#mode), [null_count](#null_count), [percentile](#percentile), [percentiles](#percentiles), [skewness](#skewness), [sort_collection](#sort_collection), [stddev](#stddev), [sum](#sum), [sum2](#sum2), [sum3](#sum3), [sum4](#sum4), [variance](#variance). -* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). +* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [contains](#contains), [format](#format), [gssub](#gssub), [gsub](#gsub), [index](#index), [latin1_to_utf8](#latin1_to_utf8), [leftpad](#leftpad), [lstrip](#lstrip), [match](#match), [matchx](#matchx), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rightpad](#rightpad), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [utf8_to_latin1](#utf8_to_latin1), [\.](#dot). * [**System functions**](#system-functions): [exec](#exec), [hostname](#hostname), [os](#os), [system](#system), [version](#version). * [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2nsec](#gmt2nsec), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2nsec](#localtime2nsec), [localtime2sec](#localtime2sec), [nsec2gmt](#nsec2gmt), [nsec2gmtdate](#nsec2gmtdate), [nsec2localdate](#nsec2localdate), [nsec2localtime](#nsec2localtime), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strfntime](#strfntime), [strfntime_local](#strfntime_local), [strftime](#strftime), [strftime_local](#strftime_local), [strpntime](#strpntime), [strpntime_local](#strpntime_local), [strptime](#strptime), [strptime_local](#strptime_local), [sysntime](#sysntime), [systime](#systime), [systimeint](#systimeint), [upntime](#upntime), [uptime](#uptime). * [**Typing functions**](#typing-functions): [asserting_absent](#asserting_absent), [asserting_array](#asserting_array), [asserting_bool](#asserting_bool), [asserting_boolean](#asserting_boolean), [asserting_empty](#asserting_empty), [asserting_empty_map](#asserting_empty_map), [asserting_error](#asserting_error), [asserting_float](#asserting_float), [asserting_int](#asserting_int), [asserting_map](#asserting_map), [asserting_nonempty_map](#asserting_nonempty_map), [asserting_not_array](#asserting_not_array), [asserting_not_empty](#asserting_not_empty), [asserting_not_map](#asserting_not_map), [asserting_not_null](#asserting_not_null), [asserting_null](#asserting_null), [asserting_numeric](#asserting_numeric), [asserting_present](#asserting_present), [asserting_string](#asserting_string), [is_absent](#is_absent), [is_array](#is_array), [is_bool](#is_bool), [is_boolean](#is_boolean), [is_empty](#is_empty), [is_empty_map](#is_empty_map), [is_error](#is_error), [is_float](#is_float), [is_int](#is_int), [is_map](#is_map), [is_nan](#is_nan), [is_nonempty_map](#is_nonempty_map), [is_not_array](#is_not_array), [is_not_empty](#is_not_empty), [is_not_map](#is_not_map), [is_not_null](#is_not_null), [is_null](#is_null), [is_numeric](#is_numeric), [is_present](#is_present), [is_string](#is_string), [typeof](#typeof). @@ -1296,6 +1296,22 @@ lstrip (class=string #args=1) Strip leading whitespace from string. +### match +
+match  (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+
+ + +### matchx +
+matchx  (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+
+ + ### regextract
 regextract  (class=string #args=2) Extracts a substring (the first, if there are multiple matches), matching a regular expression, from the input. Does not use capture groups; see also the =~ operator which does.
diff --git a/man/manpage.txt b/man/manpage.txt
index e7e3d35821..7f3a122af7 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -199,18 +199,19 @@ MILLER(1)                                                            MILLER(1)
        is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
        kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
        localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-       mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-       nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-       percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-       roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-       select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-       strfntime_local strftime strftime_local string strip strlen strpntime
-       strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-       sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-       typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-       urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-       .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+       mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+       mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+       percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+       round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+       sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+       sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+       stddev strfntime strfntime_local strftime strftime_local string strip strlen
+       strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+       sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+       truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+       urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+       && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+       || ~
 
 1mCOMMENTS-IN-DATA FLAGS0m
        Miller lets you put comments in your data, such as
@@ -2629,6 +2630,16 @@ MILLER(1)                                                            MILLER(1)
    1mmapsum0m
         (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.
 
+   1mmatch0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
+   1mmatchx0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
    1mmax0m
         (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
@@ -3628,4 +3639,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-13                         MILLER(1)
+                                  2023-12-16                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 4d5ee4f5c7..4f0644ed76 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-12-13
+.\"      Date: 2023-12-16
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-12-13" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-12-16" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -246,18 +246,19 @@ is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null
 is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
 kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
 localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-strfntime_local strftime strftime_local string strip strlen strpntime
-strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-\&.* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+stddev strfntime strfntime_local strftime strftime_local string strip strlen
+strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+&& * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+|| ~
 .fi
 .if n \{\
 .RE
@@ -3938,6 +3939,28 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 .fi
 .if n \{\
 .RE
+.SS "match"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+.fi
+.if n \{\
+.RE
+.SS "matchx"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+.fi
+.if n \{\
+.RE
 .SS "max"
 .if n \{\
 .RS 0
diff --git a/pkg/bifs/regex.go b/pkg/bifs/regex.go
index 52cab9ac5e..74c0840f6a 100644
--- a/pkg/bifs/regex.go
+++ b/pkg/bifs/regex.go
@@ -81,7 +81,7 @@ func BIF_sub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
 	sregex := input2.AcquireStringValue()
 	replacement := input3.AcquireStringValue()
 
-	stringOutput := lib.RegexSub(input, sregex, replacement)
+	stringOutput := lib.RegexStringSub(input, sregex, replacement)
 	return mlrval.FromString(stringOutput)
 }
 
@@ -111,7 +111,7 @@ func BIF_gsub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
 	sregex := input2.AcquireStringValue()
 	replacement := input3.AcquireStringValue()
 
-	stringOutput := lib.RegexGsub(input, sregex, replacement)
+	stringOutput := lib.RegexStringGsub(input, sregex, replacement)
 	return mlrval.FromString(stringOutput)
 }
 
@@ -129,7 +129,7 @@ func BIF_string_matches_regexp(input1, input2 *mlrval.Mlrval) (retval *mlrval.Ml
 		return mlrval.FromNotStringError("=~", input2), nil
 	}
 
-	boolOutput, captures := lib.RegexMatches(input1string, input2.AcquireStringValue())
+	boolOutput, captures := lib.RegexStringMatchWithCaptures(input1string, input2.AcquireStringValue())
 	return mlrval.FromBool(boolOutput), captures
 }
 
diff --git a/pkg/dsl/cst/leaves.go b/pkg/dsl/cst/leaves.go
index 08b3200a98..c0b4d88757 100644
--- a/pkg/dsl/cst/leaves.go
+++ b/pkg/dsl/cst/leaves.go
@@ -266,7 +266,7 @@ func (root *RootNode) BuildStringLiteralNode(literal string) IEvaluable {
 	// RegexLiteralNode.  See also https://github.com/johnkerl/miller/issues/297.
 	literal = lib.UnbackslashStringLiteral(literal)
 
-	hasCaptures, replacementCaptureMatrix := lib.RegexReplacementHasCaptures(literal)
+	hasCaptures, replacementCaptureMatrix := lib.ReplacementHasCaptures(literal)
 	if !hasCaptures {
 		return &StringLiteralNode{
 			literal: mlrval.FromString(literal),
diff --git a/pkg/input/record_reader.go b/pkg/input/record_reader.go
index 2802019363..096060e629 100644
--- a/pkg/input/record_reader.go
+++ b/pkg/input/record_reader.go
@@ -158,7 +158,7 @@ type tIPSRegexSplitter struct {
 }
 
 func (s *tIPSRegexSplitter) Split(input string) []string {
-	return lib.RegexSplitString(s.ipsRegex, input, 2)
+	return lib.RegexCompiledSplitString(s.ipsRegex, input, 2)
 }
 
 // IFieldSplitter splits a string into pieces, e.g. for IFS.
@@ -193,5 +193,5 @@ type tIFSRegexSplitter struct {
 }
 
 func (s *tIFSRegexSplitter) Split(input string) []string {
-	return lib.RegexSplitString(s.ifsRegex, input, -1)
+	return lib.RegexCompiledSplitString(s.ifsRegex, input, -1)
 }
diff --git a/pkg/input/record_reader_xtab.go b/pkg/input/record_reader_xtab.go
index 0cfc74b25d..74d8dac417 100644
--- a/pkg/input/record_reader_xtab.go
+++ b/pkg/input/record_reader_xtab.go
@@ -304,7 +304,7 @@ type tXTABIPSSplitter struct {
 // which we need to produce just a pair of items -- a key and a value -- delimited by one or more
 // IPS. For exaemple, with IPS being a space, in 'abc     123' we need to get key 'abc' and value
 // '123'; for 'abc    123 456' we need key 'abc' and value '123 456'.  It's super-elegant to simply
-// regex-split the line like 'kv = lib.RegexSplitString(reader.readerOptions.IPSRegex, line, 2)' --
+// regex-split the line like 'kv = lib.RegexCompiledSplitString(reader.readerOptions.IPSRegex, line, 2)' --
 // however, that's 3x slower than the current implementation. It turns out regexes are great
 // but we should use them only when we must, since they are expensive.
 func (s *tXTABIPSSplitter) Split(input string) (key, value string, err error) {
@@ -358,7 +358,7 @@ type tXTABIPSRegexSplitter struct {
 }
 
 func (s *tXTABIPSRegexSplitter) Split(input string) (key, value string, err error) {
-	kv := lib.RegexSplitString(s.ipsRegex, input, 2)
+	kv := lib.RegexCompiledSplitString(s.ipsRegex, input, 2)
 	if len(kv) == 0 {
 		return "", "", fmt.Errorf("internal coding error in XTAB reader")
 	} else if len(kv) == 1 {
diff --git a/pkg/lib/regex.go b/pkg/lib/regex.go
index 3bab040360..cabbc1510f 100644
--- a/pkg/lib/regex.go
+++ b/pkg/lib/regex.go
@@ -1,5 +1,5 @@
 // ================================================================
-// Support for regexes in Miller.
+// Support for regular expressions in Miller.
 //
 // * By and large we use the Go library.
 //
@@ -13,17 +13,24 @@
 //       $y = "\2:\1";
 //     }
 //   where the '=~' sets the captures and the "\2:\1" uses them.  (Note that
-//   https://github.com/johnkerl/miller/issues/388 has a better suggestion
-//   which would make the captures explicit as variables, rather than implicit
-//   within CST state -- regardless, the current syntax will still be supported
-//   for backward compatibility and so is here to stay.) Here we make use of Go
-//   regexp-library functions to write to, and then later interpolate from, a
-//   captures array which is stored within CST state. (See the `runtime.State`
-//   object.)
+//   https://github.com/johnkerl/miller/issues/388 has a better suggestion which would make the
+//   captures explicit as variables, rather than implicit within CST state: this is implemented by
+//   the `match` and `matchx` DSL functions.  Regardless, the `=~` syntax will still be supported
+//   for backward compatibility and so is here to stay.) Here we make use of Go regexp-library
+//   functions to write to, and then later interpolate from, a captures array which is stored within
+//   CST state. (See the `runtime.State` object.)
 //
 // * "\0" is for a full match; "\1" .. "\9" are for submatch cqptures. E.g.
 //   if $x is "foobarbaz" and the regex is "foo(.)(..)baz", then "\0" is
 //   "foobarbaz", "\1" is "b", "\2" is "ar", and "\3".."\9" are "".
+//
+// * Naming:
+//
+//   o "regexp" and "Regexp" are used for the Go library and its data structure, respectively;
+//
+//   o "regex" is used for regular-expression strings following Miller's idiosyncratic syntax and
+//     semantics as described above.
+//
 // ================================================================
 
 package lib
@@ -34,6 +41,7 @@ import (
 	"os"
 	"regexp"
 	"strings"
+	"sync"
 )
 
 // captureDetector is used to see if a string literal interpolates previous
@@ -44,20 +52,54 @@ var captureDetector = regexp.MustCompile(`\\[0-9]`)
 // "\2:\1" so they don't need to be recomputed on every record.
 var captureSplitter = regexp.MustCompile(`(\\[0-9])`)
 
-// CompileMillerRegex wraps Go regex-compile with some Miller-specific syntax
-// which predate the port of Miller from C to Go.  Miller regexes use a final
-// 'i' to indicate case-insensitivity; Go regexes use an initial "(?i)".
+// See regexpCompileCached
+var regexpCache map[string]*regexp.Regexp
+
+const cacheMaxSize = 1000
+
+var cacheMutex sync.Mutex
+
+// regexpCompileCached keeps a cache of compiled regexes, so that the caller has the flexibility to
+// only pass in strings while getting the benefits of compilation avoidance.
+//
+// Regarding cache size: in nominal use, regexp strings are within Miller DSL code statements, and
+// there will be a handful. These will all get re-used after their first application, and the cache
+// will remain bounded by the size of the user's DSL code. However, it is possible to have regex
+// strings contained within Miller record-field data.
+//
+// We could solve this by using an LRU cache. However, for simplicity, we limit the number of
+// cached compiles, and for any extras that appear during record processing, we simply recompile
+// each time.
+func regexpCompileCached(s string) (*regexp.Regexp, error) {
+	if len(regexpCache) > cacheMaxSize {
+		return regexp.Compile(s)
+	}
+	r, err := regexp.Compile(s)
+	if err == nil {
+		cacheMutex.Lock()
+		if regexpCache == nil {
+			regexpCache = make(map[string]*regexp.Regexp)
+		}
+		regexpCache[s] = r
+		cacheMutex.Unlock()
+	}
+	return r, err
+}
+
+// CompileMillerRegex wraps Go regex-compile with some Miller-specific syntax which predates the
+// port of Miller from C to Go.  Miller regexes use a final 'i' to indicate case-insensitivity; Go
+// regexes use an initial "(?i)".
 //
-// (See also mlr.bnf where we specify which things can be backslash-escaped
-// without a syntax error at parse time.)
+// (See also mlr.bnf where we specify which things can be backslash-escaped without a syntax error
+// at parse time.)
 //
-// * If the regex_string is of the form a.*b, compiles it case-sensisitively.
-// * If the regex_string is of the form "a.*b", compiles a.*b case-sensisitively.
+// * If the regex_string is of the form a.*b, compiles it case-sensitively.
+// * If the regex_string is of the form "a.*b", compiles a.*b case-sensitively.
 // * If the regex_string is of the form "a.*b"i, compiles a.*b case-insensitively.
 func CompileMillerRegex(regexString string) (*regexp.Regexp, error) {
 	n := len(regexString)
 	if n < 2 {
-		return regexp.Compile(regexString)
+		return regexpCompileCached(regexString)
 	}
 
 	// TODO: rethink this. This will strip out things people have entered, e.g. "\"...\"".
@@ -68,20 +110,20 @@ func CompileMillerRegex(regexString string) (*regexp.Regexp, error) {
 	// literals) and from verbs (like cut -r or having-fields).
 
 	if strings.HasPrefix(regexString, "\"") && strings.HasSuffix(regexString, "\"") {
-		return regexp.Compile(regexString[1 : n-1])
+		return regexpCompileCached(regexString[1 : n-1])
 	}
 	if strings.HasPrefix(regexString, "/") && strings.HasSuffix(regexString, "/") {
-		return regexp.Compile(regexString[1 : n-1])
+		return regexpCompileCached(regexString[1 : n-1])
 	}
 
 	if strings.HasPrefix(regexString, "\"") && strings.HasSuffix(regexString, "\"i") {
-		return regexp.Compile("(?i)" + regexString[1:n-2])
+		return regexpCompileCached("(?i)" + regexString[1:n-2])
 	}
 	if strings.HasPrefix(regexString, "/") && strings.HasSuffix(regexString, "/i") {
-		return regexp.Compile("(?i)" + regexString[1:n-2])
+		return regexpCompileCached("(?i)" + regexString[1:n-2])
 	}
 
-	return regexp.Compile(regexString)
+	return regexpCompileCached(regexString)
 }
 
 // CompileMillerRegexOrDie wraps CompileMillerRegex. Usually in Go we want to
@@ -110,7 +152,7 @@ func CompileMillerRegexesOrDie(regexStrings []string) []*regexp.Regexp {
 // In Go as in all languages I'm aware of with a string-split, "a,b,c" splits
 // on "," to ["a", "b", "c" and "a" splits to ["a"], both of which are fine --
 // but "" splits to [""] when I wish it were []. This function does the latter.
-func RegexSplitString(regex *regexp.Regexp, input string, n int) []string {
+func RegexCompiledSplitString(regex *regexp.Regexp, input string, n int) []string {
 	if input == "" {
 		return make([]string, 0)
 	} else {
@@ -118,31 +160,140 @@ func RegexSplitString(regex *regexp.Regexp, input string, n int) []string {
 	}
 }
 
-// MakeEmptyRegexCaptures is for initial CST state at the start of executing
-// the DSL expression for the current record.  Even if '$x =~ "(..)_(...)" set
-// "\1" and "\2" on the previous record, at start of processing for the current
-// record we need to start with a clean slate.
-func MakeEmptyRegexCaptures() []string {
-	return nil
+// RegexStringSub implements the sub DSL function.
+func RegexStringSub(
+	input string,
+	sregex string,
+	replacement string,
+) string {
+	regex := CompileMillerRegexOrDie(sregex)
+	_, replacementCaptureMatrix := ReplacementHasCaptures(replacement)
+	return RegexCompiledSub(input, regex, replacement, replacementCaptureMatrix)
 }
 
-// RegexReplacementHasCaptures is used by the CST builder to see if
-// string-literal is like "foo bar" or "foo \1 bar" -- in the latter case it
-// needs to retain the compiled offsets-matrix information.
-func RegexReplacementHasCaptures(
+// RegexCompiledSub is the same as RegexStringSub but with compiled regex and
+// replacement strings.
+func RegexCompiledSub(
+	input string,
+	regex *regexp.Regexp,
 	replacement string,
-) (
-	hasCaptures bool,
-	matrix [][]int,
-) {
-	if captureDetector.MatchString(replacement) {
-		return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1)
-	} else {
-		return false, nil
+	replacementCaptureMatrix [][]int,
+) string {
+	return regexCompiledSubOrGsub(input, regex, replacement, replacementCaptureMatrix, true)
+}
+
+// RegexStringGsub implements the `gsub` DSL function.
+func RegexStringGsub(
+	input string,
+	sregex string,
+	replacement string,
+) string {
+	regex := CompileMillerRegexOrDie(sregex)
+	_, replacementCaptureMatrix := ReplacementHasCaptures(replacement)
+	return regexCompiledSubOrGsub(input, regex, replacement, replacementCaptureMatrix, false)
+}
+
+// regexCompiledSubOrGsub is the implementation for `sub`/`gsub` with compilex regex
+// and replacement strings.
+func regexCompiledSubOrGsub(
+	input string,
+	regex *regexp.Regexp,
+	replacement string,
+	replacementCaptureMatrix [][]int,
+	breakOnFirst bool,
+) string {
+	matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
+	if matrix == nil || len(matrix) == 0 {
+		return input
 	}
+
+	// Example return value from FindAllSubmatchIndex with input
+	// "...ab_cde...fg_hij..." and regex "(..)_(...)":
+	//
+	// Matrix is [][]int{
+	//   []int{3, 9, 3, 5, 6, 9},
+	//   []int{12, 18, 12, 14, 15, 18},
+	// }
+	//
+	// * 3-9 is for the entire match "ab_cde"
+	// * 3-5 is for the first capture "ab"
+	// * 6-9 is for the second capture "cde"
+	//
+	// * 12-18 is for the entire match "fg_hij"
+	// * 12-14 is for the first capture "fg"
+	// * 15-18 is for the second capture "hij"
+
+	var buffer bytes.Buffer
+	nonMatchStartIndex := 0
+
+	for _, row := range matrix {
+		buffer.WriteString(input[nonMatchStartIndex:row[0]])
+
+		// "\0" .. "\9"
+		captures := make([]string, 10)
+		di := 0
+		n := len(row)
+		for si := 0; si < n && di <= 9; si += 2 {
+			start := row[si]
+			end := row[si+1]
+			if start >= 0 && end >= 0 {
+				captures[di] = input[start:end]
+			}
+			di += 1
+		}
+
+		// If the replacement had no captures, e.g. "xyz", we would insert it
+		//
+		//   "..."     -> "..."
+		//   "ab_cde"  -> "xyz"   --- here
+		//   "..."     -> "..."
+		//   "fg_hij"  -> "xyz"   --- and here
+		//   "..."     -> "..."
+		//
+		// using buffer.WriteString(replacement). However, this function exists
+		// to handle the case when the replacement string has captures like
+		// "\2:\1", so we need to produce
+		//
+		//   "..."     -> "..."
+		//   "ab_cde"  -> "cde:ab"   --- here
+		//   "..."     -> "..."
+		//   "fg_hij"  -> "hij:fg"   --- and here
+		//   "..."     -> "..."
+		updatedReplacement := InterpolateCaptures(
+			replacement,
+			replacementCaptureMatrix,
+			captures,
+		)
+		buffer.WriteString(updatedReplacement)
+
+		nonMatchStartIndex = row[1]
+		if breakOnFirst {
+			break
+		}
+	}
+
+	buffer.WriteString(input[nonMatchStartIndex:])
+	return buffer.String()
 }
 
-// RegexMatches implements the =~ DSL operator. The captures are stored in DSL
+// RegexStringMatchSimple is for simple boolean return without any substring captures.
+func RegexStringMatchSimple(
+	input string,
+	sregex string,
+) bool {
+	regex := CompileMillerRegexOrDie(sregex)
+	return RegexCompiledMatchSimple(input, regex)
+}
+
+// RegexCompiledMatchSimple is for simple boolean return without any substring captures.
+func RegexCompiledMatchSimple(
+	input string,
+	regex *regexp.Regexp,
+) bool {
+	return regex.Match([]byte(input))
+}
+
+// RegexStringMatchWithCaptures implements the =~ DSL operator. The captures are stored in DSL
 // state and may be used by a DSL statement after the =~. For example, in
 //
 //	sub($a, "(..)_(...)", "\1:\2")
@@ -157,9 +308,9 @@ func RegexReplacementHasCaptures(
 //	}
 //
 // and the =~ callsite doesn't know if captures will be used or not. So,
-// RegexMatches always returns the captures array. It is stored within the CST
+// RegexStringMatchWithCaptures always returns the captures array. It is stored within the CST
 // state.
-func RegexMatches(
+func RegexStringMatchWithCaptures(
 	input string,
 	sregex string,
 ) (
@@ -167,14 +318,14 @@ func RegexMatches(
 	capturesOneUp []string,
 ) {
 	regex := CompileMillerRegexOrDie(sregex)
-	return RegexMatchesCompiled(input, regex)
+	return RegexCompiledMatchWithCaptures(input, regex)
 }
 
-// RegexMatchesCompiled is the implementation for the =~ operator.  Without
+// RegexCompiledMatchWithCaptures is the implementation for the =~ operator.  Without
 // Miller-style regex captures this would a simple one-line
 // regex.MatchString(input). However, we return the captures array for the
 // benefit of subsequent references to "\0".."\9".
-func RegexMatchesCompiled(
+func RegexCompiledMatchWithCaptures(
 	input string,
 	regex *regexp.Regexp,
 ) (bool, []string) {
@@ -228,17 +379,47 @@ func RegexMatchesCompiled(
 	return true, captures
 }
 
+// MakeEmptyCaptures is for initial CST state at the start of executing the DSL expression for the
+// current record.  Even if '$x =~ "(..)_(...)" set "\1" and "\2" on the previous record, at start
+// of processing for the current record we need to start with a clean slate. This is in support of
+// CST state, which `=~` semantics requires.
+func MakeEmptyCaptures() []string {
+	return nil
+}
+
+// ReplacementHasCaptures is used by the CST builder to see if string-literal is like "foo bar" or
+// "foo \1 bar" -- in the latter case it needs to retain the compiled offsets-matrix information.
+// This is in support of CST state, which `=~` semantics requires.
+func ReplacementHasCaptures(
+	replacement string,
+) (
+	hasCaptures bool,
+	matrix [][]int,
+) {
+	if captureDetector.MatchString(replacement) {
+		return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1)
+	} else {
+		return false, nil
+	}
+}
+
 // InterpolateCaptures example:
-//   - Input $x is "ab_cde"
+//
+// * Input $x is "ab_cde"
+//
 //   - DSL expression
 //     if ($x =~ "(..)_(...)") {
 //     ... other lines of code ...
 //     $y = "\2:\1";
 //     }
-//   - InterpolateCaptures is used on the evaluation of "\2:\1"
-//   - replacementString is "\2:\1"
+//
+// * InterpolateCaptures is used on the evaluation of "\2:\1"
+//
+// * replacementString is "\2:\1"
+//
 //   - replacementMatrix contains precomputed/cached offsets for the "\2" and
 //     "\1" substrings within "\2:\1"
+//
 //   - captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"),
 //     slot 2 being "cde" (for "\2"), and slots 3-9 being "".
 func InterpolateCaptures(
@@ -268,119 +449,3 @@ func InterpolateCaptures(
 
 	return buffer.String()
 }
-
-// RegexSub implements the sub DSL function.
-func RegexSub(
-	input string,
-	sregex string,
-	replacement string,
-) string {
-	regex := CompileMillerRegexOrDie(sregex)
-	_, replacementCaptureMatrix := RegexReplacementHasCaptures(replacement)
-	return RegexSubCompiled(input, regex, replacement, replacementCaptureMatrix)
-}
-
-// RegexSubCompiled is the same as RegexSub but with compiled regex and
-// replacement strings.
-func RegexSubCompiled(
-	input string,
-	regex *regexp.Regexp,
-	replacement string,
-	replacementCaptureMatrix [][]int,
-) string {
-	return regexSubGsubCompiled(input, regex, replacement, replacementCaptureMatrix, true)
-}
-
-// RegexGsub implements the gsub DSL function.
-func RegexGsub(
-	input string,
-	sregex string,
-	replacement string,
-) string {
-	regex := CompileMillerRegexOrDie(sregex)
-	_, replacementCaptureMatrix := RegexReplacementHasCaptures(replacement)
-	return regexSubGsubCompiled(input, regex, replacement, replacementCaptureMatrix, false)
-}
-
-// regexSubGsubCompiled is the implementation for sub/gsub with compilex regex
-// and replacement strings.
-func regexSubGsubCompiled(
-	input string,
-	regex *regexp.Regexp,
-	replacement string,
-	replacementCaptureMatrix [][]int,
-	breakOnFirst bool,
-) string {
-	matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
-	if matrix == nil || len(matrix) == 0 {
-		return input
-	}
-
-	// Example return value from FindAllSubmatchIndex with input
-	// "...ab_cde...fg_hij..." and regex "(..)_(...)":
-	//
-	// Matrix is [][]int{
-	//   []int{3, 9, 3, 5, 6, 9},
-	//   []int{12, 18, 12, 14, 15, 18},
-	// }
-	//
-	// * 3-9 is for the entire match "ab_cde"
-	// * 3-5 is for the first capture "ab"
-	// * 6-9 is for the second capture "cde"
-	//
-	// * 12-18 is for the entire match "fg_hij"
-	// * 12-14 is for the first capture "fg"
-	// * 15-18 is for the second capture "hij"
-
-	var buffer bytes.Buffer
-	nonMatchStartIndex := 0
-
-	for _, row := range matrix {
-		buffer.WriteString(input[nonMatchStartIndex:row[0]])
-
-		// "\0" .. "\9"
-		captures := make([]string, 10)
-		di := 0
-		n := len(row)
-		for si := 0; si < n && di <= 9; si += 2 {
-			start := row[si]
-			end := row[si+1]
-			if start >= 0 && end >= 0 {
-				captures[di] = input[start:end]
-			}
-			di += 1
-		}
-
-		// If the replacement had no captures, e.g. "xyz", we would insert it
-		//
-		//   "..."     -> "..."
-		//   "ab_cde"  -> "xyz"   --- here
-		//   "..."     -> "..."
-		//   "fg_hij"  -> "xyz"   --- and here
-		//   "..."     -> "..."
-		//
-		// using buffer.WriteString(replacement). However, this function exists
-		// to handle the case when the replacement string has captures like
-		// "\2:\1", so we need to produce
-		//
-		//   "..."     -> "..."
-		//   "ab_cde"  -> "cde:ab"   --- here
-		//   "..."     -> "..."
-		//   "fg_hij"  -> "hij:fg"   --- and here
-		//   "..."     -> "..."
-		updatedReplacement := InterpolateCaptures(
-			replacement,
-			replacementCaptureMatrix,
-			captures,
-		)
-		buffer.WriteString(updatedReplacement)
-
-		nonMatchStartIndex = row[1]
-		if breakOnFirst {
-			break
-		}
-	}
-
-	buffer.WriteString(input[nonMatchStartIndex:])
-	return buffer.String()
-}
diff --git a/pkg/lib/regex_test.go b/pkg/lib/regex_test.go
index 961d73f8d5..d2a8f5f705 100644
--- a/pkg/lib/regex_test.go
+++ b/pkg/lib/regex_test.go
@@ -88,7 +88,7 @@ var dataForMatches = []tDataForMatches{
 
 func TestRegexReplacementHasCaptures(t *testing.T) {
 	for i, entry := range dataForHasCaptures {
-		actualHasCaptures, actualMatrix := RegexReplacementHasCaptures(entry.replacement)
+		actualHasCaptures, actualMatrix := ReplacementHasCaptures(entry.replacement)
 		if actualHasCaptures != entry.expectedHasCaptures {
 			t.Fatalf("case %d replacement \"%s\" expected %v got %v\n",
 				i, entry.replacement, entry.expectedHasCaptures, actualHasCaptures,
@@ -104,7 +104,7 @@ func TestRegexReplacementHasCaptures(t *testing.T) {
 
 func TestRegexSub(t *testing.T) {
 	for i, entry := range dataForSub {
-		actualOutput := RegexSub(entry.input, entry.sregex, entry.replacement)
+		actualOutput := RegexStringSub(entry.input, entry.sregex, entry.replacement)
 		if actualOutput != entry.expectedOutput {
 			t.Fatalf("case %d input \"%s\" sregex \"%s\" replacement \"%s\" expected \"%s\" got \"%s\"\n",
 				i, entry.input, entry.sregex, entry.replacement, entry.expectedOutput, actualOutput,
@@ -115,7 +115,7 @@ func TestRegexSub(t *testing.T) {
 
 func TestRegexGsub(t *testing.T) {
 	for i, entry := range dataForGsub {
-		actualOutput := RegexGsub(entry.input, entry.sregex, entry.replacement)
+		actualOutput := RegexStringGsub(entry.input, entry.sregex, entry.replacement)
 		if actualOutput != entry.expectedOutput {
 			t.Fatalf("case %d input \"%s\" sregex \"%s\" replacement \"%s\" expected \"%s\" got \"%s\"\n",
 				i, entry.input, entry.sregex, entry.replacement, entry.expectedOutput, actualOutput,
@@ -126,7 +126,7 @@ func TestRegexGsub(t *testing.T) {
 
 func TestRegexMatches(t *testing.T) {
 	for i, entry := range dataForMatches {
-		actualOutput, actualCaptures := RegexMatches(entry.input, entry.sregex)
+		actualOutput, actualCaptures := RegexStringMatchWithCaptures(entry.input, entry.sregex)
 		if actualOutput != entry.expectedOutput {
 			t.Fatalf("case %d input \"%s\" sregex \"%s\" expected %v got %v\n",
 				i, entry.input, entry.sregex, entry.expectedOutput, actualOutput,
diff --git a/pkg/runtime/state.go b/pkg/runtime/state.go
index e94fd4ce5f..820f40c3dd 100644
--- a/pkg/runtime/state.go
+++ b/pkg/runtime/state.go
@@ -43,8 +43,8 @@ func NewEmptyState(options *cli.TOptions, strictMode bool) *State {
 
 		// OutputRecordsAndContexts is assigned after construction
 
-		// See lib.MakeEmptyRegexCaptures for context.
-		RegexCaptures: lib.MakeEmptyRegexCaptures(),
+		// See lib.MakeEmptyCaptures for context.
+		RegexCaptures: lib.MakeEmptyCaptures(),
 		Options:       options,
 
 		StrictMode: strictMode,
@@ -57,5 +57,5 @@ func (state *State) Update(
 ) {
 	state.Inrec = inrec
 	state.Context = context
-	state.RegexCaptures = lib.MakeEmptyRegexCaptures()
+	state.RegexCaptures = lib.MakeEmptyCaptures()
 }
diff --git a/pkg/transformers/merge_fields.go b/pkg/transformers/merge_fields.go
index de1a555c3d..7ee2d9fade 100644
--- a/pkg/transformers/merge_fields.go
+++ b/pkg/transformers/merge_fields.go
@@ -479,7 +479,7 @@ func (tr *TransformerMergeFields) transformByCollapsing(
 			matched = valueFieldNameRegex.MatchString(pe.Key)
 			if matched {
 				// TODO: comment re matrix
-				shortName = lib.RegexSubCompiled(valueFieldName, valueFieldNameRegex, "", nil)
+				shortName = lib.RegexCompiledSub(valueFieldName, valueFieldNameRegex, "", nil)
 				break
 			}
 		}
diff --git a/pkg/transformers/rename.go b/pkg/transformers/rename.go
index e5f0658b8a..7880b6ead0 100644
--- a/pkg/transformers/rename.go
+++ b/pkg/transformers/rename.go
@@ -169,7 +169,7 @@ func NewTransformerRename(
 			regexString := pe.Key
 			regex := lib.CompileMillerRegexOrDie(regexString)
 			replacement := pe.Value.(string)
-			_, replacementCaptureMatrix := lib.RegexReplacementHasCaptures(replacement)
+			_, replacementCaptureMatrix := lib.ReplacementHasCaptures(replacement)
 			regexAndReplacement := tRegexAndReplacement{
 				regex:                    regex,
 				replacement:              replacement,
@@ -241,7 +241,7 @@ func (tr *TransformerRename) transformWithRegexes(
 						inrec.Rename(oldName, newName)
 					}
 				} else {
-					newName := lib.RegexSubCompiled(oldName, regex, replacement, replacementCaptureMatrix)
+					newName := lib.RegexCompiledSub(oldName, regex, replacement, replacementCaptureMatrix)
 					if newName != oldName {
 						inrec.Rename(oldName, newName)
 					}