From a685120988dd0abcc4879c1a0be4f2c29bb434a0 Mon Sep 17 00:00:00 2001
From: John Kerl <kerl.john.r@gmail.com>
Date: Sun, 17 Dec 2023 12:32:29 -0500
Subject: [PATCH] Rename internal regex functions

---
 docs/src/manpage.md                         |  37 +-
 docs/src/manpage.txt                        |  37 +-
 docs/src/reference-dsl-builtin-functions.md |  18 +-
 man/manpage.txt                             |  37 +-
 man/mlr.1                                   |  51 ++-
 pkg/bifs/regex.go                           |   6 +-
 pkg/dsl/cst/leaves.go                       |   2 +-
 pkg/input/record_reader.go                  |   4 +-
 pkg/input/record_reader_xtab.go             |   4 +-
 pkg/lib/regex.go                            | 395 ++++++++++++--------
 pkg/lib/regex_test.go                       |   8 +-
 pkg/runtime/state.go                        |   6 +-
 pkg/transformers/merge_fields.go            |   2 +-
 pkg/transformers/rename.go                  |   4 +-
 14 files changed, 374 insertions(+), 237 deletions(-)

diff --git a/docs/src/manpage.md b/docs/src/manpage.md
index de7ce4b6fc..19cb2de072 100644
--- a/docs/src/manpage.md
+++ b/docs/src/manpage.md
@@ -220,18 +220,19 @@ MILLER(1)                                                            MILLER(1)
        is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
        kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
        localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-       mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-       nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-       percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-       roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-       select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-       strfntime_local strftime strftime_local string strip strlen strpntime
-       strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-       sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-       typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-       urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-       .* .+ .- ./ / // &lt; &lt;&lt; &lt;= &lt;=&gt; == =~ &gt; &gt;= &gt;&gt; &gt;&gt;&gt; ?: ?? ??? ^ ^^ | || ~
+       mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+       mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+       percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+       round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+       sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+       sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+       stddev strfntime strfntime_local strftime strftime_local string strip strlen
+       strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+       sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+       truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+       urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+       && * ** + - . .* .+ .- ./ / // &lt; &lt;&lt; &lt;= &lt;=&gt; == =~ &gt; &gt;= &gt;&gt; &gt;&gt;&gt; ?: ?? ??? ^ ^^ |
+       || ~
 
 1mCOMMENTS-IN-DATA FLAGS0m
        Miller lets you put comments in your data, such as
@@ -2650,6 +2651,16 @@ MILLER(1)                                                            MILLER(1)
    1mmapsum0m
         (class=collections #args=variadic) With 0 args, returns empty map. With &gt;= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.
 
+   1mmatch0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
+   1mmatchx0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
    1mmax0m
         (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
@@ -3649,5 +3660,5 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-13                         MILLER(1)
+                                  2023-12-16                         MILLER(1)
 </pre>
diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt
index e7e3d35821..7f3a122af7 100644
--- a/docs/src/manpage.txt
+++ b/docs/src/manpage.txt
@@ -199,18 +199,19 @@ MILLER(1)                                                            MILLER(1)
        is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
        kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
        localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-       mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-       nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-       percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-       roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-       select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-       strfntime_local strftime strftime_local string strip strlen strpntime
-       strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-       sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-       typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-       urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-       .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+       mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+       mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+       percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+       round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+       sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+       sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+       stddev strfntime strfntime_local strftime strftime_local string strip strlen
+       strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+       sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+       truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+       urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+       && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+       || ~
 
 1mCOMMENTS-IN-DATA FLAGS0m
        Miller lets you put comments in your data, such as
@@ -2629,6 +2630,16 @@ MILLER(1)                                                            MILLER(1)
    1mmapsum0m
         (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.
 
+   1mmatch0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
+   1mmatchx0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
    1mmax0m
         (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
@@ -3628,4 +3639,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-13                         MILLER(1)
+                                  2023-12-16                         MILLER(1)
diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md
index 8c3b496407..d391e83419 100644
--- a/docs/src/reference-dsl-builtin-functions.md
+++ b/docs/src/reference-dsl-builtin-functions.md
@@ -75,7 +75,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary
 * [**Higher-order-functions functions**](#higher-order-functions-functions):  [any](#any),  [apply](#apply),  [every](#every),  [fold](#fold),  [reduce](#reduce),  [select](#select),  [sort](#sort).
 * [**Math functions**](#math-functions):  [abs](#abs),  [acos](#acos),  [acosh](#acosh),  [asin](#asin),  [asinh](#asinh),  [atan](#atan),  [atan2](#atan2),  [atanh](#atanh),  [cbrt](#cbrt),  [ceil](#ceil),  [cos](#cos),  [cosh](#cosh),  [erf](#erf),  [erfc](#erfc),  [exp](#exp),  [expm1](#expm1),  [floor](#floor),  [invqnorm](#invqnorm),  [log](#log),  [log10](#log10),  [log1p](#log1p),  [logifit](#logifit),  [max](#max),  [min](#min),  [qnorm](#qnorm),  [round](#round),  [roundm](#roundm),  [sgn](#sgn),  [sin](#sin),  [sinh](#sinh),  [sqrt](#sqrt),  [tan](#tan),  [tanh](#tanh),  [urand](#urand),  [urand32](#urand32),  [urandelement](#urandelement),  [urandint](#urandint),  [urandrange](#urandrange).
 * [**Stats functions**](#stats-functions):  [antimode](#antimode),  [count](#count),  [distinct_count](#distinct_count),  [kurtosis](#kurtosis),  [maxlen](#maxlen),  [mean](#mean),  [meaneb](#meaneb),  [median](#median),  [minlen](#minlen),  [mode](#mode),  [null_count](#null_count),  [percentile](#percentile),  [percentiles](#percentiles),  [skewness](#skewness),  [sort_collection](#sort_collection),  [stddev](#stddev),  [sum](#sum),  [sum2](#sum2),  [sum3](#sum3),  [sum4](#sum4),  [variance](#variance).
-* [**String functions**](#string-functions):  [capitalize](#capitalize),  [clean_whitespace](#clean_whitespace),  [collapse_whitespace](#collapse_whitespace),  [contains](#contains),  [format](#format),  [gssub](#gssub),  [gsub](#gsub),  [index](#index),  [latin1_to_utf8](#latin1_to_utf8),  [leftpad](#leftpad),  [lstrip](#lstrip),  [regextract](#regextract),  [regextract_or_else](#regextract_or_else),  [rightpad](#rightpad),  [rstrip](#rstrip),  [ssub](#ssub),  [strip](#strip),  [strlen](#strlen),  [sub](#sub),  [substr](#substr),  [substr0](#substr0),  [substr1](#substr1),  [tolower](#tolower),  [toupper](#toupper),  [truncate](#truncate),  [unformat](#unformat),  [unformatx](#unformatx),  [utf8_to_latin1](#utf8_to_latin1),  [\.](#dot).
+* [**String functions**](#string-functions):  [capitalize](#capitalize),  [clean_whitespace](#clean_whitespace),  [collapse_whitespace](#collapse_whitespace),  [contains](#contains),  [format](#format),  [gssub](#gssub),  [gsub](#gsub),  [index](#index),  [latin1_to_utf8](#latin1_to_utf8),  [leftpad](#leftpad),  [lstrip](#lstrip),  [match](#match),  [matchx](#matchx),  [regextract](#regextract),  [regextract_or_else](#regextract_or_else),  [rightpad](#rightpad),  [rstrip](#rstrip),  [ssub](#ssub),  [strip](#strip),  [strlen](#strlen),  [sub](#sub),  [substr](#substr),  [substr0](#substr0),  [substr1](#substr1),  [tolower](#tolower),  [toupper](#toupper),  [truncate](#truncate),  [unformat](#unformat),  [unformatx](#unformatx),  [utf8_to_latin1](#utf8_to_latin1),  [\.](#dot).
 * [**System functions**](#system-functions):  [exec](#exec),  [hostname](#hostname),  [os](#os),  [system](#system),  [version](#version).
 * [**Time functions**](#time-functions):  [dhms2fsec](#dhms2fsec),  [dhms2sec](#dhms2sec),  [fsec2dhms](#fsec2dhms),  [fsec2hms](#fsec2hms),  [gmt2localtime](#gmt2localtime),  [gmt2nsec](#gmt2nsec),  [gmt2sec](#gmt2sec),  [hms2fsec](#hms2fsec),  [hms2sec](#hms2sec),  [localtime2gmt](#localtime2gmt),  [localtime2nsec](#localtime2nsec),  [localtime2sec](#localtime2sec),  [nsec2gmt](#nsec2gmt),  [nsec2gmtdate](#nsec2gmtdate),  [nsec2localdate](#nsec2localdate),  [nsec2localtime](#nsec2localtime),  [sec2dhms](#sec2dhms),  [sec2gmt](#sec2gmt),  [sec2gmtdate](#sec2gmtdate),  [sec2hms](#sec2hms),  [sec2localdate](#sec2localdate),  [sec2localtime](#sec2localtime),  [strfntime](#strfntime),  [strfntime_local](#strfntime_local),  [strftime](#strftime),  [strftime_local](#strftime_local),  [strpntime](#strpntime),  [strpntime_local](#strpntime_local),  [strptime](#strptime),  [strptime_local](#strptime_local),  [sysntime](#sysntime),  [systime](#systime),  [systimeint](#systimeint),  [upntime](#upntime),  [uptime](#uptime).
 * [**Typing functions**](#typing-functions):  [asserting_absent](#asserting_absent),  [asserting_array](#asserting_array),  [asserting_bool](#asserting_bool),  [asserting_boolean](#asserting_boolean),  [asserting_empty](#asserting_empty),  [asserting_empty_map](#asserting_empty_map),  [asserting_error](#asserting_error),  [asserting_float](#asserting_float),  [asserting_int](#asserting_int),  [asserting_map](#asserting_map),  [asserting_nonempty_map](#asserting_nonempty_map),  [asserting_not_array](#asserting_not_array),  [asserting_not_empty](#asserting_not_empty),  [asserting_not_map](#asserting_not_map),  [asserting_not_null](#asserting_not_null),  [asserting_null](#asserting_null),  [asserting_numeric](#asserting_numeric),  [asserting_present](#asserting_present),  [asserting_string](#asserting_string),  [is_absent](#is_absent),  [is_array](#is_array),  [is_bool](#is_bool),  [is_boolean](#is_boolean),  [is_empty](#is_empty),  [is_empty_map](#is_empty_map),  [is_error](#is_error),  [is_float](#is_float),  [is_int](#is_int),  [is_map](#is_map),  [is_nan](#is_nan),  [is_nonempty_map](#is_nonempty_map),  [is_not_array](#is_not_array),  [is_not_empty](#is_not_empty),  [is_not_map](#is_not_map),  [is_not_null](#is_not_null),  [is_null](#is_null),  [is_numeric](#is_numeric),  [is_present](#is_present),  [is_string](#is_string),  [typeof](#typeof).
@@ -1296,6 +1296,22 @@ lstrip  (class=string #args=1) Strip leading whitespace from string.
 </pre>
 
 
+### match
+<pre class="pre-non-highlight-non-pair">
+match  (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+</pre>
+
+
+### matchx
+<pre class="pre-non-highlight-non-pair">
+matchx  (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+</pre>
+
+
 ### regextract
 <pre class="pre-non-highlight-non-pair">
 regextract  (class=string #args=2) Extracts a substring (the first, if there are multiple matches), matching a regular expression, from the input. Does not use capture groups; see also the =~ operator which does.
diff --git a/man/manpage.txt b/man/manpage.txt
index e7e3d35821..7f3a122af7 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -199,18 +199,19 @@ MILLER(1)                                                            MILLER(1)
        is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
        kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
        localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-       mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-       nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-       percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-       roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-       select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-       splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-       strfntime_local strftime strftime_local string strip strlen strpntime
-       strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-       sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-       typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-       urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-       .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+       mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+       mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+       percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+       round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+       sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+       sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+       stddev strfntime strfntime_local strftime strftime_local string strip strlen
+       strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+       sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+       truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+       urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+       && * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+       || ~
 
 1mCOMMENTS-IN-DATA FLAGS0m
        Miller lets you put comments in your data, such as
@@ -2629,6 +2630,16 @@ MILLER(1)                                                            MILLER(1)
    1mmapsum0m
         (class=collections #args=variadic) With 0 args, returns empty map. With >= 1 arg, returns a map with key-value pairs from all arguments. Rightmost collisions win, e.g. 'mapsum({1:2,3:4},{1:5})' is '{1:5,3:4}'.
 
+   1mmatch0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
+   1mmatchx0m
+        (class=string #args=2) TODO: WRITE ME
+       Example:
+       TODO: WRITE ME
+
    1mmax0m
         (class=math #args=variadic) Max of n numbers; null loses. The min and max functions also recurse into arrays and maps, so they can be used to get min/max stats on array/map values.
 
@@ -3628,4 +3639,4 @@ MILLER(1)                                                            MILLER(1)
 
 
 
-                                  2023-12-13                         MILLER(1)
+                                  2023-12-16                         MILLER(1)
diff --git a/man/mlr.1 b/man/mlr.1
index 4d5ee4f5c7..4f0644ed76 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -2,12 +2,12 @@
 .\"     Title: mlr
 .\"    Author: [see the "AUTHOR" section]
 .\" Generator: ./mkman.rb
-.\"      Date: 2023-12-13
+.\"      Date: 2023-12-16
 .\"    Manual: \ \&
 .\"    Source: \ \&
 .\"  Language: English
 .\"
-.TH "MILLER" "1" "2023-12-13" "\ \&" "\ \&"
+.TH "MILLER" "1" "2023-12-16" "\ \&" "\ \&"
 .\" -----------------------------------------------------------------
 .\" * Portability definitions
 .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -246,18 +246,19 @@ is_nonempty_map is_not_array is_not_empty is_not_map is_not_null is_null
 is_numeric is_present is_string joink joinkv joinv json_parse json_stringify
 kurtosis latin1_to_utf8 leafcount leftpad length localtime2gmt localtime2nsec
 localtime2sec log log10 log1p logifit lstrip madd mapdiff mapexcept mapselect
-mapsum max maxlen md5 mean meaneb median mexp min minlen mmul mode msub
-nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os percentile
-percentiles pow qnorm reduce regextract regextract_or_else rightpad round
-roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime
-select sgn sha1 sha256 sha512 sin sinh skewness sort sort_collection splita
-splitax splitkv splitkvx splitnv splitnvx sqrt ssub stddev strfntime
-strfntime_local strftime strftime_local string strip strlen strpntime
-strpntime_local strptime strptime_local sub substr substr0 substr1 sum sum2
-sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper truncate
-typeof unflatten unformat unformatx upntime uptime urand urand32 urandelement
-urandint urandrange utf8_to_latin1 variance version ! != !=~ % & && * ** + - .
-\&.* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~
+mapsum match matchx max maxlen md5 mean meaneb median mexp min minlen mmul
+mode msub nsec2gmt nsec2gmtdate nsec2localdate nsec2localtime null_count os
+percentile percentiles pow qnorm reduce regextract regextract_or_else rightpad
+round roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate
+sec2localtime select sgn sha1 sha256 sha512 sin sinh skewness sort
+sort_collection splita splitax splitkv splitkvx splitnv splitnvx sqrt ssub
+stddev strfntime strfntime_local strftime strftime_local string strip strlen
+strpntime strpntime_local strptime strptime_local sub substr substr0 substr1
+sum sum2 sum3 sum4 sysntime system systime systimeint tan tanh tolower toupper
+truncate typeof unflatten unformat unformatx upntime uptime urand urand32
+urandelement urandint urandrange utf8_to_latin1 variance version ! != !=~ % &
+&& * ** + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ |
+|| ~
 .fi
 .if n \{\
 .RE
@@ -3938,6 +3939,28 @@ localtime2sec("2001-02-03 04:05:06", "Asia/Istanbul") = 981165906"
 .fi
 .if n \{\
 .RE
+.SS "match"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+.fi
+.if n \{\
+.RE
+.SS "matchx"
+.if n \{\
+.RS 0
+.\}
+.nf
+ (class=string #args=2) TODO: WRITE ME
+Example:
+TODO: WRITE ME
+.fi
+.if n \{\
+.RE
 .SS "max"
 .if n \{\
 .RS 0
diff --git a/pkg/bifs/regex.go b/pkg/bifs/regex.go
index 52cab9ac5e..74c0840f6a 100644
--- a/pkg/bifs/regex.go
+++ b/pkg/bifs/regex.go
@@ -81,7 +81,7 @@ func BIF_sub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
 	sregex := input2.AcquireStringValue()
 	replacement := input3.AcquireStringValue()
 
-	stringOutput := lib.RegexSub(input, sregex, replacement)
+	stringOutput := lib.RegexStringSub(input, sregex, replacement)
 	return mlrval.FromString(stringOutput)
 }
 
@@ -111,7 +111,7 @@ func BIF_gsub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval {
 	sregex := input2.AcquireStringValue()
 	replacement := input3.AcquireStringValue()
 
-	stringOutput := lib.RegexGsub(input, sregex, replacement)
+	stringOutput := lib.RegexStringGsub(input, sregex, replacement)
 	return mlrval.FromString(stringOutput)
 }
 
@@ -129,7 +129,7 @@ func BIF_string_matches_regexp(input1, input2 *mlrval.Mlrval) (retval *mlrval.Ml
 		return mlrval.FromNotStringError("=~", input2), nil
 	}
 
-	boolOutput, captures := lib.RegexMatches(input1string, input2.AcquireStringValue())
+	boolOutput, captures := lib.RegexStringMatchWithCaptures(input1string, input2.AcquireStringValue())
 	return mlrval.FromBool(boolOutput), captures
 }
 
diff --git a/pkg/dsl/cst/leaves.go b/pkg/dsl/cst/leaves.go
index 08b3200a98..c0b4d88757 100644
--- a/pkg/dsl/cst/leaves.go
+++ b/pkg/dsl/cst/leaves.go
@@ -266,7 +266,7 @@ func (root *RootNode) BuildStringLiteralNode(literal string) IEvaluable {
 	// RegexLiteralNode.  See also https://github.com/johnkerl/miller/issues/297.
 	literal = lib.UnbackslashStringLiteral(literal)
 
-	hasCaptures, replacementCaptureMatrix := lib.RegexReplacementHasCaptures(literal)
+	hasCaptures, replacementCaptureMatrix := lib.ReplacementHasCaptures(literal)
 	if !hasCaptures {
 		return &StringLiteralNode{
 			literal: mlrval.FromString(literal),
diff --git a/pkg/input/record_reader.go b/pkg/input/record_reader.go
index 2802019363..096060e629 100644
--- a/pkg/input/record_reader.go
+++ b/pkg/input/record_reader.go
@@ -158,7 +158,7 @@ type tIPSRegexSplitter struct {
 }
 
 func (s *tIPSRegexSplitter) Split(input string) []string {
-	return lib.RegexSplitString(s.ipsRegex, input, 2)
+	return lib.RegexCompiledSplitString(s.ipsRegex, input, 2)
 }
 
 // IFieldSplitter splits a string into pieces, e.g. for IFS.
@@ -193,5 +193,5 @@ type tIFSRegexSplitter struct {
 }
 
 func (s *tIFSRegexSplitter) Split(input string) []string {
-	return lib.RegexSplitString(s.ifsRegex, input, -1)
+	return lib.RegexCompiledSplitString(s.ifsRegex, input, -1)
 }
diff --git a/pkg/input/record_reader_xtab.go b/pkg/input/record_reader_xtab.go
index 0cfc74b25d..74d8dac417 100644
--- a/pkg/input/record_reader_xtab.go
+++ b/pkg/input/record_reader_xtab.go
@@ -304,7 +304,7 @@ type tXTABIPSSplitter struct {
 // which we need to produce just a pair of items -- a key and a value -- delimited by one or more
 // IPS. For exaemple, with IPS being a space, in 'abc     123' we need to get key 'abc' and value
 // '123'; for 'abc    123 456' we need key 'abc' and value '123 456'.  It's super-elegant to simply
-// regex-split the line like 'kv = lib.RegexSplitString(reader.readerOptions.IPSRegex, line, 2)' --
+// regex-split the line like 'kv = lib.RegexCompiledSplitString(reader.readerOptions.IPSRegex, line, 2)' --
 // however, that's 3x slower than the current implementation. It turns out regexes are great
 // but we should use them only when we must, since they are expensive.
 func (s *tXTABIPSSplitter) Split(input string) (key, value string, err error) {
@@ -358,7 +358,7 @@ type tXTABIPSRegexSplitter struct {
 }
 
 func (s *tXTABIPSRegexSplitter) Split(input string) (key, value string, err error) {
-	kv := lib.RegexSplitString(s.ipsRegex, input, 2)
+	kv := lib.RegexCompiledSplitString(s.ipsRegex, input, 2)
 	if len(kv) == 0 {
 		return "", "", fmt.Errorf("internal coding error in XTAB reader")
 	} else if len(kv) == 1 {
diff --git a/pkg/lib/regex.go b/pkg/lib/regex.go
index 3bab040360..cabbc1510f 100644
--- a/pkg/lib/regex.go
+++ b/pkg/lib/regex.go
@@ -1,5 +1,5 @@
 // ================================================================
-// Support for regexes in Miller.
+// Support for regular expressions in Miller.
 //
 // * By and large we use the Go library.
 //
@@ -13,17 +13,24 @@
 //       $y = "\2:\1";
 //     }
 //   where the '=~' sets the captures and the "\2:\1" uses them.  (Note that
-//   https://github.com/johnkerl/miller/issues/388 has a better suggestion
-//   which would make the captures explicit as variables, rather than implicit
-//   within CST state -- regardless, the current syntax will still be supported
-//   for backward compatibility and so is here to stay.) Here we make use of Go
-//   regexp-library functions to write to, and then later interpolate from, a
-//   captures array which is stored within CST state. (See the `runtime.State`
-//   object.)
+//   https://github.com/johnkerl/miller/issues/388 has a better suggestion which would make the
+//   captures explicit as variables, rather than implicit within CST state: this is implemented by
+//   the `match` and `matchx` DSL functions.  Regardless, the `=~` syntax will still be supported
+//   for backward compatibility and so is here to stay.) Here we make use of Go regexp-library
+//   functions to write to, and then later interpolate from, a captures array which is stored within
+//   CST state. (See the `runtime.State` object.)
 //
 // * "\0" is for a full match; "\1" .. "\9" are for submatch cqptures. E.g.
 //   if $x is "foobarbaz" and the regex is "foo(.)(..)baz", then "\0" is
 //   "foobarbaz", "\1" is "b", "\2" is "ar", and "\3".."\9" are "".
+//
+// * Naming:
+//
+//   o "regexp" and "Regexp" are used for the Go library and its data structure, respectively;
+//
+//   o "regex" is used for regular-expression strings following Miller's idiosyncratic syntax and
+//     semantics as described above.
+//
 // ================================================================
 
 package lib
@@ -34,6 +41,7 @@ import (
 	"os"
 	"regexp"
 	"strings"
+	"sync"
 )
 
 // captureDetector is used to see if a string literal interpolates previous
@@ -44,20 +52,54 @@ var captureDetector = regexp.MustCompile(`\\[0-9]`)
 // "\2:\1" so they don't need to be recomputed on every record.
 var captureSplitter = regexp.MustCompile(`(\\[0-9])`)
 
-// CompileMillerRegex wraps Go regex-compile with some Miller-specific syntax
-// which predate the port of Miller from C to Go.  Miller regexes use a final
-// 'i' to indicate case-insensitivity; Go regexes use an initial "(?i)".
+// See regexpCompileCached
+var regexpCache map[string]*regexp.Regexp
+
+const cacheMaxSize = 1000
+
+var cacheMutex sync.Mutex
+
+// regexpCompileCached keeps a cache of compiled regexes, so that the caller has the flexibility to
+// only pass in strings while getting the benefits of compilation avoidance.
+//
+// Regarding cache size: in nominal use, regexp strings are within Miller DSL code statements, and
+// there will be a handful. These will all get re-used after their first application, and the cache
+// will remain bounded by the size of the user's DSL code. However, it is possible to have regex
+// strings contained within Miller record-field data.
+//
+// We could solve this by using an LRU cache. However, for simplicity, we limit the number of
+// cached compiles, and for any extras that appear during record processing, we simply recompile
+// each time.
+func regexpCompileCached(s string) (*regexp.Regexp, error) {
+	if len(regexpCache) > cacheMaxSize {
+		return regexp.Compile(s)
+	}
+	r, err := regexp.Compile(s)
+	if err == nil {
+		cacheMutex.Lock()
+		if regexpCache == nil {
+			regexpCache = make(map[string]*regexp.Regexp)
+		}
+		regexpCache[s] = r
+		cacheMutex.Unlock()
+	}
+	return r, err
+}
+
+// CompileMillerRegex wraps Go regex-compile with some Miller-specific syntax which predates the
+// port of Miller from C to Go.  Miller regexes use a final 'i' to indicate case-insensitivity; Go
+// regexes use an initial "(?i)".
 //
-// (See also mlr.bnf where we specify which things can be backslash-escaped
-// without a syntax error at parse time.)
+// (See also mlr.bnf where we specify which things can be backslash-escaped without a syntax error
+// at parse time.)
 //
-// * If the regex_string is of the form a.*b, compiles it case-sensisitively.
-// * If the regex_string is of the form "a.*b", compiles a.*b case-sensisitively.
+// * If the regex_string is of the form a.*b, compiles it case-sensitively.
+// * If the regex_string is of the form "a.*b", compiles a.*b case-sensitively.
 // * If the regex_string is of the form "a.*b"i, compiles a.*b case-insensitively.
 func CompileMillerRegex(regexString string) (*regexp.Regexp, error) {
 	n := len(regexString)
 	if n < 2 {
-		return regexp.Compile(regexString)
+		return regexpCompileCached(regexString)
 	}
 
 	// TODO: rethink this. This will strip out things people have entered, e.g. "\"...\"".
@@ -68,20 +110,20 @@ func CompileMillerRegex(regexString string) (*regexp.Regexp, error) {
 	// literals) and from verbs (like cut -r or having-fields).
 
 	if strings.HasPrefix(regexString, "\"") && strings.HasSuffix(regexString, "\"") {
-		return regexp.Compile(regexString[1 : n-1])
+		return regexpCompileCached(regexString[1 : n-1])
 	}
 	if strings.HasPrefix(regexString, "/") && strings.HasSuffix(regexString, "/") {
-		return regexp.Compile(regexString[1 : n-1])
+		return regexpCompileCached(regexString[1 : n-1])
 	}
 
 	if strings.HasPrefix(regexString, "\"") && strings.HasSuffix(regexString, "\"i") {
-		return regexp.Compile("(?i)" + regexString[1:n-2])
+		return regexpCompileCached("(?i)" + regexString[1:n-2])
 	}
 	if strings.HasPrefix(regexString, "/") && strings.HasSuffix(regexString, "/i") {
-		return regexp.Compile("(?i)" + regexString[1:n-2])
+		return regexpCompileCached("(?i)" + regexString[1:n-2])
 	}
 
-	return regexp.Compile(regexString)
+	return regexpCompileCached(regexString)
 }
 
 // CompileMillerRegexOrDie wraps CompileMillerRegex. Usually in Go we want to
@@ -110,7 +152,7 @@ func CompileMillerRegexesOrDie(regexStrings []string) []*regexp.Regexp {
 // In Go as in all languages I'm aware of with a string-split, "a,b,c" splits
 // on "," to ["a", "b", "c" and "a" splits to ["a"], both of which are fine --
 // but "" splits to [""] when I wish it were []. This function does the latter.
-func RegexSplitString(regex *regexp.Regexp, input string, n int) []string {
+func RegexCompiledSplitString(regex *regexp.Regexp, input string, n int) []string {
 	if input == "" {
 		return make([]string, 0)
 	} else {
@@ -118,31 +160,140 @@ func RegexSplitString(regex *regexp.Regexp, input string, n int) []string {
 	}
 }
 
-// MakeEmptyRegexCaptures is for initial CST state at the start of executing
-// the DSL expression for the current record.  Even if '$x =~ "(..)_(...)" set
-// "\1" and "\2" on the previous record, at start of processing for the current
-// record we need to start with a clean slate.
-func MakeEmptyRegexCaptures() []string {
-	return nil
+// RegexStringSub implements the sub DSL function.
+func RegexStringSub(
+	input string,
+	sregex string,
+	replacement string,
+) string {
+	regex := CompileMillerRegexOrDie(sregex)
+	_, replacementCaptureMatrix := ReplacementHasCaptures(replacement)
+	return RegexCompiledSub(input, regex, replacement, replacementCaptureMatrix)
 }
 
-// RegexReplacementHasCaptures is used by the CST builder to see if
-// string-literal is like "foo bar" or "foo \1 bar" -- in the latter case it
-// needs to retain the compiled offsets-matrix information.
-func RegexReplacementHasCaptures(
+// RegexCompiledSub is the same as RegexStringSub but with compiled regex and
+// replacement strings.
+func RegexCompiledSub(
+	input string,
+	regex *regexp.Regexp,
 	replacement string,
-) (
-	hasCaptures bool,
-	matrix [][]int,
-) {
-	if captureDetector.MatchString(replacement) {
-		return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1)
-	} else {
-		return false, nil
+	replacementCaptureMatrix [][]int,
+) string {
+	return regexCompiledSubOrGsub(input, regex, replacement, replacementCaptureMatrix, true)
+}
+
+// RegexStringGsub implements the `gsub` DSL function.
+func RegexStringGsub(
+	input string,
+	sregex string,
+	replacement string,
+) string {
+	regex := CompileMillerRegexOrDie(sregex)
+	_, replacementCaptureMatrix := ReplacementHasCaptures(replacement)
+	return regexCompiledSubOrGsub(input, regex, replacement, replacementCaptureMatrix, false)
+}
+
+// regexCompiledSubOrGsub is the implementation for `sub`/`gsub` with compilex regex
+// and replacement strings.
+func regexCompiledSubOrGsub(
+	input string,
+	regex *regexp.Regexp,
+	replacement string,
+	replacementCaptureMatrix [][]int,
+	breakOnFirst bool,
+) string {
+	matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
+	if matrix == nil || len(matrix) == 0 {
+		return input
 	}
+
+	// Example return value from FindAllSubmatchIndex with input
+	// "...ab_cde...fg_hij..." and regex "(..)_(...)":
+	//
+	// Matrix is [][]int{
+	//   []int{3, 9, 3, 5, 6, 9},
+	//   []int{12, 18, 12, 14, 15, 18},
+	// }
+	//
+	// * 3-9 is for the entire match "ab_cde"
+	// * 3-5 is for the first capture "ab"
+	// * 6-9 is for the second capture "cde"
+	//
+	// * 12-18 is for the entire match "fg_hij"
+	// * 12-14 is for the first capture "fg"
+	// * 15-18 is for the second capture "hij"
+
+	var buffer bytes.Buffer
+	nonMatchStartIndex := 0
+
+	for _, row := range matrix {
+		buffer.WriteString(input[nonMatchStartIndex:row[0]])
+
+		// "\0" .. "\9"
+		captures := make([]string, 10)
+		di := 0
+		n := len(row)
+		for si := 0; si < n && di <= 9; si += 2 {
+			start := row[si]
+			end := row[si+1]
+			if start >= 0 && end >= 0 {
+				captures[di] = input[start:end]
+			}
+			di += 1
+		}
+
+		// If the replacement had no captures, e.g. "xyz", we would insert it
+		//
+		//   "..."     -> "..."
+		//   "ab_cde"  -> "xyz"   --- here
+		//   "..."     -> "..."
+		//   "fg_hij"  -> "xyz"   --- and here
+		//   "..."     -> "..."
+		//
+		// using buffer.WriteString(replacement). However, this function exists
+		// to handle the case when the replacement string has captures like
+		// "\2:\1", so we need to produce
+		//
+		//   "..."     -> "..."
+		//   "ab_cde"  -> "cde:ab"   --- here
+		//   "..."     -> "..."
+		//   "fg_hij"  -> "hij:fg"   --- and here
+		//   "..."     -> "..."
+		updatedReplacement := InterpolateCaptures(
+			replacement,
+			replacementCaptureMatrix,
+			captures,
+		)
+		buffer.WriteString(updatedReplacement)
+
+		nonMatchStartIndex = row[1]
+		if breakOnFirst {
+			break
+		}
+	}
+
+	buffer.WriteString(input[nonMatchStartIndex:])
+	return buffer.String()
 }
 
-// RegexMatches implements the =~ DSL operator. The captures are stored in DSL
+// RegexStringMatchSimple is for simple boolean return without any substring captures.
+func RegexStringMatchSimple(
+	input string,
+	sregex string,
+) bool {
+	regex := CompileMillerRegexOrDie(sregex)
+	return RegexCompiledMatchSimple(input, regex)
+}
+
+// RegexCompiledMatchSimple is for simple boolean return without any substring captures.
+func RegexCompiledMatchSimple(
+	input string,
+	regex *regexp.Regexp,
+) bool {
+	return regex.Match([]byte(input))
+}
+
+// RegexStringMatchWithCaptures implements the =~ DSL operator. The captures are stored in DSL
 // state and may be used by a DSL statement after the =~. For example, in
 //
 //	sub($a, "(..)_(...)", "\1:\2")
@@ -157,9 +308,9 @@ func RegexReplacementHasCaptures(
 //	}
 //
 // and the =~ callsite doesn't know if captures will be used or not. So,
-// RegexMatches always returns the captures array. It is stored within the CST
+// RegexStringMatchWithCaptures always returns the captures array. It is stored within the CST
 // state.
-func RegexMatches(
+func RegexStringMatchWithCaptures(
 	input string,
 	sregex string,
 ) (
@@ -167,14 +318,14 @@ func RegexMatches(
 	capturesOneUp []string,
 ) {
 	regex := CompileMillerRegexOrDie(sregex)
-	return RegexMatchesCompiled(input, regex)
+	return RegexCompiledMatchWithCaptures(input, regex)
 }
 
-// RegexMatchesCompiled is the implementation for the =~ operator.  Without
+// RegexCompiledMatchWithCaptures is the implementation for the =~ operator.  Without
 // Miller-style regex captures this would a simple one-line
 // regex.MatchString(input). However, we return the captures array for the
 // benefit of subsequent references to "\0".."\9".
-func RegexMatchesCompiled(
+func RegexCompiledMatchWithCaptures(
 	input string,
 	regex *regexp.Regexp,
 ) (bool, []string) {
@@ -228,17 +379,47 @@ func RegexMatchesCompiled(
 	return true, captures
 }
 
+// MakeEmptyCaptures is for initial CST state at the start of executing the DSL expression for the
+// current record.  Even if '$x =~ "(..)_(...)" set "\1" and "\2" on the previous record, at start
+// of processing for the current record we need to start with a clean slate. This is in support of
+// CST state, which `=~` semantics requires.
+func MakeEmptyCaptures() []string {
+	return nil
+}
+
+// ReplacementHasCaptures is used by the CST builder to see if string-literal is like "foo bar" or
+// "foo \1 bar" -- in the latter case it needs to retain the compiled offsets-matrix information.
+// This is in support of CST state, which `=~` semantics requires.
+func ReplacementHasCaptures(
+	replacement string,
+) (
+	hasCaptures bool,
+	matrix [][]int,
+) {
+	if captureDetector.MatchString(replacement) {
+		return true, captureSplitter.FindAllSubmatchIndex([]byte(replacement), -1)
+	} else {
+		return false, nil
+	}
+}
+
 // InterpolateCaptures example:
-//   - Input $x is "ab_cde"
+//
+// * Input $x is "ab_cde"
+//
 //   - DSL expression
 //     if ($x =~ "(..)_(...)") {
 //     ... other lines of code ...
 //     $y = "\2:\1";
 //     }
-//   - InterpolateCaptures is used on the evaluation of "\2:\1"
-//   - replacementString is "\2:\1"
+//
+// * InterpolateCaptures is used on the evaluation of "\2:\1"
+//
+// * replacementString is "\2:\1"
+//
 //   - replacementMatrix contains precomputed/cached offsets for the "\2" and
 //     "\1" substrings within "\2:\1"
+//
 //   - captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"),
 //     slot 2 being "cde" (for "\2"), and slots 3-9 being "".
 func InterpolateCaptures(
@@ -268,119 +449,3 @@ func InterpolateCaptures(
 
 	return buffer.String()
 }
-
-// RegexSub implements the sub DSL function.
-func RegexSub(
-	input string,
-	sregex string,
-	replacement string,
-) string {
-	regex := CompileMillerRegexOrDie(sregex)
-	_, replacementCaptureMatrix := RegexReplacementHasCaptures(replacement)
-	return RegexSubCompiled(input, regex, replacement, replacementCaptureMatrix)
-}
-
-// RegexSubCompiled is the same as RegexSub but with compiled regex and
-// replacement strings.
-func RegexSubCompiled(
-	input string,
-	regex *regexp.Regexp,
-	replacement string,
-	replacementCaptureMatrix [][]int,
-) string {
-	return regexSubGsubCompiled(input, regex, replacement, replacementCaptureMatrix, true)
-}
-
-// RegexGsub implements the gsub DSL function.
-func RegexGsub(
-	input string,
-	sregex string,
-	replacement string,
-) string {
-	regex := CompileMillerRegexOrDie(sregex)
-	_, replacementCaptureMatrix := RegexReplacementHasCaptures(replacement)
-	return regexSubGsubCompiled(input, regex, replacement, replacementCaptureMatrix, false)
-}
-
-// regexSubGsubCompiled is the implementation for sub/gsub with compilex regex
-// and replacement strings.
-func regexSubGsubCompiled(
-	input string,
-	regex *regexp.Regexp,
-	replacement string,
-	replacementCaptureMatrix [][]int,
-	breakOnFirst bool,
-) string {
-	matrix := regex.FindAllSubmatchIndex([]byte(input), -1)
-	if matrix == nil || len(matrix) == 0 {
-		return input
-	}
-
-	// Example return value from FindAllSubmatchIndex with input
-	// "...ab_cde...fg_hij..." and regex "(..)_(...)":
-	//
-	// Matrix is [][]int{
-	//   []int{3, 9, 3, 5, 6, 9},
-	//   []int{12, 18, 12, 14, 15, 18},
-	// }
-	//
-	// * 3-9 is for the entire match "ab_cde"
-	// * 3-5 is for the first capture "ab"
-	// * 6-9 is for the second capture "cde"
-	//
-	// * 12-18 is for the entire match "fg_hij"
-	// * 12-14 is for the first capture "fg"
-	// * 15-18 is for the second capture "hij"
-
-	var buffer bytes.Buffer
-	nonMatchStartIndex := 0
-
-	for _, row := range matrix {
-		buffer.WriteString(input[nonMatchStartIndex:row[0]])
-
-		// "\0" .. "\9"
-		captures := make([]string, 10)
-		di := 0
-		n := len(row)
-		for si := 0; si < n && di <= 9; si += 2 {
-			start := row[si]
-			end := row[si+1]
-			if start >= 0 && end >= 0 {
-				captures[di] = input[start:end]
-			}
-			di += 1
-		}
-
-		// If the replacement had no captures, e.g. "xyz", we would insert it
-		//
-		//   "..."     -> "..."
-		//   "ab_cde"  -> "xyz"   --- here
-		//   "..."     -> "..."
-		//   "fg_hij"  -> "xyz"   --- and here
-		//   "..."     -> "..."
-		//
-		// using buffer.WriteString(replacement). However, this function exists
-		// to handle the case when the replacement string has captures like
-		// "\2:\1", so we need to produce
-		//
-		//   "..."     -> "..."
-		//   "ab_cde"  -> "cde:ab"   --- here
-		//   "..."     -> "..."
-		//   "fg_hij"  -> "hij:fg"   --- and here
-		//   "..."     -> "..."
-		updatedReplacement := InterpolateCaptures(
-			replacement,
-			replacementCaptureMatrix,
-			captures,
-		)
-		buffer.WriteString(updatedReplacement)
-
-		nonMatchStartIndex = row[1]
-		if breakOnFirst {
-			break
-		}
-	}
-
-	buffer.WriteString(input[nonMatchStartIndex:])
-	return buffer.String()
-}
diff --git a/pkg/lib/regex_test.go b/pkg/lib/regex_test.go
index 961d73f8d5..d2a8f5f705 100644
--- a/pkg/lib/regex_test.go
+++ b/pkg/lib/regex_test.go
@@ -88,7 +88,7 @@ var dataForMatches = []tDataForMatches{
 
 func TestRegexReplacementHasCaptures(t *testing.T) {
 	for i, entry := range dataForHasCaptures {
-		actualHasCaptures, actualMatrix := RegexReplacementHasCaptures(entry.replacement)
+		actualHasCaptures, actualMatrix := ReplacementHasCaptures(entry.replacement)
 		if actualHasCaptures != entry.expectedHasCaptures {
 			t.Fatalf("case %d replacement \"%s\" expected %v got %v\n",
 				i, entry.replacement, entry.expectedHasCaptures, actualHasCaptures,
@@ -104,7 +104,7 @@ func TestRegexReplacementHasCaptures(t *testing.T) {
 
 func TestRegexSub(t *testing.T) {
 	for i, entry := range dataForSub {
-		actualOutput := RegexSub(entry.input, entry.sregex, entry.replacement)
+		actualOutput := RegexStringSub(entry.input, entry.sregex, entry.replacement)
 		if actualOutput != entry.expectedOutput {
 			t.Fatalf("case %d input \"%s\" sregex \"%s\" replacement \"%s\" expected \"%s\" got \"%s\"\n",
 				i, entry.input, entry.sregex, entry.replacement, entry.expectedOutput, actualOutput,
@@ -115,7 +115,7 @@ func TestRegexSub(t *testing.T) {
 
 func TestRegexGsub(t *testing.T) {
 	for i, entry := range dataForGsub {
-		actualOutput := RegexGsub(entry.input, entry.sregex, entry.replacement)
+		actualOutput := RegexStringGsub(entry.input, entry.sregex, entry.replacement)
 		if actualOutput != entry.expectedOutput {
 			t.Fatalf("case %d input \"%s\" sregex \"%s\" replacement \"%s\" expected \"%s\" got \"%s\"\n",
 				i, entry.input, entry.sregex, entry.replacement, entry.expectedOutput, actualOutput,
@@ -126,7 +126,7 @@ func TestRegexGsub(t *testing.T) {
 
 func TestRegexMatches(t *testing.T) {
 	for i, entry := range dataForMatches {
-		actualOutput, actualCaptures := RegexMatches(entry.input, entry.sregex)
+		actualOutput, actualCaptures := RegexStringMatchWithCaptures(entry.input, entry.sregex)
 		if actualOutput != entry.expectedOutput {
 			t.Fatalf("case %d input \"%s\" sregex \"%s\" expected %v got %v\n",
 				i, entry.input, entry.sregex, entry.expectedOutput, actualOutput,
diff --git a/pkg/runtime/state.go b/pkg/runtime/state.go
index e94fd4ce5f..820f40c3dd 100644
--- a/pkg/runtime/state.go
+++ b/pkg/runtime/state.go
@@ -43,8 +43,8 @@ func NewEmptyState(options *cli.TOptions, strictMode bool) *State {
 
 		// OutputRecordsAndContexts is assigned after construction
 
-		// See lib.MakeEmptyRegexCaptures for context.
-		RegexCaptures: lib.MakeEmptyRegexCaptures(),
+		// See lib.MakeEmptyCaptures for context.
+		RegexCaptures: lib.MakeEmptyCaptures(),
 		Options:       options,
 
 		StrictMode: strictMode,
@@ -57,5 +57,5 @@ func (state *State) Update(
 ) {
 	state.Inrec = inrec
 	state.Context = context
-	state.RegexCaptures = lib.MakeEmptyRegexCaptures()
+	state.RegexCaptures = lib.MakeEmptyCaptures()
 }
diff --git a/pkg/transformers/merge_fields.go b/pkg/transformers/merge_fields.go
index de1a555c3d..7ee2d9fade 100644
--- a/pkg/transformers/merge_fields.go
+++ b/pkg/transformers/merge_fields.go
@@ -479,7 +479,7 @@ func (tr *TransformerMergeFields) transformByCollapsing(
 			matched = valueFieldNameRegex.MatchString(pe.Key)
 			if matched {
 				// TODO: comment re matrix
-				shortName = lib.RegexSubCompiled(valueFieldName, valueFieldNameRegex, "", nil)
+				shortName = lib.RegexCompiledSub(valueFieldName, valueFieldNameRegex, "", nil)
 				break
 			}
 		}
diff --git a/pkg/transformers/rename.go b/pkg/transformers/rename.go
index e5f0658b8a..7880b6ead0 100644
--- a/pkg/transformers/rename.go
+++ b/pkg/transformers/rename.go
@@ -169,7 +169,7 @@ func NewTransformerRename(
 			regexString := pe.Key
 			regex := lib.CompileMillerRegexOrDie(regexString)
 			replacement := pe.Value.(string)
-			_, replacementCaptureMatrix := lib.RegexReplacementHasCaptures(replacement)
+			_, replacementCaptureMatrix := lib.ReplacementHasCaptures(replacement)
 			regexAndReplacement := tRegexAndReplacement{
 				regex:                    regex,
 				replacement:              replacement,
@@ -241,7 +241,7 @@ func (tr *TransformerRename) transformWithRegexes(
 						inrec.Rename(oldName, newName)
 					}
 				} else {
-					newName := lib.RegexSubCompiled(oldName, regex, replacement, replacementCaptureMatrix)
+					newName := lib.RegexCompiledSub(oldName, regex, replacement, replacementCaptureMatrix)
 					if newName != oldName {
 						inrec.Rename(oldName, newName)
 					}