diff --git a/docs/content/manual/manual.yml b/docs/content/manual/manual.yml index 68cc091fef..ca08f89eaf 100644 --- a/docs/content/manual/manual.yml +++ b/docs/content/manual/manual.yml @@ -1428,6 +1428,30 @@ sections: input: '[{"foo":1, "bar":14}, {"foo":2, "bar":3}]' output: ['{"foo":2, "bar":3}'] + - title: "`uniq(stream)`" + body: | + + The `uniq` function produces a substream of the given stream + by emitting in turn the first item from each run within it. + No sorting takes place. + + examples: + - program: '[uniq(1,1,2,null,null,1)]' + input: 'null' + output: ['[1,2,null,1]'] + + - program: '[uniq(.[])]' + input: '[1,1,2,null,null,1]' + output: ['[1,2,null,1]'] + + - program: '[uniq(empty)]' + input: 'null' + output: ['[]'] + + - program: '[true, false | [uniq(1,1,2)]]' + input: null + output: ['[[1,2],[1,2]]'] + - title: "`unique`, `unique_by(path_exp)`" body: | @@ -2471,27 +2495,33 @@ sections: input: '("ab,cd", "ef, gh")' output: ['"ab"', '"cd"', '"ef"', '"gh"'] - - title: "`sub(regex; tostring)`, `sub(regex; string; flags)`" + - title: "`sub(regex; tostring)`, `sub(regex; tostring; flags)`" body: | - Emit the string obtained by replacing the first match of regex in the - input string with `tostring`, after interpolation. `tostring` should - be a jq string, and may contain references to named captures. The - named captures are, in effect, presented as a JSON object (as - constructed by `capture`) to `tostring`, so a reference to a captured - variable named "x" would take the form: `"\(.x)"`. + Emit the string obtained by replacing the first match of + regex in the input string with `tostring`, after + interpolation. `tostring` should be a jq string or a stream + of such strings, each of which may contain references to + named captures. The named captures are, in effect, presented + as a JSON object (as constructed by `capture`) to + `tostring`, so a reference to a captured variable named "x" + would take the form: `"\(.x)"`. example: - program: 'sub("^[^a-z]*(?[a-z]*).*")' input: '"123abc456"' output: '"ZabcZabc"' + - program: '[sub("(?.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)")]' + input: '"aB"' + output: ['["AB","aB"]'] - - title: "`gsub(regex; string)`, `gsub(regex; string; flags)`" + - title: "`gsub(regex; tostring)`, `gsub(regex; tostring; flags)`" body: | `gsub` is like `sub` but all the non-overlapping occurrences of the regex are - replaced by the string, after interpolation. + replaced by `tostring`, after interpolation. If the second argument is a stream + of jq strings, then `gsub` will produce a corresponding stream of JSON strings. example: - program: 'gsub("(?.)[^a]*"; "+\(.x)-")' diff --git a/src/builtin.jq b/src/builtin.jq index a102fd51a0..7af5d696e5 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -99,8 +99,10 @@ def scan(re): # # If input is an array, then emit a stream of successive subarrays of length n (or less), # and similarly for strings. -def _nwise(a; $n): if a|length <= $n then a else a[0:$n] , _nwise(a[$n:]; $n) end; -def _nwise($n): _nwise(.; $n); +def _nwise($n): + def n: if length <= $n then . else .[0:$n] , (.[$n:] | n) end; + n; +def _nwise(a; $n): a | _nwise($n); # # splits/1 produces a stream; split/1 is retained for backward compatibility. def splits($re; flags): . as $s @@ -114,47 +116,34 @@ def splits($re): splits($re; null); # split emits an array for backward compatibility def split($re; flags): [ splits($re; flags) ]; # -# If s contains capture variables, then create a capture object and pipe it to s -def sub($re; s): - . as $in - | [match($re)] - | if length == 0 then $in - else .[0] - | . as $r -# # create the "capture" object: - | reduce ( $r | .captures | .[] | select(.name != null) | { (.name) : .string } ) as $pair - ({}; . + $pair) - | $in[0:$r.offset] + s + $in[$r.offset+$r.length:] - end ; +# stream-oriented +def uniq(s): + foreach s as $x (null; + if . and $x == .[0] then .[1] = false + else [$x, true] + end; + if .[1] then .[0] else empty end); # -# If s contains capture variables, then create a capture object and pipe it to s -def sub($re; s; flags): - def subg: [explode[] | select(. != 103)] | implode; - # "fla" should be flags with all occurrences of g removed; gs should be non-nil if flags has a g - def sub1(fla; gs): - def mysub: - . as $in - | [match($re; fla)] - | if length == 0 then $in - else .[0] as $edit - | ($edit | .offset + .length) as $len - # create the "capture" object: - | reduce ( $edit | .captures | .[] | select(.name != null) | { (.name) : .string } ) as $pair - ({}; . + $pair) - | $in[0:$edit.offset] - + s - + ($in[$len:] | if length > 0 and gs then mysub else . end) - end ; - mysub ; - (flags | index("g")) as $gs - | (flags | if $gs then subg else . end) as $fla - | sub1($fla; $gs); +# If s contains capture variables, then create a capture object and pipe it to s, bearing +# in mind that s could be a stream +def sub($re; s; $flags): + . as $in + | (reduce uniq(match($re; $flags)) as $edit + ({result: [], previous: 0}; + $in[ .previous: ($edit | .offset) ] as $gap + # create the "capture" objects (one per item in s) + | [reduce ( $edit | .captures | .[] | select(.name != null) | { (.name) : .string } ) as $pair + ({}; . + $pair) | s ] as $inserts + | reduce range(0; $inserts|length) as $ix (.; .result[$ix] += $gap + $inserts[$ix]) + | .previous = ($edit | .offset + .length ) ) + | .result[] + $in[.previous:] ) + // $in; # def sub($re; s): sub($re; s; ""); -# repeated substitution of re (which may contain named captures) +# def gsub($re; s; flags): sub($re; s; flags + "g"); def gsub($re; s): sub($re; s; "g"); - +# ######################################################################## # generic iterator/generator def while(cond; update): @@ -237,7 +226,6 @@ def tostream: getpath($p) | reduce path(.[]?) as $q ([$p, .]; [$p+$q]); - # Assuming the input array is sorted, bsearch/1 returns # the index of the target if the target is in the input array; and otherwise # (-1 - ix), where ix is the insertion point that would leave the array sorted. diff --git a/tests/jq.test b/tests/jq.test index ca8e27059f..83b19fb4e9 100644 --- a/tests/jq.test +++ b/tests/jq.test @@ -1731,3 +1731,11 @@ false . |= try . catch . 1 1 + +[uniq(1,1,2,3,3,4)] +null +[1,2,3,4] + +[uniq(empty)] +null +[] diff --git a/tests/onig.test b/tests/onig.test index daacae9cd7..805efabaa6 100644 --- a/tests/onig.test +++ b/tests/onig.test @@ -75,6 +75,45 @@ gsub( "(.*)"; ""; "x") "" "" +gsub( ""; "a"; "g") +"" +"a" + +gsub( "^"; ""; "g") +"a" +"a" + + +# The following is a regression test and should not be construed as a requirement other than that execution should terminate: +gsub( ""; "a"; "g") +"a" +"aa" + +gsub( "$"; "a"; "g") +"a" +"aa" + +gsub( "^"; "a") +"" +"a" + +gsub("(?=u)"; "u") +"qux" +"quux" + +gsub("^.*a"; "b") +"aaa" +"b" + +gsub("^.*?a"; "b") +"aaa" +"baa" + +# The following is for regression testing and should not be construed as a requirement: +[gsub("a"; "b", "c")] +"a" +["b","c"] + [.[] | scan(", ")] ["a,b, c, d, e,f",", a,b, c, d, e,f, "] [", ",", ",", ",", ",", ",", ",", ",", "] @@ -92,7 +131,33 @@ gsub("(?.)[^a]*"; "+\(.x)-") "Abcabc" "+A-+a-" +gsub("(?.)(?[0-9])"; "\(.x|ascii_downcase)\(.y)") +"A1 B2 CD" +"a1 b2 CD" + +gsub("\\b(?.)"; "\(.x|ascii_downcase)") +"ABC DEF" +"aBC dEF" + # utf-8 sub("(?.)"; "\(.x)!") "’" "’!" + +[sub("a"; "b", "c")] +"a" +["b","c"] + +[sub("(?.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)", "c")] +"aB" +["AB","aB","cB"] + +[gsub("(?.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)", "c")] +"aB" +["AB","ab","cc"] + +# splits and _nwise +[splits("")] +"ab" +["","a","b"] +