From 9d478894daaeb924eaf140b21ddd81f213610dd8 Mon Sep 17 00:00:00 2001
From: "Documenter.jl" <documenter@juliadocs.github.io>
Date: Tue, 3 Sep 2024 17:49:46 +0000
Subject: [PATCH] build based on ad1e8b5

---
 .../generated/UserGuide/autovec/index.html    |  10 +-
 .../generated/UserGuide/benchmark/index.html  |  80 ++++----
 .../generated/UserGuide/sep_unite/index.html  |   4 +-
 .../generated/UserGuide/slice/index.html      |   2 +-
 latest/reference/index.html                   | 193 ++++++++++--------
 latest/search/search_index.json               |   2 +-
 6 files changed, 161 insertions(+), 130 deletions(-)
diff --git a/latest/examples/generated/UserGuide/autovec/index.html b/latest/examples/generated/UserGuide/autovec/index.html
index 59d77ab..7636be0 100644
--- a/latest/examples/generated/UserGuide/autovec/index.html
+++ b/latest/examples/generated/UserGuide/autovec/index.html
@@ -805,7 +805,7 @@ <h1>Auto-vectorization</h1>
 
 <span class="n">string</span><span class="p">(</span><span class="n">TidierData</span><span class="o">.</span><span class="n">not_vectorized</span><span class="p">[])</span>
 </code></pre></div>
-<div class="highlight"><pre><span></span><code>"[:getindex, :rand, :esc, :Ref, :Set, :Cols, :collect, :(:), :∘, :lag, :lead, :ntile, :repeat, :across, :desc, :mean, :std, :var, :median, :mad, :first, :last, :minimum, :maximum, :sum, :length, :skipmissing, :quantile, :passmissing, :cumsum, :cumprod, :accumulate, :is_float, :is_integer, :is_string, :cat_rev, :cat_relevel, :cat_infreq, :cat_lump, :cat_reorder, :cat_collapse, :cat_lump_min, :cat_lump_prop, :categorical, :as_categorical, :is_categorical, :unique, :iqr]"
+<div class="highlight"><pre><span></span><code>"[:getindex, :rand, :esc, :Ref, :Set, :Cols, :collect, :(:), :∘, :lag, :lead, :ntile, :repeat, :across, :desc, :mean, :std, :var, :median, :mad, :first, :last, :minimum, :maximum, :sum, :length, :skipmissing, :quantile, :passmissing, :cumsum, :cumprod, :accumulate, :is_float, :is_integer, :is_string, :cat_rev, :cat_relevel, :cat_infreq, :cat_lump, :cat_reorder, :cat_collapse, :cat_lump_min, :cat_lump_prop, :categorical, :as_categorical, :is_categorical, :unique, :iqr, :cat_other, :cat_replace_missing, :cat_recode]"
 </code></pre></div>
 <p>This "auto-vectorization" makes working with TidierData.jl more R-like and convenient. However, if you ever define your own function and try to use it, TidierData.jl may unintentionally vectorize it for you. To prevent auto-vectorization, you can prefix your function with a <code>~</code>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">df</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">DataFrame</span><span class="p">(</span><span class="n">a</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">repeat</span><span class="p">(</span><span class="sc">'a'</span><span class="o">:</span><span class="sc">'e'</span><span class="p">,</span><span class="w"> </span><span class="n">inner</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">2</span><span class="p">),</span><span class="w"> </span><span class="n">b</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">],</span><span class="w"> </span><span class="n">c</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">11</span><span class="o">:</span><span class="mi">20</span><span class="p">)</span>
@@ -837,7 +837,7 @@ <h1>Auto-vectorization</h1>
 <p>Or you can modify the do-not-vectorize list like this:</p>
 <div class="highlight"><pre><span></span><code><span class="n">push!</span><span class="p">(</span><span class="n">TidierData</span><span class="o">.</span><span class="n">not_vectorized</span><span class="p">[],</span><span class="w"> </span><span class="ss">:new_mean</span><span class="p">)</span>
 </code></pre></div>
-<div class="highlight"><pre><span></span><code>49-element Vector{Symbol}:
+<div class="highlight"><pre><span></span><code>52-element Vector{Symbol}:
  :getindex
  :rand
  :esc
@@ -849,14 +849,14 @@ <h1>Auto-vectorization</h1>
  :∘
  :lag
  ⋮
- :cat_collapse
- :cat_lump_min
- :cat_lump_prop
  :categorical
  :as_categorical
  :is_categorical
  :unique
  :iqr
+ :cat_other
+ :cat_replace_missing
+ :cat_recode
  :new_mean
 </code></pre></div>
 <p>Now <code>new_mean()</code> should behave just like <code>mean()</code> in that it is treated as non-vectorized.</p>
diff --git a/latest/examples/generated/UserGuide/benchmark/index.html b/latest/examples/generated/UserGuide/benchmark/index.html
index 9df79ff..bfde6ac 100644
--- a/latest/examples/generated/UserGuide/benchmark/index.html
+++ b/latest/examples/generated/UserGuide/benchmark/index.html
@@ -887,14 +887,14 @@ <h2 id="filtering">filtering<a class="headerlink" href="#filtering" title="Perma
 
 <span class="nd">@benchmark</span><span class="w"> </span><span class="n">filter</span><span class="p">(</span><span class="n">row</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="n">row</span><span class="o">.</span><span class="n">Year</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="mi">1939</span><span class="w"> </span><span class="o">&amp;&amp;</span><span class="w"> </span><span class="n">row</span><span class="o">.</span><span class="n">Votes</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="mi">40</span><span class="p">,</span><span class="w"> </span><span class="n">movies</span><span class="p">)</span>
 </code></pre></div>
-<div class="highlight"><pre><span></span><code>BenchmarkTools.Trial: 532 samples with 1 evaluation.
- Range (min … max):  9.001 ms …  18.990 ms  ┊ GC (min … max): 0.00% … 5.50%
- Time  (median):     9.281 ms               ┊ GC (median):    0.00%
- Time  (mean ± σ):   9.411 ms ± 554.583 μs  ┊ GC (mean ± σ):  1.14% ± 2.52%
+<div class="highlight"><pre><span></span><code>BenchmarkTools.Trial: 493 samples with 1 evaluation.
+ Range (min … max):   9.672 ms …  19.010 ms  ┊ GC (min … max): 0.00% … 4.76%
+ Time  (median):      9.973 ms               ┊ GC (median):    0.00%
+ Time  (mean ± σ):   10.144 ms ± 714.436 μs  ┊ GC (mean ± σ):  1.19% ± 2.63%
 
-         ▃▄█▄▅▆▅▂                                              
-  ▂▁▂▂▄▃█████████▇▆▅▃▂▁▁▁▁▂▁▁▁▁▁▁▁▁▃▁▃▂▃▃▃▄▄▃▃▃▃▂▃▂▃▃▂▂▂▁▁▁▁▂ ▃
-  9 ms            Histogram: frequency by time        10.4 ms &lt;
+       ▄▆▇█▇▆▄▃▁▂▂                                              
+  ▂▃▂▅████████████▅▆▃▄▁▃▃▁▁▁▁▁▁▂▃▃▂▃▄▃▅▃▃▄▄▃▄▃▃▄▃▂▁▃▂▁▃▁▁▁▁▁▁▂ ▃
+  9.67 ms         Histogram: frequency by time         11.3 ms &lt;
 
  Memory estimate: 7.76 MiB, allocs estimate: 287668.
 </code></pre></div>
@@ -913,13 +913,13 @@ <h2 id="group_by-summarize">group_by summarize<a class="headerlink" href="#group
 <span class="nd">@benchmark</span><span class="w"> </span><span class="n">combine</span><span class="p">(</span><span class="n">groupby</span><span class="p">(</span><span class="n">movies</span><span class="p">,</span><span class="w"> </span><span class="ss">:MPAA</span><span class="p">),</span><span class="w"> </span><span class="n">nrow</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="ss">:n</span><span class="p">)</span>
 </code></pre></div>
 <div class="highlight"><pre><span></span><code>BenchmarkTools.Trial: 10000 samples with 1 evaluation.
- Range (min … max):  414.934 μs …  1.865 ms  ┊ GC (min … max): 0.00% … 31.44%
- Time  (median):     422.558 μs              ┊ GC (median):    0.00%
- Time  (mean ± σ):   432.911 μs ± 67.083 μs  ┊ GC (mean ± σ):  1.25% ±  5.15%
+ Range (min … max):  419.333 μs …  3.638 ms  ┊ GC (min … max): 0.00% … 16.92%
+ Time  (median):     426.235 μs              ┊ GC (median):    0.00%
+ Time  (mean ± σ):   438.110 μs ± 74.097 μs  ┊ GC (mean ± σ):  1.22% ±  5.06%
 
-  ▃██▇▆▅▄▄▄▃▂▂▁▁▁                                              ▂
-  ████████████████▇▇▇▆▆▆▆▆▅▅▄▄▄▃▄▁▁▁▁▃▃▁▁▄▇▇▃▅▃▃▃▅▄▆▆▇▆▇▆▅▄▃▄▄ █
-  415 μs        Histogram: log(frequency) by time       573 μs &lt;
+  ▆█▇▆▄▄▄▃▂▂▁▁▁                                                ▂
+  ███████████████▇▇▆▆▆▇▅▅▄▅▄▃▆▆▆▄▄▅▃▅▃▄▃▃▁▄▄▆▇▆▇▆▁▃▄▃▄▅▃▁▃▁▇▆▅ █
+  419 μs        Histogram: log(frequency) by time       619 μs &lt;
 
  Memory estimate: 474.87 KiB, allocs estimate: 270.
 </code></pre></div>
@@ -936,14 +936,14 @@ <h2 id="one-mutate">one mutate<a class="headerlink" href="#one-mutate" title="Pe
 
 <span class="nd">@benchmark</span><span class="w"> </span><span class="n">transform</span><span class="p">(</span><span class="n">movies</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="ss">:Votes</span><span class="p">,</span><span class="w"> </span><span class="ss">:R1</span><span class="p">]</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="p">((</span><span class="n">v</span><span class="p">,</span><span class="w"> </span><span class="n">r</span><span class="p">)</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="n">v</span><span class="w"> </span><span class="o">.*</span><span class="w"> </span><span class="n">r</span><span class="p">)</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="ss">:new_col</span><span class="p">)</span>
 </code></pre></div>
-<div class="highlight"><pre><span></span><code>BenchmarkTools.Trial: 6789 samples with 1 evaluation.
- Range (min … max):  541.700 μs …   9.582 ms  ┊ GC (min … max): 0.00% …  5.69%
- Time  (median):     661.425 μs               ┊ GC (median):    0.00%
- Time  (mean ± σ):   731.742 μs ± 250.650 μs  ┊ GC (mean ± σ):  7.82% ± 12.75%
+<div class="highlight"><pre><span></span><code>BenchmarkTools.Trial: 6516 samples with 1 evaluation.
+ Range (min … max):  557.359 μs …   7.220 ms  ┊ GC (min … max): 0.00% …  9.33%
+ Time  (median):     686.274 μs               ┊ GC (median):    0.00%
+ Time  (mean ± σ):   763.019 μs ± 243.408 μs  ┊ GC (mean ± σ):  9.17% ± 14.37%
 
-         ▅██▇▇▆▅▄▃▂▁                               ▁▁▂▂▂▂▃▂▂▁▁  ▂
-  ▃▁▃▁▅▅████████████▇▆▅▅▁▅▃▁▁▁▁▁▃▃▁▁▁▁▃▆▆▆▅▄▅▄▃▅▅▇█████████████ █
-  542 μs        Histogram: log(frequency) by time       1.25 ms &lt;
+    ▂▅▆▇████▇▆▅▄▄▂▂                    ▁▂▁▁▁▁▁ ▁▁ ▁     ▁ ▁▁    ▂
+  ▃▇████████████████▇█▇▇▆▅▄▅▃▃▅▅▃▅▅▅▅██████████████▇███████████ █
+  557 μs        Histogram: log(frequency) by time       1.45 ms &lt;
 
  Memory estimate: 8.42 MiB, allocs estimate: 223.
 </code></pre></div>
@@ -966,14 +966,14 @@ <h2 id="mutate-6-new-columns">mutate 6 new columns<a class="headerlink" href="#m
 
 <span class="nd">@benchmark</span><span class="w"> </span><span class="n">transform</span><span class="p">(</span><span class="n">movies</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="ss">:Votes</span><span class="p">,</span><span class="w"> </span><span class="ss">:R1</span><span class="p">]</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="p">((</span><span class="n">v</span><span class="p">,</span><span class="w"> </span><span class="n">r</span><span class="p">)</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="n">v</span><span class="w"> </span><span class="o">.*</span><span class="w"> </span><span class="n">r</span><span class="p">)</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="ss">:Votes_R1_Product</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="ss">:Rating</span><span class="p">,</span><span class="w"> </span><span class="ss">:Year</span><span class="p">]</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="p">((</span><span class="n">r</span><span class="p">,</span><span class="w"> </span><span class="n">y</span><span class="p">)</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="n">r</span><span class="w"> </span><span class="o">./</span><span class="w"> </span><span class="n">y</span><span class="p">)</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="ss">:Rating_Year_Ratio</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="ss">:R1</span><span class="p">,</span><span class="w"> </span><span class="ss">:R2</span><span class="p">,</span><span class="w"> </span><span class="ss">:R3</span><span class="p">,</span><span class="w"> </span><span class="ss">:R4</span><span class="p">,</span><span class="w"> </span><span class="ss">:R5</span><span class="p">]</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="p">((</span><span class="n">a</span><span class="p">,</span><span class="w"> </span><span class="n">b</span><span class="p">,</span><span class="w"> </span><span class="n">c</span><span class="p">,</span><span class="w"> </span><span class="n">d</span><span class="p">,</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="n">a</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">b</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">c</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">d</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">e</span><span class="p">)</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="ss">:R1_to_R5_Sum</span><span class="p">,</span><span class="w"> </span><span class="ss">:Budget</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="p">(</span><span class="n">b</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="n">ifelse</span><span class="o">.</span><span class="p">(</span><span class="n">ismissing</span><span class="o">.</span><span class="p">(</span><span class="n">b</span><span class="p">),</span><span class="w"> </span><span class="nb">missing</span><span class="p">,</span><span class="w"> </span><span class="n">b</span><span class="w"> </span><span class="o">.&gt;</span><span class="w"> </span><span class="mi">50000</span><span class="p">))</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="ss">:High_Budget_Flag</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="ss">:R6</span><span class="p">,</span><span class="w"> </span><span class="ss">:R7</span><span class="p">,</span><span class="w"> </span><span class="ss">:R8</span><span class="p">]</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="p">((</span><span class="n">f</span><span class="p">,</span><span class="w"> </span><span class="n">g</span><span class="p">,</span><span class="w"> </span><span class="n">h</span><span class="p">)</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="p">(</span><span class="n">f</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">g</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="n">h</span><span class="p">)</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="mi">3</span><span class="p">)</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="ss">:R6_to_R8_Avg</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="ss">:Year</span><span class="p">,</span><span class="w"> </span><span class="ss">:Length</span><span class="p">]</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="p">((</span><span class="n">y</span><span class="p">,</span><span class="w"> </span><span class="n">l</span><span class="p">)</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="n">y</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">l</span><span class="p">)</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="ss">:Year_Minus_Length</span><span class="w"> </span><span class="p">)</span>
 </code></pre></div>
-<div class="highlight"><pre><span></span><code>BenchmarkTools.Trial: 3937 samples with 1 evaluation.
- Range (min … max):  1.062 ms …   9.694 ms  ┊ GC (min … max): 0.00% …  6.74%
- Time  (median):     1.174 ms               ┊ GC (median):    0.00%
- Time  (mean ± σ):   1.264 ms ± 326.052 μs  ┊ GC (mean ± σ):  6.29% ± 11.05%
+<div class="highlight"><pre><span></span><code>BenchmarkTools.Trial: 4012 samples with 1 evaluation.
+ Range (min … max):  1.022 ms …   6.252 ms  ┊ GC (min … max): 0.00% … 10.10%
+ Time  (median):     1.143 ms               ┊ GC (median):    0.00%
+ Time  (mean ± σ):   1.241 ms ± 282.196 μs  ┊ GC (mean ± σ):  7.04% ± 11.85%
 
-      ▁▅▇█▄▂                                                   
-  ▂▂▃▅██████▇▅▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▂▂▂▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▂▃▂▂▂ ▃
-  1.06 ms         Histogram: frequency by time        1.91 ms &lt;
+       ▃██▅▂                                                   
+  ▂▂▃▄▇█████▇▅▃▃▃▂▂▂▂▁▂▁▂▂▁▁▁▁▁▁▁▁▁▁▂▂▁▁▁▂▂▂▂▂▂▂▂▂▂▃▃▃▄▃▃▃▃▃▃ ▃
+  1.02 ms         Histogram: frequency by time        1.92 ms &lt;
 
  Memory estimate: 10.56 MiB, allocs estimate: 581.
 </code></pre></div>
@@ -992,14 +992,14 @@ <h2 id="groupby-then-2-mutates">groupby then 2 mutates<a class="headerlink" href
 
 <span class="nd">@benchmark</span><span class="w"> </span><span class="n">transform</span><span class="p">(</span><span class="w"> </span><span class="n">transform</span><span class="p">(</span><span class="w"> </span><span class="n">groupby</span><span class="p">(</span><span class="n">movies</span><span class="p">,</span><span class="w"> </span><span class="ss">:MPAA</span><span class="p">),</span><span class="w"> </span><span class="ss">:R1</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="p">(</span><span class="n">x</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="n">x</span><span class="o">/</span><span class="mi">2</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mi">4</span><span class="p">)</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="ss">:ace</span><span class="p">,</span><span class="w"> </span><span class="n">ungroup</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">false</span><span class="p">),</span><span class="w"> </span><span class="p">[</span><span class="ss">:Votes</span><span class="p">,</span><span class="w"> </span><span class="ss">:R1</span><span class="p">]</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="p">((</span><span class="n">a</span><span class="p">,</span><span class="w"> </span><span class="n">b</span><span class="p">)</span><span class="w"> </span><span class="o">-&gt;</span><span class="w"> </span><span class="n">b</span><span class="w"> </span><span class="o">.^</span><span class="w"> </span><span class="n">a</span><span class="p">)</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="ss">:Bace</span><span class="p">,</span><span class="w"> </span><span class="n">ungroup</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">false</span><span class="p">)</span>
 </code></pre></div>
-<div class="highlight"><pre><span></span><code>BenchmarkTools.Trial: 671 samples with 1 evaluation.
- Range (min … max):  6.845 ms …  13.608 ms  ┊ GC (min … max): 0.00% … 7.58%
- Time  (median):     7.277 ms               ┊ GC (median):    0.00%
- Time  (mean ± σ):   7.442 ms ± 603.643 μs  ┊ GC (mean ± σ):  3.11% ± 4.29%
+<div class="highlight"><pre><span></span><code>BenchmarkTools.Trial: 683 samples with 1 evaluation.
+ Range (min … max):  6.629 ms …  12.749 ms  ┊ GC (min … max): 0.00% … 7.02%
+ Time  (median):     7.068 ms               ┊ GC (median):    0.00%
+ Time  (mean ± σ):   7.318 ms ± 541.471 μs  ┊ GC (mean ± σ):  2.98% ± 4.19%
 
-    ▁ ▆▇▂█▅ ▁▁                                                 
-  ▂▆███████████▇▆▇▆█▅▄▅▆▄▅▄▅▃▂▄▃▆▃▆▆▆▅▅▅▄▆▄▄▃▂▃▃▁▂▄▂▂▄▃▂▂▃▁▂▁ ▃
-  6.85 ms         Histogram: frequency by time         8.5 ms &lt;
+      ▁ ▄█▅▄▂▃                                                 
+  ▂▃▄▇████████▆▄▄▃▄▂▃▃▄▃▄▅▅▆▅▆▆▅▆▇▆▄▆▅▄▃▃▃▃▂▂▂▂▃▂▃▂▂▁▂▃▂▂▁▁▂▃ ▃
+  6.63 ms         Histogram: frequency by time        8.81 ms &lt;
 
  Memory estimate: 26.17 MiB, allocs estimate: 2449.
 </code></pre></div>
@@ -1017,13 +1017,13 @@ <h2 id="select-5-columns">select 5 columns<a class="headerlink" href="#select-5-
 <span class="nd">@benchmark</span><span class="w"> </span><span class="n">select</span><span class="p">(</span><span class="n">movies</span><span class="p">,</span><span class="w"> </span><span class="ss">:R1</span><span class="p">,</span><span class="w"> </span><span class="ss">:R2</span><span class="p">,</span><span class="w"> </span><span class="ss">:R3</span><span class="p">,</span><span class="w"> </span><span class="ss">:R4</span><span class="p">,</span><span class="w"> </span><span class="ss">:R5</span><span class="p">)</span>
 </code></pre></div>
 <div class="highlight"><pre><span></span><code>BenchmarkTools.Trial: 10000 samples with 1 evaluation.
- Range (min … max):  153.436 μs …   7.502 ms  ┊ GC (min … max): 0.00% … 6.35%
- Time  (median):     220.581 μs               ┊ GC (median):    0.00%
- Time  (mean ± σ):   232.208 μs ± 100.421 μs  ┊ GC (mean ± σ):  4.46% ± 9.85%
+ Range (min … max):  173.423 μs …  4.715 ms  ┊ GC (min … max): 0.00% … 8.59%
+ Time  (median):     221.673 μs              ┊ GC (median):    0.00%
+ Time  (mean ± σ):   237.430 μs ± 95.591 μs  ┊ GC (mean ± σ):  4.87% ± 9.96%
 
-      ▁▄▆▇██▆▄▃▁                                                ▂
-  ▄▃▄▅██████████▇▆▄▄▁▁▁▁▁▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▅▆▆▇▇▇█▇▇▇▇▇ █
-  153 μs        Histogram: log(frequency) by time        622 μs &lt;
+      ▅█▄                                                       
+  ▂▂▄████▆▅▄▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▁▂▁▁▁▁▁▁▁▂▁▁▂▁▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂ ▃
+  173 μs          Histogram: frequency by time          688 μs &lt;
 
  Memory estimate: 2.25 MiB, allocs estimate: 200.
 </code></pre></div>
diff --git a/latest/examples/generated/UserGuide/sep_unite/index.html b/latest/examples/generated/UserGuide/sep_unite/index.html
index d64aaed..2904c4f 100644
--- a/latest/examples/generated/UserGuide/sep_unite/index.html
+++ b/latest/examples/generated/UserGuide/sep_unite/index.html
@@ -931,8 +931,8 @@ <h2 id="unite"><code>@unite</code><a class="headerlink" href="#unite" title="Per
 <span class="w">    </span><span class="nd">@unite</span><span class="p">(</span><span class="n">new_col</span><span class="p">,</span><span class="w"> </span><span class="p">(</span><span class="n">b</span><span class="p">,</span><span class="w"> </span><span class="n">c</span><span class="p">,</span><span class="w"> </span><span class="n">d</span><span class="p">),</span><span class="w"> </span><span class="s">"/"</span><span class="p">)</span>
 <span class="k">end</span>
 </code></pre></div>
-<div><div style="float: left;"><span>3×4 DataFrame</span></div><div style="clear: both;"></div></div>
-<div class="data-frame" style="overflow-x: scroll;"><table class="data-frame" style="margin-bottom: 6px;"><thead><tr class="header"><th class="rowNumber" style="font-weight: bold; text-align: right;">Row</th><th style="text-align: left;">b</th><th style="text-align: left;">c</th><th style="text-align: left;">d</th><th style="text-align: left;">new_col</th></tr><tr class="subheader headerLastRow"><th class="rowNumber" style="font-weight: bold; text-align: right;"></th><th title="String" style="text-align: left;">String</th><th title="String" style="text-align: left;">String</th><th title="Union{Missing, String}" style="text-align: left;">String?</th><th title="String" style="text-align: left;">String</th></tr></thead><tbody><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">1</td><td style="text-align: left;">1</td><td style="text-align: left;">1</td><td style="font-style: italic; text-align: left;">missing</td><td style="text-align: left;">1/1</td></tr><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">2</td><td style="text-align: left;">2</td><td style="text-align: left;">2</td><td style="font-style: italic; text-align: left;">missing</td><td style="text-align: left;">2/2</td></tr><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">3</td><td style="text-align: left;">3</td><td style="text-align: left;">3</td><td style="text-align: left;">3</td><td style="text-align: left;">3/3/3</td></tr></tbody></table></div>
+<div><div style="float: left;"><span>3×1 DataFrame</span></div><div style="clear: both;"></div></div>
+<div class="data-frame" style="overflow-x: scroll;"><table class="data-frame" style="margin-bottom: 6px;"><thead><tr class="header"><th class="rowNumber" style="font-weight: bold; text-align: right;">Row</th><th style="text-align: left;">new_col</th></tr><tr class="subheader headerLastRow"><th class="rowNumber" style="font-weight: bold; text-align: right;"></th><th title="String" style="text-align: left;">String</th></tr></thead><tbody><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">1</td><td style="text-align: left;">1/1</td></tr><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">2</td><td style="text-align: left;">2/2</td></tr><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">3</td><td style="text-align: left;">3/3/3</td></tr></tbody></table></div>
 
 <p><a id="@separate_rows"></a></p>
 <p><a id="@separate_rows-1"></a></p>
diff --git a/latest/examples/generated/UserGuide/slice/index.html b/latest/examples/generated/UserGuide/slice/index.html
index 458ef54..52677d7 100644
--- a/latest/examples/generated/UserGuide/slice/index.html
+++ b/latest/examples/generated/UserGuide/slice/index.html
@@ -1110,7 +1110,7 @@ <h2 id="sample-5-random-rows-in-the-data-frame">Sample 5 random rows in the data
 <span class="k">end</span>
 </code></pre></div>
 <div><div style="float: left;"><span>5×3 DataFrame</span></div><div style="clear: both;"></div></div>
-<div class="data-frame" style="overflow-x: scroll;"><table class="data-frame" style="margin-bottom: 6px;"><thead><tr class="header"><th class="rowNumber" style="font-weight: bold; text-align: right;">Row</th><th style="text-align: left;">row_num</th><th style="text-align: left;">a</th><th style="text-align: left;">b</th></tr><tr class="subheader headerLastRow"><th class="rowNumber" style="font-weight: bold; text-align: right;"></th><th title="Int64" style="text-align: left;">Int64</th><th title="String" style="text-align: left;">String</th><th title="Int64" style="text-align: left;">Int64</th></tr></thead><tbody><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">1</td><td style="text-align: right;">6</td><td style="text-align: left;">c</td><td style="text-align: right;">2</td></tr><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">2</td><td style="text-align: right;">1</td><td style="text-align: left;">a</td><td style="text-align: right;">1</td></tr><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">3</td><td style="text-align: right;">5</td><td style="text-align: left;">c</td><td style="text-align: right;">2</td></tr><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">4</td><td style="text-align: right;">8</td><td style="text-align: left;">d</td><td style="text-align: right;">3</td></tr><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">5</td><td style="text-align: right;">7</td><td style="text-align: left;">d</td><td style="text-align: right;">3</td></tr></tbody></table></div>
+<div class="data-frame" style="overflow-x: scroll;"><table class="data-frame" style="margin-bottom: 6px;"><thead><tr class="header"><th class="rowNumber" style="font-weight: bold; text-align: right;">Row</th><th style="text-align: left;">row_num</th><th style="text-align: left;">a</th><th style="text-align: left;">b</th></tr><tr class="subheader headerLastRow"><th class="rowNumber" style="font-weight: bold; text-align: right;"></th><th title="Int64" style="text-align: left;">Int64</th><th title="String" style="text-align: left;">String</th><th title="Int64" style="text-align: left;">Int64</th></tr></thead><tbody><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">1</td><td style="text-align: right;">6</td><td style="text-align: left;">c</td><td style="text-align: right;">2</td></tr><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">2</td><td style="text-align: right;">9</td><td style="text-align: left;">e</td><td style="text-align: right;">3</td></tr><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">3</td><td style="text-align: right;">7</td><td style="text-align: left;">d</td><td style="text-align: right;">3</td></tr><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">4</td><td style="text-align: right;">3</td><td style="text-align: left;">b</td><td style="text-align: right;">1</td></tr><tr><td class="rowNumber" style="font-weight: bold; text-align: right;">5</td><td style="text-align: right;">8</td><td style="text-align: left;">d</td><td style="text-align: right;">3</td></tr></tbody></table></div>
 
 <p><a id="Slice-the-min"></a></p>
 <p><a id="Slice-the-min-1"></a></p>
diff --git a/latest/reference/index.html b/latest/reference/index.html
index a35f381..61880a8 100644
--- a/latest/reference/index.html
+++ b/latest/reference/index.html
@@ -944,7 +944,7 @@ <h2 id="index">Index<a class="headerlink" href="#index" title="Permanent link">
 <li><a href="./#TidierData.@right_join-Tuple{Any,%20Any,%20Any}"><code>TidierData.@right_join</code></a></li>
 <li><a href="./#TidierData.@select-Tuple{Any,%20Vararg{Any}}"><code>TidierData.@select</code></a></li>
 <li><a href="./#TidierData.@semi_join-Tuple{Any,%20Any,%20Any}"><code>TidierData.@semi_join</code></a></li>
-<li><a href="./#TidierData.@separate-NTuple{4,%20Any}"><code>TidierData.@separate</code></a></li>
+<li><a href="./#TidierData.@separate-Tuple{Any,%20Any,%20Any,%20Any,%20Vararg{Any}}"><code>TidierData.@separate</code></a></li>
 <li><a href="./#TidierData.@separate_rows-Tuple{Any,%20Vararg{Any}}"><code>TidierData.@separate_rows</code></a></li>
 <li><a href="./#TidierData.@slice-Tuple{Any,%20Vararg{Any}}"><code>TidierData.@slice</code></a></li>
 <li><a href="./#TidierData.@slice_head-Tuple{Any,%20Vararg{Any}}"><code>TidierData.@slice_head</code></a></li>
@@ -958,7 +958,7 @@ <h2 id="index">Index<a class="headerlink" href="#index" title="Permanent link">
 <li><a href="./#TidierData.@tally-Tuple{Any,%20Vararg{Any}}"><code>TidierData.@tally</code></a></li>
 <li><a href="./#TidierData.@transmute-Tuple{Any,%20Vararg{Any}}"><code>TidierData.@transmute</code></a></li>
 <li><a href="./#TidierData.@ungroup-Tuple{Any}"><code>TidierData.@ungroup</code></a></li>
-<li><a href="./#TidierData.@unite-NTuple{4,%20Any}"><code>TidierData.@unite</code></a></li>
+<li><a href="./#TidierData.@unite-Tuple{Any,%20Any,%20Any,%20Any,%20Vararg{Any}}"><code>TidierData.@unite</code></a></li>
 <li><a href="./#TidierData.@unnest_longer-Tuple{Any,%20Vararg{Any}}"><code>TidierData.@unnest_longer</code></a></li>
 <li><a href="./#TidierData.@unnest_wider-Tuple{Any,%20Vararg{Any}}"><code>TidierData.@unnest_wider</code></a></li>
 </ul>
@@ -979,7 +979,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <li><code>option</code>: "code"</li>
 <li><code>value</code>: <code>true</code> or <code>false</code></li>
 </ul>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L59-L71" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L59-L71" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.across-Tuple" href="#TidierData.across-Tuple">#</a>
 <strong><code>TidierData.across</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">across</span><span class="p">(</span><span class="n">variable</span><span class="p">[</span><span class="n">s</span><span class="p">],</span><span class="w"> </span><span class="k">function</span><span class="p">[</span><span class="n">s</span><span class="p">])</span>
@@ -1047,7 +1047,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │ d         4     14          1         11          5         15</span>
 <span class="go">   5 │ e         5     15          1         11          5         15</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/pseudofunctions.jl#L1-L69" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/pseudofunctions.jl#L1-L69" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.as_float-Tuple{Any}" href="#TidierData.as_float-Tuple{Any}">#</a>
 <strong><code>TidierData.as_float</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">as_float</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
@@ -1068,7 +1068,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="gp">julia&gt;</span><span class="w"> </span><span class="n">as_float</span><span class="p">(</span><span class="nb">missing</span><span class="p">)</span>
 <span class="go">missing</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/type_conversions.jl#L1-L22" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/type_conversions.jl#L1-L22" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.as_integer-Tuple{Any}" href="#TidierData.as_integer-Tuple{Any}">#</a>
 <strong><code>TidierData.as_integer</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">as_integer</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
@@ -1095,7 +1095,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="gp">julia&gt;</span><span class="w"> </span><span class="n">as_integer</span><span class="p">(</span><span class="nb">missing</span><span class="p">)</span>
 <span class="go">missing</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/type_conversions.jl#L20-L47" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/type_conversions.jl#L20-L47" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.as_string-Tuple{Any}" href="#TidierData.as_string-Tuple{Any}">#</a>
 <strong><code>TidierData.as_string</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">as_string</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
@@ -1116,7 +1116,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="gp">julia&gt;</span><span class="w"> </span><span class="n">as_string</span><span class="p">(</span><span class="nb">missing</span><span class="p">)</span>
 <span class="go">missing</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/type_conversions.jl#L42-L63" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/type_conversions.jl#L42-L63" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.case_when-Tuple" href="#TidierData.case_when-Tuple">#</a>
 <strong><code>TidierData.case_when</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">case_when</span><span class="p">(</span><span class="n">condition</span><span class="w"> </span><span class="o">=&gt;</span><span class="w"> </span><span class="n">return_value</span><span class="p">)</span>
@@ -1192,7 +1192,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │       4      3</span>
 <span class="go">   5 │       5      3</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/conditionals.jl#L28-L104" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/conditionals.jl#L28-L104" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.desc-Tuple" href="#TidierData.desc-Tuple">#</a>
 <strong><code>TidierData.desc</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">desc</span><span class="p">(</span><span class="n">col</span><span class="p">)</span>
@@ -1223,7 +1223,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   9 │ e        10     20</span>
 <span class="go">  10 │ e         9     19</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/pseudofunctions.jl#L15-L45" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/pseudofunctions.jl#L15-L45" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.ends_with-Tuple" href="#TidierData.ends_with-Tuple">#</a>
 <strong><code>TidierData.ends_with</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">ends_with</span><span class="p">(</span><span class="n">suffix</span><span class="p">)</span>
@@ -1249,7 +1249,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="w">   </span><span class="mi">4</span><span class="w"> </span><span class="n">│</span><span class="w">     </span><span class="mi">4</span><span class="w">     </span><span class="mi">24</span>
 <span class="w">   </span><span class="mi">5</span><span class="w"> </span><span class="n">│</span><span class="w">     </span><span class="mi">5</span><span class="w">     </span><span class="mi">25</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/helperfunctions.jl#L10-L35" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/helperfunctions.jl#L10-L35" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.everything-Tuple{}" href="#TidierData.everything-Tuple{}">#</a>
 <strong><code>TidierData.everything</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">everything</span><span class="p">()</span>
@@ -1275,7 +1275,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="w">   </span><span class="mi">4</span><span class="w"> </span><span class="n">│</span><span class="w">    </span><span class="mi">24</span><span class="w">      </span><span class="mi">4</span><span class="w">     </span><span class="mi">14</span>
 <span class="w">   </span><span class="mi">5</span><span class="w"> </span><span class="n">│</span><span class="w">    </span><span class="mi">25</span><span class="w">      </span><span class="mi">5</span><span class="w">     </span><span class="mi">15</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/helperfunctions.jl#L20-L45" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/helperfunctions.jl#L20-L45" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.if_else-Tuple{Union{Missing, Bool}, Any, Any, Any}" href="#TidierData.if_else-Tuple{Union{Missing,%20Bool},%20Any,%20Any,%20Any}">#</a>
 <strong><code>TidierData.if_else</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">if_else</span><span class="p">(</span><span class="n">condition</span><span class="p">,</span><span class="w"> </span><span class="n">yes</span><span class="p">,</span><span class="w"> </span><span class="n">no</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="n">miss</span><span class="p">])</span>
@@ -1343,7 +1343,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │       4      3</span>
 <span class="go">   5 │       5      3</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/conditionals.jl#L1-L68" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/conditionals.jl#L1-L68" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.is_float-Tuple{AbstractVector}" href="#TidierData.is_float-Tuple{AbstractVector}">#</a>
 <strong><code>TidierData.is_float</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">is_float</span><span class="p">(</span><span class="n">column</span><span class="o">::</span><span class="kt">AbstractVector</span><span class="p">)</span>
@@ -1368,7 +1368,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="gp">julia&gt;</span><span class="w"> </span><span class="n">is_float</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">b</span><span class="p">)</span>
 <span class="go">false</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/is_type.jl#L12-L35" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/is_type.jl#L12-L35" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.is_integer-Tuple{AbstractVector}" href="#TidierData.is_integer-Tuple{AbstractVector}">#</a>
 <strong><code>TidierData.is_integer</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">is_integer</span><span class="p">(</span><span class="n">column</span><span class="o">::</span><span class="kt">AbstractVector</span><span class="p">)</span>
@@ -1393,7 +1393,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="gp">julia&gt;</span><span class="w"> </span><span class="n">is_integer</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">d</span><span class="p">)</span>
 <span class="go">false</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/is_type.jl#L23-L46" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/is_type.jl#L23-L46" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.is_number-Tuple{AbstractVector}" href="#TidierData.is_number-Tuple{AbstractVector}">#</a>
 <strong><code>TidierData.is_number</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">is_number</span><span class="p">(</span><span class="n">column</span><span class="o">::</span><span class="kt">AbstractVector</span><span class="p">)</span>
@@ -1421,7 +1421,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="gp">julia&gt;</span><span class="w"> </span><span class="n">is_number</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">d</span><span class="p">)</span>
 <span class="go">false</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/is_type.jl#L1-L27" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/is_type.jl#L1-L27" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.is_string-Tuple{AbstractVector}" href="#TidierData.is_string-Tuple{AbstractVector}">#</a>
 <strong><code>TidierData.is_string</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">is_string</span><span class="p">(</span><span class="n">column</span><span class="o">::</span><span class="kt">AbstractVector</span><span class="p">)</span>
@@ -1446,7 +1446,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="gp">julia&gt;</span><span class="w"> </span><span class="n">is_string</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">c</span><span class="p">)</span>
 <span class="go">false</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/is_type.jl#L34-L57" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/is_type.jl#L34-L57" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.matches-Tuple{Any, Vararg{Any}}" href="#TidierData.matches-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.matches</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">matches</span><span class="p">(</span><span class="n">pattern</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="n">flags</span><span class="p">])</span>
@@ -1500,7 +1500,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="w">   </span><span class="mi">4</span><span class="w"> </span><span class="n">│</span><span class="w">     </span><span class="mi">4</span><span class="w">     </span><span class="mi">14</span>
 <span class="w">   </span><span class="mi">5</span><span class="w"> </span><span class="n">│</span><span class="w">     </span><span class="mi">5</span><span class="w">     </span><span class="mi">15</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/helperfunctions.jl#L15-L68" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/helperfunctions.jl#L15-L68" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.missing_if-Tuple{Any, Any}" href="#TidierData.missing_if-Tuple{Any,%20Any}">#</a>
 <strong><code>TidierData.missing_if</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">missing_if</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="w"> </span><span class="n">value</span><span class="p">)</span>
@@ -1530,7 +1530,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   3 │       3  banana</span>
 <span class="go">   4 │ missing  cherry</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/missings.jl#L125-L154" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/missings.jl#L125-L154" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.n-Tuple{}" href="#TidierData.n-Tuple{}">#</a>
 <strong><code>TidierData.n</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">n</span><span class="p">()</span>
@@ -1566,7 +1566,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │ d         2</span>
 <span class="go">   5 │ e         2</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/pseudofunctions.jl#L22-L57" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/pseudofunctions.jl#L22-L57" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.ntile-Tuple{Any, Integer}" href="#TidierData.ntile-Tuple{Any,%20Integer}">#</a>
 <strong><code>TidierData.ntile</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">ntile</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="w"> </span><span class="n">n</span><span class="o">::</span><span class="kt">Integer</span><span class="p">)</span>
@@ -1636,7 +1636,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   7 │     7        3</span>
 <span class="go">   8 │     8        3</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/ntile.jl#L10-L81" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/ntile.jl#L10-L81" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.replace_missing-Tuple{Any, Any}" href="#TidierData.replace_missing-Tuple{Any,%20Any}">#</a>
 <strong><code>TidierData.replace_missing</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">replace_missing</span><span class="p">(</span><span class="n">x</span><span class="p">,</span><span class="w"> </span><span class="n">replacement</span><span class="p">)</span>
@@ -1666,7 +1666,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   3 │     3     35</span>
 <span class="go">   4 │     4      8</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/missings.jl#L130-L159" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/missings.jl#L130-L159" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.row_number-Tuple{}" href="#TidierData.row_number-Tuple{}">#</a>
 <strong><code>TidierData.row_number</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">row_number</span><span class="p">()</span>
@@ -1728,7 +1728,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │ b</span>
 <span class="go">   5 │ c</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/pseudofunctions.jl#L29-L90" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/pseudofunctions.jl#L29-L90" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.starts_with-Tuple" href="#TidierData.starts_with-Tuple">#</a>
 <strong><code>TidierData.starts_with</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">starts_with</span><span class="p">(</span><span class="n">prefix</span><span class="p">)</span>
@@ -1754,7 +1754,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="w">   </span><span class="mi">4</span><span class="w"> </span><span class="n">│</span><span class="w">     </span><span class="mi">4</span><span class="w">     </span><span class="mi">14</span>
 <span class="w">   </span><span class="mi">5</span><span class="w"> </span><span class="n">│</span><span class="w">     </span><span class="mi">5</span><span class="w">     </span><span class="mi">15</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/helperfunctions.jl#L5-L30" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/helperfunctions.jl#L5-L30" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.where-Tuple" href="#TidierData.where-Tuple">#</a>
 <strong><code>TidierData.where</code></strong> — <em>Method</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="k">where</span><span class="p">(</span><span class="k">function</span><span class="p">)</span>
@@ -1822,7 +1822,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │ d        11.0     26.0     41.0</span>
 <span class="go">   5 │ e        14.0     29.0     44.0</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/pseudofunctions.jl#L8-L76" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/pseudofunctions.jl#L8-L76" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@anti_join-Tuple{Any, Any, Any}" href="#TidierData.@anti_join-Tuple{Any,%20Any,%20Any}">#</a>
 <strong><code>TidierData.@anti_join</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@anti_join</span><span class="p">(</span><span class="n">df1</span><span class="p">,</span><span class="w"> </span><span class="n">df2</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="n">by</span><span class="p">])</span>
@@ -1874,7 +1874,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">─────┼───────────────</span>
 <span class="go">   1 │ b           2</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/joins.jl#L109-L160" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/joins.jl#L109-L160" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@arrange-Tuple{Any, Vararg{Any}}" href="#TidierData.@arrange-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@arrange</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@arrange</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -1924,7 +1924,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   9 │ e        10     20</span>
 <span class="go">  10 │ e         9     19</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L511-L560" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L511-L560" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@bind_cols-Tuple{Any, Vararg{Any}}" href="#TidierData.@bind_cols-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@bind_cols</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@bind_cols</span><span class="p">(</span><span class="n">dfs</span><span class="o">...</span><span class="p">)</span>
@@ -1952,7 +1952,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   2 │     2      2      5      5      8      8</span>
 <span class="go">   3 │     3      3      6      6      9      9</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/binding.jl#L23-L50" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/binding.jl#L23-L50" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@bind_rows-Tuple{Any, Vararg{Any}}" href="#TidierData.@bind_rows-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@bind_rows</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@bind_rows</span><span class="p">(</span><span class="n">dfs</span><span class="o">...</span><span class="p">,</span><span class="w"> </span><span class="n">id</span><span class="p">)</span>
@@ -2020,7 +2020,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   8 │     8  missing        8      3</span>
 <span class="go">   9 │     9  missing        9      3</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/binding.jl#L1-L71" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/binding.jl#L1-L71" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@count-Tuple{Any, Vararg{Any}}" href="#TidierData.@count-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@count</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@count</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="n">wt</span><span class="p">],</span><span class="w"> </span><span class="p">[</span><span class="n">sort</span><span class="p">])</span>
@@ -2096,7 +2096,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   3 │ c            7</span>
 <span class="go">   4 │ a            6       </span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/compound_verbs.jl#L51-L127" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/compound_verbs.jl#L51-L127" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@distinct-Tuple{Any, Vararg{Any}}" href="#TidierData.@distinct-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@distinct</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="n">distinct</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -2169,7 +2169,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   9 │ e         4     19</span>
 <span class="go">  10 │ e         5     20</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L536-L611" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L536-L611" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@drop_missing-Tuple{Any, Vararg{Any}}" href="#TidierData.@drop_missing-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@drop_missing</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@drop_missing</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="n">cols</span><span class="o">...</span><span class="p">])</span>
@@ -2229,7 +2229,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   2 │     2  missing </span>
 <span class="go">   3 │     4        4</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/missings.jl#L1-L61" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/missings.jl#L1-L61" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@fill_missing-Tuple{Any, Vararg{Any}}" href="#TidierData.@fill_missing-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@fill_missing</code></strong> — <em>Macro</em>.</p>
 <p>@fill_missing(df, [columns...], direction)</p>
@@ -2301,7 +2301,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   2 │      6.0        3.0  missing          3.0  b</span>
 <span class="go">   3 │      6.0  missing          6.0  missing    b</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/missings.jl#L89-L161" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/missings.jl#L89-L161" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@filter-Tuple{Any, Vararg{Any}}" href="#TidierData.@filter-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@filter</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@filter</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -2346,7 +2346,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │ a         1     11</span>
 <span class="go">   2 │ c         3     13</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L380-L424" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L380-L424" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@full_join-Tuple{Any, Any, Any}" href="#TidierData.@full_join-Tuple{Any,%20Any,%20Any}">#</a>
 <strong><code>TidierData.@full_join</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@full_join</span><span class="p">(</span><span class="n">df1</span><span class="p">,</span><span class="w"> </span><span class="n">df2</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="n">by</span><span class="p">])</span>
@@ -2408,7 +2408,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   2 │ b             2  missing </span>
 <span class="go">   3 │ c       missing        4</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/joins.jl#L82-L143" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/joins.jl#L82-L143" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@glimpse" href="#TidierData.@glimpse">#</a>
 <strong><code>TidierData.@glimpse</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@glimpse</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">width</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">80</span><span class="p">)</span>
@@ -2445,7 +2445,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">.b             Int64          1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,</span>
 <span class="go">.c             String         a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a,</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L615-L652" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L615-L652" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@group_by-Tuple{Any, Vararg{Any}}" href="#TidierData.@group_by-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@group_by</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@group_by</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -2515,7 +2515,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │ d         4.0</span>
 <span class="go">   5 │ e         5.0</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L439-L509" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L439-L509" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@head" href="#TidierData.@head">#</a>
 <strong><code>TidierData.@head</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="w">   </span><span class="nd">@head</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">value</span><span class="p">)</span>
@@ -2583,7 +2583,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
    1 │ b           5
    2 │ b           6
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L691-L757" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L691-L757" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@inner_join-Tuple{Any, Any, Any}" href="#TidierData.@inner_join-Tuple{Any,%20Any,%20Any}">#</a>
 <strong><code>TidierData.@inner_join</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@inner_join</span><span class="p">(</span><span class="n">df1</span><span class="p">,</span><span class="w"> </span><span class="n">df2</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="n">by</span><span class="p">])</span>
@@ -2635,7 +2635,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">─────┼──────────────────────</span>
 <span class="go">   1 │ a           1      3</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/joins.jl#L55-L106" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/joins.jl#L55-L106" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@left_join-Tuple{Any, Any, Any}" href="#TidierData.@left_join-Tuple{Any,%20Any,%20Any}">#</a>
 <strong><code>TidierData.@left_join</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@left_join</span><span class="p">(</span><span class="n">df1</span><span class="p">,</span><span class="w"> </span><span class="n">df2</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="n">by</span><span class="p">])</span>
@@ -2692,7 +2692,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │ a           1        3</span>
 <span class="go">   2 │ b           2  missing</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/joins.jl#L1-L57" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/joins.jl#L1-L57" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@mutate-Tuple{Any, Vararg{Any}}" href="#TidierData.@mutate-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@mutate</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@mutate</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -2793,7 +2793,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │ d         4     14          1         11</span>
 <span class="go">   5 │ e         5     15          1         11</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L249-L351" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L249-L351" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@nest-Tuple{Any, Vararg{Any}}" href="#TidierData.@nest-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@nest</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@nest</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">new_column</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">nesting_columns</span><span class="p">)</span>
@@ -2921,7 +2921,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">  14 │ e        14     44     29</span>
 <span class="go">  15 │ e        15     45     30</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/nests.jl#L240-L365" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/nests.jl#L240-L365" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@pivot_longer-Tuple{Any, Vararg{Any}}" href="#TidierData.@pivot_longer-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@pivot_longer</code></strong> — <em>Macro</em>.</p>
 <p>@pivot<em>longer(df, cols, [names</em>to], [values_to])</p>
@@ -2986,7 +2986,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   3 │     1  B           2</span>
 <span class="go">   4 │     2  B           4</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/pivots.jl#L40-L106" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/pivots.jl#L40-L106" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@pivot_wider-Tuple{Any, Vararg{Any}}" href="#TidierData.@pivot_wider-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@pivot_wider</code></strong> — <em>Macro</em>.</p>
 <p>@pivot<em>wider(df, names</em>from, values<em>from[, values</em>fill])</p>
@@ -3031,7 +3031,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │     1      1      2</span>
 <span class="go">   2 │     2      0      4</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/pivots.jl#L1-L46" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/pivots.jl#L1-L46" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@pull-Tuple{Any, Any}" href="#TidierData.@pull-Tuple{Any,%20Any}">#</a>
 <strong><code>TidierData.@pull</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@pull</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">column</span><span class="p">)</span>
@@ -3061,7 +3061,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go"> 4</span>
 <span class="go"> 5</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L600-L629" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L600-L629" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@relocate-Tuple{Any, Vararg{Any}}" href="#TidierData.@relocate-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@relocate</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@relocate</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="p">,</span><span class="w"> </span><span class="n">before</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">nothing</span><span class="p">,</span><span class="w"> </span><span class="n">after</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">nothing</span><span class="p">)</span>
@@ -3123,7 +3123,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │     9  D           4  B         4  D</span>
 <span class="go">   5 │    10  E           5  C         5  E</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/relocate.jl#L36-L97" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/relocate.jl#L36-L97" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@rename-Tuple{Any, Vararg{Any}}" href="#TidierData.@rename-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@rename</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@rename</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -3150,7 +3150,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │ d         4     14</span>
 <span class="go">   5 │ e         5     15</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L190-L217" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L190-L217" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@rename_with-Tuple{Any, Any, Vararg{Any}}" href="#TidierData.@rename_with-Tuple{Any,%20Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@rename_with</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="w"> </span><span class="nd">@rename_with</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">fn</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -3190,7 +3190,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   2 │ banana  doc2          2</span>
 <span class="go">   3 │ cherry  doc3          3</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L639-L679" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L639-L679" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@right_join-Tuple{Any, Any, Any}" href="#TidierData.@right_join-Tuple{Any,%20Any,%20Any}">#</a>
 <strong><code>TidierData.@right_join</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@right_join</span><span class="p">(</span><span class="n">df1</span><span class="p">,</span><span class="w"> </span><span class="n">df2</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="n">by</span><span class="p">])</span>
@@ -3247,7 +3247,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │ a             1      3</span>
 <span class="go">   2 │ c       missing      4</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/joins.jl#L28-L84" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/joins.jl#L28-L84" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@select-Tuple{Any, Vararg{Any}}" href="#TidierData.@select-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@select</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@select</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -3423,7 +3423,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │     4     14</span>
 <span class="go">   5 │     5     15</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L72-L249" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L72-L249" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@semi_join-Tuple{Any, Any, Any}" href="#TidierData.@semi_join-Tuple{Any,%20Any,%20Any}">#</a>
 <strong><code>TidierData.@semi_join</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@semi_join</span><span class="p">(</span><span class="n">df1</span><span class="p">,</span><span class="w"> </span><span class="n">df2</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="n">by</span><span class="p">])</span>
@@ -3475,17 +3475,18 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">─────┼───────────────</span>
 <span class="go">   1 │ a           1</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/joins.jl#L136-L187" class="documenter-source">source</a><br></p>
-<p><a id="TidierData.@separate-NTuple{4, Any}" href="#TidierData.@separate-NTuple{4,%20Any}">#</a>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/joins.jl#L136-L187" class="documenter-source">source</a><br></p>
+<p><a id="TidierData.@separate-Tuple{Any, Any, Any, Any, Vararg{Any}}" href="#TidierData.@separate-Tuple{Any,%20Any,%20Any,%20Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@separate</code></strong> — <em>Macro</em>.</p>
-<p>@separate(df, From, Into, Separator)</p>
+<p>@separate(df, from, into, sep, extra = "merge")</p>
 <p>Separate a string column into mulitiple new columns based on a specified delimter </p>
 <p><strong>Arguments</strong></p>
 <ul>
 <li><code>df</code>: A DataFrame</li>
-<li><code>From</code>: Column that will be split</li>
-<li><code>Into</code>: New column names, supports [] or ()</li>
-<li><code>Separator</code>: the string or chacater on which to split</li>
+<li><code>from</code>: Column that will be split</li>
+<li><code>into</code>: New column names, supports [] or ()</li>
+<li><code>sep</code>: the string or character on which to split</li>
+<li><code>extra</code>: "merge", "warn" and "drop" . If not enough columns are provided, extra determines whether additional entries will be merged into the final one or dropped. "warn" generates a warning message for dropped values.</li>
 </ul>
 <p><strong>Examples</strong></p>
 <div class="highlight"><pre><span></span><code><span class="gp">julia&gt;</span><span class="w"> </span><span class="n">df</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">DataFrame</span><span class="p">(</span><span class="n">a</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[</span><span class="s">"1-1"</span><span class="p">,</span><span class="w"> </span><span class="s">"2-2"</span><span class="p">,</span><span class="w"> </span><span class="s">"3-3-3"</span><span class="p">]);</span>
@@ -3509,18 +3510,38 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │ 1          1          missing    </span>
 <span class="go">   2 │ 2          2          missing    </span>
 <span class="go">   3 │ 3          3          3</span>
+
+<span class="gp">julia&gt;</span><span class="w"> </span><span class="nd">@separate</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">a</span><span class="p">,</span><span class="w"> </span><span class="p">(</span><span class="n">b</span><span class="p">,</span><span class="w"> </span><span class="n">c</span><span class="p">),</span><span class="w"> </span><span class="s">"-"</span><span class="p">)</span>
+<span class="go">3×2 DataFrame</span>
+<span class="go"> Row │ b          c      </span>
+<span class="go">     │ SubStrin…  String </span>
+<span class="go">─────┼───────────────────</span>
+<span class="go">   1 │ 1          1</span>
+<span class="go">   2 │ 2          2</span>
+<span class="go">   3 │ 3          3-3</span>
+
+<span class="gp">julia&gt;</span><span class="w"> </span><span class="nd">@chain</span><span class="w"> </span><span class="n">df</span><span class="w"> </span><span class="k">begin</span>
+<span class="w">         </span><span class="nd">@separate</span><span class="p">(</span><span class="n">a</span><span class="p">,</span><span class="w"> </span><span class="p">(</span><span class="n">b</span><span class="p">,</span><span class="w"> </span><span class="n">c</span><span class="p">),</span><span class="w"> </span><span class="s">"-"</span><span class="p">,</span><span class="w"> </span><span class="n">extra</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"drop"</span><span class="p">)</span>
+<span class="w">       </span><span class="k">end</span>
+<span class="go">3×2 DataFrame</span>
+<span class="go"> Row │ b          c         </span>
+<span class="go">     │ SubStrin…  SubStrin… </span>
+<span class="go">─────┼──────────────────────</span>
+<span class="go">   1 │ 1          1</span>
+<span class="go">   2 │ 2          2</span>
+<span class="go">   3 │ 3          3</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/separate_unite.jl#L9-L44" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/separate_unite.jl#L9-L66" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@separate_rows-Tuple{Any, Vararg{Any}}" href="#TidierData.@separate_rows-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@separate_rows</code></strong> — <em>Macro</em>.</p>
-<div class="highlight"><pre><span></span><code><span class="n">separate_rows</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="o">...</span><span class="p">,</span><span class="w"> </span><span class="n">delimiter</span><span class="p">)</span>
+<div class="highlight"><pre><span></span><code><span class="n">separate_rows</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="o">...</span><span class="p">,</span><span class="w"> </span><span class="n">sep</span><span class="p">)</span>
 </code></pre></div>
 <p>Split the contents of specified columns in a DataFrame into multiple rows based on a given delimiter.</p>
 <p><strong>Arguments</strong></p>
 <ul>
 <li><code>df</code>: A DataFrame</li>
 <li><code>columns</code>: A column or multiple columns to be split. Can be a mix of integers and column names.</li>
-<li><code>delimiter</code>: The string or character or regular expression used to split the column values.</li>
+<li><code>sep</code>: The string or character or regular expression used to split the column values.</li>
 </ul>
 <p><strong>Examples</strong></p>
 <div class="highlight"><pre><span></span><code><span class="gp">julia&gt;</span><span class="w"> </span><span class="n">df</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">DataFrame</span><span class="p">(</span><span class="n">a</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="o">:</span><span class="mi">3</span><span class="p">,</span>
@@ -3535,7 +3556,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   2 │     2  aa;bb;cc  2;3;4   8;9;10</span>
 <span class="go">   3 │     3  dd;ee     5;6     11;12</span>
 
-<span class="gp">julia&gt;</span><span class="w"> </span><span class="nd">@separate_rows</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">4</span><span class="p">,</span><span class="w"> </span><span class="s">";"</span><span class="w"> </span><span class="p">)</span>
+<span class="gp">julia&gt;</span><span class="w"> </span><span class="nd">@separate_rows</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="mi">4</span><span class="p">,</span><span class="w"> </span><span class="s">";"</span><span class="p">)</span>
 <span class="go">6×4 DataFrame</span>
 <span class="go"> Row │ a      b          c       d         </span>
 <span class="go">     │ Int64  SubStrin…  String  SubStrin… </span>
@@ -3547,7 +3568,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   5 │     3  dd         5;6     11</span>
 <span class="go">   6 │     3  ee         5;6     12</span>
 
-<span class="gp">julia&gt;</span><span class="w"> </span><span class="nd">@separate_rows</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">b</span><span class="o">:</span><span class="n">d</span><span class="p">,</span><span class="w"> </span><span class="s">";"</span><span class="w"> </span><span class="p">)</span>
+<span class="gp">julia&gt;</span><span class="w"> </span><span class="nd">@separate_rows</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">b</span><span class="o">:</span><span class="n">d</span><span class="p">,</span><span class="w"> </span><span class="s">";"</span><span class="p">)</span>
 <span class="go">6×4 DataFrame</span>
 <span class="go"> Row │ a      b          c          d         </span>
 <span class="go">     │ Int64  SubStrin…  SubStrin…  SubStrin… </span>
@@ -3559,7 +3580,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   5 │     3  dd         5          11</span>
 <span class="go">   6 │     3  ee         6          12</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/separate_unite.jl#L88-L136" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/separate_unite.jl#L132-L180" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@slice-Tuple{Any, Vararg{Any}}" href="#TidierData.@slice-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@slice</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@slice</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -3652,7 +3673,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   2 │ b         4     14</span>
 <span class="go">   3 │ c         7     17</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/slice.jl#L1-L93" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/slice.jl#L1-L93" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@slice_head-Tuple{Any, Vararg{Any}}" href="#TidierData.@slice_head-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@slice_head</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@slice_head</span><span class="p">(</span><span class="n">df</span><span class="p">;</span><span class="w"> </span><span class="n">n</span><span class="p">,</span><span class="w"> </span><span class="n">prop</span><span class="p">)</span>
@@ -3691,7 +3712,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │ missing         0.3       0.2</span>
 <span class="go">   2 │       0.2       2.0       0.2</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/slice.jl#L303-L341" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/slice.jl#L303-L341" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@slice_max-Tuple{Any, Vararg{Any}}" href="#TidierData.@slice_max-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@slice_max</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@slice_max</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">column</span><span class="p">;</span><span class="w"> </span><span class="n">with_ties</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="n">n</span><span class="p">,</span><span class="w"> </span><span class="n">prop</span><span class="p">,</span><span class="w"> </span><span class="n">missing_rm</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">true</span><span class="p">)</span>
@@ -3753,7 +3774,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   2 │      6.0       7.0       6.0</span>
 <span class="go">   3 │      1.0       6.0       1.0</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/slice.jl#L74-L135" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/slice.jl#L74-L135" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@slice_min-Tuple{Any, Vararg{Any}}" href="#TidierData.@slice_min-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@slice_min</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@slice_min</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">column</span><span class="p">;</span><span class="w"> </span><span class="n">with_ties</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">true</span><span class="p">,</span><span class="w"> </span><span class="n">n</span><span class="p">,</span><span class="w"> </span><span class="n">prop</span><span class="p">,</span><span class="w"> </span><span class="n">missing_rm</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">true</span><span class="p">)</span>
@@ -3815,7 +3836,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   2 │ missing         0.3  missing   </span>
 <span class="go">   3 │       0.2       2.0        0.2</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/slice.jl#L189-L250" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/slice.jl#L189-L250" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@slice_sample-Tuple{Any, Vararg{Any}}" href="#TidierData.@slice_sample-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@slice_sample</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@slice_sample</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="n">n</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">1</span><span class="p">,</span><span class="w"> </span><span class="n">prop</span><span class="p">,</span><span class="w"> </span><span class="n">replace</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">false</span><span class="p">])</span>
@@ -3889,7 +3910,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="w">   </span><span class="mi">4</span><span class="w"> </span><span class="n">│</span><span class="w">     </span><span class="mi">9</span><span class="w">     </span><span class="mi">19</span>
 <span class="w">   </span><span class="mi">5</span><span class="w"> </span><span class="n">│</span><span class="w">     </span><span class="mi">8</span><span class="w">     </span><span class="mi">18</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/slice.jl#L45-L118" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/slice.jl#L45-L118" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@slice_tail-Tuple{Any, Vararg{Any}}" href="#TidierData.@slice_tail-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@slice_tail</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@slice_tail</span><span class="p">(</span><span class="n">df</span><span class="p">;</span><span class="w"> </span><span class="n">n</span><span class="p">,</span><span class="w"> </span><span class="n">prop</span><span class="p">)</span>
@@ -3928,7 +3949,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │      5.0       7.0       5.0</span>
 <span class="go">   2 │      6.0       7.0       6.0</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/slice.jl#L353-L391" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/slice.jl#L353-L391" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@summarise-Tuple{Any, Vararg{Any}}" href="#TidierData.@summarise-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@summarise</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@summarize</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -3992,7 +4013,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">─────┼──────────────────────</span>
 <span class="go">   1 │         1         11</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L373-L436" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L373-L436" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@summarize-Tuple{Any, Vararg{Any}}" href="#TidierData.@summarize-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@summarize</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@summarize</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -4056,7 +4077,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">─────┼──────────────────────</span>
 <span class="go">   1 │         1         11</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L308-L371" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L308-L371" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@summary-Tuple{Any, Vararg{Any}}" href="#TidierData.@summary-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@summary</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="w">   </span><span class="nd">@summary</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">cols</span><span class="o">...</span><span class="p">)</span>
@@ -4081,7 +4102,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="w">         </span><span class="nd">@summary</span><span class="p">(</span><span class="n">b</span><span class="o">:</span><span class="n">d</span><span class="p">)</span>
 <span class="w">       </span><span class="k">end</span><span class="p">;</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/summary.jl#L22-L46" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/summary.jl#L22-L46" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@tally-Tuple{Any, Vararg{Any}}" href="#TidierData.@tally-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@tally</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@tally</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="p">[</span><span class="n">wt</span><span class="p">],</span><span class="w"> </span><span class="p">[</span><span class="n">sort</span><span class="p">])</span>
@@ -4159,7 +4180,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   3 │ c            7</span>
 <span class="go">   4 │ a            6       </span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/compound_verbs.jl#L5-L83" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/compound_verbs.jl#L5-L83" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@transmute-Tuple{Any, Vararg{Any}}" href="#TidierData.@transmute-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@transmute</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@transmute</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">exprs</span><span class="o">...</span><span class="p">)</span>
@@ -4186,7 +4207,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │    18</span>
 <span class="go">   5 │    20</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L131-L158" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L131-L158" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@ungroup-Tuple{Any}" href="#TidierData.@ungroup-Tuple{Any}">#</a>
 <strong><code>TidierData.@ungroup</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@ungroup</span><span class="p">(</span><span class="n">df</span><span class="p">)</span>
@@ -4230,10 +4251,10 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   4 │ d         4     14</span>
 <span class="go">   5 │ e         5     15</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/TidierData.jl#L494-L538" class="documenter-source">source</a><br></p>
-<p><a id="TidierData.@unite-NTuple{4, Any}" href="#TidierData.@unite-NTuple{4,%20Any}">#</a>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/TidierData.jl#L494-L538" class="documenter-source">source</a><br></p>
+<p><a id="TidierData.@unite-Tuple{Any, Any, Any, Any, Vararg{Any}}" href="#TidierData.@unite-Tuple{Any,%20Any,%20Any,%20Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@unite</code></strong> — <em>Macro</em>.</p>
-<div class="highlight"><pre><span></span><code><span class="w">  </span><span class="nd">@unite</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">new_cols</span><span class="p">,</span><span class="w"> </span><span class="n">from_cols</span><span class="p">,</span><span class="w"> </span><span class="n">sep</span><span class="p">)</span>
+<div class="highlight"><pre><span></span><code><span class="w">  </span><span class="nd">@unite</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">new_cols</span><span class="p">,</span><span class="w"> </span><span class="n">from_cols</span><span class="p">,</span><span class="w"> </span><span class="n">sep</span><span class="p">,</span><span class="w"> </span><span class="n">remove</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">true</span><span class="p">)</span>
 </code></pre></div>
 <p>Separate a multiple columns into one new columns using a specific delimter</p>
 <p><strong>Arguments</strong></p>
@@ -4241,12 +4262,22 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <li><code>df</code>: A DataFrame</li>
 <li><code>new_col</code>: New column that will recieve the combination</li>
 <li><code>from_cols</code>: Column names that it will combine, supports [] or ()</li>
-<li><code>sep</code>: the string or character that will seprate the values in the new column</li>
+<li><code>sep</code>: the string or character that will separate the values in the new column</li>
+<li><code>remove</code>: defaults to <code>true</code>, removes input columns from data frame</li>
 </ul>
 <p><strong>Examples</strong></p>
 <div class="highlight"><pre><span></span><code><span class="gp">julia&gt;</span><span class="w"> </span><span class="n">df</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">DataFrame</span><span class="p">(</span><span class="w"> </span><span class="n">b</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[</span><span class="s">"1"</span><span class="p">,</span><span class="w"> </span><span class="s">"2"</span><span class="p">,</span><span class="w"> </span><span class="s">"3"</span><span class="p">],</span><span class="w"> </span><span class="n">c</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[</span><span class="s">"1"</span><span class="p">,</span><span class="w"> </span><span class="s">"2"</span><span class="p">,</span><span class="w"> </span><span class="s">"3"</span><span class="p">],</span><span class="w"> </span><span class="n">d</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">[</span><span class="nb">missing</span><span class="p">,</span><span class="w"> </span><span class="nb">missing</span><span class="p">,</span><span class="w"> </span><span class="s">"3"</span><span class="p">]);</span>
 
 <span class="gp">julia&gt;</span><span class="w"> </span><span class="nd">@unite</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">new_col</span><span class="p">,</span><span class="w"> </span><span class="p">(</span><span class="n">b</span><span class="p">,</span><span class="w"> </span><span class="n">c</span><span class="p">,</span><span class="w"> </span><span class="n">d</span><span class="p">),</span><span class="w"> </span><span class="s">"-"</span><span class="p">)</span>
+<span class="go">3×1 DataFrame</span>
+<span class="go"> Row │ new_col </span>
+<span class="go">     │ String  </span>
+<span class="go">─────┼─────────</span>
+<span class="go">   1 │ 1-1</span>
+<span class="go">   2 │ 2-2</span>
+<span class="go">   3 │ 3-3-3</span>
+
+<span class="gp">julia&gt;</span><span class="w"> </span><span class="nd">@unite</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">new_col</span><span class="p">,</span><span class="w"> </span><span class="p">(</span><span class="n">b</span><span class="p">,</span><span class="w"> </span><span class="n">c</span><span class="p">,</span><span class="w"> </span><span class="n">d</span><span class="p">),</span><span class="w"> </span><span class="s">"-"</span><span class="p">,</span><span class="w"> </span><span class="n">remove</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">false</span><span class="p">)</span>
 <span class="go">3×4 DataFrame</span>
 <span class="go"> Row │ b       c       d        new_col </span>
 <span class="go">     │ String  String  String?  String  </span>
@@ -4255,7 +4286,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   2 │ 2       2       missing  2-2</span>
 <span class="go">   3 │ 3       3       3        3-3-3</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/separate_unite.jl#L53-L77" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/separate_unite.jl#L81-L115" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@unnest_longer-Tuple{Any, Vararg{Any}}" href="#TidierData.@unnest_longer-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@unnest_longer</code></strong> — <em>Macro</em>.</p>
 <div class="highlight"><pre><span></span><code><span class="nd">@unnest_longer</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="p">,</span><span class="w"> </span><span class="n">indices_include</span><span class="o">=</span><span class="nb">false</span><span class="p">)</span>
@@ -4287,7 +4318,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   3 │     2      3  [7, 8]</span>
 <span class="go">   4 │     2      4  [7, 8]</span>
 
-<span class="gp">julia&gt;</span><span class="w"> </span><span class="nd">@unnest_longer</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">b</span><span class="o">:</span><span class="n">c</span><span class="p">,</span><span class="w"> </span><span class="n">indices_include</span><span class="o">=</span><span class="nb">true</span><span class="p">)</span>
+<span class="gp">julia&gt;</span><span class="w"> </span><span class="nd">@unnest_longer</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">b</span><span class="o">:</span><span class="n">c</span><span class="p">,</span><span class="w"> </span><span class="n">indices_include</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">true</span><span class="p">)</span>
 <span class="go">4×5 DataFrame</span>
 <span class="go"> Row │ a      b      c      b_id   c_id  </span>
 <span class="go">     │ Int64  Int64  Int64  Int64  Int64 </span>
@@ -4320,10 +4351,10 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   6 │     3  5</span>
 <span class="go">   7 │     4  missing </span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/nests.jl#L139-L203" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/nests.jl#L139-L203" class="documenter-source">source</a><br></p>
 <p><a id="TidierData.@unnest_wider-Tuple{Any, Vararg{Any}}" href="#TidierData.@unnest_wider-Tuple{Any,%20Vararg{Any}}">#</a>
 <strong><code>TidierData.@unnest_wider</code></strong> — <em>Macro</em>.</p>
-<div class="highlight"><pre><span></span><code><span class="nd">@unnest_wider</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="p">,</span><span class="w"> </span><span class="n">names_sep</span><span class="o">=</span><span class="p">)</span>
+<div class="highlight"><pre><span></span><code><span class="nd">@unnest_wider</span><span class="p">(</span><span class="n">df</span><span class="p">,</span><span class="w"> </span><span class="n">columns</span><span class="p">,</span><span class="w"> </span><span class="n">names_sep</span><span class="p">)</span>
 </code></pre></div>
 <p>Unnest specified columns of arrays or dictionaries into wider format dataframe with individual columns.</p>
 <p><strong>Arguments</strong></p>
@@ -4361,7 +4392,7 @@ <h2 id="reference-exported-functions">Reference - Exported functions<a class="he
 <span class="go">   1 │     1      1      2      5      6</span>
 <span class="go">   2 │     2      3      4      7      8</span>
 </code></pre></div>
-<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/343185974497e522ac1acd4e28b640ca29aa925b/src/nests.jl#L79-L119" class="documenter-source">source</a><br></p>
+<p><a target="_blank" href="https://github.com/TidierOrg/TidierData.jl/blob/ad1e8b53338f4ae56764a484d55e75fdd5cad205/src/nests.jl#L79-L119" class="documenter-source">source</a><br></p>
 <p><a id="Reference-Internal-functions"></a></p>
 <p><a id="Reference-Internal-functions-1"></a></p>
 <h2 id="reference-internal-functions">Reference - Internal functions<a class="headerlink" href="#reference-internal-functions" title="Permanent link">¤</a></h2></div>
diff --git a/latest/search/search_index.json b/latest/search/search_index.json
index 9775ca5..36d9602 100644
--- a/latest/search/search_index.json
+++ b/latest/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":""},{"location":"#what-is-tidierdatajl","title":"What is TidierData.jl?","text":"<p>TidierData.jl is a 100% Julia implementation of the dplyr and tidyr R packages. Powered by the DataFrames.jl package and Julia\u2019s extensive meta-programming capabilities, TidierData.jl is an R user\u2019s love letter to data analysis in Julia.</p> <p><code>TidierData.jl</code> has two goals, which differentiate it from other data analysis meta-packages in Julia:</p> Stick as closely to tidyverse syntax as possible. <p>Whereas other meta-packages introduce Julia-centric idioms for working with DataFrames, this package\u2019s goal is to reimplement parts of tidyverse in Julia. This means that <code>TidierData.jl</code> uses tidy expressions as opposed to idiomatic Julia expressions. An example of a tidy expression is <code>a = mean(b)</code>. In Julia, <code>a</code> and <code>b</code> are variables and are thus \"eagerly\" evaluated. This means that if <code>b</code> is merely referring to a column in a data frame and not an object in the global namespace, then an error will be generated because <code>b</code> was not found. In idiomatic Julia, <code>b</code> would need to be expressed as a symbol, or <code>:b</code>. Even then, <code>a = mean(:b)</code> would generate an error because it's not possible to calculate the mean value of a symbol. To handle this using idiomatic Julia, <code>DataFrames.jl</code> introduces a mini-language that relies heavily on the creation of anonymous functions, with explicit directional pairs syntax using a <code>source =&gt; function =&gt; destination</code> syntax. While this is quite elegant, it can be verbose. <code>TidierData.jl</code> aims to reduce this complexity by exposing an R-like syntax, which is then converted into valid <code>DataFrames.jl</code> code. The reason that tidy expressions are considered valid by Julia in <code>TidierData.jl</code> is because they are implemented using macros. Macros \"capture\" the expressions they are given, and then they can modify those expressions before evaluating them. For consistency, all top-level <code>dplyr</code> functions are implemented as macros (whether or not a macro is truly needed), and all \"helper\" functions (used inside of those top-level functions) are implemented as functions or pseudo-functions (functions which only exist through modification of the abstract syntax tree).</p> Make broadcasting mostly invisible. <p>Broadcasting trips up many R users switching to Julia because R users are used to most functions being vectorized. <code>TidierData.jl</code> currently uses a lookup table to decide which functions not to vectorize; all other functions are automatically vectorized. Read the documentation page on \"Autovectorization\" to read about how this works, and how to override the defaults. An example of where this issue commonly causes errors is when centering a variable. To create a new column <code>a</code> that centers the column <code>b</code>, <code>TidierData.jl</code> lets you simply write <code>a = b - mean(b)</code> exactly as you would in R. This works because <code>TidierData.jl</code> knows to not vectorize <code>mean()</code> while also recognizing that <code>-</code> should be vectorized such that this expression is rewritten in <code>DataFrames.jl</code> as <code>:b =&gt; (b -&gt; b .- mean(b)) =&gt; :a</code>. For any user-defined function that you want to \"mark\" as being non-vectorized, you can prefix it with a <code>~</code>. For example, a function <code>new_mean()</code>, if it had the same functionality as <code>mean()</code> would normally get vectorized by <code>TidierData.jl</code> unless you write it as <code>~new_mean()</code>.</p> <p></p> <p></p>"},{"location":"#installation","title":"Installation","text":"<p>For the stable version:</p> <pre><code>] add TidierData\n</code></pre> <p>The <code>]</code> character starts the Julia package manager. Press the backspace key to return to the Julia prompt.</p> <p>or</p> <pre><code>using Pkg\nPkg.add(\"TidierData\")\n</code></pre> <p>For the newest version:</p> <pre><code>] add TidierData#main\n</code></pre> <p>or</p> <pre><code>using Pkg\nPkg.add(url=\"https://github.com/TidierOrg/TidierData.jl\")\n</code></pre> <p></p> <p></p>"},{"location":"#what-macros-and-functions-does-tidierdatajl-support","title":"What macros and functions does TidierData.jl support?","text":"<p>To support R-style programming, <code>TidierData.jl</code> is implemented using macros. This is because macros are able to \"capture\" the code before executing it, which allows the package to support R-like \"tidy expressions\" that would otherwise not be considered valid Julia code.</p> <p>TidierData.jl currently supports the following top-level macros:</p> <p>Top-level macros:</p> <ul> <li><code>@glimpse()</code> and <code>@head()</code></li> <li><code>@select()</code> and <code>@distinct()</code></li> <li><code>@rename()</code> and <code>@rename_with()</code></li> <li><code>@mutate()</code> and <code>@transmute()</code> </li> <li><code>@summarize()</code> and <code>@summarise()</code></li> <li><code>@filter()</code></li> <li><code>@slice()</code>, <code>@slice_sample()</code>, <code>@slice_min()</code>, <code>@slice_max()</code>, <code>@slice_head()</code>, and <code>@slice_tail()</code></li> <li><code>@group_by()</code> and <code>@ungroup()</code></li> <li><code>@arrange()</code></li> <li><code>@relocate()</code></li> <li><code>@pull()</code></li> <li><code>@count()</code> and <code>@tally()</code></li> <li><code>@left_join()</code>, <code>@right_join()</code>, <code>@inner_join()</code>, <code>@full_join()</code>, <code>@anti_join()</code>, and <code>@semi_join()</code></li> <li><code>@bind_rows()</code> and <code>@bind_cols()</code></li> <li><code>@pivot_wider()</code> and <code>@pivot_longer()</code></li> <li><code>@separate()</code>, <code>@separate_rows()</code>, and <code>@unite()</code></li> <li><code>@drop_missing()</code> and <code>@fill_missing()</code></li> <li><code>@unnest_longer()</code>, <code>@unnest_wider()</code>, and <code>@nest()</code></li> <li><code>@clean_names()</code> (as in R's <code>janitor::clean_names()</code> function)</li> <li><code>@summary()</code> (as in R's <code>summary()</code> function)</li> </ul> <p>TidierData.jl also supports the following helper functions:</p> <p>Helper functions:</p> <ul> <li><code>across()</code></li> <li><code>where()</code></li> <li><code>desc()</code></li> <li><code>if_else()</code> and <code>case_when()</code></li> <li><code>n()</code> and <code>row_number()</code></li> <li><code>ntile()</code></li> <li><code>lag()</code> and <code>lead()</code></li> <li><code>everything()</code>, <code>starts_with()</code>, <code>ends_with()</code>, <code>matches()</code>, and <code>contains()</code></li> <li><code>as_float()</code>, <code>as_integer()</code>, and <code>as_string()</code></li> <li><code>is_number()</code>, <code>is_float()</code>, <code>is_integer()</code>, and <code>is_string()</code></li> <li><code>missing_if()</code> and <code>replace_missing()</code></li> </ul> <p>See the Reference page for a detailed guide to each of the macros and functions.</p> <p></p> <p></p>"},{"location":"#example","title":"Example","text":"<p>Let's select the first five movies in our dataset whose budget exceeds the mean budget. Unlike in R, where we pass an <code>na.rm = TRUE</code> argument to remove missing values, in Julia we wrap the variable with a <code>skipmissing()</code> to remove the missing values before the <code>mean()</code> is calculated.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n\n@chain movies begin\n    @mutate(Budget = Budget / 1_000_000)\n    @filter(Budget &gt;= mean(skipmissing(Budget)))\n    @select(Title, Budget)\n    @slice(1:5)\nend\n</code></pre> <pre><code>5\u00d72 DataFrame\n Row \u2502 Title                       Budget   \n     \u2502 String                      Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 'Til There Was You              23.0\n   2 \u2502 10 Things I Hate About You      16.0\n   3 \u2502 102 Dalmatians                  85.0\n   4 \u2502 13 Going On 30                  37.0\n   5 \u2502 13th Warrior, The               85.0\n</code></pre> <p></p> <p></p>"},{"location":"#whats-new","title":"What\u2019s new","text":"<p>See NEWS.md for the latest updates.</p> <p></p> <p></p>"},{"location":"#whats-missing","title":"What's missing","text":"<p>Is there a tidyverse feature missing that you would like to see in TidierData.jl? Please file a GitHub issue. Because TidierData.jl primarily wraps DataFrames.jl, our decision to integrate a new feature will be guided by how well-supported it is within DataFrames.jl and how likely other users are to benefit from it.</p>"},{"location":"reference/","title":"Reference","text":""},{"location":"reference/#index","title":"Index","text":"<ul> <li><code>TidierData.TidierData_set</code></li> <li><code>TidierData.across</code></li> <li><code>TidierData.as_float</code></li> <li><code>TidierData.as_integer</code></li> <li><code>TidierData.as_string</code></li> <li><code>TidierData.case_when</code></li> <li><code>TidierData.desc</code></li> <li><code>TidierData.ends_with</code></li> <li><code>TidierData.everything</code></li> <li><code>TidierData.if_else</code></li> <li><code>TidierData.is_float</code></li> <li><code>TidierData.is_integer</code></li> <li><code>TidierData.is_number</code></li> <li><code>TidierData.is_string</code></li> <li><code>TidierData.matches</code></li> <li><code>TidierData.missing_if</code></li> <li><code>TidierData.n</code></li> <li><code>TidierData.ntile</code></li> <li><code>TidierData.replace_missing</code></li> <li><code>TidierData.row_number</code></li> <li><code>TidierData.starts_with</code></li> <li><code>TidierData.where</code></li> <li><code>TidierData.@anti_join</code></li> <li><code>TidierData.@arrange</code></li> <li><code>TidierData.@bind_cols</code></li> <li><code>TidierData.@bind_rows</code></li> <li><code>TidierData.@count</code></li> <li><code>TidierData.@distinct</code></li> <li><code>TidierData.@drop_missing</code></li> <li><code>TidierData.@fill_missing</code></li> <li><code>TidierData.@filter</code></li> <li><code>TidierData.@full_join</code></li> <li><code>TidierData.@glimpse</code></li> <li><code>TidierData.@group_by</code></li> <li><code>TidierData.@head</code></li> <li><code>TidierData.@inner_join</code></li> <li><code>TidierData.@left_join</code></li> <li><code>TidierData.@mutate</code></li> <li><code>TidierData.@nest</code></li> <li><code>TidierData.@pivot_longer</code></li> <li><code>TidierData.@pivot_wider</code></li> <li><code>TidierData.@pull</code></li> <li><code>TidierData.@relocate</code></li> <li><code>TidierData.@rename</code></li> <li><code>TidierData.@rename_with</code></li> <li><code>TidierData.@right_join</code></li> <li><code>TidierData.@select</code></li> <li><code>TidierData.@semi_join</code></li> <li><code>TidierData.@separate</code></li> <li><code>TidierData.@separate_rows</code></li> <li><code>TidierData.@slice</code></li> <li><code>TidierData.@slice_head</code></li> <li><code>TidierData.@slice_max</code></li> <li><code>TidierData.@slice_min</code></li> <li><code>TidierData.@slice_sample</code></li> <li><code>TidierData.@slice_tail</code></li> <li><code>TidierData.@summarise</code></li> <li><code>TidierData.@summarize</code></li> <li><code>TidierData.@summary</code></li> <li><code>TidierData.@tally</code></li> <li><code>TidierData.@transmute</code></li> <li><code>TidierData.@ungroup</code></li> <li><code>TidierData.@unite</code></li> <li><code>TidierData.@unnest_longer</code></li> <li><code>TidierData.@unnest_wider</code></li> </ul>"},{"location":"reference/#reference-exported-functions","title":"Reference - Exported functions","text":"<p># <code>TidierData.TidierData_set</code> \u2014 Method.</p> <pre><code>TidierData_set(option::AbstractString, value::Bool)\n</code></pre> <p>Set package options.</p> <p>Here are the supported options and what they do:</p> <ul> <li>\"code\": Defaults to <code>false</code>. If set to <code>true</code>, this option displays the DataFrames.jl code generated by the TidierData.jl package. It is useful for debugging whether errors are introduced by TidierData.jl's generated code.</li> </ul> <p>Arguments</p> <ul> <li><code>option</code>: \"code\"</li> <li><code>value</code>: <code>true</code> or <code>false</code></li> </ul> <p>source</p> <p># <code>TidierData.across</code> \u2014 Method.</p> <pre><code>across(variable[s], function[s])\n</code></pre> <p>Apply functions to multiple variables. If specifying multiple variables or functions, surround them with parentheses so that they are recognized as a tuple.</p> <p>This function should only be called inside of TidierData.jl macros.</p> <p>Arguments</p> <ul> <li><code>variable[s]</code>: An unquoted variable, or if multiple, an unquoted tuple of variables.</li> <li><code>function[s]</code>: A function, or if multiple, a tuple of functions.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @summarize(across(b, minimum))\n       end\n1\u00d71 DataFrame\n Row \u2502 b_minimum \n     \u2502 Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1\n\njulia&gt; @chain df begin\n         @summarize(across(where(is_number), minimum))\n       end\n1\u00d72 DataFrame\n Row \u2502 b_minimum  c_minimum \n     \u2502 Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11\n\njulia&gt; @chain df begin\n         @summarize(across((b,c), (minimum, maximum)))\n       end\n1\u00d74 DataFrame\n Row \u2502 b_minimum  c_minimum  b_maximum  c_maximum \n     \u2502 Int64      Int64      Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11          5         15\n\njulia&gt; @chain df begin\n         @mutate(across((b,c), (minimum, maximum)))\n       end\n5\u00d77 DataFrame\n Row \u2502 a     b      c      b_minimum  c_minimum  b_maximum  c_maximum \n     \u2502 Char  Int64  Int64  Int64      Int64      Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11          1         11          5         15\n   2 \u2502 b         2     12          1         11          5         15\n   3 \u2502 c         3     13          1         11          5         15\n   4 \u2502 d         4     14          1         11          5         15\n   5 \u2502 e         5     15          1         11          5         15\n\njulia&gt; @chain df begin\n         @mutate(across((b, starts_with(\"c\")), (minimum, maximum)))\n       end\n5\u00d77 DataFrame\n Row \u2502 a     b      c      b_minimum  c_minimum  b_maximum  c_maximum \n     \u2502 Char  Int64  Int64  Int64      Int64      Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11          1         11          5         15\n   2 \u2502 b         2     12          1         11          5         15\n   3 \u2502 c         3     13          1         11          5         15\n   4 \u2502 d         4     14          1         11          5         15\n   5 \u2502 e         5     15          1         11          5         15\n</code></pre> <p>source</p> <p># <code>TidierData.as_float</code> \u2014 Method.</p> <pre><code>as_float(value)\n</code></pre> <p>Convert a number or string to a Float64 data type.</p> <p>This is a useful helper for type conversions. Missing values are propagated.</p> <p>Arguments</p> <ul> <li><code>value</code>: An <code>AbstractString</code>, <code>Number</code>, or <code>missing</code> value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; as_float(1)\n1.0\n\njulia&gt; as_float(\"1.5\")\n1.5\n\njulia&gt; as_float(missing)\nmissing\n</code></pre> <p>source</p> <p># <code>TidierData.as_integer</code> \u2014 Method.</p> <pre><code>as_integer(value)\n</code></pre> <p>Convert a number or string to an Int64 data type.</p> <p>This is a useful helper for type conversions. Missing values are propagated. Any values after the decimal point are removed.</p> <p>Arguments</p> <ul> <li><code>value</code>: An <code>AbstractString</code>, <code>Number</code>, or <code>missing</code> value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; as_integer(1)\n1\n\njulia&gt; as_integer(1.5)\n1\n\njulia&gt; as_integer(\"2\")\n2\n\njulia&gt; as_integer(\"2.5\")\n2\n\njulia&gt; as_integer(missing)\nmissing\n</code></pre> <p>source</p> <p># <code>TidierData.as_string</code> \u2014 Method.</p> <pre><code>as_string(value)\n</code></pre> <p>Convert a number or string to a String data type.</p> <p>This is a useful helper for type conversions. Missing values are propagated.</p> <p>Arguments</p> <ul> <li><code>value</code>: An <code>AbstractString</code>, <code>Number</code>, or <code>missing</code> value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; as_string(1)\n\"1\"\n\njulia&gt; as_string(1.5)\n\"1.5\"\n\njulia&gt; as_string(missing)\nmissing\n</code></pre> <p>source</p> <p># <code>TidierData.case_when</code> \u2014 Method.</p> <pre><code>case_when(condition =&gt; return_value)\ncase_when(condition_1 =&gt; return_value_1, condition_2 =&gt; return_value_2, ...)\n</code></pre> <p>Return the corresponding <code>return_value</code> for the first <code>condition</code> that evaluates to <code>true</code>.</p> <p>The most specific condition should be listed first and most general condition should be listed last. If none of the conditions evaluate to <code>true</code>, then a <code>missing</code> value is returned. </p> <p>Arguments</p> <ul> <li><code>condition</code>: A condition that evaluates to <code>true</code>, <code>false</code>, or <code>missing</code>.</li> <li><code>return_value</code>: The value to return if the condition is <code>true</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = [1, 2, missing, 4, 5]);\n\njulia&gt; @chain df begin\n         @mutate(b = case_when(a &gt; 4  =&gt;  \"hi\",\n                               a &gt; 2  =&gt;  \"medium\",\n                               a &gt; 0  =&gt;  \"low\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   String? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1  low\n   2 \u2502       2  low\n   3 \u2502 missing  missing \n   4 \u2502       4  medium\n   5 \u2502       5  hi\n\njulia&gt; @chain df begin\n         @mutate(b = case_when(a &gt; 4  =&gt;  \"hi\",\n                               a &gt; 2  =&gt;  \"medium\",\n                               a &gt; 0  =&gt;  \"low\",\n                               true   =&gt;  \"unknown\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   String  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1  low\n   2 \u2502       2  low\n   3 \u2502 missing  unknown\n   4 \u2502       4  medium\n   5 \u2502       5  hi\n\njulia&gt; @chain df begin\n         @mutate(b = case_when(a &gt;= 3  =&gt;  3,\n                               true    =&gt;  a))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1        1\n   2 \u2502       2        2\n   3 \u2502 missing  missing \n   4 \u2502       4        3\n   5 \u2502       5        3\n\njulia&gt; @chain df begin\n         @mutate(b = case_when(a &gt;= 3        =&gt;  3,\n                               ismissing(a)  =&gt;  0,\n                               true          =&gt;  a))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b     \n     \u2502 Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1      1\n   2 \u2502       2      2\n   3 \u2502 missing      0\n   4 \u2502       4      3\n   5 \u2502       5      3\n</code></pre> <p>source</p> <p># <code>TidierData.desc</code> \u2014 Method.</p> <pre><code>desc(col)\n</code></pre> <p>Orders the rows of a DataFrame column in descending order when used inside of <code>@arrange()</code>. This function should only be called inside of `@arrange()``.</p> <p>Arguments</p> <ul> <li><code>col</code>: An unquoted column name.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'e', inner = 2), b = 1:10, c = 11:20);\n\njulia&gt; @chain df begin\n         @arrange(a, desc(b))\n       end\n10\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         2     12\n   2 \u2502 a         1     11\n   3 \u2502 b         4     14\n   4 \u2502 b         3     13\n   5 \u2502 c         6     16\n   6 \u2502 c         5     15\n   7 \u2502 d         8     18\n   8 \u2502 d         7     17\n   9 \u2502 e        10     20\n  10 \u2502 e         9     19\n</code></pre> <p>source</p> <p># <code>TidierData.ends_with</code> \u2014 Method.</p> <pre><code>ends_with(suffix)\n</code></pre> <p>Select all columns ending with the <code>suffix</code>.</p> <p>Arguments</p> <ul> <li><code>suffix</code>: A string.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a_1 = 1:5, a_2 = 11:15, b_1 = 21:25);\n\njulia&gt; @chain df begin \n         @select(ends_with(\"1\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a_1    b_1   \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     21\n   2 \u2502     2     22\n   3 \u2502     3     23\n   4 \u2502     4     24\n   5 \u2502     5     25\n</code></pre> <p>source</p> <p># <code>TidierData.everything</code> \u2014 Method.</p> <pre><code>everything()\n</code></pre> <p>Select all (remaining) columns.</p> <p>Arguments</p> <ul> <li>None</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a_1 = 1:5, a_2 = 11:15, b_1 = 21:25);\n\njulia&gt; @chain df begin \n         @select(b_1, everything())\n       end\n5\u00d73 DataFrame\n Row \u2502 b_1    a_1    a_2   \n     \u2502 Int64  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    21      1     11\n   2 \u2502    22      2     12\n   3 \u2502    23      3     13\n   4 \u2502    24      4     14\n   5 \u2502    25      5     15\n</code></pre> <p>source</p> <p># <code>TidierData.if_else</code> \u2014 Method.</p> <pre><code>if_else(condition, yes, no, [miss])\n</code></pre> <p>Return the <code>yes</code> value if the <code>condition</code> is <code>true</code> and the <code>no</code> value if the <code>condition</code> is <code>false</code>. If <code>miss</code> is specified, then the provided <code>miss</code> value is returned when the <code>condition</code> contains a <code>missing</code> value. If <code>miss</code> is not specified, then the returned value is an explicit <code>missing</code> value.</p> <p>Arguments</p> <ul> <li><code>condition</code>: A condition that evaluates to <code>true</code>, <code>false</code>, or <code>missing</code>.</li> <li><code>yes</code>: Value to return if the condition is <code>true</code>.</li> <li><code>no</code>: Value to return if the condition is <code>false</code>.</li> <li><code>miss</code>: Optional. Value to return if the condition is <code>missing</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = [1, 2, missing, 4, 5]);\n\njulia&gt; @chain df begin\n         @mutate(b = if_else(a &gt;= 3, \"yes\", \"no\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   String? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1  no\n   2 \u2502       2  no\n   3 \u2502 missing  missing \n   4 \u2502       4  yes\n   5 \u2502       5  yes\n\njulia&gt; @chain df begin\n         @mutate(b = if_else(a &gt;= 3, \"yes\", \"no\", \"unknown\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   String  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1  no\n   2 \u2502       2  no\n   3 \u2502 missing  unknown\n   4 \u2502       4  yes\n   5 \u2502       5  yes\n\njulia&gt; @chain df begin\n         @mutate(b = if_else(a &gt;= 3, 3, a))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1        1\n   2 \u2502       2        2\n   3 \u2502 missing  missing \n   4 \u2502       4        3\n   5 \u2502       5        3\n\njulia&gt; @chain df begin\n         @mutate(b = if_else(a &gt;= 3, 3, a, 0))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b     \n     \u2502 Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1      1\n   2 \u2502       2      2\n   3 \u2502 missing      0\n   4 \u2502       4      3\n   5 \u2502       5      3\n</code></pre> <p>source</p> <p># <code>TidierData.is_float</code> \u2014 Method.</p> <pre><code>is_float(column::AbstractVector)\n</code></pre> <p>Determine if the given column contains floating-point numbers.</p> <p>Arguments</p> <ul> <li><code>column::AbstractVector</code>: The column whose data type needs to be checked.</li> </ul> <p>Returns</p> <ul> <li><code>Bool</code>: <code>true</code> if the column contains floating-point numbers, <code>false</code> otherwise.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(b = [missing, 2, 3],\n                      c = [missing, 2.2, 34],\n                      d = [missing, missing, \"A\"]);\n\njulia&gt; is_float(df.c)\ntrue\n\njulia&gt; is_float(df.b)\nfalse\n</code></pre> <p>source</p> <p># <code>TidierData.is_integer</code> \u2014 Method.</p> <pre><code>is_integer(column::AbstractVector)\n</code></pre> <p>Determine if the given column contains integers.</p> <p>Arguments</p> <ul> <li><code>column::AbstractVector</code>: The column whose data type needs to be checked.</li> </ul> <p>Returns</p> <ul> <li><code>Bool</code>: <code>true</code> if the column contains integers, <code>false</code> otherwise.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(b = [missing, 2, 3],\n                      c = [missing, 2.2, 34],\n                      d = [missing, missing, \"A\"]);\n\njulia&gt; is_integer(df.b)\ntrue\n\njulia&gt; is_integer(df.d)\nfalse\n</code></pre> <p>source</p> <p># <code>TidierData.is_number</code> \u2014 Method.</p> <pre><code>is_number(column::AbstractVector)\n</code></pre> <p>Determine if the given column contains numbers.</p> <p>Arguments</p> <ul> <li><code>column::AbstractVector</code>: The column whose data type needs to be checked.</li> </ul> <p>Returns</p> <ul> <li><code>Bool</code>: <code>true</code> if the column contains numbers, <code>false</code> otherwise.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(b = [missing, 2, 3],\n                      c = [missing, 2.2, 34],\n                      d = [missing, missing, \"A\"]);\n\njulia&gt; is_number(df.b)\ntrue\n\njulia&gt; is_number(df.c)\ntrue\n\njulia&gt; is_number(df.d)\nfalse\n</code></pre> <p>source</p> <p># <code>TidierData.is_string</code> \u2014 Method.</p> <pre><code>is_string(column::AbstractVector)\n</code></pre> <p>Determine if the given column contains strings.</p> <p>Arguments</p> <ul> <li><code>column::AbstractVector</code>: The column whose data type needs to be checked.</li> </ul> <p>Returns</p> <ul> <li><code>Bool</code>: <code>true</code> if the column contains strings, <code>false</code> otherwise.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(b = [missing, 2, 3],\n                      c = [missing, 2.2, 34],\n                      d = [missing, missing, \"A\"]);\n\njulia&gt; is_string(df.d)\ntrue\n\njulia&gt; is_string(df.c)\nfalse\n</code></pre> <p>source</p> <p># <code>TidierData.matches</code> \u2014 Method.</p> <pre><code>matches(pattern, [flags])\n</code></pre> <p>Select all columns matching the <code>pattern</code>.</p> <p>Arguments</p> <ul> <li><code>pattern</code>: A string.</li> <li><code>flags</code>: Optional string containing flags. \"i\" = Do case-insensitive pattern matching. \"m\" = Treat string as multiple lines. \"s\" = Treat string as a single line. \"x\" = Tells the regular expression parser to ignore most whitespace that is neither backslashed nor within a character class. You</li> </ul> <p>can use this to break up your regular expression into (slightly) more readable parts.</p> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a_1 = 1:5, a_2 = 11:15, b_1 = 21:25);\n\njulia&gt; @chain df begin \n         @select(matches(\"^a\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a_1    a_2   \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n\njulia&gt; @chain df begin \n         @select(matches(\"1$\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a_1    b_1   \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     21\n   2 \u2502     2     22\n   3 \u2502     3     23\n   4 \u2502     4     24\n   5 \u2502     5     25\n\njulia&gt; @chain df begin \n         @select(matches(\"A\", \"i\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a_1    a_2   \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n</code></pre> <p>source</p> <p># <code>TidierData.missing_if</code> \u2014 Method.</p> <pre><code>missing_if(x, value)\n</code></pre> <p>Replace a specific <code>value</code> with <code>missing</code> in <code>x</code>.</p> <p>Arguments</p> <ul> <li><code>x</code>: The input value which can be of any type. If <code>x</code> is already <code>missing</code> or equals <code>value</code>, the function will return <code>missing</code>. Otherwise, it returns <code>x</code> unaltered.</li> <li><code>value</code>: The specific value to be checked against.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n              a = [1, missing, 3, 4],\n              b = [\"apple\", \"apple\", \"banana\", \"cherry\"]\n            );\n\njulia&gt; @chain df begin\n         @mutate(a = missing_if(a, 4), \n                 b = missing_if(b, \"apple\"))\n       end\n4\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   String? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1  missing \n   2 \u2502 missing  missing \n   3 \u2502       3  banana\n   4 \u2502 missing  cherry\n</code></pre> <p>source</p> <p># <code>TidierData.n</code> \u2014 Method.</p> <pre><code>n()\n</code></pre> <p>Return the number of rows in the DataFrame or in the group if used in the context of a GroupedDataFrame.</p> <p>Arguments</p> <ul> <li>None</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'e', inner = 2), b = 1:10, c = 11:20);\n\njulia&gt; @chain df begin\n         @summarize(n = n())\n       end\n1\u00d71 DataFrame\n Row \u2502 n     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    10\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @summarize(n = n())\n       end\n5\u00d72 DataFrame\n Row \u2502 a     n     \n     \u2502 Char  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         2\n   2 \u2502 b         2\n   3 \u2502 c         2\n   4 \u2502 d         2\n   5 \u2502 e         2\n</code></pre> <p>source</p> <p># <code>TidierData.ntile</code> \u2014 Method.</p> <pre><code>ntile(x, n::Integer)\n</code></pre> <p>Break the input vector into <code>n</code> equal-sized buckets.</p> <p><code>ntile()</code> is a rough rank that breaks the input vector into <code>n</code> buckets. If <code>length(x)</code> is not an integer multiple of <code>n</code>, the size of the buckets will differ by up to one, with larger buckets coming first.</p> <p>Unlike other ranking functions, <code>ntile()</code> ignores ties: it will create evenly sized buckets even if the same value of <code>x</code> ends up in different buckets.</p> <p>Arguments</p> <ul> <li><code>x</code>: A vector to rank. By default, the smallest values will get the smallest ranks. Missing values will be given rank <code>missing</code>.</li> <li><code>n</code>: Number of groups to bucket into.</li> </ul> <p>Examples</p> <pre><code>julia&gt; x = [5,1,3,2,2, missing]\n6-element Vector{Union{Missing, Int64}}:\n 5\n 1\n 3\n 2\n 2\n  missing\n\njulia&gt; ntile(x, 2)\n6-element Vector{Union{Missing, Int64}}:\n 2\n 1\n 2\n 1\n 1\n  missing\n\njulia&gt; ntile(x, 4)\n6-element Vector{Union{Missing, Int64}}:\n 4\n 1\n 3\n 1\n 2\n  missing\n\njulia&gt; ntile(1:8, 3)\n8-element Vector{Int64}:\n 1\n 1\n 1\n 2\n 2\n 2\n 3\n 3\n\njulia&gt; df = DataFrame(a = 1:8);\n\njulia&gt; @chain df begin\n       @mutate(buckets = ntile(a, 3))\n       end\n8\u00d72 DataFrame\n Row \u2502 a      buckets \n     \u2502 Int64  Int64   \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1        1\n   2 \u2502     2        1\n   3 \u2502     3        1\n   4 \u2502     4        2\n   5 \u2502     5        2\n   6 \u2502     6        2\n   7 \u2502     7        3\n   8 \u2502     8        3\n</code></pre> <p>source</p> <p># <code>TidierData.replace_missing</code> \u2014 Method.</p> <pre><code>replace_missing(x, replacement)\n</code></pre> <p>Replace <code>missing</code> values in <code>x</code> with a specified <code>replacement</code> value.</p> <p>Arguments</p> <ul> <li><code>x</code>: The input value which can be of any type. If <code>x</code> is <code>missing</code>, the function will return <code>replacement</code>. Otherwise, it returns <code>x</code> unaltered.</li> <li><code>replacement</code>: The value to replace <code>missing</code> with in <code>x</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n              a = [1, missing, 3, 4],\n              b = [4, 5, missing, 8]\n            );\n\njulia&gt; @chain df begin\n         @mutate(a = replace_missing(a, 100),\n                 b = replace_missing(b, 35))\n       end\n4\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      4\n   2 \u2502   100      5\n   3 \u2502     3     35\n   4 \u2502     4      8\n</code></pre> <p>source</p> <p># <code>TidierData.row_number</code> \u2014 Method.</p> <pre><code>row_number()\n</code></pre> <p>Return each row's number in a DataFrame or in the group if used in the context of a GroupedDataFrame.</p> <p>Arguments</p> <ul> <li>None</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'e', inner = 2));\n\njulia&gt; @chain df begin\n         @mutate(row_num = row_number())\n       end\n10\u00d72 DataFrame\n Row \u2502 a     row_num \n     \u2502 Char  Int64   \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n   2 \u2502 a           2\n   3 \u2502 b           3\n   4 \u2502 b           4\n   5 \u2502 c           5\n   6 \u2502 c           6\n   7 \u2502 d           7\n   8 \u2502 d           8\n   9 \u2502 e           9\n  10 \u2502 e          10\n\njulia&gt; @chain df begin\n         @mutate(row_num = row_number() + 1)\n       end\n10\u00d72 DataFrame\n Row \u2502 a     row_num \n     \u2502 Char  Int64   \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           2\n   2 \u2502 a           3\n   3 \u2502 b           4\n   4 \u2502 b           5\n   5 \u2502 c           6\n   6 \u2502 c           7\n   7 \u2502 d           8\n   8 \u2502 d           9\n   9 \u2502 e          10\n  10 \u2502 e          11\n\njulia&gt; @chain df begin\n         @filter(row_number() &lt;= 5)\n       end\n5\u00d71 DataFrame\n Row \u2502 a    \n     \u2502 Char \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a\n   2 \u2502 a\n   3 \u2502 b\n   4 \u2502 b\n   5 \u2502 c\n</code></pre> <p>source</p> <p># <code>TidierData.starts_with</code> \u2014 Method.</p> <pre><code>starts_with(prefix)\n</code></pre> <p>Select all columns starting with the <code>prefix</code>.</p> <p>Arguments</p> <ul> <li><code>prefix</code>: A string.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a_1 = 1:5, a_2 = 11:15, b_1 = 21:25);\n\njulia&gt; @chain df begin \n         @select(starts_with(\"a\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a_1    a_2   \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n</code></pre> <p>source</p> <p># <code>TidierData.where</code> \u2014 Method.</p> <pre><code>where(function)\n</code></pre> <p>Selects columns on which a function returns <code>true</code> for all values of the column.</p> <p>This function should only be called inside of TidierData.jl macros.</p> <p>Arguments</p> <ul> <li><code>function</code>: A predicate function (one that returns <code>true</code> or <code>false</code>).</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @select(where(is_number))\n       end\n5\u00d72 DataFrame\n Row \u2502 b      c     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n\njulia&gt; @chain df begin\n         @summarize(across(where(is_number), minimum))\n       end\n1\u00d72 DataFrame\n Row \u2502 b_minimum  c_minimum \n     \u2502 Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11\n\njulia&gt; @chain df begin\n         @mutate(across(where(is_number), minimum))\n       end\n5\u00d75 DataFrame\n Row \u2502 a     b      c      b_minimum  c_minimum \n     \u2502 Char  Int64  Int64  Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11          1         11\n   2 \u2502 b         2     12          1         11\n   3 \u2502 c         3     13          1         11\n   4 \u2502 d         4     14          1         11\n   5 \u2502 e         5     15          1         11\n\njulia&gt; df = DataFrame(a = repeat('a':'e', inner = 3),\n                      b = 1:15,\n                      c = 16:30,\n                      d = 31:45);\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @summarize(across(where(is_number), mean))\n       end\n5\u00d74 DataFrame\n Row \u2502 a     b_mean   c_mean   d_mean  \n     \u2502 Char  Float64  Float64  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         2.0     17.0     32.0\n   2 \u2502 b         5.0     20.0     35.0\n   3 \u2502 c         8.0     23.0     38.0\n   4 \u2502 d        11.0     26.0     41.0\n   5 \u2502 e        14.0     29.0     44.0\n</code></pre> <p>source</p> <p># <code>TidierData.@anti_join</code> \u2014 Macro.</p> <pre><code>@anti_join(df1, df2, [by])\n</code></pre> <p>Perform an anti-join on <code>df1</code> and <code>df2</code> with an optional <code>by</code>.</p> <p>Arguments</p> <ul> <li><code>df1</code>: A DataFrame.</li> <li><code>df2</code>: A DataFrame.</li> <li><code>by</code>: An optional column or tuple of columns. <code>by</code> supports interpolation of individual columns. If <code>by</code> is not supplied, then it will be inferred from shared names of columns between <code>df1</code> and <code>df2</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n\njulia&gt; df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n\njulia&gt; @anti_join(df1, df2)\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           2\n\njulia&gt; @anti_join(df1, df2, a)\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           2\n\njulia&gt; @anti_join(df1, df2, a = a)\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           2\n\njulia&gt; @anti_join(df1, df2, \"a\")\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           2\n\njulia&gt; @anti_join(df1, df2, \"a\" = \"a\")\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           2\n</code></pre> <p>source</p> <p># <code>TidierData.@arrange</code> \u2014 Macro.</p> <pre><code>@arrange(df, exprs...)\n</code></pre> <p>Order the rows of a DataFrame by the values of specified columns.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: Variables from the input DataFrame. Use <code>desc()</code> to sort in descending order. Multiple variables can be specified, separated by commas.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'e', inner = 2), b = 1:10, c = 11:20);\n\njulia&gt; @chain df begin\n         @arrange(a)\n       end\n10\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 a         2     12\n   3 \u2502 b         3     13\n   4 \u2502 b         4     14\n   5 \u2502 c         5     15\n   6 \u2502 c         6     16\n   7 \u2502 d         7     17\n   8 \u2502 d         8     18\n   9 \u2502 e         9     19\n  10 \u2502 e        10     20\n\njulia&gt; @chain df begin\n         @arrange(a, desc(b))\n       end\n10\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         2     12\n   2 \u2502 a         1     11\n   3 \u2502 b         4     14\n   4 \u2502 b         3     13\n   5 \u2502 c         6     16\n   6 \u2502 c         5     15\n   7 \u2502 d         8     18\n   8 \u2502 d         7     17\n   9 \u2502 e        10     20\n  10 \u2502 e         9     19\n</code></pre> <p>source</p> <p># <code>TidierData.@bind_cols</code> \u2014 Macro.</p> <pre><code>@bind_cols(dfs...)\n</code></pre> <p>Bind many DataFrames into one by column. </p> <p>Arguments</p> <ul> <li><code>dfs...</code>: DataFrames to combine.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a=1:3, b=1:3);\n\njulia&gt; df2 = DataFrame(a=4:6, b=4:6);\n\njulia&gt; df3 = DataFrame(a=7:9, c=7:9);\n\njulia&gt; @chain df1 begin\n         @bind_cols(df2, df3)\n       end\n3\u00d76 DataFrame\n Row \u2502 a      b      a_1    b_1    a_2    c     \n     \u2502 Int64  Int64  Int64  Int64  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1      4      4      7      7\n   2 \u2502     2      2      5      5      8      8\n   3 \u2502     3      3      6      6      9      9\n</code></pre> <p>source</p> <p># <code>TidierData.@bind_rows</code> \u2014 Macro.</p> <pre><code>@bind_rows(dfs..., id)\n</code></pre> <p>Bind many DataFrames into one by row. </p> <p>Columns present in at least one of the provided DataFrames are kept. Columns not present in some DataFrames are filled with missing values where necessary.</p> <p>Arguments</p> <ul> <li><code>dfs...</code>: DataFrames to combine.</li> <li><code>id</code>: string DataFrame identifier. When id is supplied, a new column of numeric identifiers is created to link each row to its original DataFrame.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a=1:3, b=1:3);\n\njulia&gt; df2 = DataFrame(a=4:6, b=4:6);\n\njulia&gt; df3 = DataFrame(a=7:9, c=7:9);\n\njulia&gt; @chain df1 begin\n         @bind_rows(df2)\n       end\n6\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1\n   2 \u2502     2      2\n   3 \u2502     3      3\n   4 \u2502     4      4\n   5 \u2502     5      5\n   6 \u2502     6      6\n</code></pre> <p>When columns are not present in some DataFrames, they are filled with missing values.</p> <pre><code>julia&gt; @chain df1 begin\n         @bind_rows(df2, df3)\n       end\n9\u00d73 DataFrame\n Row \u2502 a      b        c       \n     \u2502 Int64  Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1        1  missing \n   2 \u2502     2        2  missing \n   3 \u2502     3        3  missing \n   4 \u2502     4        4  missing \n   5 \u2502     5        5  missing \n   6 \u2502     6        6  missing \n   7 \u2502     7  missing        7\n   8 \u2502     8  missing        8\n   9 \u2502     9  missing        9\n\njulia&gt; @chain df1 begin\n         @bind_rows(df2, df3, id = \"id\")\n       end\n9\u00d74 DataFrame\n Row \u2502 a      b        c        id    \n     \u2502 Int64  Int64?   Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1        1  missing      1\n   2 \u2502     2        2  missing      1\n   3 \u2502     3        3  missing      1\n   4 \u2502     4        4  missing      2\n   5 \u2502     5        5  missing      2\n   6 \u2502     6        6  missing      2\n   7 \u2502     7  missing        7      3\n   8 \u2502     8  missing        8      3\n   9 \u2502     9  missing        9      3\n</code></pre> <p>source</p> <p># <code>TidierData.@count</code> \u2014 Macro.</p> <pre><code>@count(df, exprs..., [wt], [sort])\n</code></pre> <p>Count the unique values of one or more variables, with an optional weighting.</p> <p><code>@chain df @count(a, b)</code> is roughly equivalent to <code>@chain df @group_by(a, b) @summarize(n = n())</code>. Supply <code>wt</code> to perform weighted counts, switching the summary from <code>n = n()</code> to <code>n = sum(wt)</code>. Note that if grouping columns are provided, the result will be an ungrouped data frame, which is slightly different behavior than R's <code>tidyverse</code>.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame or GroupedDataFrame.</li> <li><code>exprs...</code>: Column names, separated by commas.</li> <li><code>wt</code>: Optional parameter. Used to calculate a sum over the provided <code>wt</code> variable instead of counting the rows.</li> <li><code>sort</code>: Defaults to <code>false</code>. Whether the result should be sorted from highest to lowest <code>n</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = vcat(repeat([\"a\"], inner = 3),\n                           repeat([\"b\"], inner = 3),\n                           repeat([\"c\"], inner = 1),\n                           missing),\n                      b = 1:8)\n8\u00d72 DataFrame\n Row \u2502 a        b     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            1\n   2 \u2502 a            2\n   3 \u2502 a            3\n   4 \u2502 b            4\n   5 \u2502 b            5\n   6 \u2502 b            6\n   7 \u2502 c            7\n   8 \u2502 missing      8\n\njulia&gt; @chain df @count()\n1\u00d71 DataFrame\n Row \u2502 n     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     8\n\njulia&gt; @chain df begin\n         @count(a)\n       end\n4\u00d72 DataFrame\n Row \u2502 a        n     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            3\n   2 \u2502 b            3\n   3 \u2502 c            1\n   4 \u2502 missing      1\n\njulia&gt; @chain df begin\n         @count(a, wt = b)\n       end\n4\u00d72 DataFrame\n Row \u2502 a        n     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            6\n   2 \u2502 b           15\n   3 \u2502 c            7\n   4 \u2502 missing      8\n\njulia&gt; @chain df begin\n         @count(a, wt = b, sort = true)\n       end\n4\u00d72 DataFrame\n Row \u2502 a        n     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           15\n   2 \u2502 missing      8\n   3 \u2502 c            7\n   4 \u2502 a            6       \n</code></pre> <p>source</p> <p># <code>TidierData.@distinct</code> \u2014 Macro.</p> <pre><code>distinct(df, exprs...)\n</code></pre> <p>Return distinct rows of a DataFrame.</p> <p>If no columns or expressions are provided, then unique rows across all columns are returned. Otherwise, unique rows are determined based on the columns or expressions provided, and then all columns are returned.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: One or more unquoted variable names separated by commas. Variable names         can also be used as their positions in the data, like <code>x:y</code>, to select         a range of variables.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'e', inner = 2), b = repeat(1:5, 2), c = 11:20);\n\njulia&gt; @chain df @distinct()\n10\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 a         2     12\n   3 \u2502 b         3     13\n   4 \u2502 b         4     14\n   5 \u2502 c         5     15\n   6 \u2502 c         1     16\n   7 \u2502 d         2     17\n   8 \u2502 d         3     18\n   9 \u2502 e         4     19\n  10 \u2502 e         5     20\n\njulia&gt; @chain df @distinct(a)\n5\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         3     13\n   3 \u2502 c         5     15\n   4 \u2502 d         2     17\n   5 \u2502 e         4     19\n\njulia&gt; @chain df begin\n         @distinct(starts_with(\"a\"))\n       end\n5\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         3     13\n   3 \u2502 c         5     15\n   4 \u2502 d         2     17\n   5 \u2502 e         4     19\n\njulia&gt; @chain df begin\n         @distinct(a, b)\n       end\n10\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 a         2     12\n   3 \u2502 b         3     13\n   4 \u2502 b         4     14\n   5 \u2502 c         5     15\n   6 \u2502 c         1     16\n   7 \u2502 d         2     17\n   8 \u2502 d         3     18\n   9 \u2502 e         4     19\n  10 \u2502 e         5     20\n</code></pre> <p>source</p> <p># <code>TidierData.@drop_missing</code> \u2014 Macro.</p> <pre><code>@drop_missing(df, [cols...])\n</code></pre> <p>Drop all rows with missing values.</p> <p>When called without arguments, <code>@drop_missing()</code> drops all rows with missing values in any column. If columns are provided as an optional argument, only missing values from named columns are considered when dropping rows.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame or GroupedDataFrame.</li> <li><code>cols...</code>: An optional column, or multiple columns separated by commas or specified using selection helpers.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n              a = [1, 2, missing, 4],\n              b = [1, missing, 3, 4]\n            )\n4\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1        1\n   2 \u2502       2  missing \n   3 \u2502 missing        3\n   4 \u2502       4        4\n\njulia&gt; @chain df @drop_missing()\n2\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1\n   2 \u2502     4      4\n\njulia&gt; @chain df @drop_missing(a)\n3\u00d72 DataFrame\n Row \u2502 a      b       \n     \u2502 Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1        1\n   2 \u2502     2  missing \n   3 \u2502     4        4\n\njulia&gt; @chain df @drop_missing(a, b)\n2\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1\n   2 \u2502     4      4\n\njulia&gt; @chain df @drop_missing(starts_with(\"a\"))\n3\u00d72 DataFrame\n Row \u2502 a      b       \n     \u2502 Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1        1\n   2 \u2502     2  missing \n   3 \u2502     4        4\n</code></pre> <p>source</p> <p># <code>TidierData.@fill_missing</code> \u2014 Macro.</p> <p>@fill_missing(df, [columns...], direction)</p> <p>Fill missing values in a DataFrame <code>df</code> using the specified method.</p> <p>Arguments</p> <ul> <li><code>df</code>: The DataFrame or GroupedDataFrame in which you want to fill missing values.</li> <li><code>columns</code>: (Optional) The columns for which missing values need to be filled, separated by commas. If not provided, the operation is applied to all columns.</li> <li><code>direction</code>: A string containing the method to use for filling missing values. Options include: \"down\" (last observation carried forward) or \"up\" (next observation carried backward).</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n          dt1 = [missing, 0.2, missing, missing, 1, missing, 5, 6],\n          dt2 = [0.3, 2, missing, 3, missing, 5, 6,missing],\n          dt3 = [missing, 0.2, missing, missing, 1, missing, 5, 6],\n          dt4 = [0.3, missing, missing, 3, missing, 5, 6, missing],\n          dt5 = ['a', 'b', 'a', 'b', 'a', 'a', 'a', 'b']);\n\njulia&gt; @fill_missing(df, dt2, dt4, \"down\")\n8\u00d75 DataFrame\n Row \u2502 dt1        dt2       dt3        dt4       dt5  \n     \u2502 Float64?   Float64?  Float64?   Float64?  Char \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing         0.3  missing         0.3  a\n   2 \u2502       0.2       2.0        0.2       0.3  b\n   3 \u2502 missing         2.0  missing         0.3  a\n   4 \u2502 missing         3.0  missing         3.0  b\n   5 \u2502       1.0       3.0        1.0       3.0  a\n   6 \u2502 missing         5.0  missing         5.0  a\n   7 \u2502       5.0       6.0        5.0       6.0  a\n   8 \u2502       6.0       6.0        6.0       6.0  b\n\njulia&gt; @chain df begin\n         @fill_missing(\"up\")\n       end\n8\u00d75 DataFrame\n Row \u2502 dt1       dt2        dt3       dt4        dt5  \n     \u2502 Float64?  Float64?   Float64?  Float64?   Char \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      0.2        0.3       0.2        0.3  a\n   2 \u2502      0.2        2.0       0.2        3.0  b\n   3 \u2502      1.0        3.0       1.0        3.0  a\n   4 \u2502      1.0        3.0       1.0        3.0  b\n   5 \u2502      1.0        5.0       1.0        5.0  a\n   6 \u2502      5.0        5.0       5.0        5.0  a\n   7 \u2502      5.0        6.0       5.0        6.0  a\n   8 \u2502      6.0  missing         6.0  missing    b \n\njulia&gt; @chain df begin\n         @group_by(dt5)\n         @fill_missing(dt1, \"up\")\n       end\nGroupedDataFrame with 2 groups based on key: dt5\nFirst Group (5 rows): dt5 = 'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)\n Row \u2502 dt1       dt2        dt3        dt4        dt5  \n     \u2502 Float64?  Float64?   Float64?   Float64?   Char \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      1.0        0.3  missing          0.3  a\n   2 \u2502      1.0  missing    missing    missing    a\n   3 \u2502      1.0  missing          1.0  missing    a\n   4 \u2502      5.0        5.0  missing          5.0  a\n   5 \u2502      5.0        6.0        5.0        6.0  a\n\u22ee\nLast Group (3 rows): dt5 = 'b': ASCII/Unicode U+0062 (category Ll: Letter, lowercase)\n Row \u2502 dt1       dt2        dt3        dt4        dt5  \n     \u2502 Float64?  Float64?   Float64?   Float64?   Char \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      0.2        2.0        0.2  missing    b\n   2 \u2502      6.0        3.0  missing          3.0  b\n   3 \u2502      6.0  missing          6.0  missing    b\n</code></pre> <p>source</p> <p># <code>TidierData.@filter</code> \u2014 Macro.</p> <pre><code>@filter(df, exprs...)\n</code></pre> <p>Subset a DataFrame and return a copy of DataFrame where specified conditions are satisfied.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: transformation(s) that produce vectors containing <code>true</code> or <code>false</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @filter(b &gt;= mean(b))\n       end\n3\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 c         3     13\n   2 \u2502 d         4     14\n   3 \u2502 e         5     15\n\njulia&gt; @chain df begin\n         @filter(b &gt;= 3 &amp;&amp; c &gt;= 14)\n       end\n2\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 d         4     14\n   2 \u2502 e         5     15\n\njulia&gt; @chain df begin\n         @filter(b in (1, 3))\n       end\n2\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 c         3     13\n</code></pre> <p>source</p> <p># <code>TidierData.@full_join</code> \u2014 Macro.</p> <pre><code>@full_join(df1, df2, [by])\n</code></pre> <p>Perform a full join on <code>df1</code> and <code>df2</code> with an optional <code>by</code>.</p> <p>Arguments</p> <ul> <li><code>df1</code>: A DataFrame.</li> <li><code>df2</code>: A DataFrame.</li> <li><code>by</code>: An optional column or tuple of columns. <code>by</code> supports interpolation of individual columns. If <code>by</code> is not supplied, then it will be inferred from shared names of columns between <code>df1</code> and <code>df2</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n\njulia&gt; df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n\njulia&gt; @full_join(df1, df2)\n3\u00d73 DataFrame\n Row \u2502 a       b        c       \n     \u2502 String  Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1        3\n   2 \u2502 b             2  missing \n   3 \u2502 c       missing        4\n\njulia&gt; @full_join(df1, df2, a)\n3\u00d73 DataFrame\n Row \u2502 a       b        c       \n     \u2502 String  Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1        3\n   2 \u2502 b             2  missing \n   3 \u2502 c       missing        4\n\njulia&gt; @full_join(df1, df2, a = a)\n3\u00d73 DataFrame\n Row \u2502 a       b        c       \n     \u2502 String  Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1        3\n   2 \u2502 b             2  missing \n   3 \u2502 c       missing        4\n\njulia&gt; @full_join(df1, df2, \"a\")\n3\u00d73 DataFrame\n Row \u2502 a       b        c       \n     \u2502 String  Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1        3\n   2 \u2502 b             2  missing \n   3 \u2502 c       missing        4\n\njulia&gt; @full_join(df1, df2, \"a\" = \"a\")\n3\u00d73 DataFrame\n Row \u2502 a       b        c       \n     \u2502 String  Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1        3\n   2 \u2502 b             2  missing \n   3 \u2502 c       missing        4\n</code></pre> <p>source</p> <p># <code>TidierData.@glimpse</code> \u2014 Macro.</p> <pre><code>@glimpse(df, width = 80)\n</code></pre> <p>Preview a DataFrame (or GroupedDataFrame).</p> <p>The <code>@glimpse</code> macro is used to preview a DataFrame or GroupedDataFrame. Each column is printed on a separate row, along with its data type and first few elements, with the output truncated based on the <code>width</code>.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame or GroupedDataFrame.</li> <li><code>width</code>: The width of the output, measured in the number of characters. Defaults to 80.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n               a = 1:100, \n               b = 1:100, \n               c = repeat([\"a\"], 100)\n               );\n\njulia&gt; @chain df @glimpse\nRows: 100\nColumns: 3\n.a             Int64          1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,\n.b             Int64          1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,\n.c             String         a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a,\n\njulia&gt; @chain df begin\n       @group_by(a)\n       @glimpse()\n       end\nRows: 100\nColumns: 3\nGroups: a [100]\n.a             Int64          1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,\n.b             Int64          1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,\n.c             String         a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a,\n</code></pre> <p>source</p> <p># <code>TidierData.@group_by</code> \u2014 Macro.</p> <pre><code>@group_by(df, exprs...)\n</code></pre> <p>Return a <code>GroupedDataFrame</code> where operations are performed by groups specified by unique  sets of <code>cols</code>.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: DataFrame columns to group by or tidy expressions. Can be a single tidy expression or multiple expressions separated by commas.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @summarize(b = mean(b))\n       end\n5\u00d72 DataFrame\n Row \u2502 a     b       \n     \u2502 Char  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1.0\n   2 \u2502 b         2.0\n   3 \u2502 c         3.0\n   4 \u2502 d         4.0\n   5 \u2502 e         5.0  \n\njulia&gt; @chain df begin\n         @group_by(d = uppercase(a))\n         @summarize(b = mean(b))\n       end\n5\u00d72 DataFrame\n Row \u2502 d     b       \n     \u2502 Char  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 A         1.0\n   2 \u2502 B         2.0\n   3 \u2502 C         3.0\n   4 \u2502 D         4.0\n   5 \u2502 E         5.0\n\njulia&gt; @chain df begin\n         @group_by(-(b, c)) # same as `a`\n         @summarize(b = mean(b))\n       end\n5\u00d72 DataFrame\n Row \u2502 a     b       \n     \u2502 Char  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1.0\n   2 \u2502 b         2.0\n   3 \u2502 c         3.0\n   4 \u2502 d         4.0\n   5 \u2502 e         5.0\n\njulia&gt; @chain df begin\n         @group_by(!(b, c)) # same as `a`\n         @summarize(b = mean(b))\n       end\n5\u00d72 DataFrame\n Row \u2502 a     b       \n     \u2502 Char  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1.0\n   2 \u2502 b         2.0\n   3 \u2502 c         3.0\n   4 \u2502 d         4.0\n   5 \u2502 e         5.0\n</code></pre> <p>source</p> <p># <code>TidierData.@head</code> \u2014 Macro.</p> <pre><code>   @head(df, value)\n</code></pre> <p>Shows the first n rows of the the data frame or of each group in a grouped data frame. </p> <p>Arguments</p> <ul> <li><code>df</code>: The data frame.</li> <li><code>value</code>: number of rows to be returned. Defaults to 6 if left blank.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = vcat(repeat([\"a\"], inner = 4),\n                                  repeat([\"b\"], inner = 4)),\n                             b = 1:8)\n8\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n   2 \u2502 a           2\n   3 \u2502 a           3\n   4 \u2502 a           4\n   5 \u2502 b           5\n   6 \u2502 b           6\n   7 \u2502 b           7\n   8 \u2502 b           8\n\njulia&gt; @head(df, 3)\n3\u00d72 DataFrame\n Row \u2502 a        b     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            1\n   2 \u2502 a            2\n   3 \u2502 a            3\n\njulia&gt; @head(df)\n6\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n   2 \u2502 a           2\n   3 \u2502 a           3\n   4 \u2502 a           4\n   5 \u2502 b           5\n   6 \u2502 b           6\n\njulia&gt; @chain df begin\n         @group_by a\n         @head 2\n       end\nGroupedDataFrame with 2 groups based on key: a\nFirst Group (2 rows): a = \"a\"\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n   2 \u2502 a           2\n\u22ee\nLast Group (2 rows): a = \"b\"\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           5\n   2 \u2502 b           6\n</code></pre> <p>source</p> <p># <code>TidierData.@inner_join</code> \u2014 Macro.</p> <pre><code>@inner_join(df1, df2, [by])\n</code></pre> <p>Perform a inner join on <code>df1</code> and <code>df2</code> with an optional <code>by</code>.</p> <p>Arguments</p> <ul> <li><code>df1</code>: A DataFrame.</li> <li><code>df2</code>: A DataFrame.</li> <li><code>by</code>: An optional column or tuple of columns. <code>by</code> supports interpolation of individual columns. If <code>by</code> is not supplied, then it will be inferred from shared names of columns between <code>df1</code> and <code>df2</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n\njulia&gt; df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n\njulia&gt; @inner_join(df1, df2)\n1\u00d73 DataFrame\n Row \u2502 a       b      c     \n     \u2502 String  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1      3\n\njulia&gt; @inner_join(df1, df2, a)\n1\u00d73 DataFrame\n Row \u2502 a       b      c     \n     \u2502 String  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1      3\n\njulia&gt; @inner_join(df1, df2, a = a)\n1\u00d73 DataFrame\n Row \u2502 a       b      c     \n     \u2502 String  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1      3\n\njulia&gt; @inner_join(df1, df2, \"a\")\n1\u00d73 DataFrame\n Row \u2502 a       b      c     \n     \u2502 String  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1      3\n\njulia&gt; @inner_join(df1, df2, \"a\" = \"a\")\n1\u00d73 DataFrame\n Row \u2502 a       b      c     \n     \u2502 String  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1      3\n</code></pre> <p>source</p> <p># <code>TidierData.@left_join</code> \u2014 Macro.</p> <pre><code>@left_join(df1, df2, [by])\n</code></pre> <p>Perform a left join on <code>df1</code> and <code>df2</code> with an optional <code>by</code>.</p> <p>Arguments</p> <ul> <li><code>df1</code>: A DataFrame.</li> <li><code>df2</code>: A DataFrame.</li> <li><code>by</code>: An optional column or tuple of columns. <code>by</code> supports interpolation of individual columns. If <code>by</code> is not supplied, then it will be inferred from shared names of columns between <code>df1</code> and <code>df2</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n\njulia&gt; df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n\njulia&gt; @left_join(df1, df2)\n2\u00d73 DataFrame\n Row \u2502 a       b      c       \n     \u2502 String  Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1        3\n   2 \u2502 b           2  missing \n\njulia&gt; @left_join(df1, df2, a)\n2\u00d73 DataFrame\n Row \u2502 a       b      c       \n     \u2502 String  Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1        3\n   2 \u2502 b           2  missing\n\njulia&gt; @left_join(df1, df2, a = a)\n2\u00d73 DataFrame\n Row \u2502 a       b      c       \n     \u2502 String  Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1        3\n   2 \u2502 b           2  missing\n\njulia&gt; @left_join(df1, df2, \"a\")\n2\u00d73 DataFrame\n Row \u2502 a       b      c       \n     \u2502 String  Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1        3\n   2 \u2502 b           2  missing\n\njulia&gt; @left_join(df1, df2, \"a\" = \"a\")\n2\u00d73 DataFrame\n Row \u2502 a       b      c       \n     \u2502 String  Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1        3\n   2 \u2502 b           2  missing\n</code></pre> <p>source</p> <p># <code>TidierData.@mutate</code> \u2014 Macro.</p> <pre><code>@mutate(df, exprs...)\n</code></pre> <p>Create new columns as functions of existing columns. The results have the same number of rows as <code>df</code>.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: add new columns or replace values of existed columns using        <code>new_variable = values</code> syntax.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @mutate(d = b + c,\n                 b_minus_mean_b = b - mean(b))\n       end\n5\u00d75 DataFrame\n Row \u2502 a     b      c      d      b_minus_mean_b \n     \u2502 Char  Int64  Int64  Int64  Float64        \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11     12            -2.0\n   2 \u2502 b         2     12     14            -1.0\n   3 \u2502 c         3     13     16             0.0\n   4 \u2502 d         4     14     18             1.0\n   5 \u2502 e         5     15     20             2.0\n\njulia&gt; @chain df begin\n         @mutate begin\n           d = b + c\n           b_minus_mean_b = b - mean(b)\n         end\n       end\n5\u00d75 DataFrame\n Row \u2502 a     b      c      d      b_minus_mean_b \n     \u2502 Char  Int64  Int64  Int64  Float64        \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11     12            -2.0\n   2 \u2502 b         2     12     14            -1.0\n   3 \u2502 c         3     13     16             0.0\n   4 \u2502 d         4     14     18             1.0\n   5 \u2502 e         5     15     20             2.0\n\njulia&gt; @chain df begin\n         @mutate(d = b in (1,3))\n       end\n5\u00d74 DataFrame\n Row \u2502 a     b      c      d     \n     \u2502 Char  Int64  Int64  Bool  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11   true\n   2 \u2502 b         2     12  false\n   3 \u2502 c         3     13   true\n   4 \u2502 d         4     14  false\n   5 \u2502 e         5     15  false\n\njulia&gt; @chain df begin\n         @mutate(across((b, c), mean))\n       end\n5\u00d75 DataFrame\n Row \u2502 a     b      c      b_mean   c_mean  \n     \u2502 Char  Int64  Int64  Float64  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11      3.0     13.0\n   2 \u2502 b         2     12      3.0     13.0\n   3 \u2502 c         3     13      3.0     13.0\n   4 \u2502 d         4     14      3.0     13.0\n   5 \u2502 e         5     15      3.0     13.0\n\njulia&gt; @chain df begin\n         @summarize(across(contains(\"b\"), mean))\n       end\n1\u00d71 DataFrame\n Row \u2502 b_mean  \n     \u2502 Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0\n\njulia&gt; @chain df begin\n         @summarize(across(-contains(\"a\"), mean))\n       end\n1\u00d72 DataFrame\n Row \u2502 b_mean   c_mean  \n     \u2502 Float64  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0     13.0\n\njulia&gt; @chain df begin\n         @mutate(across(where(is_number), minimum))\n       end\n5\u00d75 DataFrame\n Row \u2502 a     b      c      b_minimum  c_minimum \n     \u2502 Char  Int64  Int64  Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11          1         11\n   2 \u2502 b         2     12          1         11\n   3 \u2502 c         3     13          1         11\n   4 \u2502 d         4     14          1         11\n   5 \u2502 e         5     15          1         11\n</code></pre> <p>source</p> <p># <code>TidierData.@nest</code> \u2014 Macro.</p> <pre><code>@nest(df, new_column = nesting_columns)\n</code></pre> <p>Multiple columns are nested into one or more new columns in a DataFrame. </p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame</li> <li><code>new_column</code>: New column name</li> <li><code>nesting_columns</code>: Columns to be nested into the new_column</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'e', inner = 3),\n                      b = 1:15,\n                      c_1 = 16:30,\n                      c_2 = 31:45);\n\njulia&gt; @nest(df, data = b:c_2)\n5\u00d72 DataFrame\n Row \u2502 a     data          \n     \u2502 Char  DataFrame     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a     3\u00d73 DataFrame \n   2 \u2502 b     3\u00d73 DataFrame \n   3 \u2502 c     3\u00d73 DataFrame \n   4 \u2502 d     3\u00d73 DataFrame \n   5 \u2502 e     3\u00d73 DataFrame \n\njulia&gt; @nest(df, data_1 = b, data_2 = starts_with(\"c\"))\n5\u00d73 DataFrame\n Row \u2502 a     data_1         data_2        \n     \u2502 Char  DataFrame      DataFrame     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a     3\u00d71 DataFrame  3\u00d72 DataFrame \n   2 \u2502 b     3\u00d71 DataFrame  3\u00d72 DataFrame \n   3 \u2502 c     3\u00d71 DataFrame  3\u00d72 DataFrame \n   4 \u2502 d     3\u00d71 DataFrame  3\u00d72 DataFrame \n   5 \u2502 e     3\u00d71 DataFrame  3\u00d72 DataFrame \n\njulia&gt; @chain df begin\n         @nest(data = b:c_2)\n         @unnest_longer(data)\n       end\n15\u00d72 DataFrame\n Row \u2502 a     data                         \n     \u2502 Char  NamedTup\u2026                    \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a     (b = 1, c_1 = 16, c_2 = 31)\n   2 \u2502 a     (b = 2, c_1 = 17, c_2 = 32)\n   3 \u2502 a     (b = 3, c_1 = 18, c_2 = 33)\n   4 \u2502 b     (b = 4, c_1 = 19, c_2 = 34)\n   5 \u2502 b     (b = 5, c_1 = 20, c_2 = 35)\n   6 \u2502 b     (b = 6, c_1 = 21, c_2 = 36)\n   7 \u2502 c     (b = 7, c_1 = 22, c_2 = 37)\n   8 \u2502 c     (b = 8, c_1 = 23, c_2 = 38)\n   9 \u2502 c     (b = 9, c_1 = 24, c_2 = 39)\n  10 \u2502 d     (b = 10, c_1 = 25, c_2 = 40)\n  11 \u2502 d     (b = 11, c_1 = 26, c_2 = 41)\n  12 \u2502 d     (b = 12, c_1 = 27, c_2 = 42)\n  13 \u2502 e     (b = 13, c_1 = 28, c_2 = 43)\n  14 \u2502 e     (b = 14, c_1 = 29, c_2 = 44)\n  15 \u2502 e     (b = 15, c_1 = 30, c_2 = 45)\n\njulia&gt; @chain df begin\n         @nest(data = b:c_2)\n         @unnest_wider(data)\n       end\n5\u00d74 DataFrame\n Row \u2502 a     b             c_1           c_2          \n     \u2502 Char  Any           Any           Any          \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a     [1, 2, 3]     [16, 17, 18]  [31, 32, 33]\n   2 \u2502 b     [4, 5, 6]     [19, 20, 21]  [34, 35, 36]\n   3 \u2502 c     [7, 8, 9]     [22, 23, 24]  [37, 38, 39]\n   4 \u2502 d     [10, 11, 12]  [25, 26, 27]  [40, 41, 42]\n   5 \u2502 e     [13, 14, 15]  [28, 29, 30]  [43, 44, 45]\n\njulia&gt; @chain df begin\n         @nest(data = -a)\n         @unnest_wider(data) # wider first\n         @unnest_longer(-a)  # then longer\n       end\n15\u00d74 DataFrame\n Row \u2502 a     b      c_1    c_2   \n     \u2502 Char  Int64  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     16     31\n   2 \u2502 a         2     17     32\n   3 \u2502 a         3     18     33\n   4 \u2502 b         4     19     34\n   5 \u2502 b         5     20     35\n   6 \u2502 b         6     21     36\n   7 \u2502 c         7     22     37\n   8 \u2502 c         8     23     38\n   9 \u2502 c         9     24     39\n  10 \u2502 d        10     25     40\n  11 \u2502 d        11     26     41\n  12 \u2502 d        12     27     42\n  13 \u2502 e        13     28     43\n  14 \u2502 e        14     29     44\n  15 \u2502 e        15     30     45\n\njulia&gt; @chain df begin\n         @nest(data = -a)\n         @unnest_longer(data) # longer first\n         @unnest_wider(-a)    # then wider\n       end\n15\u00d74 DataFrame\n Row \u2502 a     b      c_2    c_1   \n     \u2502 Char  Int64  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     31     16\n   2 \u2502 a         2     32     17\n   3 \u2502 a         3     33     18\n   4 \u2502 b         4     34     19\n   5 \u2502 b         5     35     20\n   6 \u2502 b         6     36     21\n   7 \u2502 c         7     37     22\n   8 \u2502 c         8     38     23\n   9 \u2502 c         9     39     24\n  10 \u2502 d        10     40     25\n  11 \u2502 d        11     41     26\n  12 \u2502 d        12     42     27\n  13 \u2502 e        13     43     28\n  14 \u2502 e        14     44     29\n  15 \u2502 e        15     45     30\n</code></pre> <p>source</p> <p># <code>TidierData.@pivot_longer</code> \u2014 Macro.</p> <p>@pivotlonger(df, cols, [namesto], [values_to])</p> <p>Reshapes the DataFrame to make it longer, increasing the number of rows and reducing the number of columns.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>cols</code>: Columns to pivot into longer format. Multiple columns can be selected but providing tuples of columns is not yet supported.</li> <li><code>names_to</code>: Optional, defaults to <code>variable</code>. The name of the newly created column whose values will contain the input DataFrame's column names.</li> <li><code>values_to</code>: Optional, defaults to <code>value</code>. The name of the newly created column containing the input DataFrame's cell values.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df_wide = DataFrame(id = [1, 2], A = [1, 3], B = [2, 4]);\n\njulia&gt; @pivot_longer(df_wide, A:B)\n4\u00d73 DataFrame\n Row \u2502 id     variable  value \n     \u2502 Int64  String    Int64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  A             1\n   2 \u2502     2  A             3\n   3 \u2502     1  B             2\n   4 \u2502     2  B             4\n\njulia&gt; @pivot_longer(df_wide, -id)\n4\u00d73 DataFrame\n Row \u2502 id     variable  value \n     \u2502 Int64  String    Int64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  A             1\n   2 \u2502     2  A             3\n   3 \u2502     1  B             2\n   4 \u2502     2  B             4\n\njulia&gt; @pivot_longer(df_wide, A:B, names_to = \"letter\", values_to = \"number\")\n4\u00d73 DataFrame\n Row \u2502 id     letter  number \n     \u2502 Int64  String  Int64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  A            1\n   2 \u2502     2  A            3\n   3 \u2502     1  B            2\n   4 \u2502     2  B            4\n\njulia&gt; @pivot_longer(df_wide, A:B, names_to = letter, values_to = number)\n4\u00d73 DataFrame\n Row \u2502 id     letter  number \n     \u2502 Int64  String  Int64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  A            1\n   2 \u2502     2  A            3\n   3 \u2502     1  B            2\n   4 \u2502     2  B            4\n\njulia&gt; @pivot_longer(df_wide, A:B, names_to = \"letter\")\n4\u00d73 DataFrame\n Row \u2502 id     letter  value \n     \u2502 Int64  String  Int64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  A           1\n   2 \u2502     2  A           3\n   3 \u2502     1  B           2\n   4 \u2502     2  B           4\n</code></pre> <p>source</p> <p># <code>TidierData.@pivot_wider</code> \u2014 Macro.</p> <p>@pivotwider(df, namesfrom, valuesfrom[, valuesfill])</p> <p>Reshapes the DataFrame to make it wider, increasing the number of columns and reducing the number of rows.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>names_from</code>: The name of the column to get the name of the output columns from.</li> <li><code>values_from</code>: The name of the column to get the cell values from.</li> <li><code>values_fill</code>: The value to replace a missing name/value combination (default is <code>missing</code>)</li> </ul> <p>Examples</p> <pre><code>julia&gt; df_long = DataFrame(id = [1, 1, 2, 2],\n                           variable = [\"A\", \"B\", \"A\", \"B\"],\n                           value = [1, 2, 3, 4]);\n\njulia&gt; df_long_missing = DataFrame(id = [1, 1, 2],\n                           variable = [\"A\", \"B\", \"B\"],\n                           value = [1, 2, 4]);\n\njulia&gt; @pivot_wider(df_long, names_from = variable, values_from = value)\n2\u00d73 DataFrame\n Row \u2502 id     A       B      \n     \u2502 Int64  Int64?  Int64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1       1       2\n   2 \u2502     2       3       4\n\njulia&gt; @pivot_wider(df_long, names_from = \"variable\", values_from = \"value\")\n2\u00d73 DataFrame\n Row \u2502 id     A       B      \n     \u2502 Int64  Int64?  Int64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1       1       2\n   2 \u2502     2       3       4\n\njulia&gt; @pivot_wider(df_long_missing, names_from = variable, values_from = value, values_fill = 0)\n2\u00d73 DataFrame\n Row \u2502 id     A      B     \n     \u2502 Int64  Int64  Int64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1      2\n   2 \u2502     2      0      4\n</code></pre> <p>source</p> <p># <code>TidierData.@pull</code> \u2014 Macro.</p> <pre><code>@pull(df, column)\n</code></pre> <p>Pull (or extract) a column as a vector.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>column</code>: A single column, referred to either by its name or number.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df @pull(a)\n5-element Vector{Char}:\n 'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)\n 'b': ASCII/Unicode U+0062 (category Ll: Letter, lowercase)\n 'c': ASCII/Unicode U+0063 (category Ll: Letter, lowercase)\n 'd': ASCII/Unicode U+0064 (category Ll: Letter, lowercase)\n 'e': ASCII/Unicode U+0065 (category Ll: Letter, lowercase)\n\njulia&gt; @chain df @pull(2)\n5-element Vector{Int64}:\n 1\n 2\n 3\n 4\n 5\n</code></pre> <p>source</p> <p># <code>TidierData.@relocate</code> \u2014 Macro.</p> <pre><code>@relocate(df, columns, before = nothing, after = nothing)\n</code></pre> <p>Rearranges the columns of a data frame. This function allows for moving specified columns to a new position within the data frame, either before or after a given target column. The <code>columns</code>, <code>before</code>, and <code>after</code> arguments all accept tidy selection functions. Only one of <code>before</code> or <code>after</code> should be specified. If neither are specified, the selected columns will be moved to the beginning of the data frame.</p> <p>Arguments</p> <ul> <li><code>df</code>: The data frame.</li> <li><code>columns</code>: Column or columns to to be moved.</li> <li><code>before</code>: (Optional) Column or columns before which the specified columns will be moved. If not provided or <code>nothing</code>, this argument is ignored.</li> <li><code>after</code>: (Optional) Column or columns after which the specified columns will be moved. If not provided or <code>nothing</code>, this argument is ignored.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(A = 1:5, B = 6:10, C = [\"A\", \"b\", \"C\", \"D\", \"E\"], D = ['A', 'B','A', 'B','C'],\n                      E = 1:5, F = [\"A\", \"b\", \"C\", \"D\", \"E\"]);\n\njulia&gt; @relocate(df, where(is_string), before = where(is_integer))\n5\u00d76 DataFrame\n Row \u2502 C       F       A      B      E      D    \n     \u2502 String  String  Int64  Int64  Int64  Char \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 A       A           1      6      1  A\n   2 \u2502 b       b           2      7      2  B\n   3 \u2502 C       C           3      8      3  A\n   4 \u2502 D       D           4      9      4  B\n   5 \u2502 E       E           5     10      5  C\n\n\njulia&gt; @relocate(df, B, C, D, after = E)\n5\u00d76 DataFrame\n Row \u2502 A      E      B      C       D     F      \n     \u2502 Int64  Int64  Int64  String  Char  String \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1      6  A       A     A\n   2 \u2502     2      2      7  b       B     b\n   3 \u2502     3      3      8  C       A     C\n   4 \u2502     4      4      9  D       B     D\n   5 \u2502     5      5     10  E       C     E\n\njulia&gt; @relocate(df, B, C, D, after = starts_with(\"E\"))\n5\u00d76 DataFrame\n Row \u2502 A      E      B      C       D     F      \n     \u2502 Int64  Int64  Int64  String  Char  String \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1      6  A       A     A\n   2 \u2502     2      2      7  b       B     b\n   3 \u2502     3      3      8  C       A     C\n   4 \u2502     4      4      9  D       B     D\n   5 \u2502     5      5     10  E       C     E\n\njulia&gt; @relocate(df, B:C) # bring columns to the front\n5\u00d76 DataFrame\n Row \u2502 B      C       A      D     E      F      \n     \u2502 Int64  String  Int64  Char  Int64  String \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     6  A           1  A         1  A\n   2 \u2502     7  b           2  B         2  b\n   3 \u2502     8  C           3  A         3  C\n   4 \u2502     9  D           4  B         4  D\n   5 \u2502    10  E           5  C         5  E\n</code></pre> <p>source</p> <p># <code>TidierData.@rename</code> \u2014 Macro.</p> <pre><code>@rename(df, exprs...)\n</code></pre> <p>Change the names of individual column names in a DataFrame. Users can also use <code>@select()</code> to rename and select columns.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: Use <code>new_name = old_name</code> syntax to rename selected columns.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @rename(d = b, e = c)\n       end\n5\u00d73 DataFrame\n Row \u2502 a     d      e     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         2     12\n   3 \u2502 c         3     13\n   4 \u2502 d         4     14\n   5 \u2502 e         5     15\n</code></pre> <p>source</p> <p># <code>TidierData.@rename_with</code> \u2014 Macro.</p> <pre><code> @rename_with(df, fn, exprs...)\n</code></pre> <p>Renames the chosen column names using a function</p> <p>Arguments</p> <ul> <li><code>df</code>: a DataFrame</li> <li><code>fn</code>: desired function to (such as strremoveall from TidierStrings)</li> <li><code>exprs</code>: One or more unquoted variable names separated by commas. Variable names</li> </ul> <p>can also be used as their positions in the data, like <code>x:y</code>, to select  a range of variables. Variables names can also be chosen with starts with. Defaults to all columns if empty.</p> <p>Examples</p> <pre><code>julia&gt; function str_remove_all(column, pattern::String)\n         if ismissing(column)\n             return column\n         end\n         patterns = split(pattern, '|')\n         for p in patterns\n             column = replace(column, strip(p) =&gt; \"\")\n         end\n         return column\n       end;\n\njulia&gt; df = DataFrame(\n              term_a = [\"apple\", \"banana\", \"cherry\"],\n              document_a = [\"doc_1\", \"doc2\", \"doc3\"],\n              _n_ = [1, 2, 3]\n            ); \n\njulia&gt; @rename_with(df, str -&gt; str_remove_all(str, \"_a\"), !term_a)\n3\u00d73 DataFrame\n Row \u2502 term_a  document  _n_   \n     \u2502 String  String    Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 apple   doc_1         1\n   2 \u2502 banana  doc2          2\n   3 \u2502 cherry  doc3          3\n</code></pre> <p>source</p> <p># <code>TidierData.@right_join</code> \u2014 Macro.</p> <pre><code>@right_join(df1, df2, [by])\n</code></pre> <p>Perform a right join on <code>df1</code> and <code>df2</code> with an optional <code>by</code>.</p> <p>Arguments</p> <ul> <li><code>df1</code>: A DataFrame.</li> <li><code>df2</code>: A DataFrame.</li> <li><code>by</code>: An optional column or tuple of columns. <code>by</code> supports interpolation of individual columns. If <code>by</code> is not supplied, then it will be inferred from shared names of columns between <code>df1</code> and <code>df2</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n\njulia&gt; df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n\njulia&gt; @right_join(df1, df2)\n2\u00d73 DataFrame\n Row \u2502 a       b        c     \n     \u2502 String  Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1      3\n   2 \u2502 c       missing      4\n\njulia&gt; @right_join(df1, df2, a)\n2\u00d73 DataFrame\n Row \u2502 a       b        c     \n     \u2502 String  Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1      3\n   2 \u2502 c       missing      4\n\njulia&gt; @right_join(df1, df2, a = a)\n2\u00d73 DataFrame\n Row \u2502 a       b        c     \n     \u2502 String  Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1      3\n   2 \u2502 c       missing      4\n\njulia&gt; @right_join(df1, df2, \"a\")\n2\u00d73 DataFrame\n Row \u2502 a       b        c     \n     \u2502 String  Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1      3\n   2 \u2502 c       missing      4\n\njulia&gt; @right_join(df1, df2, \"a\" = \"a\")\n2\u00d73 DataFrame\n Row \u2502 a       b        c     \n     \u2502 String  Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1      3\n   2 \u2502 c       missing      4\n</code></pre> <p>source</p> <p># <code>TidierData.@select</code> \u2014 Macro.</p> <pre><code>@select(df, exprs...)\n</code></pre> <p>Select variables in a DataFrame.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: One or more unquoted variable names separated by commas. Variable names         can also be used as their positions in the data, like <code>x:y</code>, to select         a range of variables.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df @select(a, b, c)\n5\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         2     12\n   3 \u2502 c         3     13\n   4 \u2502 d         4     14\n   5 \u2502 e         5     15\n\njulia&gt; @chain df @select(a:b)\n5\u00d72 DataFrame\n Row \u2502 a     b     \n     \u2502 Char  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1\n   2 \u2502 b         2\n   3 \u2502 c         3\n   4 \u2502 d         4\n   5 \u2502 e         5\n\njulia&gt; @chain df @select(1:2)\n5\u00d72 DataFrame\n Row \u2502 a     b     \n     \u2502 Char  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1\n   2 \u2502 b         2\n   3 \u2502 c         3\n   4 \u2502 d         4\n   5 \u2502 e         5\n\njulia&gt; @chain df @select(-(a:b))\n5\u00d71 DataFrame\n Row \u2502 c     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    11\n   2 \u2502    12\n   3 \u2502    13\n   4 \u2502    14\n   5 \u2502    15\n\njulia&gt; @chain df @select(!(a:b))\n5\u00d71 DataFrame\n Row \u2502 c     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    11\n   2 \u2502    12\n   3 \u2502    13\n   4 \u2502    14\n   5 \u2502    15\n\njulia&gt; @chain df @select(-(a, b))\n5\u00d71 DataFrame\n Row \u2502 c     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    11\n   2 \u2502    12\n   3 \u2502    13\n   4 \u2502    14\n   5 \u2502    15\n\njulia&gt; @chain df @select(!(a, b))\n5\u00d71 DataFrame\n Row \u2502 c     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    11\n   2 \u2502    12\n   3 \u2502    13\n   4 \u2502    14\n   5 \u2502    15\n\njulia&gt; @chain df begin\n         @select(contains(\"b\"), starts_with(\"c\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 b      c     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n\njulia&gt; @chain df @select(-(1:2))\n5\u00d71 DataFrame\n Row \u2502 c     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    11\n   2 \u2502    12\n   3 \u2502    13\n   4 \u2502    14\n   5 \u2502    15\n\njulia&gt; @chain df @select(!(1:2))\n5\u00d71 DataFrame\n Row \u2502 c     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    11\n   2 \u2502    12\n   3 \u2502    13\n   4 \u2502    14\n   5 \u2502    15\n\njulia&gt; @chain df @select(-c)\n5\u00d72 DataFrame\n Row \u2502 a     b     \n     \u2502 Char  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1\n   2 \u2502 b         2\n   3 \u2502 c         3\n   4 \u2502 d         4\n   5 \u2502 e         5\n\njulia&gt; @chain df begin\n         @select(-contains(\"a\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 b      c     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n\njulia&gt; @chain df begin\n         @select(!contains(\"a\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 b      c     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n\njulia&gt; @chain df begin\n         @select(where(is_number))\n       end\n5\u00d72 DataFrame\n Row \u2502 b      c     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n</code></pre> <p>source</p> <p># <code>TidierData.@semi_join</code> \u2014 Macro.</p> <pre><code>@semi_join(df1, df2, [by])\n</code></pre> <p>Perform an semi-join on <code>df1</code> and <code>df2</code> with an optional <code>by</code>.</p> <p>Arguments</p> <ul> <li><code>df1</code>: A DataFrame.</li> <li><code>df2</code>: A DataFrame.</li> <li><code>by</code>: An optional column or tuple of columns. <code>by</code> supports interpolation of individual columns. If <code>by</code> is not supplied, then it will be inferred from shared names of columns between <code>df1</code> and <code>df2</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n\njulia&gt; df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n\njulia&gt; @semi_join(df1, df2)\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n\njulia&gt; @semi_join(df1, df2, a)\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n\njulia&gt; @semi_join(df1, df2, a = a)\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n\njulia&gt; @semi_join(df1, df2, \"a\")\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n\njulia&gt; @semi_join(df1, df2, \"a\" = \"a\")\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n</code></pre> <p>source</p> <p># <code>TidierData.@separate</code> \u2014 Macro.</p> <p>@separate(df, From, Into, Separator)</p> <p>Separate a string column into mulitiple new columns based on a specified delimter </p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame</li> <li><code>From</code>: Column that will be split</li> <li><code>Into</code>: New column names, supports [] or ()</li> <li><code>Separator</code>: the string or chacater on which to split</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = [\"1-1\", \"2-2\", \"3-3-3\"]);\n\njulia&gt; @separate(df, a, [b, c, d], \"-\")\n3\u00d73 DataFrame\n Row \u2502 b          c          d          \n     \u2502 SubStrin\u2026  SubStrin\u2026  SubStrin\u2026? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 1          1          missing    \n   2 \u2502 2          2          missing    \n   3 \u2502 3          3          3\n\njulia&gt; @chain df begin\n         @separate(a, (b, c, d), \"-\")\n       end\n3\u00d73 DataFrame\n Row \u2502 b          c          d          \n     \u2502 SubStrin\u2026  SubStrin\u2026  SubStrin\u2026? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 1          1          missing    \n   2 \u2502 2          2          missing    \n   3 \u2502 3          3          3\n</code></pre> <p>source</p> <p># <code>TidierData.@separate_rows</code> \u2014 Macro.</p> <pre><code>separate_rows(df, columns..., delimiter)\n</code></pre> <p>Split the contents of specified columns in a DataFrame into multiple rows based on a given delimiter.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame</li> <li><code>columns</code>: A column or multiple columns to be split. Can be a mix of integers and column names.</li> <li><code>delimiter</code>: The string or character or regular expression used to split the column values.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 1:3,\n                      b = [\"a\", \"aa;bb;cc\", \"dd;ee\"],\n                      c = [\"1\", \"2;3;4\", \"5;6\"],\n                      d = [\"7\", \"8;9;10\", \"11;12\"])\n3\u00d74 DataFrame\n Row \u2502 a      b         c       d      \n     \u2502 Int64  String    String  String \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  a         1       7\n   2 \u2502     2  aa;bb;cc  2;3;4   8;9;10\n   3 \u2502     3  dd;ee     5;6     11;12\n\njulia&gt; @separate_rows(df, 2, 4, \";\" )\n6\u00d74 DataFrame\n Row \u2502 a      b          c       d         \n     \u2502 Int64  SubStrin\u2026  String  SubStrin\u2026 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  a          1       7\n   2 \u2502     2  aa         2;3;4   8\n   3 \u2502     2  bb         2;3;4   9\n   4 \u2502     2  cc         2;3;4   10\n   5 \u2502     3  dd         5;6     11\n   6 \u2502     3  ee         5;6     12\n\njulia&gt; @separate_rows(df, b:d, \";\" )\n6\u00d74 DataFrame\n Row \u2502 a      b          c          d         \n     \u2502 Int64  SubStrin\u2026  SubStrin\u2026  SubStrin\u2026 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  a          1          7\n   2 \u2502     2  aa         2          8\n   3 \u2502     2  bb         3          9\n   4 \u2502     2  cc         4          10\n   5 \u2502     3  dd         5          11\n   6 \u2502     3  ee         6          12\n</code></pre> <p>source</p> <p># <code>TidierData.@slice</code> \u2014 Macro.</p> <pre><code>@slice(df, exprs...)\n</code></pre> <p>Select, remove or duplicate rows by indexing their integer positions.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: integer row values. Use positive values to keep the rows, or negative values to drop. Values provided must be either all positive or all negative, and they must be within the range of DataFrames' row numbers.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'c', inner = 3), b = 1:9, c = 11:19);\n\njulia&gt; @chain df @slice(1:5)\n5\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 a         2     12\n   3 \u2502 a         3     13\n   4 \u2502 b         4     14\n   5 \u2502 b         5     15\n\njulia&gt; @chain df @slice(-(1:2))\n7\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         3     13\n   2 \u2502 b         4     14\n   3 \u2502 b         5     15\n   4 \u2502 b         6     16\n   5 \u2502 c         7     17\n   6 \u2502 c         8     18\n   7 \u2502 c         9     19\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @slice(1)\n         @ungroup\n       end\n3\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         4     14\n   3 \u2502 c         7     17\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @slice(n())\n         @ungroup\n       end\n3\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         3     13\n   2 \u2502 b         6     16\n   3 \u2502 c         9     19\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @slice(-n())\n         @ungroup\n       end\n6\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 a         2     12\n   3 \u2502 b         4     14\n   4 \u2502 b         5     15\n   5 \u2502 c         7     17\n   6 \u2502 c         8     18\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @slice(-(2:n()))\n         @ungroup\n       end\n3\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         4     14\n   3 \u2502 c         7     17\n</code></pre> <p>source</p> <p># <code>TidierData.@slice_head</code> \u2014 Macro.</p> <pre><code>@slice_head(df; n, prop)\n</code></pre> <p>Retrieve rows from the beginning of a DataFrame or GroupedDataFrame.</p> <p>Arguments</p> <ul> <li><code>df</code>: The source data frame or grouped data frame from which to slice rows.</li> <li><code>prop</code>: The proportion of rows to slice.</li> <li><code>n</code>: An optional integer argument to specify the number of rows at the beginning of the dataframe to retrieve. Defaults to 1.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n           a = [missing, 0.2, missing, missing, 1, missing, 5, 6],\n           b = [0.3, 2, missing, 0.3, 6, 5, 7, 7],\n           c = [0.2, 0.2, 0.2, missing, 1, missing, 5, 6]);\n\njulia&gt; @chain df begin\n         @slice_head(n = 3)\n       end \n3\u00d73 DataFrame\n Row \u2502 a          b          c        \n     \u2502 Float64?   Float64?   Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing          0.3       0.2\n   2 \u2502       0.2        2.0       0.2\n   3 \u2502 missing    missing         0.2\n\njulia&gt; @chain df begin\n         @slice_head(prop = 0.25)\n       end \n2\u00d73 DataFrame\n Row \u2502 a          b         c        \n     \u2502 Float64?   Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing         0.3       0.2\n   2 \u2502       0.2       2.0       0.2\n</code></pre> <p>source</p> <p># <code>TidierData.@slice_max</code> \u2014 Macro.</p> <pre><code>@slice_max(df, column; with_ties = true, n, prop, missing_rm = true)\n</code></pre> <p>Retrieve rows with the maximum value(s) from the specified column of a DataFrame or GroupedDataFrame.</p> <p>Arguments</p> <ul> <li><code>df</code>: The source data frame or grouped data frame from which to slice rows.</li> <li><code>column</code>: The column for which to slice the maximum values.</li> <li><code>with_ties</code>: Whether or not all ties will be shown, defaults to true. When false it will only show the first row.</li> <li><code>prop</code>: The proportion of rows to slice.</li> <li><code>n</code>: An optional integer argument to specify the number of maximum rows to retrieve. If with_ties = true, and the ties &gt; n, n will be overridden.</li> <li><code>missing_rm</code>: Defaults to true, skips the missing values when determining the proportion of the dataframe to slice.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n           a = [missing, 0.2, missing, missing, 1, missing, 5, 6],\n           b = [0.3, 2, missing, 3, 6, 5, 7, 7],\n           c = [0.2, 0.2, 0.2, missing, 1, missing, 5, 6]);\n\njulia&gt; @chain df begin\n         @slice_max(b)\n       end \n2\u00d73 DataFrame\n Row \u2502 a         b         c        \n     \u2502 Float64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      5.0       7.0       5.0\n   2 \u2502      6.0       7.0       6.0\n\njulia&gt; @chain df begin\n         @slice_max(b, with_ties = false)\n       end \n1\u00d73 DataFrame\n Row \u2502 a         b         c        \n     \u2502 Float64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      5.0       7.0       5.0\n\njulia&gt; @chain df begin\n         @slice_max(b, n = 3)\n       end \n3\u00d73 DataFrame\n Row \u2502 a         b         c        \n     \u2502 Float64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      5.0       7.0       5.0\n   2 \u2502      6.0       7.0       6.0\n   3 \u2502      1.0       6.0       1.0\n\njulia&gt; @chain df begin\n         @slice_max(b, prop = 0.5, missing_rm = true)\n       end\n3\u00d73 DataFrame\n Row \u2502 a         b         c        \n     \u2502 Float64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      5.0       7.0       5.0\n   2 \u2502      6.0       7.0       6.0\n   3 \u2502      1.0       6.0       1.0\n</code></pre> <p>source</p> <p># <code>TidierData.@slice_min</code> \u2014 Macro.</p> <pre><code>@slice_min(df, column; with_ties = true, n, prop, missing_rm = true)\n</code></pre> <p>Retrieve rows with the minimum value(s) from the specified column of a DataFrame or GroupedDataFrame.</p> <p>Arguments</p> <ul> <li><code>df</code>: The source data frame or grouped data frame from which to slice rows.</li> <li><code>column</code>: The column for which to slice the minimum values.</li> <li><code>with_ties</code>: Whether or not all ties will be shown, defaults to true and shows all ties. When false it will only show the first row.</li> <li><code>prop</code>: The proportion of rows to slice.</li> <li><code>n</code>: An optional integer argument to specify the number of minimum rows to retrieve. If with_ties = true, and the ties &gt; n, n will be overridden.</li> <li><code>missing_rm</code>: Defaults to true, skips the missing values when determining the proportion of the dataframe to slice.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n           a = [missing, 0.2, missing, missing, 1, missing, 5, 6],\n           b = [0.3, 2, missing, 0.3, 6, 5, 7, 7],\n           c = [0.2, 0.2, 0.2, missing, 1, missing, 5, 6]);\n\njulia&gt; @chain df begin\n         @slice_min(b)\n       end \n2\u00d73 DataFrame\n Row \u2502 a         b         c         \n     \u2502 Float64?  Float64?  Float64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502  missing       0.3        0.2\n   2 \u2502  missing       0.3  missing\n\njulia&gt; @chain df begin\n         @slice_min(b, with_ties = false)\n       end \n1\u00d73 DataFrame\n Row \u2502 a         b         c        \n     \u2502 Float64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502  missing       0.3       0.2\n\njulia&gt; @chain df begin\n         @slice_min(b, n = 3)\n       end\n3\u00d73 DataFrame\n Row \u2502 a          b         c         \n     \u2502 Float64?   Float64?  Float64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing         0.3        0.2\n   2 \u2502 missing         0.3  missing   \n   3 \u2502       0.2       2.0        0.2  \n\njulia&gt; @chain df begin\n         @slice_min(b, prop = 0.5, missing_rm = true)\n       end\n3\u00d73 DataFrame\n Row \u2502 a          b         c         \n     \u2502 Float64?   Float64?  Float64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing         0.3        0.2\n   2 \u2502 missing         0.3  missing   \n   3 \u2502       0.2       2.0        0.2\n</code></pre> <p>source</p> <p># <code>TidierData.@slice_sample</code> \u2014 Macro.</p> <pre><code>@slice_sample(df, [n = 1, prop, replace = false])\n</code></pre> <p>Randomly sample rows from a DataFrame <code>df</code> or from each group in a GroupedDataFrame. The default is to return 1 row. Either the number of rows (<code>n</code>) or the proportion of rows (<code>prop</code>) should be provided as a keyword argument.</p> <p>Arguments</p> <ul> <li><code>df</code>: The source data frame or grouped data frame from which to sample rows.</li> <li><code>n</code>: The number of rows to sample. Defaults to <code>1</code>.</li> <li><code>prop</code>: The proportion of rows to sample.</li> <li><code>replace</code>: Whether to sample with replacement. Defaults to <code>false</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 1:10, b = 11:20);\n\njulia&gt; using StableRNGs, Random\n\njulia&gt; rng = StableRNG(1);\n\njulia&gt; Random.seed!(rng, 1);\n\njulia&gt; @chain df begin \n         @slice_sample(n = 5)\n       end\n5\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     6     16\n   2 \u2502     1     11\n   3 \u2502     5     15\n   4 \u2502     4     14\n   5 \u2502     8     18\n\njulia&gt; @chain df begin \n         @slice_sample(n = 5, replace = true)\n       end\n5\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     7     17\n   2 \u2502     2     12\n   3 \u2502     1     11\n   4 \u2502     4     14\n   5 \u2502     2     12\n\njulia&gt; @chain df begin \n         @slice_sample(prop = 0.5)\n       end\n5\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     6     16\n   2 \u2502     7     17\n   3 \u2502     5     15\n   4 \u2502     9     19\n   5 \u2502     2     12\n\njulia&gt; @chain df begin \n         @slice_sample(prop = 0.5, replace = true)\n       end\n5\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    10     20\n   2 \u2502     4     14\n   3 \u2502     9     19\n   4 \u2502     9     19\n   5 \u2502     8     18\n</code></pre> <p>source</p> <p># <code>TidierData.@slice_tail</code> \u2014 Macro.</p> <pre><code>@slice_tail(df; n, prop)\n</code></pre> <p>Retrieve rows from the end of a DataFrame or GroupedDataFrame.</p> <p>Arguments</p> <ul> <li><code>df</code>: The source data frame or grouped data frame from which to slice rows.</li> <li><code>prop</code>: The proportion of rows to slice.</li> <li><code>n</code>: An optional integer argument to specify the number of rows at the beginning of the dataframe to retrieve. Defaults to 1.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n           a = [missing, 0.2, missing, missing, 1, missing, 5, 6],\n           b = [0.3, 2, missing, 0.3, 6, 5, 7, 7],\n           c = [0.2, 0.2, 0.2, missing, 1, missing, 5, 6]);\n\njulia&gt; @chain df begin\n         @slice_tail(n = 3)\n       end \n3\u00d73 DataFrame\n Row \u2502 a          b         c         \n     \u2502 Float64?   Float64?  Float64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing         5.0  missing   \n   2 \u2502       5.0       7.0        5.0\n   3 \u2502       6.0       7.0        6.0\n\njulia&gt; @chain df begin\n         @slice_tail(prop = 0.25)\n       end \n2\u00d73 DataFrame\n Row \u2502 a         b         c        \n     \u2502 Float64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      5.0       7.0       5.0\n   2 \u2502      6.0       7.0       6.0\n</code></pre> <p>source</p> <p># <code>TidierData.@summarise</code> \u2014 Macro.</p> <pre><code>@summarize(df, exprs...)\n@summarise(df, exprs...)\n</code></pre> <p>Create a new DataFrame with one row that aggregating all observations from the input DataFrame or GroupedDataFrame. </p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: a <code>new_variable = function(old_variable)</code> pair. <code>function()</code> should be an aggregate function that returns a single value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @summarize(mean_b = mean(b),\n                    median_b = median(b))\n       end\n1\u00d72 DataFrame\n Row \u2502 mean_b   median_b \n     \u2502 Float64  Float64  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0       3.0\n\njulia&gt; @chain df begin\n         @summarize begin\n           mean_b = mean(b)\n           median_b = median(b)\n         end\n       end\n1\u00d72 DataFrame\n Row \u2502 mean_b   median_b \n     \u2502 Float64  Float64  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0       3.0 \n\njulia&gt; @chain df begin\n         @summarise(mean_b = mean(b), median_b = median(b))\n       end\n1\u00d72 DataFrame\n Row \u2502 mean_b   median_b \n     \u2502 Float64  Float64  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0       3.0\n\njulia&gt; @chain df begin\n         @summarize(across((b,c), (minimum, maximum)))\n       end\n1\u00d74 DataFrame\n Row \u2502 b_minimum  c_minimum  b_maximum  c_maximum \n     \u2502 Int64      Int64      Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11          5         15\n\njulia&gt; @chain df begin\n         @summarize(across(where(is_number), minimum))\n       end\n1\u00d72 DataFrame\n Row \u2502 b_minimum  c_minimum \n     \u2502 Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11\n</code></pre> <p>source</p> <p># <code>TidierData.@summarize</code> \u2014 Macro.</p> <pre><code>@summarize(df, exprs...)\n@summarise(df, exprs...)\n</code></pre> <p>Create a new DataFrame with one row that aggregating all observations from the input DataFrame or GroupedDataFrame. </p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: a <code>new_variable = function(old_variable)</code> pair. <code>function()</code> should be an aggregate function that returns a single value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @summarize(mean_b = mean(b),\n                    median_b = median(b))\n       end\n1\u00d72 DataFrame\n Row \u2502 mean_b   median_b \n     \u2502 Float64  Float64  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0       3.0\n\njulia&gt; @chain df begin\n         @summarize begin\n           mean_b = mean(b)\n           median_b = median(b)\n         end\n       end\n1\u00d72 DataFrame\n Row \u2502 mean_b   median_b \n     \u2502 Float64  Float64  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0       3.0 \n\njulia&gt; @chain df begin\n         @summarise(mean_b = mean(b), median_b = median(b))\n       end\n1\u00d72 DataFrame\n Row \u2502 mean_b   median_b \n     \u2502 Float64  Float64  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0       3.0\n\njulia&gt; @chain df begin\n         @summarize(across((b,c), (minimum, maximum)))\n       end\n1\u00d74 DataFrame\n Row \u2502 b_minimum  c_minimum  b_maximum  c_maximum \n     \u2502 Int64      Int64      Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11          5         15\n\njulia&gt; @chain df begin\n         @summarize(across(where(is_number), minimum))\n       end\n1\u00d72 DataFrame\n Row \u2502 b_minimum  c_minimum \n     \u2502 Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11\n</code></pre> <p>source</p> <p># <code>TidierData.@summary</code> \u2014 Macro.</p> <pre><code>   @summary(df, cols...)\n</code></pre> <p>For numerical columns, returns a dataframe with the Q1,Q3, min, max, mean, median, number of missing values</p> <p>Arguments</p> <ul> <li>'df': A DataFrame</li> <li><code>cols</code>: columns on which summary will be performed. This is an optional arguement, without which summary will be performed on all numerical columns</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = [1, 2, 3, 4, 5],\n                      b = [missing, 7, 8, 9, 10],\n                      c = [11, missing, 13, 14, missing],\n                      d = [16, 17, 18, 19, 20]);\n\njulia&gt; @summary(df);\n\njulia&gt; @summary(df, (b:d));\n\njulia&gt; @chain df begin\n         @summary(b:d)\n       end;\n</code></pre> <p>source</p> <p># <code>TidierData.@tally</code> \u2014 Macro.</p> <pre><code>@tally(df, [wt], [sort])\n</code></pre> <p>Tally the unique values of one or more variables, with an optional weighting.</p> <p><code>@tally()</code> is a low-level helper macro for <code>@count()</code> that assumes that any grouping has already been performed. <code>@chain @tally()</code> is roughly equivalent to <code>@chain df @summarize(n = n())</code>. Supply <code>wt</code> to perform weighted counts, switching the summary from <code>n = n()</code> to <code>n = sum(wt)</code>.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame or GroupedDataFrame.</li> <li><code>wt</code>: Optional parameter. Used to calculate a sum over the provided <code>wt</code> variable instead of counting the rows.</li> <li><code>sort</code>: Defaults to <code>false</code>. Whether the result should be sorted from highest to lowest <code>n</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = vcat(repeat([\"a\"], inner = 3),\n                           repeat([\"b\"], inner = 3),\n                           repeat([\"c\"], inner = 1),\n                           missing),\n                      b = 1:8)\n8\u00d72 DataFrame\n Row \u2502 a        b     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            1\n   2 \u2502 a            2\n   3 \u2502 a            3\n   4 \u2502 b            4\n   5 \u2502 b            5\n   6 \u2502 b            6\n   7 \u2502 c            7\n   8 \u2502 missing      8\n\njulia&gt; @chain df @tally()\n1\u00d71 DataFrame\n Row \u2502 n     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     8\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @tally()\n       end\n4\u00d72 DataFrame\n Row \u2502 a        n     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            3\n   2 \u2502 b            3\n   3 \u2502 c            1\n   4 \u2502 missing      1\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @tally(wt = b)\n       end\n4\u00d72 DataFrame\n Row \u2502 a        n     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            6\n   2 \u2502 b           15\n   3 \u2502 c            7\n   4 \u2502 missing      8\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @tally(wt = b, sort = true)\n       end\n4\u00d72 DataFrame\n Row \u2502 a        n     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           15\n   2 \u2502 missing      8\n   3 \u2502 c            7\n   4 \u2502 a            6       \n</code></pre> <p>source</p> <p># <code>TidierData.@transmute</code> \u2014 Macro.</p> <pre><code>@transmute(df, exprs...)\n</code></pre> <p>Create a new DataFrame with only computed columns.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: add new columns or replace values of existed columns using        <code>new_variable = values</code> syntax.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @transmute(d = b + c)\n       end\n5\u00d71 DataFrame\n Row \u2502 d     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    12\n   2 \u2502    14\n   3 \u2502    16\n   4 \u2502    18\n   5 \u2502    20\n</code></pre> <p>source</p> <p># <code>TidierData.@ungroup</code> \u2014 Macro.</p> <pre><code>@ungroup(df)\n</code></pre> <p>Return a <code>DataFrame</code> with all groups removed.</p> <p>If this is applied to a <code>GroupedDataFrame</code>, then it removes the grouping. If this is applied to a <code>DataFrame</code> (without any groups), then it returns the <code>DataFrame</code> unchanged.</p> <p>Arguments</p> <ul> <li><code>df</code>: A <code>GroupedDataFrame</code> or `DataFrame``.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @group_by(a)\n       end\nGroupedDataFrame with 5 groups based on key: a\nFirst Group (1 row): a = 'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n\u22ee\nLast Group (1 row): a = 'e': ASCII/Unicode U+0065 (category Ll: Letter, lowercase)\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 e         5     15\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @ungroup\n       end\n5\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         2     12\n   3 \u2502 c         3     13\n   4 \u2502 d         4     14\n   5 \u2502 e         5     15\n</code></pre> <p>source</p> <p># <code>TidierData.@unite</code> \u2014 Macro.</p> <pre><code>  @unite(df, new_cols, from_cols, sep)\n</code></pre> <p>Separate a multiple columns into one new columns using a specific delimter</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame</li> <li><code>new_col</code>: New column that will recieve the combination</li> <li><code>from_cols</code>: Column names that it will combine, supports [] or ()</li> <li><code>sep</code>: the string or character that will seprate the values in the new column</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame( b = [\"1\", \"2\", \"3\"], c = [\"1\", \"2\", \"3\"], d = [missing, missing, \"3\"]);\n\njulia&gt; @unite(df, new_col, (b, c, d), \"-\")\n3\u00d74 DataFrame\n Row \u2502 b       c       d        new_col \n     \u2502 String  String  String?  String  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 1       1       missing  1-1\n   2 \u2502 2       2       missing  2-2\n   3 \u2502 3       3       3        3-3-3\n</code></pre> <p>source</p> <p># <code>TidierData.@unnest_longer</code> \u2014 Macro.</p> <pre><code>@unnest_longer(df, columns, indices_include=false)\n</code></pre> <p>Unnest arrays in columns from a DataFrame to create a longer DataFrame with one row for each entry of the array.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>columns</code>: Columns to unnest. Can be a column symbols or a range of columns if they align for number of values.</li> <li><code>indices_include</code>: Optional. When set to <code>true</code>, adds an index column for each unnested column, which logs the position of each array entry.</li> <li><code>keep_empty</code>: Optional. When set to <code>true</code>, rows with empty arrays are kept, not skipped, and unnested as missing.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])\n2\u00d73 DataFrame\n Row \u2502 a      b       c      \n     \u2502 Int64  Array\u2026  Array\u2026 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  [1, 2]  [5, 6]\n   2 \u2502     2  [3, 4]  [7, 8]\n\njulia&gt; @unnest_longer(df, 2)\n4\u00d73 DataFrame\n Row \u2502 a      b      c      \n     \u2502 Int64  Int64  Array\u2026 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1  [5, 6]\n   2 \u2502     1      2  [5, 6]\n   3 \u2502     2      3  [7, 8]\n   4 \u2502     2      4  [7, 8]\n\njulia&gt; @unnest_longer(df, b:c, indices_include=true)\n4\u00d75 DataFrame\n Row \u2502 a      b      c      b_id   c_id  \n     \u2502 Int64  Int64  Int64  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1      5      1      1\n   2 \u2502     1      2      6      2      2\n   3 \u2502     2      3      7      1      1\n   4 \u2502     2      4      8      2      2\n\njulia&gt; df2 = DataFrame(x = 1:4, y = [[], [1, 2, 3], [4, 5], Int[]])\n4\u00d72 DataFrame\n Row \u2502 x      y            \n     \u2502 Int64  Array\u2026       \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  Any[]\n   2 \u2502     2  Any[1, 2, 3]\n   3 \u2502     3  Any[4, 5]\n   4 \u2502     4  Any[]\n\njulia&gt; @unnest_longer(df2, y, keep_empty = true)\n7\u00d72 DataFrame\n Row \u2502 x      y       \n     \u2502 Int64  Any     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  missing \n   2 \u2502     2  1\n   3 \u2502     2  2\n   4 \u2502     2  3\n   5 \u2502     3  4\n   6 \u2502     3  5\n   7 \u2502     4  missing \n</code></pre> <p>source</p> <p># <code>TidierData.@unnest_wider</code> \u2014 Macro.</p> <pre><code>@unnest_wider(df, columns, names_sep=)\n</code></pre> <p>Unnest specified columns of arrays or dictionaries into wider format dataframe with individual columns.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>columns</code>: Columns to be unnested. These columns should contain arrays, dictionaries, dataframes, or tuples. Dictionarys headings will be converted to column names.</li> <li><code>names_sep</code>: An optional string to specify the separator for creating new column names. If not provided, defaults to no separator.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(name = [\"Zaki\", \"Farida\"], attributes = [\n               Dict(\"age\" =&gt; 25, \"city\" =&gt; \"New York\"),\n               Dict(\"age\" =&gt; 30, \"city\" =&gt; \"Los Angeles\")]);\n\njulia&gt; @unnest_wider(df, attributes)\n2\u00d73 DataFrame\n Row \u2502 name    city         age   \n     \u2502 String  String       Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Zaki    New York        25\n   2 \u2502 Farida  Los Angeles     30\n\njulia&gt; df2 = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])\n2\u00d73 DataFrame\n Row \u2502 a      b       c      \n     \u2502 Int64  Array\u2026  Array\u2026 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  [1, 2]  [5, 6]\n   2 \u2502     2  [3, 4]  [7, 8]\n\njulia&gt; @unnest_wider(df2, b:c, names_sep = \"_\")\n2\u00d75 DataFrame\n Row \u2502 a      b_1    b_2    c_1    c_2   \n     \u2502 Int64  Int64  Int64  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1      2      5      6\n   2 \u2502     2      3      4      7      8\n</code></pre> <p>source</p> <p></p> <p></p>"},{"location":"reference/#reference-internal-functions","title":"Reference - Internal functions","text":""},{"location":"examples/generated/Contributors/Howto/","title":"Contribute","text":""},{"location":"examples/generated/Contributors/Howto/#contribute-to-documentation","title":"Contribute to Documentation","text":"<p>Contributing with examples can be done by first creating a new file example here</p> <p>Info</p> <ul> <li><code>your_new_file.jl</code> at <code>docs/examples/UserGuide/</code></li> </ul> <p>Once this is done you need to add a new entry here at the bottom and the appropriate level.</p> <p>Info</p> <p>Your new entry should look like:</p> <ul> <li><code>\"Your title example\" : \"examples/generated/UserGuide/your_new_file.md\"</code></li> </ul> <p></p> <p></p>"},{"location":"examples/generated/Contributors/Howto/#build-docs-locally","title":"Build docs locally","text":"<p>If you want to take a look at the docs locally before doing a PR follow the next steps:</p> <p>build docs locally</p> <p>Install the following dependencies in your system via pip, i.e.</p> <ul> <li><code>pip install mkdocs pygments python-markdown-math</code></li> <li><code>pip install mkdocs-material pymdown-extensions mkdocstrings</code></li> <li><code>pip install mknotebooks pytkdocs_tweaks mkdocs_include_exclude_files jinja2 mkdocs-video</code></li> </ul> <p>Then simply go to your <code>docs</code> env and activate it, i.e.</p> <p><code>docs&gt; julia</code></p> <p><code>julia&gt; ]</code></p> <p><code>(docs) pkg&gt; activate .</code></p> <p>Next, run the scripts:</p> <p>Info</p> <p>Generate files and build docs by running:</p> <ul> <li><code>genfiles.jl</code></li> <li><code>make.jl</code></li> </ul> <p>Now go to your <code>terminal</code> in the same path <code>docs&gt;</code> and run:</p> <p><code>mkdocs serve</code></p> <p>This should output <code>http://127.0.0.1:8000</code>, copy/paste this into your browser and you are all set.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/across/","title":"across","text":"<p><code>across()</code> is a helper function that is typically used inside <code>@mutate()</code> or <code>@summarize</code> to operate on multiple columns and/or multiple functions. Notice that <code>across()</code> accepts two arguments, a set of variables and a set of functions. If providing multiple variables or functions, these should be provided as a tuple \u2013 in other words, wrapped in parentheses and separated by commas. If you want to skip missing values, you can \"fuse\" the summary function (such as <code>mean()</code>) with the <code>skipmissing()</code> function by using the fuction fusion operator, which you can type out in Julia by typing <code>\\circ</code> and then pressing <code>[Tab]</code> such that it reads <code>mean\u2218skipmissing</code>.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/across/#one-variable-one-function","title":"One variable, one function","text":"<pre><code>@chain movies begin\n  @mutate(Budget = Budget / 1_000_000)\n  @summarize(across(Budget, mean\u2218skipmissing))\nend\n</code></pre> 1\u00d71 DataFrame RowBudget_mean_skipmissingFloat64113.4125"},{"location":"examples/generated/UserGuide/across/#one-variable-one-anonymous-function","title":"One variable, one anonymous function","text":"<pre><code>@chain movies begin\n  @mutate(Budget = Budget / 1_000_000)\n  @summarize(across(Budget, (x -&gt; mean(skipmissing(x)))))\nend\n</code></pre> 1\u00d71 DataFrame RowBudget_functionFloat64113.4125 <p>Note: compound functions are not correctly supported inside of anonymous functions. As of right now, the above function works, but <code>(x -&gt; mean\u2218skipmissing(x))</code> does not work. This is a known bug and will be fixed in a future update.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/across/#multiple-variables-multiple-functions","title":"Multiple variables, multiple functions","text":"<pre><code>@chain movies begin\n    @mutate(Budget = Budget / 1_000_000)\n    @summarize(across((Rating, Budget), (mean\u2218skipmissing, median\u2218skipmissing)))\nend\n</code></pre> 1\u00d74 DataFrame RowRating_mean_skipmissingBudget_mean_skipmissingRating_median_skipmissingBudget_median_skipmissingFloat64Float64Float64Float6415.9328513.41256.13.0"},{"location":"examples/generated/UserGuide/across/#multiple-selection-helpers-multiple-functions","title":"Multiple selection helpers, multiple functions","text":"<pre><code>@chain movies begin\n  @mutate(Budget = Budget / 1_000_000)\n  @summarize(across((starts_with(\"Bud\"), ends_with(\"ting\")), (mean\u2218skipmissing, median\u2218skipmissing)))\nend\n</code></pre> 1\u00d74 DataFrame RowBudget_mean_skipmissingRating_mean_skipmissingBudget_median_skipmissingRating_median_skipmissingFloat64Float64Float64Float64113.41255.932853.06.1 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/arrange/","title":"@arrange","text":"<p>Arranging is the way to sort a data frame. <code>@arrange()</code> can take multiple arguments. Arguments refer to columns that are sorted in ascending order by default. If you want to sort in descending order, make sure to wrap the column name in <code>desc()</code> as shown below.</p> <p><code>DataFrames.jl</code> does not currently support the <code>sort()</code> function on grouped data frames. In order to make this work in <code>TidierData.jl</code>, if you apply <code>@arrange()</code> to a GroupedDataFrame, <code>@arrange()</code> will temporarily ungroup the data, perform the <code>sort()</code>, and then re-group by the original grouping variables.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/arrange/#sort-both-variables-in-ascending-order","title":"Sort both variables in ascending order","text":"<pre><code>@chain movies begin\n  @arrange(Year, Rating)\n  @select(1:5)\n  @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641Blacksmith Scene18931missing7.02Hadj Cheriff18941missing4.13Glenroy Bros., No. 218941missing4.24Leonard-Cushing Fight18941missing4.45Sioux Ghost Dance18941missing4.4"},{"location":"examples/generated/UserGuide/arrange/#sort-in-a-mix-of-ascending-and-descending-order","title":"Sort in a mix of ascending and descending order","text":"<p>To sort in descending order, make sure to wrap the variable inside of <code>desc()</code>.</p> <pre><code>@chain movies begin\n  @arrange(Year, desc(Rating))\n  @select(1:5)\n  @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641Blacksmith Scene18931missing7.02Luis Martinetti, Contortionist18941missing6.13Caicedo (with Pole)18941missing5.84Glenroy Brothers (Comic Boxing)18941missing5.45Buffalo Dance18941missing5.0 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/autovec/","title":"Auto-vectorization","text":"<p>TidierData.jl uses a lookup table to decide which functions not to vectorize. For example, <code>mean()</code> is listed as a function that should never be vectorized. Also, any function used inside of <code>across()</code> is also not automatically vectorized. Any function that is not included in this list and is used in a context other than <code>across()</code> is automatically vectorized.</p> <p>Which functions are not vectorized? The set of non-vectorized functions is contained in the array <code>TidierData.not_vectorized[]</code>. Let's take a look at this array. We will wrap it in a <code>string()</code> to make the output easier to read.</p> <pre><code>using TidierData\n\nstring(TidierData.not_vectorized[])\n</code></pre> <pre><code>\"[:getindex, :rand, :esc, :Ref, :Set, :Cols, :collect, :(:), :\u2218, :lag, :lead, :ntile, :repeat, :across, :desc, :mean, :std, :var, :median, :mad, :first, :last, :minimum, :maximum, :sum, :length, :skipmissing, :quantile, :passmissing, :cumsum, :cumprod, :accumulate, :is_float, :is_integer, :is_string, :cat_rev, :cat_relevel, :cat_infreq, :cat_lump, :cat_reorder, :cat_collapse, :cat_lump_min, :cat_lump_prop, :categorical, :as_categorical, :is_categorical, :unique, :iqr]\"\n</code></pre> <p>This \"auto-vectorization\" makes working with TidierData.jl more R-like and convenient. However, if you ever define your own function and try to use it, TidierData.jl may unintentionally vectorize it for you. To prevent auto-vectorization, you can prefix your function with a <code>~</code>.</p> <pre><code>df = DataFrame(a = repeat('a':'e', inner = 2), b = [1,1,1,2,2,2,3,3,3,4], c = 11:20)\n</code></pre> 10\u00d73 DataFrame RowabcCharInt64Int641a1112a1123b1134b2145c2156c2167d3178d3189e31910e420 <p>For example, let's define a function <code>new_mean()</code> that calculates a mean.</p> <pre><code>new_mean(exprs...) = mean(exprs...)\n</code></pre> <pre><code>new_mean (generic function with 1 method)\n</code></pre> <p>If we try to use <code>new_mean()</code> inside of <code>@mutate()</code>, it will give us the wrong result. This is because <code>new_mean()</code> is vectorized, which results in the mean being calculated element-wise, which is almost never what we actually want.</p> <pre><code>@chain df begin\n    @mutate(d = c - new_mean(c))\nend\n</code></pre> 10\u00d74 DataFrame RowabcdCharInt64Int64Float641a1110.02a1120.03b1130.04b2140.05c2150.06c2160.07d3170.08d3180.09e3190.010e4200.0 <p>To prevent <code>new_mean()</code> from being vectorized, we need to prefix it with a <code>~</code> like this:</p> <pre><code>@chain df begin\n    @mutate(d = c - ~new_mean(c))\nend\n</code></pre> 10\u00d74 DataFrame RowabcdCharInt64Int64Float641a111-4.52a112-3.53b113-2.54b214-1.55c215-0.56c2160.57d3171.58d3182.59e3193.510e4204.5 <p>Or you can modify the do-not-vectorize list like this:</p> <pre><code>push!(TidierData.not_vectorized[], :new_mean)\n</code></pre> <pre><code>49-element Vector{Symbol}:\n :getindex\n :rand\n :esc\n :Ref\n :Set\n :Cols\n :collect\n :(:)\n :\u2218\n :lag\n \u22ee\n :cat_collapse\n :cat_lump_min\n :cat_lump_prop\n :categorical\n :as_categorical\n :is_categorical\n :unique\n :iqr\n :new_mean\n</code></pre> <p>Now <code>new_mean()</code> should behave just like <code>mean()</code> in that it is treated as non-vectorized.</p> <pre><code>@chain df begin\n    @mutate(d = c - new_mean(c))\nend\n</code></pre> 10\u00d74 DataFrame RowabcdCharInt64Int64Float641a111-4.52a112-3.53b113-2.54b214-1.55c215-0.56c2160.57d3171.58d3182.59e3193.510e4204.5 <p>This gives us the correct answer. Notice that adding a <code>~</code> is not needed with <code>mean()</code> because <code>mean()</code> is already included on our look-up table of functions not requiring vectorization.</p> <pre><code>@chain df begin\n    @mutate(d = c - mean(c))\nend\n</code></pre> 10\u00d74 DataFrame RowabcdCharInt64Int64Float641a111-4.52a112-3.53b113-2.54b214-1.55c215-0.56c2160.57d3171.58d3182.59e3193.510e4204.5 <p>If you're not sure if a function is vectorized and want to prevent it from being vectorized, you can always prefix it with a ~ to prevent vectorization. Even though <code>mean()</code> is not vectorized anyway, prefixing it with a ~ will not cause any harm.</p> <pre><code>@chain df begin\n    @mutate(d = c - ~mean(c))\nend\n</code></pre> 10\u00d74 DataFrame RowabcdCharInt64Int64Float641a111-4.52a112-3.53b113-2.54b214-1.55c215-0.56c2160.57d3171.58d3182.59e3193.510e4204.5 <p>If for some crazy reason, you did want to vectorize <code>mean()</code>, you are always allowed to vectorize it, and TidierData.jl won't un-vectorize it.</p> <pre><code>@chain df begin\n    @mutate(d = c - mean.(c))\nend\n</code></pre> 10\u00d74 DataFrame RowabcdCharInt64Int64Float641a1110.02a1120.03b1130.04b2140.05c2150.06c2160.07d3170.08d3180.09e3190.010e4200.0 <p>Note: <code>~</code> also works with operators, so if you want to not vectorize an operator, you can prefix it with <code>~</code>, for example, <code>a ~* b</code> will perform a matrix multiplication rather than element-wise multiplication.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/benchmark/","title":"Benchmark","text":"<p>The goal of this benchmarking is to guage how Tidier.jl performs in comparison to DataFrames.jl. Ultimately, from this benchmarking, we can check that Tidier.jl is comparable in speed to DataFrames.jl.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/benchmark/#why-function-wrap","title":"Why function wrap?","text":"<p>Wrapping code in a function allows it to compile just once, which more closely reflects the reality of production workflows. For a more robust explanation, please see @kdpsingh comment here: https://github.com/TidierOrg/TidierData.jl/issues/24#issuecomment-1682718061</p> <pre><code>using TidierData\nusing RDatasets\nusing BenchmarkTools\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/benchmark/#filtering","title":"filtering","text":"<pre><code>function filtering_tidier()\n@chain movies begin\n    @filter(Year &gt; 1939 &amp;&amp; Votes &gt; 40)\nend\nend\n\n@benchmark filtering_tidier()\n\n@benchmark filter(row -&gt; row.Year &gt; 1939 &amp;&amp; row.Votes &gt; 40, movies)\n</code></pre> <pre><code>BenchmarkTools.Trial: 532 samples with 1 evaluation.\n Range (min \u2026 max):  9.001 ms \u2026  18.990 ms  \u250a GC (min \u2026 max): 0.00% \u2026 5.50%\n Time  (median):     9.281 ms               \u250a GC (median):    0.00%\n Time  (mean \u00b1 \u03c3):   9.411 ms \u00b1 554.583 \u03bcs  \u250a GC (mean \u00b1 \u03c3):  1.14% \u00b1 2.52%\n\n         \u2583\u2584\u2588\u2584\u2585\u2586\u2585\u2582                                              \n  \u2582\u2581\u2582\u2582\u2584\u2583\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2587\u2586\u2585\u2583\u2582\u2581\u2581\u2581\u2581\u2582\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2583\u2581\u2583\u2582\u2583\u2583\u2583\u2584\u2584\u2583\u2583\u2583\u2583\u2582\u2583\u2582\u2583\u2583\u2582\u2582\u2582\u2581\u2581\u2581\u2581\u2582 \u2583\n  9 ms            Histogram: frequency by time        10.4 ms &lt;\n\n Memory estimate: 7.76 MiB, allocs estimate: 287668.\n</code></pre>"},{"location":"examples/generated/UserGuide/benchmark/#group_by-summarize","title":"group_by summarize","text":"<pre><code>function groupbysummarize_tidier()\n@chain movies begin\n    @group_by(MPAA)\n    @summarise(n=n())\nend\nend\n\n@benchmark groupbysummarize_tidier()\n\n@benchmark combine(groupby(movies, :MPAA), nrow =&gt; :n)\n</code></pre> <pre><code>BenchmarkTools.Trial: 10000 samples with 1 evaluation.\n Range (min \u2026 max):  414.934 \u03bcs \u2026  1.865 ms  \u250a GC (min \u2026 max): 0.00% \u2026 31.44%\n Time  (median):     422.558 \u03bcs              \u250a GC (median):    0.00%\n Time  (mean \u00b1 \u03c3):   432.911 \u03bcs \u00b1 67.083 \u03bcs  \u250a GC (mean \u00b1 \u03c3):  1.25% \u00b1  5.15%\n\n  \u2583\u2588\u2588\u2587\u2586\u2585\u2584\u2584\u2584\u2583\u2582\u2582\u2581\u2581\u2581                                              \u2582\n  \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2587\u2587\u2587\u2586\u2586\u2586\u2586\u2586\u2585\u2585\u2584\u2584\u2584\u2583\u2584\u2581\u2581\u2581\u2581\u2583\u2583\u2581\u2581\u2584\u2587\u2587\u2583\u2585\u2583\u2583\u2583\u2585\u2584\u2586\u2586\u2587\u2586\u2587\u2586\u2585\u2584\u2583\u2584\u2584 \u2588\n  415 \u03bcs        Histogram: log(frequency) by time       573 \u03bcs &lt;\n\n Memory estimate: 474.87 KiB, allocs estimate: 270.\n</code></pre>"},{"location":"examples/generated/UserGuide/benchmark/#one-mutate","title":"one mutate","text":"<pre><code>function mutate_1_tidier()\n@chain movies begin\n    @mutate(new_col = Votes * R1)\nend\nend\n\n@benchmark mutate_1_tidier()\n\n@benchmark transform(movies, [:Votes, :R1] =&gt; ((v, r) -&gt; v .* r) =&gt; :new_col)\n</code></pre> <pre><code>BenchmarkTools.Trial: 6789 samples with 1 evaluation.\n Range (min \u2026 max):  541.700 \u03bcs \u2026   9.582 ms  \u250a GC (min \u2026 max): 0.00% \u2026  5.69%\n Time  (median):     661.425 \u03bcs               \u250a GC (median):    0.00%\n Time  (mean \u00b1 \u03c3):   731.742 \u03bcs \u00b1 250.650 \u03bcs  \u250a GC (mean \u00b1 \u03c3):  7.82% \u00b1 12.75%\n\n         \u2585\u2588\u2588\u2587\u2587\u2586\u2585\u2584\u2583\u2582\u2581                               \u2581\u2581\u2582\u2582\u2582\u2582\u2583\u2582\u2582\u2581\u2581  \u2582\n  \u2583\u2581\u2583\u2581\u2585\u2585\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2587\u2586\u2585\u2585\u2581\u2585\u2583\u2581\u2581\u2581\u2581\u2581\u2583\u2583\u2581\u2581\u2581\u2581\u2583\u2586\u2586\u2586\u2585\u2584\u2585\u2584\u2583\u2585\u2585\u2587\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588 \u2588\n  542 \u03bcs        Histogram: log(frequency) by time       1.25 ms &lt;\n\n Memory estimate: 8.42 MiB, allocs estimate: 223.\n</code></pre>"},{"location":"examples/generated/UserGuide/benchmark/#mutate-6-new-columns","title":"mutate 6 new columns","text":"<pre><code>function mutate6_tidier()\n    @chain movies begin\n        @mutate(\n        Votes_R1_Product = Votes .* R1,\n        Rating_Year_Ratio = Rating ./ Year,\n        R1_to_R5_Sum = R1 + R2 + R3 + R4 + R5,\n        High_Budget_Flag = if_else(ismissing(Budget), \"NA\", Budget .&gt; 50000),\n        R6_to_R8_Avg = (R6 + R7 + R8) / 3,\n        year_Minus_Length = Year - Length)\n    end\nend\n\n@benchmark mutate6_tidier()\n\n@benchmark transform(movies, [:Votes, :R1] =&gt; ((v, r) -&gt; v .* r) =&gt; :Votes_R1_Product, [:Rating, :Year] =&gt; ((r, y) -&gt; r ./ y) =&gt; :Rating_Year_Ratio, [:R1, :R2, :R3, :R4, :R5] =&gt; ((a, b, c, d, e) -&gt; a + b + c + d + e) =&gt; :R1_to_R5_Sum, :Budget =&gt; (b -&gt; ifelse.(ismissing.(b), missing, b .&gt; 50000)) =&gt; :High_Budget_Flag, [:R6, :R7, :R8] =&gt; ((f, g, h) -&gt; (f + g + h) / 3) =&gt; :R6_to_R8_Avg, [:Year, :Length] =&gt; ((y, l) -&gt; y - l) =&gt; :Year_Minus_Length )\n</code></pre> <pre><code>BenchmarkTools.Trial: 3937 samples with 1 evaluation.\n Range (min \u2026 max):  1.062 ms \u2026   9.694 ms  \u250a GC (min \u2026 max): 0.00% \u2026  6.74%\n Time  (median):     1.174 ms               \u250a GC (median):    0.00%\n Time  (mean \u00b1 \u03c3):   1.264 ms \u00b1 326.052 \u03bcs  \u250a GC (mean \u00b1 \u03c3):  6.29% \u00b1 11.05%\n\n      \u2581\u2585\u2587\u2588\u2584\u2582                                                   \n  \u2582\u2582\u2583\u2585\u2588\u2588\u2588\u2588\u2588\u2588\u2587\u2585\u2584\u2583\u2583\u2582\u2582\u2582\u2582\u2582\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2582\u2581\u2581\u2581\u2581\u2581\u2582\u2582\u2582\u2581\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2583\u2583\u2583\u2583\u2583\u2583\u2583\u2583\u2583\u2582\u2583\u2582\u2582\u2582 \u2583\n  1.06 ms         Histogram: frequency by time        1.91 ms &lt;\n\n Memory estimate: 10.56 MiB, allocs estimate: 581.\n</code></pre>"},{"location":"examples/generated/UserGuide/benchmark/#groupby-then-2-mutates","title":"groupby then 2 mutates","text":"<pre><code>function groupby1_2mutate_tidier()\n@chain movies begin\n    @group_by(MPAA)\n    @mutate(ace = R1 -&gt; R1/2 * 4)\n    @mutate(Bace = Votes^R1)\nend\nend\n\n@benchmark groupby1_2mutate_tidier()\n\n@benchmark transform( transform( groupby(movies, :MPAA), :R1 =&gt; (x -&gt; x/2 * 4) =&gt; :ace, ungroup = false), [:Votes, :R1] =&gt; ((a, b) -&gt; b .^ a) =&gt; :Bace, ungroup = false)\n</code></pre> <pre><code>BenchmarkTools.Trial: 671 samples with 1 evaluation.\n Range (min \u2026 max):  6.845 ms \u2026  13.608 ms  \u250a GC (min \u2026 max): 0.00% \u2026 7.58%\n Time  (median):     7.277 ms               \u250a GC (median):    0.00%\n Time  (mean \u00b1 \u03c3):   7.442 ms \u00b1 603.643 \u03bcs  \u250a GC (mean \u00b1 \u03c3):  3.11% \u00b1 4.29%\n\n    \u2581 \u2586\u2587\u2582\u2588\u2585 \u2581\u2581                                                 \n  \u2582\u2586\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2587\u2586\u2587\u2586\u2588\u2585\u2584\u2585\u2586\u2584\u2585\u2584\u2585\u2583\u2582\u2584\u2583\u2586\u2583\u2586\u2586\u2586\u2585\u2585\u2585\u2584\u2586\u2584\u2584\u2583\u2582\u2583\u2583\u2581\u2582\u2584\u2582\u2582\u2584\u2583\u2582\u2582\u2583\u2581\u2582\u2581 \u2583\n  6.85 ms         Histogram: frequency by time         8.5 ms &lt;\n\n Memory estimate: 26.17 MiB, allocs estimate: 2449.\n</code></pre>"},{"location":"examples/generated/UserGuide/benchmark/#select-5-columns","title":"select 5 columns","text":"<pre><code>function select5_tidier()\n    @chain movies begin\n        @select(R1:R5)\n    end\nend\n\n@benchmark select5_tidier()\n\n@benchmark select(movies, :R1, :R2, :R3, :R4, :R5)\n</code></pre> <pre><code>BenchmarkTools.Trial: 10000 samples with 1 evaluation.\n Range (min \u2026 max):  153.436 \u03bcs \u2026   7.502 ms  \u250a GC (min \u2026 max): 0.00% \u2026 6.35%\n Time  (median):     220.581 \u03bcs               \u250a GC (median):    0.00%\n Time  (mean \u00b1 \u03c3):   232.208 \u03bcs \u00b1 100.421 \u03bcs  \u250a GC (mean \u00b1 \u03c3):  4.46% \u00b1 9.85%\n\n      \u2581\u2584\u2586\u2587\u2588\u2588\u2586\u2584\u2583\u2581                                                \u2582\n  \u2584\u2583\u2584\u2585\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2587\u2586\u2584\u2584\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2583\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2583\u2585\u2586\u2586\u2587\u2587\u2587\u2588\u2587\u2587\u2587\u2587\u2587 \u2588\n  153 \u03bcs        Histogram: log(frequency) by time        622 \u03bcs &lt;\n\n Memory estimate: 2.25 MiB, allocs estimate: 200.\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/binding/","title":"Binding","text":"<p>Whereas joins are useful for combining data frames based on matching keys, another way to combine data frames is to bind them together, which can be done either by rows or by columns. <code>TidierData.jl</code> implements these actions using <code>@bind_rows()</code> and <code>@bind_cols()</code>, respectively.</p> <p>Let's generate three data frames to combine.</p> <pre><code>using TidierData\n\ndf1 = DataFrame(a=1:3, b=1:3);\n\ndf2 = DataFrame(a=4:6, b=4:6);\n\ndf3 = DataFrame(a=7:9, c=7:9);\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/binding/#bind_rows","title":"<code>@bind_rows()</code>","text":"<pre><code>@bind_rows(df1, df2)\n</code></pre> 6\u00d72 DataFrame RowabInt64Int64111222333444555666 <p><code>@bind_rows()</code> keeps columns that are present in at least one of the provided data frames. Any missing columns will be filled with <code>missing</code> values.</p> <pre><code>@bind_rows(df1, df3)\n</code></pre> 6\u00d73 DataFrame RowabcInt64Int64?Int64?111missing222missing333missing47missing758missing869missing9 <p>There is an optional <code>id</code> argument to add an identifier for combined data frames. Note that both <code>@bind_rows</code> and <code>@bind_cols</code> accept multiple (i.e., more than 2) data frames, as in the example below.</p> <pre><code>@bind_rows(df1, df2, df3, id = \"id\")\n</code></pre> 9\u00d74 DataFrame RowabcidInt64Int64?Int64?Int64111missing1222missing1333missing1444missing2555missing2666missing277missing7388missing8399missing93 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/binding/#bind_cols","title":"<code>@bind_cols()</code>","text":"<p><code>@bind_cols</code> works similarly to R's <code>tidyverse</code> although the <code>.name_repair</code> argument is not supported.</p> <pre><code>@bind_cols(df1, df2)\n</code></pre> 3\u00d74 DataFrame Rowaba_1b_1Int64Int64Int64Int64111442225533366 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/column_names/","title":"Column names","text":"<p>When referring to column names, TidierData.jl is a bit unusual for a Julia package in that it does not use symbols. This is because TidierData.jl uses tidy expressions, which in R lingo equates to a style of programming referred to as \"non-standard evaluation.\" If you are creating a new column <code>a</code> containing a value that is the mean of column <code>b</code>, you would simply write <code>a = mean(b)</code>.</p> <p>However, there may be times when you wish to create or refer to a column containing a space in it. Let's start by creating some column names containing a space in their name.</p> <pre><code>using TidierData\n\ndf = DataFrame(var\"my name\" = [\"Ada\", \"Twist\"],\n               var\"my age\" = [40, 50])\n</code></pre> 2\u00d72 DataFrame Rowmy namemy ageStringInt641Ada402Twist50 <p>To create a column name containing a space, we used the <code>var\"column name\"</code> notation. Because <code>DataFrame()</code> is a regular Julia function, this is the standard way to refer to a variable containing a space, which is why we need to use this here.</p> <p>This notation also works inside of TidierData.jl.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/column_names/#varcolumn-name-notation","title":"<code>var\"column name\"</code> notation","text":"<p>If we want to figure out the age for the people in our dataset a decade from today, we could use this same <code>var\"column name\"</code> notation inside of <code>@mutate</code>.</p> <pre><code>@chain df begin\n  @mutate(var\"age in 10 years\" = var\"my age\" + 10)\nend\n</code></pre> 2\u00d73 DataFrame Rowmy namemy ageage in 10 yearsStringInt64Int641Ada40502Twist5060 <p>However, typing out the <code>var\"column name\"</code> can become cumbersome. TidierData.jl also supports another shorthand notation to refer to column names containing spaces or other special characters: backticks.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/column_names/#backtick-notation","title":"Backtick notation","text":"<p>This same code could be written more concisely like this:</p> <pre><code>@chain df begin\n  @mutate(`age in 10 years` = `my age` + 10)\nend\n</code></pre> 2\u00d73 DataFrame Rowmy namemy ageage in 10 yearsStringInt64Int641Ada40502Twist5060 <p>Backticks are an R convention. While they are not specific to tidyverse, they are a convenient way to refer to column names that otherwise would not parse correctly as a single entity. Backticks are supported in all TidierData.jl functions where column names may be referenced.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/column_names/#cleaning-up-column-names","title":"Cleaning up column names","text":"<p>Another option is to clean up the column names so that you do not have spaces to begin with. In R, this is usually accomplished using the <code>janitor</code> package. In Julia, the Cleaner.jl package provides this functionality, which we have wrapped inside of TidierData.jl.</p> <pre><code>@chain df begin\n  @clean_names\nend\n</code></pre> 2\u00d72 DataFrame Rowmy_namemy_ageStringInt641Ada402Twist50 <p>Although the default value for the <code>case</code> argument is \"snake_case\", you can also set this to \"camelCase\".</p> <pre><code>@chain df begin\n  @clean_names(case = \"camelCase\")\nend\n</code></pre> 2\u00d72 DataFrame RowmyNamemyAgeStringInt641Ada402Twist50 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/conditionals/","title":"Conditionals","text":"<p>Conditional functions are a useful tool to update or create new columns conditional on the values of a column of data. When continuous variables are converted to categories, this is sometimes referred to as \"recoding\" a column.</p> <p>TidierData.jl provides two functions to recode data: <code>if_else()</code> and <code>case_when()</code>.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/conditionals/#if_else","title":"<code>if_else()</code>","text":"<p>Why do we need another <code>if_else()</code> function if base Julia already comes with an <code>ifelse()</code> function. Similar to R, the base Julia implementation of <code>if_else()</code> does not include a way to designate what value to return if the enclosed vector contains a missing value. Additionally, the base Julia implementation of <code>ifelse()</code> produces an error if presented with a <code>missing</code> value in the condition. The TidierData.jl <code>if_else()</code> can handle missing values and includes an optional 4th argument that is used to designate what to return in the event of a `missing`` value for the condition. Let's take a look at some examples.</p> <pre><code>using TidierData\n\ndf = DataFrame(a = [1, 2, missing, 4, 5])\n</code></pre> 5\u00d71 DataFrame RowaInt64?11223missing4455 <p>Here, we have created a <code>DataFrame</code> containing a single column <code>a</code> with 5 values, for which the 3rd value is missing.</p> <p>Now, let's create a new column <code>b</code> that contains a \"yes\" if <code>a</code> is greater than or equal to 3, and a \"no\" otherwise. Notice that when we do this, the <code>missing</code> values remains as <code>missing</code>.</p> <pre><code>@chain df begin\n  @mutate(b = if_else(a &gt;= 3, \"yes\", \"no\"))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?String?11no22no3missingmissing44yes55yes <p>What if we wanted to fill in the missing value with \"unknown\"? All we need to do is provide an optional 4th argument containing the value to return in the event of a missing condition. When we run this version, <code>missing</code> values in <code>a</code> are converted to \"unknown\" in <code>b</code>.</p> <pre><code>@chain df begin\n  @mutate(b = if_else(a &gt;= 3, \"yes\", \"no\", \"unknown\"))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?String11no22no3missingunknown44yes55yes <p>Although both of these examples showed how to return a single value (like \"yes\" and \"no\"), you can also return a vector of values, which is useful for updating only a subset of the values of a column. For example, if we wanted to create a column <code>b</code> that contains a 3 when <code>a</code> is greater than or equal to 3 but otherwise remains unchanged, we could provide a 3 for the <code>yes</code> condition and a vector (column) <code>a</code> in the <code>no</code> condition. If we do not provide the optional 4th argument, <code>missing</code> values remain <code>missing</code>.</p> <pre><code>@chain df begin\n  @mutate(b = if_else(a &gt;= 3, 3, a))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?Int64?1112223missingmissing443553 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/conditionals/#case_when","title":"<code>case_when()</code>","text":"<p>Although <code>if_else()</code> is convenient when evaluating a single condition, it can be cumbersome when evaluating multiple conditions because subsequent conditions need to be nested within the <code>no</code> condition for the preceding argument. For situations where multiple conditions need to be evaluated, <code>case_when()</code> is more convenient.</p> <p>Let's first consider a similar example from above and recreate it using <code>case_when()</code>. The following code creates a column <code>b</code> that assigns a value of 3 if <code>a &gt;= 3</code> and otherwise leaves the value unchanged.</p> <pre><code>@chain df begin\n  @mutate(b = case_when(a &gt;= 3  =&gt;  3,\n                        true    =&gt;  a))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?Int64?1112223missingmissing443553 <p>What is going on here? <code>case_when()</code> uses a <code>condition =&gt; return_value</code> syntax, which are encoded as pairs in Julia. You can provide a single pair, or multiple pairs separated by commas. Because the pairs operator (<code>=&gt;</code>) might be confused with a greater than or equal to sign (<code>&gt;=</code>), we have padded two spaces on either side of the <code>=&gt;</code> to make sure that the pair remains visually distinct. We do not use a <code>~</code> operator in <code>case_when()</code> (as is used in R) because the <code>~</code> operator is used to denote de-vectorized functions in TidierData.jl.</p> <p>There are 2 other things to note above. First, the <code>true</code> condition evaluates to <code>true</code> for all remaining values of <code>a</code>. The only reason that the <code>b</code> contains a <code>missing</code> value here is that the <code>true</code> condition was met, leading to the value of <code>a</code> (in this case, <code>missing</code>) to be assigned to <code>b</code>. Second, we were able to return a single value (3) in the first condition, and a vector (column) of data (<code>a</code>) in the second condition.</p> <p>What if we wanted to fill in the missing values with something else? In this case, we would need to create an explicit condition that checks for missing values and assigns a return value to that condition.</p> <pre><code>@chain df begin\n  @mutate(b = case_when(a &gt;= 3        =&gt;  3,\n                        ismissing(a)  =&gt;  0,\n                        true          =&gt;  a))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?Int641112223missing0443553 <p>Do our conditions have to be mutually exclusive? No. The return value for the first matching condition is assigned to <code>b</code> because the conditions are evaluated sequentially from first to last.</p> <pre><code>@chain df begin\n  @mutate(b = case_when(a &gt; 4  =&gt;  \"hi\",\n                        a &gt; 2  =&gt;  \"medium\",\n                        a &gt; 0  =&gt;  \"low\"))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?String?11low22low3missingmissing44medium55hi <p>Again, if we want to fill in remaining values (which in this case are the <code>missing</code> ones), we can map the final condition <code>true</code> to the value of \"unknown\". Because the ordering of the conditions matters, the <code>true</code> condition should always be listed last if it is included.</p> <pre><code>@chain df begin\n  @mutate(b = case_when(a &gt; 4  =&gt;  \"hi\",\n                        a &gt; 2  =&gt;  \"medium\",\n                        a &gt; 0  =&gt;  \"low\",\n                        true   =&gt;  \"unknown\"))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?String11low22low3missingunknown44medium55hi <p></p> <p></p>"},{"location":"examples/generated/UserGuide/conditionals/#do-these-functions-work-outside-of-tidierdatajl","title":"Do these functions work outside of TidierData.jl?","text":"<p>Yes, both <code>if_else()</code> and <code>case_when()</code> work outside of TidierData.jl. However, you'll need to remember that if working with vectors, both the functions and conditions will need to be vectorized, and in the case of <code>case_when()</code>, the <code>=&gt;</code> will need to be written as <code>.=&gt;</code>. The reason this is not needed when using these functions inside of TidierData.jl is because they are auto-vectorized.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/dataset_movies/","title":"Movies dataset","text":"<p>To get started, we will load the <code>movies</code> dataset from the <code>RDatasets.jl</code> package.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p>To work with this dataset, we will use the <code>@chain</code> macro. This macro initiates a pipe, and every function or macro provided to it between the <code>begin</code> and <code>end</code> blocks modifies the dataframe mentioned at the beginning of the pipe. You don't have to necessarily spread a chain over multiple lines of code, but when working with data frames it's often easiest to do so. Before going further, take a look at the Chain.jl GitHub page to see all the cool things that are possible with this, including mid-chain side effects using <code>@aside</code> and mid-chain assignment of variables.</p> <p>Let's take a look at the first 5 rows of the <code>movies</code> dataset using <code>@slice()</code>.</p> <pre><code>@chain movies begin\n    @slice(1:5)\nend\n</code></pre> 5\u00d724 DataFrame RowTitleYearLengthBudgetRatingVotesR1R2R3R4R5R6R7R8R9R10MPAAActionAnimationComedyDramaDocumentaryRomanceShortStringInt32Int32Int32?Float64Int32Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Cat\u2026Int32Int32Int32Int32Int32Int32Int321$1971121missing6.43484.54.54.54.514.524.524.514.54.54.500110002$1000 a Touchdown193971missing6.0200.014.54.524.514.514.514.54.54.514.500100003$21 a Day Once a Month19417missing8.250.00.00.00.00.024.50.044.524.524.501000014$40,000199670missing8.2614.50.00.00.00.00.00.00.034.545.500100005$50,000 Climax Show, The197571missing3.41724.54.50.014.514.54.50.00.00.024.50000000 <p>Let's use <code>@glimpse()</code> to preview the dataset.</p> <pre><code>@glimpse(movies)\n</code></pre> <pre><code>Rows: 58788\nColumns: 24\n.Title         String         $, $1000 a Touchdown, $21 a Day Once a Month, $40,\n.Year          Int32          1971, 1939, 1941, 1996, 1975, 2000, 2002, 2002, 19\n.Length        Int32          121, 71, 7, 70, 71, 91, 93, 25, 97, 61, 99, 96, 10\n.Budget        Union{Missing, Int32}missing, missing, missing, missing, missing,\n.Rating        Float64        6.4, 6.0, 8.2, 8.2, 3.4, 4.3, 5.3, 6.7, 6.6, 6.0,\n.Votes         Int32          348, 20, 5, 6, 17, 45, 200, 24, 18, 51, 23, 53, 44\n.R1            Float64        4.5, 0.0, 0.0, 14.5, 24.5, 4.5, 4.5, 4.5, 4.5, 4.5\n.R2            Float64        4.5, 14.5, 0.0, 0.0, 4.5, 4.5, 0.0, 4.5, 4.5, 0.0,\n.R3            Float64        4.5, 4.5, 0.0, 0.0, 0.0, 4.5, 4.5, 4.5, 4.5, 4.5,\n.R4            Float64        4.5, 24.5, 0.0, 0.0, 14.5, 14.5, 4.5, 4.5, 0.0, 4.\n.R5            Float64        14.5, 14.5, 0.0, 0.0, 14.5, 14.5, 24.5, 4.5, 0.0,\n.R6            Float64        24.5, 14.5, 24.5, 0.0, 4.5, 14.5, 24.5, 14.5, 0.0,\n.R7            Float64        24.5, 14.5, 0.0, 0.0, 0.0, 4.5, 14.5, 14.5, 34.5,\n.R8            Float64        14.5, 4.5, 44.5, 0.0, 0.0, 4.5, 4.5, 14.5, 14.5, 4\n.R9            Float64        4.5, 4.5, 24.5, 34.5, 0.0, 14.5, 4.5, 4.5, 4.5, 4.\n.R10           Float64        4.5, 14.5, 24.5, 45.5, 24.5, 14.5, 14.5, 14.5, 24.\n.MPAA          CategoricalArrays.CategoricalValue{String, UInt8}, , , , , , R, ,\n.Action        Int32          0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,\n.Animation     Int32          0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n.Comedy        Int32          1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0,\n.Drama         Int32          1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1,\n.Documentary   Int32          0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n.Romance       Int32          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n.Short         Int32          0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/distinct/","title":"@distinct","text":"<p>The <code>@distinct()</code> macro in <code>TidierData.jl</code> is useful to select distinct rows. Like it's R counterpart, it can be used with or without arguments. When arguments are provided, it behaves slightly differently than the R version. Whereas the R function only returns the provided columns, the TidierData.jl version returns all columns, where the first match is returned for the non-selected columns.</p> <pre><code>using TidierData\n\ndf = DataFrame(a = 1:10, b = repeat('a':'e', inner = 2))\n</code></pre> 10\u00d72 DataFrame RowabInt64Char11a22a33b44b55c66c77d88d99e1010e <p></p> <p></p>"},{"location":"examples/generated/UserGuide/distinct/#select-distinct-values-overall","title":"Select distinct values overall","text":"<p>Since there are no duplicate rows, this will return all rows.</p> <pre><code>@chain df begin\n    @distinct()\nend\n</code></pre> 10\u00d72 DataFrame RowabInt64Char11a22a33b44b55c66c77d88d99e1010e <p></p> <p></p>"},{"location":"examples/generated/UserGuide/distinct/#select-distinct-values-based-on-column-b","title":"Select distinct values based on column <code>b</code>","text":"<p>Notice that the first matching row for column <code>a</code> is returned for every distinct value of column <code>b</code>. This is slightly different behavior than R's tidyverse, which would have returned only column <code>b</code>.</p> <pre><code>@chain df begin\n  @distinct(b)\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64Char11a23b35c47d59e <p>In TidierData.jl, <code>@distinct()</code> works with grouped data frames. If grouped, <code>@distinct()</code> will ignore the grouping when determining distinct values but will return the data frame in grouped form based on the original groupings.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/fill_missing/","title":"Fill missing","text":"<p>The @fill_missing macro is a reimplementation of fill(). To mirror the syntax in R, the methods availble are \"up\" (fill from bottom up) and \"down\" fill from top down.</p> <pre><code>using TidierData\n\ndf = DataFrame(\n    a = [missing, 2, 3, missing, 5],\n    b = [missing, 1, missing, 4, 5],\n    c = ['a', 'b', missing, 'd', 'e'],\n    group = ['A', 'A', 'B', 'B', 'A']\n);\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/fill_missing/#fill-all-columns","title":"Fill all columns","text":"<p>Fill missing values for the whole DataFrame using the \"down\" method (top to bottom)</p> <pre><code>@chain df begin\n    @fill_missing(\"down\")\nend\n\n@fill_missing(df, \"down\")\n</code></pre> 5\u00d74 DataFrame RowabcgroupInt64?Int64?Char?Char1missingmissingaA221bA331bB434dB555eA <p></p> <p></p>"},{"location":"examples/generated/UserGuide/fill_missing/#fill-specifc-columns","title":"Fill specifc columns","text":"<p>This fills missing values in columns <code>a</code> and <code>c</code> going from bottom to top.</p> <pre><code>@chain df begin\n    @fill_missing(a, c, \"up\")\nend\n</code></pre> 5\u00d74 DataFrame RowabcgroupInt64?Int64?Char?Char12missingaA221bA33missingdB454dB555eA <p></p> <p></p>"},{"location":"examples/generated/UserGuide/fill_missing/#fill-with-grouped-dataframes","title":"Fill with Grouped DataFrames","text":"<p>When grouping by the <code>group</code> column, this fills missing values in columns <code>a</code> within each group going from top to bottom within that group</p> <pre><code>@chain df begin\n    @group_by(group)\n    @fill_missing(a, \"down\")\nend\n</code></pre> <p>GroupedDataFrame with 2 groups based on key: group</p> First Group (3 rows): group = 'A': ASCII/Unicode U+0041 (category Lu: Letter, uppercase) RowabcgroupInt64?Int64?Char?Char1missingmissingaA221bA355eA <p>&amp;vellip;</p> Last Group (2 rows): group = 'B': ASCII/Unicode U+0042 (category Lu: Letter, uppercase) RowabcgroupInt64?Int64?Char?Char13missingmissingB234dB <p></p> <p></p>"},{"location":"examples/generated/UserGuide/fill_missing/#replace_missing","title":"<code>replace_missing()</code>","text":"<p>The <code>replace_missing</code> function facilitates the replacement of <code>missing</code> values with a specified replacement.</p> <pre><code>@chain df begin\n    @mutate(b = replace_missing(b, 2))\nend\n</code></pre> 5\u00d74 DataFrame RowabcgroupInt64?Int64Char?Char1missing2aA221bA332missingB4missing4dB555eA <p></p> <p></p>"},{"location":"examples/generated/UserGuide/fill_missing/#missing_if","title":"<code>missing_if()</code>","text":"<p>The <code>missing_if</code> function is used to introduce <code>missing</code> values under specific conditions.</p> <pre><code>@chain df begin\n    @mutate(b = missing_if(b, 5))\nend\n</code></pre> 5\u00d74 DataFrame RowabcgroupInt64?Int64?Char?Char1missingmissingaA221bA33missingmissingB4missing4dB55missingeA <p>Both <code>missing_if</code> and <code>replace_missing</code> are not type specifc.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/filter/","title":"@filter","text":"<p>Filtering is a mechanism to indicate which rows you want to keep in a dataset based on criteria. This is also referred to as subsetting. Filtering rows is normally a bit tricky in <code>DataFrames.jl</code> because comparison operators like <code>&gt;=</code> actually need to be vectorized as <code>.&gt;=</code>, which can catch new Julia users by surprise. <code>@filter()</code> mimics R's <code>tidyverse</code> behavior by auto-vectorizing the code and then only selecting those rows that evaluate to <code>true</code>. Similar to <code>dplyr</code>, rows that evaluate to <code>missing</code> are skipped.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/filter/#lets-take-a-look-at-the-movies-whose-budget-was-more-than-average-we-will-select-only-the-first-5-rows-for-the-sake-of-brevity","title":"Let\u2019s take a look at the movies whose budget was more than average. We will select only the first 5 rows for the sake of brevity.","text":"<pre><code>@chain movies begin\n  @mutate(Budget = Budget / 1_000_000)\n  @filter(Budget &gt;= mean(skipmissing(Budget)))\n  @select(Title, Budget)\n  @slice(1:5)\nend\n</code></pre> 5\u00d72 DataFrame RowTitleBudgetStringFloat64?1'Til There Was You23.0210 Things I Hate About You16.03102 Dalmatians85.0413 Going On 3037.0513th Warrior, The85.0"},{"location":"examples/generated/UserGuide/filter/#lets-search-for-movies-that-have-at-least-200-votes-and-a-rating-of-greater-than-or-equal-to-8-there-are-3-ways-you-can-specify-an-and-condition-inside-of-tidierdatajl","title":"Let's search for movies that have at least 200 votes and a rating of greater than or equal to 8. There are 3 ways you can specify an \"and\" condition inside of <code>TidierData.jl</code>.","text":""},{"location":"examples/generated/UserGuide/filter/#the-first-option-is-to-use-the-short-circuiting-operator-as-shown-below-this-is-the-preferred-approach-because-the-second-expression-is-only-evaluated-per-element-if-the-first-one-is-true","title":"The first option is to use the short-circuiting <code>&amp;&amp;</code> operator as shown below. This is the preferred approach because the second expression is only evaluated (per element) if the first one is true.","text":"<pre><code>@chain movies begin\n  @filter(Votes &gt;= 200 &amp;&amp; Rating &gt;= 8)\n  @select(Title, Votes, Rating)\n  @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowTitleVotesRatingStringInt32Float64112 Angry Men292788.7212 stulev2528.932001: A Space Odyssey649828.3421 Grams218578.0539 Steps, The79318.0"},{"location":"examples/generated/UserGuide/filter/#the-second-option-is-to-use-the-bitwise-operator-note-that-there-is-a-key-difference-in-syntax-between-and-because-the-operator-takes-a-higher-operator-precedence-than-you-have-to-wrap-the-comparison-expressions-inside-of-parentheses-to-ensure-that-the-overall-expression-is-evaluated-correctly","title":"The second option is to use the bitwise <code>&amp;</code> operator. Note that there is a key difference in syntax between <code>&amp;</code> and <code>&amp;&amp;</code>. Because the <code>&amp;</code> operator takes a higher operator precedence than <code>&gt;=</code>, you have to wrap the comparison expressions inside of parentheses to ensure that the overall expression is evaluated correctly.","text":"<pre><code>@chain movies begin\n  @filter((Votes &gt;= 200) &amp; (Rating &gt;= 8))\n  @select(Title, Votes, Rating)\n  @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowTitleVotesRatingStringInt32Float64112 Angry Men292788.7212 stulev2528.932001: A Space Odyssey649828.3421 Grams218578.0539 Steps, The79318.0"},{"location":"examples/generated/UserGuide/filter/#the-third-option-for-and-conditions-only-is-to-separate-the-expressions-with-commas-this-is-similar-to-the-behavior-of-filter-in-tidyverse","title":"The third option for \"and\" conditions only is to separate the expressions with commas. This is similar to the behavior of <code>filter()</code> in <code>tidyverse</code>.","text":"<pre><code>@chain movies begin\n  @filter(Votes &gt;= 200, Rating &gt;= 8)\n  @select(Title, Votes, Rating)\n  @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowTitleVotesRatingStringInt32Float64112 Angry Men292788.7212 stulev2528.932001: A Space Odyssey649828.3421 Grams218578.0539 Steps, The79318.0"},{"location":"examples/generated/UserGuide/filter/#now-lets-see-how-to-use-filter-with-in-heres-an-example-with-a-tuple","title":"Now let's see how to use <code>@filter()</code> with <code>in</code>. Here's an example with a tuple.","text":"<pre><code>@chain movies begin\n  @filter(Title in (\"101 Dalmatians\",\n                    \"102 Dalmatians\"))\n  @select(1:5)\nend\n</code></pre> 2\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641101 Dalmatians1996103missing5.52102 Dalmatians2000100850000004.7"},{"location":"examples/generated/UserGuide/filter/#we-can-also-use-filter-with-in-using-a-vector-denoted-by-a","title":"We can also use <code>@filter()</code> with <code>in</code> using a vector, denoted by a <code>[]</code>.","text":"<pre><code>@chain movies begin\n  @filter(Title in [\"101 Dalmatians\",\n                    \"102 Dalmatians\"])\n  @select(1:5)\nend\n</code></pre> 2\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641101 Dalmatians1996103missing5.52102 Dalmatians2000100850000004.7"},{"location":"examples/generated/UserGuide/filter/#finally-we-can-combine-filter-with-row_number-to-retrieve-the-first-5-rows-which-can-be-used-to-mimic-the-functionality-provided-by-slice","title":"Finally, we can combine <code>@filter</code> with <code>row_number()</code> to retrieve the first 5 rows, which can be used to mimic the functionality provided by <code>@slice</code>.","text":"<pre><code>@chain movies begin\n  @filter(row_number() &lt;= 5)\n  @select(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641$1971121missing6.42$1000 a Touchdown193971missing6.03$21 a Day Once a Month19417missing8.24$40,000199670missing8.25$50,000 Climax Show, The197571missing3.4 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/group_by/","title":"@group_by","text":"<p>Grouping and ungrouping behavior is one of the nicest parts of using R's tidyverse. Once a data frame is grouped, all verbs applied to that data frame respect the grouping, including but not limited to <code>@mutate()</code>, <code>@summarize()</code>, <code>@slice()</code> and <code>@filter</code>, which allows for really powerful abstractions. For example, with <code>@group_by()</code> followed by <code>@filter()</code>, you can limit the rows of a dataset to the maximum or minimum values for each group.</p> <p>Exactly as in R's <code>tidyverse</code>, once a data frame is grouped, it remains grouped until either <code>@summarize()</code> is called (which \"peels off\" one layer of grouping) or <code>@ungroup()</code> is called, which removes all layers of grouping. Also as in R's <code>tidyverse</code>, <code>@group_by()</code> sorts the groups in ascending order. Unlike in R, there is never any question about whether a data frame is currently grouped because GroupedDataFrames print out in a very different form than DataFrames, making them easy to tell apart.</p> <p>When using <code>@chain</code>, note that you can write either <code>@ungroup</code> or <code>@ungroup()</code>. Both are considered valid.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/group_by/#combining-group_by-with-mutate","title":"Combining <code>@group_by()</code> with <code>@mutate()</code>","text":"<pre><code>@chain movies begin\n    @group_by(Year)\n    @mutate(Mean_Yearly_Rating = mean(skipmissing(Rating)))\n    @select(Year, Rating, Mean_Yearly_Rating)\n    @ungroup\n    @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowYearRatingMean_Yearly_RatingInt32Float64Float64119716.45.66517219396.06.35041319418.26.34107419968.25.74712519753.45.62908"},{"location":"examples/generated/UserGuide/group_by/#combining-group_by-with-summarize","title":"Combining @group_by() with @summarize()","text":"<pre><code>@chain movies begin\n    @group_by(Year)\n    @summarize(Mean_Yearly_Rating = mean(skipmissing(Rating)),\n        Median_Yearly_Rating = median(skipmissing(Rating)))\n    @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowYearMean_Yearly_RatingMedian_Yearly_RatingInt32Float64Float64119715.665175.8219396.350416.4319416.341076.4419965.747125.9519755.629085.7"},{"location":"examples/generated/UserGuide/group_by/#grouping-by-multiple-columns","title":"Grouping by multiple columns","text":"<pre><code>@chain movies begin\n  @group_by(Year, Comedy)\n  @summarize(Mean_Yearly_Rating = mean(skipmissing(Rating)),\n      Median_Yearly_Rating = median(skipmissing(Rating)))\n  @ungroup # Need to ungroup to peel off grouping by Year\n  @arrange(desc(Year), Comedy)\n  @slice(1:5)\nend\n</code></pre> 5\u00d74 DataFrame RowYearComedyMean_Yearly_RatingMedian_Yearly_RatingInt32Int32Float64Float641200506.627886.752200516.300816.13200406.765216.94200416.428986.65200306.404096.6"},{"location":"examples/generated/UserGuide/group_by/#combining-group_by-with-filter","title":"Combining @group_by() with @filter()","text":"<pre><code>@chain movies begin\n    @group_by(Year)\n    @filter(Rating == minimum(Rating))\n    @ungroup\n    @select(Year, Rating)\n    @arrange(desc(Year))\n    @slice(1:10)\nend\n</code></pre> 10\u00d72 DataFrame RowYearRatingInt32Float64120051.8220041.0320041.0420041.0520041.0620041.0720041.0820041.0920031.01020031.0 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/interpolation/","title":"Interpolation","text":"<p>The <code>!!</code> (\"bang bang\") operator can be used to interpolate values of variables from the parent environment into your code. This operator is borrowed from the R <code>rlang</code> package. At some point, we may switch to using native Julia interpolation, but for a variety of reasons that introduce some complexity with native interpolation, we plan to continue to support <code>!!</code> interpolation.</p> <p>To interpolate multiple variables, the <code>rlang</code> R package uses the <code>!!!</code> \"triple bang\" operator. However, in <code>TidierData.jl</code>, the <code>!!</code> \"bang bang\" operator can be used to interpolate either single or multiple values as shown in the examples below.</p> <p>Note: You can only interpolate values from variables in the parent environment. If you would like to interpolate column names, you have two options: you can either use <code>across()</code> or you can use <code>@aside</code> with <code>@pull()</code> to create variables in the parent environment containing the values of those columns which can then be accessed using interpolatino.</p> <p>myvar = :b<code>and</code>myvar = Cols(:a, :b)<code>both refer to *columns* with those names. On the other hand,</code>myvar = \"b\"<code>,</code>myvar = (\"a\", \"b\")<code>and</code>myvar = [\"a\", \"b\"]<code>will interpolate the *values*. If you intend to interpolate column names, the preferred way is to use</code>Cols()` as in the examples below.</p> <pre><code>using TidierData\n\ndf = DataFrame(a = string.(repeat('a':'e', inner = 2)),\n               b = [1,1,1,2,2,2,3,3,3,4],\n               c = 11:20)\n</code></pre> 10\u00d73 DataFrame RowabcStringInt64Int641a1112a1123b1134b2145c2156c2167d3178d3189e31910e420 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#select-the-column-because-myvar-contains-a-symbol","title":"Select the column (because <code>myvar</code> contains a symbol)","text":"<pre><code>myvar = :b\n\n@chain df begin\n  @select(!!myvar)\nend\n</code></pre> 10\u00d71 DataFrame RowbInt64112131425262738393104"},{"location":"examples/generated/UserGuide/interpolation/#select-multiple-variables","title":"Select multiple variables","text":"<p>You can also use a vector as in <code>[:a, :b]</code>, but <code>Cols()</code> is preferred because it lets you mix and match numbers.</p> <pre><code>myvars = Cols(:a, :b)\n\n@chain df begin\n  @select(!!myvars)\nend\n</code></pre> 10\u00d72 DataFrame RowabStringInt641a12a13b14b25c26c27d38d39e310e4 <p>This is the same as this...</p> <pre><code>myvars = Cols(:a, 2)\n\n@chain df begin\n  @select(!!myvars)\nend\n</code></pre> 10\u00d72 DataFrame RowabStringInt641a12a13b14b25c26c27d38d39e310e4 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#filter-rows-containing-the-value-of-myvar_string","title":"Filter rows containing the value of <code>myvar_string</code>","text":"<pre><code>myvar_string = \"b\"\n\n@chain df begin\n  @filter(a == !!myvar_string)\nend\n</code></pre> 2\u00d73 DataFrame RowabcStringInt64Int641b1132b214"},{"location":"examples/generated/UserGuide/interpolation/#filtering-rows-works-similarly-using-in","title":"Filtering rows works similarly using <code>in</code>.","text":"<p>Note that for <code>in</code> to work here, we have to wrap it in <code>[]</code> because otherwise, the string will be converted into a collection of characters, which are a different data type.</p> <pre><code>myvar_string = \"b\"\n\n@chain df begin\n  @filter(a in [!!myvar_string])\nend\n</code></pre> 2\u00d73 DataFrame RowabcStringInt64Int641b1132b214 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#you-can-also-use-this-for-a-vector-or-tuple-of-strings","title":"You can also use this for a vector (or tuple) of strings.","text":"<pre><code>myvars_string = [\"a\", \"b\"]\n\n@chain df begin\n  @filter(a in !!myvars_string)\nend\n</code></pre> 4\u00d73 DataFrame RowabcStringInt64Int641a1112a1123b1134b214"},{"location":"examples/generated/UserGuide/interpolation/#mutate-one-variable","title":"Mutate one variable","text":"<p>Remember: You cannot interpolate column names into <code>@mutate()</code> expressions. However, you can create a temporary variable containing the values of the column in question or you can use <code>@mutate()</code> with <code>across()</code>.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#option-1-create-a-temporary-variable-containing-the-values-of-the-column","title":"Option 1: Create a temporary variable containing the values of the column.","text":"<pre><code>myvar = :b\n\n@chain df begin\n  @aside(myvar_values = @pull(_, !!myvar))\n  @mutate(d = !!myvar_values + 1)\nend\n</code></pre> 10\u00d74 DataFrame RowabcdStringInt64Int64Int641a11122a11223b11324b21435c21536c21637d31748d31849e319410e4205"},{"location":"examples/generated/UserGuide/interpolation/#option-2-use-mutate-with-across","title":"Option 2: Use <code>@mutate()</code> with <code>across()</code>","text":"<p>Note: when using <code>across()</code>, anonymous functions are not vectorized. This is intentional to allow users to specify their function exactly as desired.</p> <pre><code>@chain df begin\n  @mutate(across(!!myvar, x -&gt; x .+ 1))\n  @rename(d = b_function)\nend\n</code></pre> 10\u00d74 DataFrame RowabcdStringInt64Int64Int641a11122a11223b11324b21435c21536c21637d31748d31849e319410e4205 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#summarize-across-one-variable","title":"Summarize across one variable","text":"<pre><code>myvar = :b\n\n@chain df begin\n  @summarize(across(!!myvar, mean))\nend\n</code></pre> 1\u00d71 DataFrame Rowb_meanFloat6412.2"},{"location":"examples/generated/UserGuide/interpolation/#summarize-across-multiple-variables","title":"Summarize across multiple variables","text":"<pre><code>myvars = Cols(:b, :c)\n\n@chain df begin\n  @summarize(across(!!myvars, (mean, minimum, maximum)))\nend\n</code></pre> 1\u00d76 DataFrame Rowb_meanc_meanb_minimumc_minimumb_maximumc_maximumFloat64Float64Int64Int64Int64Int6412.215.5111420"},{"location":"examples/generated/UserGuide/interpolation/#group-by-one-interpolated-variable","title":"Group by one interpolated variable","text":"<pre><code>myvar = :a\n\n@chain df begin\n  @group_by(!!myvar)\n  @summarize(c = mean(c))\nend\n</code></pre> 5\u00d72 DataFrame RowacStringFloat641a11.52b13.53c15.54d17.55e19.5"},{"location":"examples/generated/UserGuide/interpolation/#group-by-multiple-interpolated-variables","title":"Group by multiple interpolated variables","text":"<p>Once again, you can mix and match column selectors within <code>Cols()</code></p> <pre><code>myvars = Cols(:a, 2)\n\n@chain df begin\n  @group_by(!!myvars)\n  @summarize(c = mean(c))\nend\n</code></pre> <p>GroupedDataFrame with 5 groups based on key: a</p> First Group (1 row): a = \"a\" RowabcStringInt64Float641a111.5 <p>&amp;vellip;</p> Last Group (2 rows): a = \"e\" RowabcStringInt64Float641e319.02e420.0 <p>Notice that <code>df</code> remains grouped by <code>a</code> because the <code>@summarize()</code> peeled off one layer of grouping.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#global-constants","title":"Global constants","text":"<p>You can also use <code>!!</code> interpolation to access global variables like <code>pi</code>.</p> <pre><code>df = DataFrame(radius = 1:5)\n\n@chain df begin\n  @mutate(area = !!pi * radius^2)\nend\n</code></pre> 5\u00d72 DataFrame RowradiusareaInt64Float64113.141592212.56643328.27434450.26555578.5398 <p>As of v0.14.0, global constants defined within the Base or Core modules (like <code>missing</code>, <code>pi</code>, and <code>Real</code> can be directly referenced without any <code>!!</code>)</p> <pre><code>@chain df begin\n  @mutate(area = pi * radius^2)\nend\n</code></pre> 5\u00d72 DataFrame RowradiusareaInt64Float64113.141592212.56643328.27434450.26555578.5398 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#alternative-interpolation-syntax","title":"Alternative interpolation syntax","text":"<p>Since we know that <code>pi</code> is defined in the <code>Main</code> module, we can also access it using <code>Main.pi</code>.</p> <pre><code>@chain df begin\n  @mutate(area = Main.pi * radius^2)\nend\n</code></pre> 5\u00d72 DataFrame RowradiusareaInt64Float64113.141592212.56643328.27434450.26555578.5398 <p>The key lesson with interpolation is that any bare unquoted variable is assumed to refer to a column name in the DataFrame. If you are referring to any variable outside of the DataFrame, you need to either use <code>!!variable</code> or <code>[Module_name_here].variable</code> syntax to refer to this variable.</p> <p>Note: You can use <code>!!</code> interpolation anywhere, including inside of functions and loops.</p> <pre><code>df = DataFrame(a = string.(repeat('a':'e', inner = 2)),\n               b = [1,1,1,2,2,2,3,3,3,4],\n               c = 11:20)\n\nfor col in [:b, :c]\n  @chain df begin\n    @summarize(across(!!col, mean))\n    println\n  end\nend\n</code></pre> <pre><code>1\u00d71 DataFrame\n Row \u2502 b_mean\n     \u2502 Float64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     2.2\n1\u00d71 DataFrame\n Row \u2502 c_mean\n     \u2502 Float64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    15.5\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/joins/","title":"Joins","text":"<p>One really nice thing about the R <code>tidyverse</code> implementation of joins is that they support natural joins. If you don't specify which columns to join on, these column names are inferred from the overlapping columns. While you can override this behavior by specifying which columns to join on, it's convenient that this is not strictly required. We have adopted a similar approach to joins in <code>TidierData.jl</code>.</p> <p>Here, we will only show examples of natural joins. For additional ways to join, take a look at the examples in the Reference.</p> <pre><code>using TidierData\n</code></pre> <p>Let's generate two data frames to join on. Here's the first one.</p> <pre><code>df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n</code></pre> <p>And here's the second one.</p> <pre><code>df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n</code></pre> <p>All the joins work similarly to R's <code>tidyverse</code> although the new <code>join_by</code> syntax for non-equijoins is not (yet) supported.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/joins/#left-join","title":"Left join","text":"<pre><code>@left_join(df1, df2)\n</code></pre> 2\u00d73 DataFrame RowabcStringInt64Int64?1a132b2missing"},{"location":"examples/generated/UserGuide/joins/#right-join","title":"Right join","text":"<pre><code>@right_join(df1, df2)\n</code></pre> 2\u00d73 DataFrame RowabcStringInt64?Int641a132cmissing4"},{"location":"examples/generated/UserGuide/joins/#inner-join","title":"Inner join","text":"<pre><code>@inner_join(df1, df2)\n</code></pre> 1\u00d73 DataFrame RowabcStringInt64Int641a13"},{"location":"examples/generated/UserGuide/joins/#full-join","title":"Full join","text":"<pre><code>@full_join(df1, df2)\n</code></pre> 3\u00d73 DataFrame RowabcStringInt64?Int64?1a132b2missing3cmissing4 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/mutate_transmute/","title":"@mutate","text":"<p>The primary purpose of <code>@mutate()</code> is to either create a new column or to update an existing column without changing the number of rows in the dataset. If you only plan to select the mutated columns, then you can use <code>@transmute()</code> instead of <code>@mutate()</code>. However, in <code>TidierData.jl</code>, <code>@select()</code> can also be used to create and select new columns (unlike R's <code>tidyverse</code>), which means that <code>@transmute()</code> is a redundant function in that it has the same functionality as <code>@select()</code>. <code>@transmute</code> is included in <code>TidierData.jl</code> for convenience but is not strictly required.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/mutate_transmute/#using-mutate-to-add-a-new-column","title":"Using <code>@mutate()</code> to add a new column","text":"<p>Let's create a new column that contains the budget for each movie expressed in millions of dollars, and the select a handful of columns and rows for the sake of brevity. Notice that the underscores in in <code>1_000_000</code> are strictly optional and included only for the sake of readability. Underscores within numbers are ignored by Julia, such that <code>1_000_000</code> is read by Julia exactly the same as <code>1000000</code>.</p> <pre><code>@chain movies begin\n  @filter(!ismissing(Budget))\n  @mutate(Budget_Millions = Budget/1_000_000)\n  @select(Title, Budget, Budget_Millions)\n  @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowTitleBudgetBudget_MillionsStringInt32?Float641'G' Men4500000.452'Manos' the Hands of Fate190000.0193'Til There Was You2300000023.04.com for Murder50000005.0510 Things I Hate About You1600000016.0 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/mutate_transmute/#using-mutate-to-update-an-existing-column","title":"Using <code>@mutate()</code> to update an existing column","text":"<p>Here we will repeat the same exercise, except that we will overwrite the existing <code>Budget</code> column.</p> <pre><code>@chain movies begin\n    @filter(!ismissing(Budget))\n    @mutate(Budget = Budget/1_000_000)\n    @select(Title, Budget)\n    @slice(1:5)\nend\n</code></pre> 5\u00d72 DataFrame RowTitleBudgetStringFloat641'G' Men0.452'Manos' the Hands of Fate0.0193'Til There Was You23.04.com for Murder5.0510 Things I Hate About You16.0 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/mutate_transmute/#using-mutate-with-in","title":"Using <code>@mutate()</code> with <code>in</code>","text":"<p>Here's an example of using <code>@mutate</code> with <code>in</code>.</p> <pre><code>@chain movies begin\n  @filter(!ismissing(Budget))\n  @mutate(Nineties = Year in 1990:1999)\n  @select(Title, Year, Nineties)\n  @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowTitleYearNinetiesStringInt32Bool1'G' Men1935false2'Manos' the Hands of Fate1966false3'Til There Was You1997true4.com for Murder2002false510 Things I Hate About You1999true <p></p> <p></p>"},{"location":"examples/generated/UserGuide/mutate_transmute/#using-mutate-with-n-and-row_number","title":"Using <code>@mutate</code> with <code>n()</code> and <code>row_number()</code>","text":"<p>Here's an example of using <code>@mutate</code> with both <code>n()</code> and <code>row_number()</code>. Within the context of <code>mutate()</code>, <code>n()</code> and <code>row_number()</code> are created into temporarily columns, which means that they can be used inside of expressions.</p> <pre><code>@chain movies begin\n  @mutate(Row_Num = row_number(),\n          Total_Rows = n())\n  @filter(!ismissing(Budget))\n  @select(Title, Year, Row_Num, Total_Rows)\n  @slice(1:5)\nend\n</code></pre> 5\u00d74 DataFrame RowTitleYearRow_NumTotal_RowsStringInt32Int64Int641'G' Men193522587882'Manos' the Hands of Fate196635587883'Til There Was You199748587884.com for Murder20029158788510 Things I Hate About You199911258788 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/mutate_transmute/#using-transmute-to-update-and-select-columns","title":"Using <code>@transmute</code> to update and select columns.","text":"<p>If we knew we wanted to select only the <code>Title</code> and <code>Budget</code> columns, we could have also used<code>@transmute()</code>, which (again) is just an alias for <code>@select()</code>.</p> <pre><code>@chain movies begin\n    @filter(!ismissing(Budget))\n    @transmute(Title = Title, Budget = Budget/1_000_000)\n    @slice(1:5)\nend\n</code></pre> 5\u00d72 DataFrame RowTitleBudgetStringFloat641'G' Men0.452'Manos' the Hands of Fate0.0193'Til There Was You23.04.com for Murder5.0510 Things I Hate About You16.0 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/nesting/","title":"Nesting","text":""},{"location":"examples/generated/UserGuide/nesting/#nest","title":"<code>@nest</code>","text":"<p>Nest columns into a dataframe nested into a new column</p> <pre><code>using TidierData\n\ndf4 = DataFrame(x = [\"a\", \"b\", \"a\", \"b\", \"C\", \"a\"], y = 1:6, yz = 13:18, a = 7:12, ab = 12:-1:7)\n\nnested_df = @nest(df4, n2 = starts_with(\"a\"), n3 = y:yz)\n</code></pre> 3\u00d73 DataFrame Rowxn3n2StringDataFrameDataFrame1a3\u00d72 DataFrame3\u00d72 DataFrame2b2\u00d72 DataFrame2\u00d72 DataFrame3C1\u00d72 DataFrame1\u00d72 DataFrame <p>To return to the original dataframe, you can unnest wider and then longer.</p> <pre><code>@chain nested_df begin\n    @unnest_wider(n3:n2)\n    @unnest_longer(y:ab)\nend\n</code></pre> 6\u00d75 DataFrame RowxyyzaabStringInt64Int64Int64Int641a1137122a3159103a6181274b2148115b4161096C517118 <p>Or you can unnest longer and then wider.</p> <pre><code>@chain nested_df begin\n  @unnest_longer(n3:n2)\n  @unnest_wider(n3:n2)\nend\n</code></pre> 6\u00d75 DataFrame RowxyzyaabStringInt64Int64Int64Int641a1317122a1539103a1861274b1428115b1641096C175118 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/nesting/#unnest_longer","title":"<code>@unnest_longer</code>","text":"<p><code>@unnest_longer</code> adds one row per entry of an array or dataframe, lengthening dataframe by flattening the column or columns.</p> <pre><code>df = DataFrame(x = 1:4, y = [[], [1, 2, 3], [4, 5], Int[]]);\n\n@chain df begin\n    @unnest_longer(y)\nend\n</code></pre> 5\u00d72 DataFrame RowxyInt64Any121222323434535 <p>If there are rows with empty arrays, <code>keep_empty</code> will prevent these rows from being dropped. <code>include_indices</code> will add a new column for each flattened column that logs the position of each entry in the array.</p> <pre><code>@chain df begin\n    @unnest_longer(y, keep_empty = true, indices_include = true)\nend\n</code></pre> 7\u00d73 DataFrame Rowxyy_idInt64AnyInt6411missing12211322242335341635274missing1 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/nesting/#unnest_wider","title":"<code>@unnest_wider</code>","text":"<p><code>@unnest_wider</code> will widen a column or column(s) of Dicts, Arrays, Tuples or Dataframes into multiple columns.</p> <pre><code>df2 = DataFrame(\n           name = [\"Zaki\", \"Farida\"],\n           attributes = [\n               Dict(\"age\" =&gt; 25, \"city\" =&gt; \"New York\"),\n               Dict(\"age\" =&gt; 30, \"city\" =&gt; \"Los Angeles\")]);\n\n@chain df2 begin\n    @unnest_wider(attributes)\nend\n</code></pre> 2\u00d73 DataFrame RownamecityageStringStringInt641ZakiNew York252FaridaLos Angeles30 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/nesting/#unnesting-nested-dataframes-with-different-lengths-which-contains-arrays","title":"Unnesting nested Dataframes with different lengths which contains arrays","text":"<pre><code>df3 = DataFrame(\n    x = 1:3,\n    y = Any[\n        DataFrame(),\n        DataFrame(a = [\"A\"], b = [14]),\n        DataFrame(a = [\"A\", \"B\", \"C\"], b = [13, 12, 11], c = [4, 4, 4])\n    ]\n)\n</code></pre> 3\u00d72 DataFrame RowxyInt64Any110\u00d70 DataFrame221\u00d72 DataFrame333\u00d73 DataFrame <p><code>df3</code> contains dataframes in with different widths that also contain arrays. Chaining together <code>@unnest_wider</code> and <code>@unnest_longer</code> will unnest the columns to tuples first and then they will be fully unnested after.</p> <pre><code>@chain df3 begin\n    @unnest_wider(y)\n    @unnest_longer(a:c, keep_empty = true)\nend\n</code></pre> 5\u00d74 DataFrame RowxabcInt64AnyInt64?Int64?11missingmissingmissing22A14missing33A13443B12453C114 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/piping/","title":"Piping","text":"<p>The easiest way to use TidierData.jl for complex data transformation operations is to connect them together using pipes. Julia comes with the built-in <code>|&gt;</code> pipe operator, but TidierData.jl also includes and re-exports the <code>@chain</code> macro from the Chain.jl package. On this page, we will show you how to use both approaches.</p> <p>First, let's load a dataset.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/piping/#julias-built-in-pipe","title":"Julia's built-in <code>|&gt;</code> pipe","text":"<p>If we wanted to figure out the number of rows in the <code>movies</code> data frame, one way to do this is to apply the <code>nrow()</code> function to movies. The most straightforward way is to write it like this:</p> <pre><code>nrow(movies)\n</code></pre> <pre><code>58788\n</code></pre> <p>Another perfectly valid way to write this expression is by piping <code>movies</code> into <code>nrow</code> using the <code>|&gt;</code> pipe operator.</p> <pre><code>movies |&gt; nrow\n</code></pre> <pre><code>58788\n</code></pre> <p>Why might we want to do this? Well, whereas the first expression would naturally be read as \"Calculate the number of rows of movies,\" the second expression reads as \"Start with movies, then calculate the number of rows.\" For a simple expression, these are easy enough to reason about. However, as we start to pipe more and more functions in a single expression, the piped version becomes much easier to reason about.</p> <p>One quick note about Julia's built-in pipe: writing <code>movies |&gt; nrow()</code> would not be considered valid. This is because Julia's built-in pipe always expects a function and not a function call. Writing <code>nrow</code> by itself is naming the function, whereas writing <code>nrow()</code> is calling the function. This quickly becomes an issue once we want to supply arguments to the function we are calling.</p> <p>Consider another approach to calculating the number of rows:</p> <pre><code>size(movies, 1)\n</code></pre> <pre><code>58788\n</code></pre> <p>In this case, the <code>size()</code> function returns a tuple of <code>(rows, columns)</code>, and if you supply an optional second argument specifying the index of the tuple, it returns only that dimension. In this case, we called <code>size()</code> with a second argument of <code>1</code>, indicating that we only wanted the function to return the number of rows.</p> <p>How would we write this using Julia's built-in pipe?</p> <pre><code>movies |&gt;\n  x -&gt; size(x, 1)\n</code></pre> <pre><code>58788\n</code></pre> <p>You might have wanted to write <code>movies |&gt; size(1)</code>, but because <code>size(1)</code> would represent a function call, we have to wrap the function call within an anonymous function, which is easily accomplished using the <code>x -&gt; func(x, arg1, arg2)</code> syntax, where <code>func()</code> refers to any function and <code>arg1</code> and <code>arg2</code> refer to any additional arguments that are needed.</p> <p>Another way we could have accomplished this is to calculate <code>size</code>, which returns a tuple of <code>(rows, columns)</code>, and then to use an anonymous function to grab the first value. Since we are calculating <code>size</code> without any arguments, we can simply write <code>size</code> within the pipe. However, to grab the first value using the <code>x[1]</code> syntax, we have to define an anonymous function. Putting it all together, we get this approach to piping:</p> <pre><code>movies |&gt;\n  size |&gt;\n  x -&gt; x[1]\n</code></pre> <pre><code>58788\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/piping/#using-the-chain-macro","title":"Using the <code>@chain</code> macro","text":"<p>The <code>@chain</code> macro comes from the Chain.jl package and is included and re-exported by TidierData.jl. Let's do this same series of exercises using <code>@chain</code>.</p> <p>Let's calculate the number of rows using <code>@chain</code>.</p> <pre><code>@chain movies nrow\n</code></pre> <pre><code>58788\n</code></pre> <p>One of the reasons we prefer the use of <code>@chain</code> in TidierData.jl is that it is so concise. There is no need for any operator. Another interesting thing is that <code>@chain</code> doesn't care whether you use a function name or a function call. Both approaches work. As a result, writing <code>nrow()</code> instead of <code>nrow</code> is equally valid using <code>@chain</code>.</p> <pre><code>@chain movies nrow()\n</code></pre> <pre><code>58788\n</code></pre> <p>There are two options for writing out multi-row chains. The preferred approach is as follows, where the starting item is listed, followed by a <code>begin-end</code> block.</p> <pre><code>@chain movies begin\n  nrow\nend\n</code></pre> <pre><code>58788\n</code></pre> <p><code>@chain</code> also comes with a built-in placeholder, which is <code>_</code>. To calculate the <code>size</code> and extract the first value, we can use this approach:</p> <pre><code>@chain movies begin\n  size\n  _[1]\nend\n</code></pre> <pre><code>58788\n</code></pre> <p>You don't have to list the data frame before the <code>begin-end</code> block. This is equally valid:</p> <pre><code>@chain begin\n  movies\n  size\n  _[1]\nend\n</code></pre> <pre><code>58788\n</code></pre> <p>The only time this approach is preferred is when instead of simply naming the data frame, you are using a function to read in the data frame from a file or database. Because this function call may include the path of the file, which could be quite long, it's easier to write this on it's own line within the <code>begin-end</code> block.</p> <p>While the documentation for TidierData.jl follows the convention of placing piped functions on separate lines of code using <code>begin-end</code> blocks, this is purely convention for ease of readability. You could rewrite the code above without the <code>begin-end</code> block as follows:</p> <pre><code>@chain movies size _[1]\n</code></pre> <pre><code>58788\n</code></pre> <p>For simple transformations, this approach is both concise and readable.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/piping/#using-chain-with-tidierdatajl","title":"Using <code>@chain</code> with TidierData.jl","text":"<p>Returning to our convention of multi-line pipes, let's grab the first five movies that were released since 2000 and had a rating of at least 9 out of 10. Here is one way that we could write this:</p> <pre><code>@chain movies begin\n    @filter(Year &gt;= 2000 &amp;&amp; Rating &gt;= 9)\n    @slice(1:5)\n    @select(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float64113 Lakes2004135missing9.022wks, 1yr2002104missing9.43500 Years Later2005106missing9.349020051440009.15Able's House Is Green, The20031368009.4 <p>Note: we generally prefer using <code>&amp;&amp;</code> in Julia because it is a \"short-cut\" operator. If the first condition evaluates to <code>false</code>, then the second condition is not even evaluated, which makes it faster (because it takes a short-cut).</p> <p>In the case of <code>@filter</code>, multiple conditions can be written out as separate expressions.</p> <pre><code>@chain movies begin\n  @filter(Year &gt;= 2000, Rating &gt;= 9)\n  @slice(1:5)\n  @select(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float64113 Lakes2004135missing9.022wks, 1yr2002104missing9.43500 Years Later2005106missing9.349020051440009.15Able's House Is Green, The20031368009.4 <p>Another to write this expression is take advantage of the fact that Julia macros can be called without parentheses. In this case, we will add back the <code>&amp;&amp;</code> for the sake of readability.</p> <pre><code>@chain movies begin\n  @filter Year &gt;= 2000 &amp;&amp; Rating &gt;= 9\n  @slice 1:5\n  @select 1:5\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float64113 Lakes2004135missing9.022wks, 1yr2002104missing9.43500 Years Later2005106missing9.349020051440009.15Able's House Is Green, The20031368009.4 <p>Lastly, TidierData.jl also supports multi-line expressions within each of the macros that accept multiple expressions. So you could also write this as follows:</p> <pre><code>@chain movies begin\n  @filter begin\n    Year &gt;= 2000\n    Rating &gt;= 9\n  end\n  @slice 1:5\n  @select 1:5\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float64113 Lakes2004135missing9.022wks, 1yr2002104missing9.43500 Years Later2005106missing9.349020051440009.15Able's House Is Green, The20031368009.4 <p>What's nice about this approach is that if you want to remove some criteria, you can easily comment out the relevant parts. For example, if you're willing to consider older movies, just comment out the <code>Year &gt;= 2000</code>.</p> <pre><code>@chain movies begin\n  @filter begin\n    # Year &gt;= 2000\n    Rating &gt;= 9\n  end\n  @slice 1:5\n  @select 1:5\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641+1 -119877missing9.42100 Years at the Movies19949missing9.2313 Lakes2004135missing9.042wks, 1yr2002104missing9.45500 Years Later2005106missing9.3 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/piping/#which-approach-to-use","title":"Which approach to use?","text":"<p>The purpose of this page was to show you that both Julia's native pipes and the <code>@chain</code> macro are perfectly valid and capable. We prefer the use of <code>@chain</code> because it is a bit more flexible and concise, with a syntax that makes it easy to comment out individual operations. We have adopted a similar <code>begin-end</code> block functionality within TidierData.jl itself, so that you can spread arguments out over multiple lines if you prefer. In the end, the choice is up to you!</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/pivots/","title":"Pivoting","text":"<p>Pivoting a dataset is needed when information sitting inside of cell values needs to be converted into column names (to make the dataset wider) or vice verse (to make the dataset longer). Either action can be referred to as \"reshaping\" a dataset, and various frameworks refer to the actions as unstacking/stacking or spreading/gathering. In R's tidyverse, these actions are referred to as pivoting, where the two accompanying actions are <code>@pivot_wider()</code> and <code>@pivot_longer()</code>.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/pivots/#pivot_wider","title":"<code>@pivot_wider()</code>","text":"<p>Pivoting a dataset to make it wider is needed when information sitting inside of cell values needs to be converted into column names. The wider format is sometimes required for the purposes of calculating correlations or running statistical tests.</p> <p>Let's start with a \"long\" DataFrame and make it wide. Why would we want to make it wide? Well, if we wanted to calculate a correlation between <code>A</code> and <code>B</code> for rows with corresponding <code>id</code> numbers, we may need to first make sure that <code>A</code> and <code>B</code> are represented in adjacent columns.</p> <pre><code>using TidierData\n\ndf_long = DataFrame(id = [1, 1, 2, 2],\n                    variable = [\"A\", \"B\", \"A\", \"B\"],\n                    value = [1, 2, 3, 4])\n</code></pre> 4\u00d73 DataFrame RowidvariablevalueInt64StringInt6411A121B232A342B4 <p>To make this dataset wider, we can do the following:</p> <pre><code>@pivot_wider(df_long, names_from = variable, values_from = value)\n</code></pre> 2\u00d73 DataFrame RowidABInt64Int64?Int64?11122234 <p>In <code>@pivot_wider()</code>, both the <code>names_from</code> and <code>values_from</code> arguments are required. <code>@pivot_wider()</code> also supports string values for the <code>names_from</code> and <code>values_from</code> arguments.</p> <pre><code>@pivot_wider(df_long, names_from = \"variable\", values_from = \"value\")\n</code></pre> 2\u00d73 DataFrame RowidABInt64Int64?Int64?11122234 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/pivots/#pivot_longer","title":"<code>@pivot_longer()</code>","text":"<p>For calculating summary statistics (e.g., mean) by groups, or for plotting purposes, DataFrames often need to be converted to their longer form. For this, we can use <code>@pivot_longer</code>. First, let's start with a \"wide\" DataFrame.</p> <pre><code>df_wide = DataFrame(id = [1, 2], A = [1, 3], B = [2, 4])\n</code></pre> 2\u00d73 DataFrame RowidABInt64Int64Int6411122234 <p>Now, let's transform this wide dataset into the longer form. Unlike <code>@pivot_wider()</code>, where providing the <code>names_from</code> and <code>values_from</code> arguments is required, the only item that's required in <code>@pivot_wider()</code> is a set of columns to pivot. The <code>names_to</code> and <code>values_to</code> arguments are optional, and if not provided, they will default to \"variable\" and \"value\", respectively.</p> <p>We can recreate the original long dataset by doing the following. Multiple columns must be provided using selection syntax or a selection helper. Tuples containing multiple columns are not yet supported.</p> <pre><code>@pivot_longer(df_wide, A:B)\n</code></pre> 4\u00d73 DataFrame RowidvariablevalueInt64StringInt6411A122A331B242B4 <p>Here is another way of providing the same result using a different type of selection syntax.</p> <pre><code>@pivot_longer(df_wide, -id)\n</code></pre> 4\u00d73 DataFrame RowidvariablevalueInt64StringInt6411A122A331B242B4 <p>The selected columns can also be included as an array</p> <pre><code>@pivot_longer(df_wide, [id, B])\n</code></pre> 4\u00d73 DataFrame RowAvariablevalueInt64StringInt6411id123id231B243B4 <p>or excluded</p> <pre><code>@pivot_longer(df_wide, -[id, B])\n</code></pre> 2\u00d74 DataFrame RowidBvariablevalueInt64Int64StringInt64112A1224A3 <p>If all columns should be included, they can be specified by either <code>everything()</code>, <code>:</code>, or by leaving the argument blank</p> <pre><code>@pivot_longer(df_wide, everything())\n</code></pre> 6\u00d72 DataFrame RowvariablevalueStringInt641id12id23A14A35B26B4 <p>In this example, we set the <code>names_to</code> and <code>values_to</code> arguments. Either argument can be left out and will revert to the default value. The <code>names_to</code> and <code>values_to</code> arguments can be provided as strings or as bare unquoted variable names.</p> <p>Here is an example with <code>names_to</code> and <code>values_to</code> containing strings:</p> <pre><code>@pivot_longer(df_wide, A:B, names_to = \"letter\", values_to = \"number\")\n</code></pre> 4\u00d73 DataFrame RowidletternumberInt64StringInt6411A122A331B242B4 <p>And here is an example with <code>names_to</code> and <code>values_to</code> containing bare unquoted variables:</p> <pre><code>@pivot_longer(df_wide, A:B, names_to = letter, values_to = number)\n</code></pre> 4\u00d73 DataFrame RowidletternumberInt64StringInt6411A122A331B242B4 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/rename/","title":"@rename","text":"<p>Renaming columns follows the same syntax as in R's <code>tidyverse</code>, where the \"tidy expression\" is <code>new_name = old_name</code>. While the main function to rename columns is <code>@rename()</code>, you can also use <code>@select()</code> if you additionally plan to select only the renamed columns.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/rename/#rename-using-rename","title":"Rename using <code>@rename()</code>","text":"<p>If you only want to rename the columns without selecting them, then this is where <code>@rename()</code> comes in handy. For the sake of brevity, we are selecting the first 5 columns and rows after performing the <code>@rename()</code>.</p> <pre><code>@chain movies begin\n    @rename(title = Title, Minutes = Length)\n    @select(1:5)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowtitleYearMinutesBudgetRatingStringInt32Int32Int32?Float641$1971121missing6.42$1000 a Touchdown193971missing6.03$21 a Day Once a Month19417missing8.24$40,000199670missing8.25$50,000 Climax Show, The197571missing3.4 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/rename/#rename-using-select","title":"Rename using <code>@select()</code>","text":"<p>If you plan to only select those columns that you would like to rename, then you can use <code>@select()</code> to both rename and select the columns of interest.</p> <pre><code>@chain movies begin\n  @select(title = Title, Minutes = Length)\n  @slice(1:5)\nend\n</code></pre> 5\u00d72 DataFrame RowtitleMinutesStringInt321$1212$1000 a Touchdown713$21 a Day Once a Month74$40,000705$50,000 Climax Show, The71 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/select/","title":"@select","text":"<p>The <code>@select()</code> macro in <code>TidierData.jl</code> supports many of the nuances of the R <code>tidyverse</code> implementation, including indexing columns individually by name or number, indexing by ranges of columns using the <code>:</code> operator between column names or numbers, and negative selection using negated column names or numbers. Selection helpers such as <code>starts_with()</code>, <code>ends_with()</code>, <code>matches()</code>, and <code>contains()</code> are also supported.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/select/#select-the-first-5-columns-individually-by-name","title":"Select the first 5 columns individually by name","text":"<pre><code>@chain movies begin\n    @select(Title, Year, Length, Budget, Rating)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641$1971121missing6.42$1000 a Touchdown193971missing6.03$21 a Day Once a Month19417missing8.24$40,000199670missing8.25$50,000 Climax Show, The197571missing3.4"},{"location":"examples/generated/UserGuide/select/#select-the-first-5-columns-individually-by-number","title":"Select the first 5 columns individually by number","text":"<pre><code>@chain movies begin\n    @select(1, 2, 3, 4, 5)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641$1971121missing6.42$1000 a Touchdown193971missing6.03$21 a Day Once a Month19417missing8.24$40,000199670missing8.25$50,000 Climax Show, The197571missing3.4"},{"location":"examples/generated/UserGuide/select/#select-the-first-5-columns-by-name-using-a-range","title":"Select the first 5 columns by name (using a range)","text":"<pre><code>@chain movies begin\n    @select(Title:Rating)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641$1971121missing6.42$1000 a Touchdown193971missing6.03$21 a Day Once a Month19417missing8.24$40,000199670missing8.25$50,000 Climax Show, The197571missing3.4"},{"location":"examples/generated/UserGuide/select/#select-the-first-5-columns-by-number-using-a-range","title":"Select the first 5 columns by number (using a range)","text":"<pre><code>@chain movies begin\n    @select(1:5)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641$1971121missing6.42$1000 a Touchdown193971missing6.03$21 a Day Once a Month19417missing8.24$40,000199670missing8.25$50,000 Climax Show, The197571missing3.4"},{"location":"examples/generated/UserGuide/select/#select-all-but-the-first-5-columns-by-name","title":"Select all but the first 5 columns by name","text":"<p>Here we will limit the results to the first 5 remaining columns and the first 5 rows for the sake of brevity.</p> <pre><code>@chain movies begin\n    @select(-(Title:Rating))\n    @select(1:5)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowVotesR1R2R3R4Int32Float64Float64Float64Float6413484.54.54.54.52200.014.54.524.5350.00.00.00.04614.50.00.00.051724.54.50.014.5 <p>We can also use <code>!</code> for inverted selection instead of <code>-</code>.</p> <pre><code>@chain movies begin\n  @select(!(Title:Rating))\n  @select(1:5)\n  @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowVotesR1R2R3R4Int32Float64Float64Float64Float6413484.54.54.54.52200.014.54.524.5350.00.00.00.04614.50.00.00.051724.54.50.014.5 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/select/#select-all-but-the-first-5-columns-by-number","title":"Select all but the first 5 columns by number","text":"<p>We will again limit the results to the first 5 remaining columns and the first 5 rows for the sake of brevity.</p> <pre><code>@chain movies begin\n    @select(-(1:5))\n    @select(1:5)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowVotesR1R2R3R4Int32Float64Float64Float64Float6413484.54.54.54.52200.014.54.524.5350.00.00.00.04614.50.00.00.051724.54.50.014.5 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/select/#mix-and-match-selection","title":"Mix and match selection","text":"<p>Just like in R's <code>tidyverse</code>, you can separate multiple selections with commas and mix and match different ways of selecting columns.</p> <pre><code>@chain movies begin\n    @select(1, Budget:Rating)\n    @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowTitleBudgetRatingStringInt32?Float641$missing6.42$1000 a Touchdownmissing6.03$21 a Day Once a Monthmissing8.24$40,000missing8.25$50,000 Climax Show, Themissing3.4 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/sep_unite/","title":"Separating","text":"<p>Follwing the tidyverse syntax, the <code>@separate()</code> macro in <code>TidierData.jl</code> separates a single column into multiple columns. This is particularly useful for splitting a column containing delimited values into individual columns.</p> <pre><code>using TidierData\n\ndf = DataFrame(a = [\"1-1\", \"2-2\", \"3-3-3\"]);\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/sep_unite/#separate","title":"<code>@separate</code>","text":"<p>Separate the \"a\" column into \"b\", \"c\", and \"d\" columns based on the dash delimiter</p> <pre><code>@chain df begin\n    @separate(a, (b, c, d), \"-\")\nend\n</code></pre> 3\u00d73 DataFrame RowbcdSubStrin\u2026SubStrin\u2026SubStrin\u2026?111missing222missing3333 <p>The <code>into</code> columns can also be designated as follows:</p> <pre><code>new_names = [\"x$(i)\" for i in 1:3]; # or new_names = [\"b\", \"c\", \"d\"], or new_names = [:b, :c, :d]\n\n@separate(df, a, !!new_names, \"-\")\n</code></pre> 3\u00d73 DataFrame Rowx1x2x3SubStrin\u2026SubStrin\u2026SubStrin\u2026?111missing222missing3333 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/sep_unite/#unite","title":"<code>@unite</code>","text":"<p>The <code>@unite</code> macro brings together multiple columns into one, separate the characters by a user specified delimiter Here, the <code>@unite</code> macro combines the \"b\", \"c\", and \"d\" columns columns into a single new \"new_col\" column using the \"/\" delimiter</p> <pre><code>df = DataFrame(\n       b = [\"1\", \"2\", \"3\"],\n       c = [\"1\", \"2\", \"3\"],\n       d = [missing, missing, \"3\"]);\n\n@chain df begin\n    @unite(new_col, (b, c, d), \"/\")\nend\n</code></pre> 3\u00d74 DataFrame Rowbcdnew_colStringStringString?String111missing1/1222missing2/233333/3/3 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/sep_unite/#separate_rows","title":"<code>@separate_rows</code>","text":"<p>Separate rows into multiple rows based on a chosen delimiter.</p> <pre><code>df = DataFrame(\n       a = 1:3,\n       b = [\"a\", \"aa;bb;cc\", \"dd;ee\"],\n       c = [\"1\", \"2;3;4\", \"5;6\"],\n       d = [\"7\", \"8;9;10\", \"11;12\"],\n       e = [\"11\", \"22;33;44\", \"55;66\"]);\n\n@separate_rows(df, b:e, \";\")\n</code></pre> 6\u00d75 DataFrame RowabcdeInt64SubStrin\u2026SubStrin\u2026SubStrin\u2026SubStrin\u202611a171122aa282232bb393342cc4104453dd5115563ee61266 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/slice/","title":"@slice","text":"<p>Slicing rows is similar to filtering rows, except that slicing is performed based on row numbers rather tha filter criteria. In <code>TidierData.jl</code>, slicing works similarly to R's <code>tidyverse</code> in that both positive (which rows to keep) and negative (which rows to remove) slicing is supported. For <code>@slice()</code>, any valid <code>UnitRange</code> of integers is considered valid; this is not the case for <code>@select()</code> or <code>across()</code>.</p> <p>Remember: Just like every other <code>TidierData.jl</code> top-level macro, <code>@slice()</code> respects group. This means that in a grouped data frame, <code>@slice(1:2)</code> will select the first 2 rows from each group.</p> <pre><code>using TidierData\n\ndf = DataFrame(row_num = 1:10,\n               a = string.(repeat('a':'e', inner = 2)),\n               b = [1,1,1,2,2,2,3,3,3,4])\n</code></pre> 10\u00d73 DataFrame Rowrow_numabInt64StringInt6411a122a133b144b255c266c277d388d399e31010e4 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#slicing-using-a-range-of-numbers","title":"Slicing using a range of numbers","text":"<p>This is an easy way of retrieving 5 consecutive rows.</p> <pre><code>@chain df begin\n    @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame Rowrow_numabInt64StringInt6411a122a133b144b255c2 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#slicing-using-a-more-complex-unitrange-of-numbers","title":"Slicing using a more complex UnitRange of numbers","text":"<p>How would we obtain every other from 1 to 7 (counting up by 2)? Note that <code>range()</code> is similar to <code>seq()</code> in R.</p> <pre><code>@chain df begin\n  @slice(range(start = 1, step = 2, stop = 7))\nend\n</code></pre> 4\u00d73 DataFrame Rowrow_numabInt64StringInt6411a123b135c247d3 <p>This same code can also be written using Julia's shorthand syntax for unit ranges.</p> <pre><code>@chain df begin\n  @slice(1:2:7)\nend\n</code></pre> 4\u00d73 DataFrame Rowrow_numabInt64StringInt6411a123b135c247d3 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#separate-multiple-row-selections-with-commas","title":"Separate multiple row selections with commas","text":"<p>If you have multiple different row selections, you can separate them with commas.</p> <pre><code>@chain df begin\n    @slice(1:5, 10)\nend\n</code></pre> 6\u00d73 DataFrame Rowrow_numabInt64StringInt6411a122a133b144b255c2610e4 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#use-n-as-short-hand-to-indicate-the-number-of-rows","title":"Use <code>n()</code> as short-hand to indicate the number of rows","text":"<p>Select the last 2 rows.</p> <pre><code>@chain df begin\n  @slice(n()-1, n())\nend\n</code></pre> 2\u00d73 DataFrame Rowrow_numabInt64StringInt6419e3210e4 <p>You can even use <code>n()</code> inside of UnitRanges, just like in R. Notice that the order of operations is slightly different in Julia as compared to R, so you don't have to wrap the <code>n()-1</code> expression inside of parentheses.</p> <pre><code>@chain df begin\n  @slice(n()-1:n())\nend\n</code></pre> 2\u00d73 DataFrame Rowrow_numabInt64StringInt6419e3210e4 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#inverted-selection-using-negative-numbers","title":"Inverted selection using negative numbers","text":"<p>This line selects all rows except the first 5 rows.</p> <pre><code>@chain df begin\n    @slice(-(1:5))\nend\n</code></pre> 5\u00d73 DataFrame Rowrow_numabInt64StringInt6416c227d338d349e3510e4 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#sample-5-random-rows-in-the-data-frame","title":"Sample 5 random rows in the data frame","text":"<pre><code>@chain df begin\n  @slice_sample(n = 5)\nend\n</code></pre> 5\u00d73 DataFrame Rowrow_numabInt64StringInt6416c221a135c248d357d3"},{"location":"examples/generated/UserGuide/slice/#slice-the-min","title":"Slice the min","text":"<p>This line selects all rows with the the minimum value of the desired column</p> <pre><code>@chain df begin\n  @slice_min(b)\nend\n</code></pre> 3\u00d73 DataFrame Rowrow_numabInt64StringInt6411a122a133b1 <p>This line will only show the first row.</p> <pre><code>@chain df begin\n  @slice_min(b, with_ties = false)\nend\n</code></pre> 1\u00d73 DataFrame Rowrow_numabInt64StringInt6411a1 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#slice-the-max","title":"Slice the max","text":"<p>The optional prop arguement will slice a proportion of the full dataframe.</p> <pre><code>@chain df begin\n  @slice_max(b, prop = 0.5)\nend\n</code></pre> 5\u00d73 DataFrame Rowrow_numabInt64StringInt64110e427d338d349e354b2 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#slice-the-tail","title":"Slice the tail","text":"<pre><code>@chain df begin\n  @slice_tail(prop = 0.5)\nend\n</code></pre> 5\u00d73 DataFrame Rowrow_numabInt64StringInt6416c227d338d349e3510e4"},{"location":"examples/generated/UserGuide/slice/#slice-the-head","title":"Slice the head","text":"<pre><code>@chain df begin\n  @slice_head(n = 3)\nend\n</code></pre> 3\u00d73 DataFrame Rowrow_numabInt64StringInt6411a122a133b1 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/summarize/","title":"@summarize","text":"<p>Summarizing a dataset involves aggregating multiple rows down to (usually) a single row of data. This can be performed across the entire dataset, or if the dataset is grouped, then for each row in the dataset. This is implemented similarly to R's tidyverse using <code>@summarize()</code>. Out of admiration for Hadley Wickham, and to be consistent with the R <code>tidyverse</code>, both <code>@summarize()</code> and <code>@summarise()</code> are supported.</p> <p>Note that summarization is different from other verbs in the <code>TidierData.jl</code> in 2 respects:</p> <ol> <li>No auto-vectorization is performed when using <code>@summarize()</code></li> <li>One layer of grouping is removed after each <code>@summarize()</code> function.</li> </ol> <p>If you require further changes to grouping beyond the defaults, you can either <code>@ungroup()</code> or call <code>@group_by()</code> to regroup by a different set of variables.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/summarize/#using-summarize-with-n-to-count-the-number-of-movies-in-the-dataset","title":"Using <code>@summarize()</code> with <code>n()</code> to count the number of movies in the dataset.","text":"<p>Within the context of <code>@summarize()</code> only, <code>n()</code> is converted to DataFrames.jl's <code>nrow()</code> function.</p> <pre><code>@chain movies begin\n    @summarize(n = n())\nend\n</code></pre> 1\u00d71 DataFrame RownInt64158788 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/summarize/#using-summarize-to-calculate-average-budget-of-movies-in-the-dataset","title":"Using <code>@summarize()</code> to calculate average budget of movies in the dataset.","text":"<p>The median budget in this dataset is 3 million, and the mean budget is 13 million! Making movies must be way more lucrative than making Julia packages.</p> <pre><code>@chain movies begin\n  @mutate(Budget = Budget / 1_000_000)\n  @summarize(median_budget = median(skipmissing(Budget)),\n             mean_budget = mean(skipmissing(Budget)))\nend\n</code></pre> 1\u00d72 DataFrame Rowmedian_budgetmean_budgetFloat64Float6413.013.4125 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/summarize/#combining-group_by-with-summarise","title":"Combining <code>@group_by()</code> with <code>@summarise()</code>","text":"<p>How many movies came out in each of the last 5 years?</p> <pre><code>@chain movies begin\n  @group_by(Year)\n  @summarise(n = n())\n  @arrange(desc(Year))\n  @slice(1:5)\nend\n</code></pre> 5\u00d72 DataFrame RowYearnInt32Int6412005349220041945320032158420022168520012121 <p>Notice that there was no need to explicitly <code>@ungroup()</code> the dataset after summarizing here. The <code>@summarise()</code> function removed one layer of grouping. Since this dataset was only grouped by one variable (<code>Year</code>), it was no longer grouped after the <code>@summarise</code> was performed.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/summary/","title":"@summary","text":"<p>The <code>@summary()</code> macro in <code>TidierData.jl</code> provides a concise way to compute summary statistics on data. Similar to its R counterpart, it will provide the mean, median, Q1, Q3, minimum, maximum, and number of missing values in a numerical column or columns.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/summary/#summary-for-the-whole-dataframe","title":"Summary for the whole dataframe","text":"<pre><code>using TidierData\n\ndf = DataFrame( A = [1, 2, 3, 4, 5], B = [missing, 7, 8, 9, 10], C = [11, missing, 13, 14, missing], D = [16, 17, 18, 19, 20]);\n\n@chain df begin\n    @summary()\nend\n\n@summary(df)\n</code></pre> 4\u00d79 DataFrame RowColumnMinQ1MedianMeanQ3MaxCountMissing_CountStringInt64Float64Float64Float64Float64Int64Int64Int641A12.03.03.04.05502B77.758.58.59.2510413C1112.013.012.666713.514324D1617.018.018.019.02050"},{"location":"examples/generated/UserGuide/summary/#you-can-specify-columns-for-which-you-want-to-compute-the-summary-this-is-useful-if-the-dataframe-has-a-large-number-of-columns-and-youre-interested-in-only-a-subset-of-them","title":"You can specify columns for which you want to compute the summary. This is useful if the DataFrame has a large number of columns and you're interested in only a subset of them.","text":"<pre><code>@chain df begin\n    @summary(B)\nend\n\n@summary(df, B)\n</code></pre> 1\u00d79 DataFrame RowColumnMinQ1MedianMeanQ3MaxCountMissing_CountStringInt64Float64Float64Float64Float64Int64Int64Int641B77.758.58.59.251041"},{"location":"examples/generated/UserGuide/summary/#or-for-a-range-of-columns","title":"or for a range of columns","text":"<pre><code>@chain df begin\n    @select(B:D)\n    @summary() # you can also write this @summary(2:4)\nend\n</code></pre> 3\u00d79 DataFrame RowColumnMinQ1MedianMeanQ3MaxCountMissing_CountStringInt64Float64Float64Float64Float64Int64Int64Int641B77.758.58.59.2510412C1112.013.012.666713.514323D1617.018.018.019.02050 <p>This page was generated using Literate.jl.</p>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":""},{"location":"#what-is-tidierdatajl","title":"What is TidierData.jl?","text":"<p>TidierData.jl is a 100% Julia implementation of the dplyr and tidyr R packages. Powered by the DataFrames.jl package and Julia\u2019s extensive meta-programming capabilities, TidierData.jl is an R user\u2019s love letter to data analysis in Julia.</p> <p><code>TidierData.jl</code> has two goals, which differentiate it from other data analysis meta-packages in Julia:</p> Stick as closely to tidyverse syntax as possible. <p>Whereas other meta-packages introduce Julia-centric idioms for working with DataFrames, this package\u2019s goal is to reimplement parts of tidyverse in Julia. This means that <code>TidierData.jl</code> uses tidy expressions as opposed to idiomatic Julia expressions. An example of a tidy expression is <code>a = mean(b)</code>. In Julia, <code>a</code> and <code>b</code> are variables and are thus \"eagerly\" evaluated. This means that if <code>b</code> is merely referring to a column in a data frame and not an object in the global namespace, then an error will be generated because <code>b</code> was not found. In idiomatic Julia, <code>b</code> would need to be expressed as a symbol, or <code>:b</code>. Even then, <code>a = mean(:b)</code> would generate an error because it's not possible to calculate the mean value of a symbol. To handle this using idiomatic Julia, <code>DataFrames.jl</code> introduces a mini-language that relies heavily on the creation of anonymous functions, with explicit directional pairs syntax using a <code>source =&gt; function =&gt; destination</code> syntax. While this is quite elegant, it can be verbose. <code>TidierData.jl</code> aims to reduce this complexity by exposing an R-like syntax, which is then converted into valid <code>DataFrames.jl</code> code. The reason that tidy expressions are considered valid by Julia in <code>TidierData.jl</code> is because they are implemented using macros. Macros \"capture\" the expressions they are given, and then they can modify those expressions before evaluating them. For consistency, all top-level <code>dplyr</code> functions are implemented as macros (whether or not a macro is truly needed), and all \"helper\" functions (used inside of those top-level functions) are implemented as functions or pseudo-functions (functions which only exist through modification of the abstract syntax tree).</p> Make broadcasting mostly invisible. <p>Broadcasting trips up many R users switching to Julia because R users are used to most functions being vectorized. <code>TidierData.jl</code> currently uses a lookup table to decide which functions not to vectorize; all other functions are automatically vectorized. Read the documentation page on \"Autovectorization\" to read about how this works, and how to override the defaults. An example of where this issue commonly causes errors is when centering a variable. To create a new column <code>a</code> that centers the column <code>b</code>, <code>TidierData.jl</code> lets you simply write <code>a = b - mean(b)</code> exactly as you would in R. This works because <code>TidierData.jl</code> knows to not vectorize <code>mean()</code> while also recognizing that <code>-</code> should be vectorized such that this expression is rewritten in <code>DataFrames.jl</code> as <code>:b =&gt; (b -&gt; b .- mean(b)) =&gt; :a</code>. For any user-defined function that you want to \"mark\" as being non-vectorized, you can prefix it with a <code>~</code>. For example, a function <code>new_mean()</code>, if it had the same functionality as <code>mean()</code> would normally get vectorized by <code>TidierData.jl</code> unless you write it as <code>~new_mean()</code>.</p> <p></p> <p></p>"},{"location":"#installation","title":"Installation","text":"<p>For the stable version:</p> <pre><code>] add TidierData\n</code></pre> <p>The <code>]</code> character starts the Julia package manager. Press the backspace key to return to the Julia prompt.</p> <p>or</p> <pre><code>using Pkg\nPkg.add(\"TidierData\")\n</code></pre> <p>For the newest version:</p> <pre><code>] add TidierData#main\n</code></pre> <p>or</p> <pre><code>using Pkg\nPkg.add(url=\"https://github.com/TidierOrg/TidierData.jl\")\n</code></pre> <p></p> <p></p>"},{"location":"#what-macros-and-functions-does-tidierdatajl-support","title":"What macros and functions does TidierData.jl support?","text":"<p>To support R-style programming, <code>TidierData.jl</code> is implemented using macros. This is because macros are able to \"capture\" the code before executing it, which allows the package to support R-like \"tidy expressions\" that would otherwise not be considered valid Julia code.</p> <p>TidierData.jl currently supports the following top-level macros:</p> <p>Top-level macros:</p> <ul> <li><code>@glimpse()</code> and <code>@head()</code></li> <li><code>@select()</code> and <code>@distinct()</code></li> <li><code>@rename()</code> and <code>@rename_with()</code></li> <li><code>@mutate()</code> and <code>@transmute()</code> </li> <li><code>@summarize()</code> and <code>@summarise()</code></li> <li><code>@filter()</code></li> <li><code>@slice()</code>, <code>@slice_sample()</code>, <code>@slice_min()</code>, <code>@slice_max()</code>, <code>@slice_head()</code>, and <code>@slice_tail()</code></li> <li><code>@group_by()</code> and <code>@ungroup()</code></li> <li><code>@arrange()</code></li> <li><code>@relocate()</code></li> <li><code>@pull()</code></li> <li><code>@count()</code> and <code>@tally()</code></li> <li><code>@left_join()</code>, <code>@right_join()</code>, <code>@inner_join()</code>, <code>@full_join()</code>, <code>@anti_join()</code>, and <code>@semi_join()</code></li> <li><code>@bind_rows()</code> and <code>@bind_cols()</code></li> <li><code>@pivot_wider()</code> and <code>@pivot_longer()</code></li> <li><code>@separate()</code>, <code>@separate_rows()</code>, and <code>@unite()</code></li> <li><code>@drop_missing()</code> and <code>@fill_missing()</code></li> <li><code>@unnest_longer()</code>, <code>@unnest_wider()</code>, and <code>@nest()</code></li> <li><code>@clean_names()</code> (as in R's <code>janitor::clean_names()</code> function)</li> <li><code>@summary()</code> (as in R's <code>summary()</code> function)</li> </ul> <p>TidierData.jl also supports the following helper functions:</p> <p>Helper functions:</p> <ul> <li><code>across()</code></li> <li><code>where()</code></li> <li><code>desc()</code></li> <li><code>if_else()</code> and <code>case_when()</code></li> <li><code>n()</code> and <code>row_number()</code></li> <li><code>ntile()</code></li> <li><code>lag()</code> and <code>lead()</code></li> <li><code>everything()</code>, <code>starts_with()</code>, <code>ends_with()</code>, <code>matches()</code>, and <code>contains()</code></li> <li><code>as_float()</code>, <code>as_integer()</code>, and <code>as_string()</code></li> <li><code>is_number()</code>, <code>is_float()</code>, <code>is_integer()</code>, and <code>is_string()</code></li> <li><code>missing_if()</code> and <code>replace_missing()</code></li> </ul> <p>See the Reference page for a detailed guide to each of the macros and functions.</p> <p></p> <p></p>"},{"location":"#example","title":"Example","text":"<p>Let's select the first five movies in our dataset whose budget exceeds the mean budget. Unlike in R, where we pass an <code>na.rm = TRUE</code> argument to remove missing values, in Julia we wrap the variable with a <code>skipmissing()</code> to remove the missing values before the <code>mean()</code> is calculated.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n\n@chain movies begin\n    @mutate(Budget = Budget / 1_000_000)\n    @filter(Budget &gt;= mean(skipmissing(Budget)))\n    @select(Title, Budget)\n    @slice(1:5)\nend\n</code></pre> <pre><code>5\u00d72 DataFrame\n Row \u2502 Title                       Budget   \n     \u2502 String                      Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 'Til There Was You              23.0\n   2 \u2502 10 Things I Hate About You      16.0\n   3 \u2502 102 Dalmatians                  85.0\n   4 \u2502 13 Going On 30                  37.0\n   5 \u2502 13th Warrior, The               85.0\n</code></pre> <p></p> <p></p>"},{"location":"#whats-new","title":"What\u2019s new","text":"<p>See NEWS.md for the latest updates.</p> <p></p> <p></p>"},{"location":"#whats-missing","title":"What's missing","text":"<p>Is there a tidyverse feature missing that you would like to see in TidierData.jl? Please file a GitHub issue. Because TidierData.jl primarily wraps DataFrames.jl, our decision to integrate a new feature will be guided by how well-supported it is within DataFrames.jl and how likely other users are to benefit from it.</p>"},{"location":"reference/","title":"Reference","text":""},{"location":"reference/#index","title":"Index","text":"<ul> <li><code>TidierData.TidierData_set</code></li> <li><code>TidierData.across</code></li> <li><code>TidierData.as_float</code></li> <li><code>TidierData.as_integer</code></li> <li><code>TidierData.as_string</code></li> <li><code>TidierData.case_when</code></li> <li><code>TidierData.desc</code></li> <li><code>TidierData.ends_with</code></li> <li><code>TidierData.everything</code></li> <li><code>TidierData.if_else</code></li> <li><code>TidierData.is_float</code></li> <li><code>TidierData.is_integer</code></li> <li><code>TidierData.is_number</code></li> <li><code>TidierData.is_string</code></li> <li><code>TidierData.matches</code></li> <li><code>TidierData.missing_if</code></li> <li><code>TidierData.n</code></li> <li><code>TidierData.ntile</code></li> <li><code>TidierData.replace_missing</code></li> <li><code>TidierData.row_number</code></li> <li><code>TidierData.starts_with</code></li> <li><code>TidierData.where</code></li> <li><code>TidierData.@anti_join</code></li> <li><code>TidierData.@arrange</code></li> <li><code>TidierData.@bind_cols</code></li> <li><code>TidierData.@bind_rows</code></li> <li><code>TidierData.@count</code></li> <li><code>TidierData.@distinct</code></li> <li><code>TidierData.@drop_missing</code></li> <li><code>TidierData.@fill_missing</code></li> <li><code>TidierData.@filter</code></li> <li><code>TidierData.@full_join</code></li> <li><code>TidierData.@glimpse</code></li> <li><code>TidierData.@group_by</code></li> <li><code>TidierData.@head</code></li> <li><code>TidierData.@inner_join</code></li> <li><code>TidierData.@left_join</code></li> <li><code>TidierData.@mutate</code></li> <li><code>TidierData.@nest</code></li> <li><code>TidierData.@pivot_longer</code></li> <li><code>TidierData.@pivot_wider</code></li> <li><code>TidierData.@pull</code></li> <li><code>TidierData.@relocate</code></li> <li><code>TidierData.@rename</code></li> <li><code>TidierData.@rename_with</code></li> <li><code>TidierData.@right_join</code></li> <li><code>TidierData.@select</code></li> <li><code>TidierData.@semi_join</code></li> <li><code>TidierData.@separate</code></li> <li><code>TidierData.@separate_rows</code></li> <li><code>TidierData.@slice</code></li> <li><code>TidierData.@slice_head</code></li> <li><code>TidierData.@slice_max</code></li> <li><code>TidierData.@slice_min</code></li> <li><code>TidierData.@slice_sample</code></li> <li><code>TidierData.@slice_tail</code></li> <li><code>TidierData.@summarise</code></li> <li><code>TidierData.@summarize</code></li> <li><code>TidierData.@summary</code></li> <li><code>TidierData.@tally</code></li> <li><code>TidierData.@transmute</code></li> <li><code>TidierData.@ungroup</code></li> <li><code>TidierData.@unite</code></li> <li><code>TidierData.@unnest_longer</code></li> <li><code>TidierData.@unnest_wider</code></li> </ul>"},{"location":"reference/#reference-exported-functions","title":"Reference - Exported functions","text":"<p># <code>TidierData.TidierData_set</code> \u2014 Method.</p> <pre><code>TidierData_set(option::AbstractString, value::Bool)\n</code></pre> <p>Set package options.</p> <p>Here are the supported options and what they do:</p> <ul> <li>\"code\": Defaults to <code>false</code>. If set to <code>true</code>, this option displays the DataFrames.jl code generated by the TidierData.jl package. It is useful for debugging whether errors are introduced by TidierData.jl's generated code.</li> </ul> <p>Arguments</p> <ul> <li><code>option</code>: \"code\"</li> <li><code>value</code>: <code>true</code> or <code>false</code></li> </ul> <p>source</p> <p># <code>TidierData.across</code> \u2014 Method.</p> <pre><code>across(variable[s], function[s])\n</code></pre> <p>Apply functions to multiple variables. If specifying multiple variables or functions, surround them with parentheses so that they are recognized as a tuple.</p> <p>This function should only be called inside of TidierData.jl macros.</p> <p>Arguments</p> <ul> <li><code>variable[s]</code>: An unquoted variable, or if multiple, an unquoted tuple of variables.</li> <li><code>function[s]</code>: A function, or if multiple, a tuple of functions.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @summarize(across(b, minimum))\n       end\n1\u00d71 DataFrame\n Row \u2502 b_minimum \n     \u2502 Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1\n\njulia&gt; @chain df begin\n         @summarize(across(where(is_number), minimum))\n       end\n1\u00d72 DataFrame\n Row \u2502 b_minimum  c_minimum \n     \u2502 Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11\n\njulia&gt; @chain df begin\n         @summarize(across((b,c), (minimum, maximum)))\n       end\n1\u00d74 DataFrame\n Row \u2502 b_minimum  c_minimum  b_maximum  c_maximum \n     \u2502 Int64      Int64      Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11          5         15\n\njulia&gt; @chain df begin\n         @mutate(across((b,c), (minimum, maximum)))\n       end\n5\u00d77 DataFrame\n Row \u2502 a     b      c      b_minimum  c_minimum  b_maximum  c_maximum \n     \u2502 Char  Int64  Int64  Int64      Int64      Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11          1         11          5         15\n   2 \u2502 b         2     12          1         11          5         15\n   3 \u2502 c         3     13          1         11          5         15\n   4 \u2502 d         4     14          1         11          5         15\n   5 \u2502 e         5     15          1         11          5         15\n\njulia&gt; @chain df begin\n         @mutate(across((b, starts_with(\"c\")), (minimum, maximum)))\n       end\n5\u00d77 DataFrame\n Row \u2502 a     b      c      b_minimum  c_minimum  b_maximum  c_maximum \n     \u2502 Char  Int64  Int64  Int64      Int64      Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11          1         11          5         15\n   2 \u2502 b         2     12          1         11          5         15\n   3 \u2502 c         3     13          1         11          5         15\n   4 \u2502 d         4     14          1         11          5         15\n   5 \u2502 e         5     15          1         11          5         15\n</code></pre> <p>source</p> <p># <code>TidierData.as_float</code> \u2014 Method.</p> <pre><code>as_float(value)\n</code></pre> <p>Convert a number or string to a Float64 data type.</p> <p>This is a useful helper for type conversions. Missing values are propagated.</p> <p>Arguments</p> <ul> <li><code>value</code>: An <code>AbstractString</code>, <code>Number</code>, or <code>missing</code> value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; as_float(1)\n1.0\n\njulia&gt; as_float(\"1.5\")\n1.5\n\njulia&gt; as_float(missing)\nmissing\n</code></pre> <p>source</p> <p># <code>TidierData.as_integer</code> \u2014 Method.</p> <pre><code>as_integer(value)\n</code></pre> <p>Convert a number or string to an Int64 data type.</p> <p>This is a useful helper for type conversions. Missing values are propagated. Any values after the decimal point are removed.</p> <p>Arguments</p> <ul> <li><code>value</code>: An <code>AbstractString</code>, <code>Number</code>, or <code>missing</code> value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; as_integer(1)\n1\n\njulia&gt; as_integer(1.5)\n1\n\njulia&gt; as_integer(\"2\")\n2\n\njulia&gt; as_integer(\"2.5\")\n2\n\njulia&gt; as_integer(missing)\nmissing\n</code></pre> <p>source</p> <p># <code>TidierData.as_string</code> \u2014 Method.</p> <pre><code>as_string(value)\n</code></pre> <p>Convert a number or string to a String data type.</p> <p>This is a useful helper for type conversions. Missing values are propagated.</p> <p>Arguments</p> <ul> <li><code>value</code>: An <code>AbstractString</code>, <code>Number</code>, or <code>missing</code> value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; as_string(1)\n\"1\"\n\njulia&gt; as_string(1.5)\n\"1.5\"\n\njulia&gt; as_string(missing)\nmissing\n</code></pre> <p>source</p> <p># <code>TidierData.case_when</code> \u2014 Method.</p> <pre><code>case_when(condition =&gt; return_value)\ncase_when(condition_1 =&gt; return_value_1, condition_2 =&gt; return_value_2, ...)\n</code></pre> <p>Return the corresponding <code>return_value</code> for the first <code>condition</code> that evaluates to <code>true</code>.</p> <p>The most specific condition should be listed first and most general condition should be listed last. If none of the conditions evaluate to <code>true</code>, then a <code>missing</code> value is returned. </p> <p>Arguments</p> <ul> <li><code>condition</code>: A condition that evaluates to <code>true</code>, <code>false</code>, or <code>missing</code>.</li> <li><code>return_value</code>: The value to return if the condition is <code>true</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = [1, 2, missing, 4, 5]);\n\njulia&gt; @chain df begin\n         @mutate(b = case_when(a &gt; 4  =&gt;  \"hi\",\n                               a &gt; 2  =&gt;  \"medium\",\n                               a &gt; 0  =&gt;  \"low\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   String? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1  low\n   2 \u2502       2  low\n   3 \u2502 missing  missing \n   4 \u2502       4  medium\n   5 \u2502       5  hi\n\njulia&gt; @chain df begin\n         @mutate(b = case_when(a &gt; 4  =&gt;  \"hi\",\n                               a &gt; 2  =&gt;  \"medium\",\n                               a &gt; 0  =&gt;  \"low\",\n                               true   =&gt;  \"unknown\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   String  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1  low\n   2 \u2502       2  low\n   3 \u2502 missing  unknown\n   4 \u2502       4  medium\n   5 \u2502       5  hi\n\njulia&gt; @chain df begin\n         @mutate(b = case_when(a &gt;= 3  =&gt;  3,\n                               true    =&gt;  a))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1        1\n   2 \u2502       2        2\n   3 \u2502 missing  missing \n   4 \u2502       4        3\n   5 \u2502       5        3\n\njulia&gt; @chain df begin\n         @mutate(b = case_when(a &gt;= 3        =&gt;  3,\n                               ismissing(a)  =&gt;  0,\n                               true          =&gt;  a))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b     \n     \u2502 Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1      1\n   2 \u2502       2      2\n   3 \u2502 missing      0\n   4 \u2502       4      3\n   5 \u2502       5      3\n</code></pre> <p>source</p> <p># <code>TidierData.desc</code> \u2014 Method.</p> <pre><code>desc(col)\n</code></pre> <p>Orders the rows of a DataFrame column in descending order when used inside of <code>@arrange()</code>. This function should only be called inside of `@arrange()``.</p> <p>Arguments</p> <ul> <li><code>col</code>: An unquoted column name.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'e', inner = 2), b = 1:10, c = 11:20);\n\njulia&gt; @chain df begin\n         @arrange(a, desc(b))\n       end\n10\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         2     12\n   2 \u2502 a         1     11\n   3 \u2502 b         4     14\n   4 \u2502 b         3     13\n   5 \u2502 c         6     16\n   6 \u2502 c         5     15\n   7 \u2502 d         8     18\n   8 \u2502 d         7     17\n   9 \u2502 e        10     20\n  10 \u2502 e         9     19\n</code></pre> <p>source</p> <p># <code>TidierData.ends_with</code> \u2014 Method.</p> <pre><code>ends_with(suffix)\n</code></pre> <p>Select all columns ending with the <code>suffix</code>.</p> <p>Arguments</p> <ul> <li><code>suffix</code>: A string.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a_1 = 1:5, a_2 = 11:15, b_1 = 21:25);\n\njulia&gt; @chain df begin \n         @select(ends_with(\"1\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a_1    b_1   \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     21\n   2 \u2502     2     22\n   3 \u2502     3     23\n   4 \u2502     4     24\n   5 \u2502     5     25\n</code></pre> <p>source</p> <p># <code>TidierData.everything</code> \u2014 Method.</p> <pre><code>everything()\n</code></pre> <p>Select all (remaining) columns.</p> <p>Arguments</p> <ul> <li>None</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a_1 = 1:5, a_2 = 11:15, b_1 = 21:25);\n\njulia&gt; @chain df begin \n         @select(b_1, everything())\n       end\n5\u00d73 DataFrame\n Row \u2502 b_1    a_1    a_2   \n     \u2502 Int64  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    21      1     11\n   2 \u2502    22      2     12\n   3 \u2502    23      3     13\n   4 \u2502    24      4     14\n   5 \u2502    25      5     15\n</code></pre> <p>source</p> <p># <code>TidierData.if_else</code> \u2014 Method.</p> <pre><code>if_else(condition, yes, no, [miss])\n</code></pre> <p>Return the <code>yes</code> value if the <code>condition</code> is <code>true</code> and the <code>no</code> value if the <code>condition</code> is <code>false</code>. If <code>miss</code> is specified, then the provided <code>miss</code> value is returned when the <code>condition</code> contains a <code>missing</code> value. If <code>miss</code> is not specified, then the returned value is an explicit <code>missing</code> value.</p> <p>Arguments</p> <ul> <li><code>condition</code>: A condition that evaluates to <code>true</code>, <code>false</code>, or <code>missing</code>.</li> <li><code>yes</code>: Value to return if the condition is <code>true</code>.</li> <li><code>no</code>: Value to return if the condition is <code>false</code>.</li> <li><code>miss</code>: Optional. Value to return if the condition is <code>missing</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = [1, 2, missing, 4, 5]);\n\njulia&gt; @chain df begin\n         @mutate(b = if_else(a &gt;= 3, \"yes\", \"no\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   String? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1  no\n   2 \u2502       2  no\n   3 \u2502 missing  missing \n   4 \u2502       4  yes\n   5 \u2502       5  yes\n\njulia&gt; @chain df begin\n         @mutate(b = if_else(a &gt;= 3, \"yes\", \"no\", \"unknown\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   String  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1  no\n   2 \u2502       2  no\n   3 \u2502 missing  unknown\n   4 \u2502       4  yes\n   5 \u2502       5  yes\n\njulia&gt; @chain df begin\n         @mutate(b = if_else(a &gt;= 3, 3, a))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1        1\n   2 \u2502       2        2\n   3 \u2502 missing  missing \n   4 \u2502       4        3\n   5 \u2502       5        3\n\njulia&gt; @chain df begin\n         @mutate(b = if_else(a &gt;= 3, 3, a, 0))\n       end\n5\u00d72 DataFrame\n Row \u2502 a        b     \n     \u2502 Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1      1\n   2 \u2502       2      2\n   3 \u2502 missing      0\n   4 \u2502       4      3\n   5 \u2502       5      3\n</code></pre> <p>source</p> <p># <code>TidierData.is_float</code> \u2014 Method.</p> <pre><code>is_float(column::AbstractVector)\n</code></pre> <p>Determine if the given column contains floating-point numbers.</p> <p>Arguments</p> <ul> <li><code>column::AbstractVector</code>: The column whose data type needs to be checked.</li> </ul> <p>Returns</p> <ul> <li><code>Bool</code>: <code>true</code> if the column contains floating-point numbers, <code>false</code> otherwise.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(b = [missing, 2, 3],\n                      c = [missing, 2.2, 34],\n                      d = [missing, missing, \"A\"]);\n\njulia&gt; is_float(df.c)\ntrue\n\njulia&gt; is_float(df.b)\nfalse\n</code></pre> <p>source</p> <p># <code>TidierData.is_integer</code> \u2014 Method.</p> <pre><code>is_integer(column::AbstractVector)\n</code></pre> <p>Determine if the given column contains integers.</p> <p>Arguments</p> <ul> <li><code>column::AbstractVector</code>: The column whose data type needs to be checked.</li> </ul> <p>Returns</p> <ul> <li><code>Bool</code>: <code>true</code> if the column contains integers, <code>false</code> otherwise.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(b = [missing, 2, 3],\n                      c = [missing, 2.2, 34],\n                      d = [missing, missing, \"A\"]);\n\njulia&gt; is_integer(df.b)\ntrue\n\njulia&gt; is_integer(df.d)\nfalse\n</code></pre> <p>source</p> <p># <code>TidierData.is_number</code> \u2014 Method.</p> <pre><code>is_number(column::AbstractVector)\n</code></pre> <p>Determine if the given column contains numbers.</p> <p>Arguments</p> <ul> <li><code>column::AbstractVector</code>: The column whose data type needs to be checked.</li> </ul> <p>Returns</p> <ul> <li><code>Bool</code>: <code>true</code> if the column contains numbers, <code>false</code> otherwise.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(b = [missing, 2, 3],\n                      c = [missing, 2.2, 34],\n                      d = [missing, missing, \"A\"]);\n\njulia&gt; is_number(df.b)\ntrue\n\njulia&gt; is_number(df.c)\ntrue\n\njulia&gt; is_number(df.d)\nfalse\n</code></pre> <p>source</p> <p># <code>TidierData.is_string</code> \u2014 Method.</p> <pre><code>is_string(column::AbstractVector)\n</code></pre> <p>Determine if the given column contains strings.</p> <p>Arguments</p> <ul> <li><code>column::AbstractVector</code>: The column whose data type needs to be checked.</li> </ul> <p>Returns</p> <ul> <li><code>Bool</code>: <code>true</code> if the column contains strings, <code>false</code> otherwise.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(b = [missing, 2, 3],\n                      c = [missing, 2.2, 34],\n                      d = [missing, missing, \"A\"]);\n\njulia&gt; is_string(df.d)\ntrue\n\njulia&gt; is_string(df.c)\nfalse\n</code></pre> <p>source</p> <p># <code>TidierData.matches</code> \u2014 Method.</p> <pre><code>matches(pattern, [flags])\n</code></pre> <p>Select all columns matching the <code>pattern</code>.</p> <p>Arguments</p> <ul> <li><code>pattern</code>: A string.</li> <li><code>flags</code>: Optional string containing flags. \"i\" = Do case-insensitive pattern matching. \"m\" = Treat string as multiple lines. \"s\" = Treat string as a single line. \"x\" = Tells the regular expression parser to ignore most whitespace that is neither backslashed nor within a character class. You</li> </ul> <p>can use this to break up your regular expression into (slightly) more readable parts.</p> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a_1 = 1:5, a_2 = 11:15, b_1 = 21:25);\n\njulia&gt; @chain df begin \n         @select(matches(\"^a\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a_1    a_2   \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n\njulia&gt; @chain df begin \n         @select(matches(\"1$\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a_1    b_1   \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     21\n   2 \u2502     2     22\n   3 \u2502     3     23\n   4 \u2502     4     24\n   5 \u2502     5     25\n\njulia&gt; @chain df begin \n         @select(matches(\"A\", \"i\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a_1    a_2   \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n</code></pre> <p>source</p> <p># <code>TidierData.missing_if</code> \u2014 Method.</p> <pre><code>missing_if(x, value)\n</code></pre> <p>Replace a specific <code>value</code> with <code>missing</code> in <code>x</code>.</p> <p>Arguments</p> <ul> <li><code>x</code>: The input value which can be of any type. If <code>x</code> is already <code>missing</code> or equals <code>value</code>, the function will return <code>missing</code>. Otherwise, it returns <code>x</code> unaltered.</li> <li><code>value</code>: The specific value to be checked against.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n              a = [1, missing, 3, 4],\n              b = [\"apple\", \"apple\", \"banana\", \"cherry\"]\n            );\n\njulia&gt; @chain df begin\n         @mutate(a = missing_if(a, 4), \n                 b = missing_if(b, \"apple\"))\n       end\n4\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   String? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1  missing \n   2 \u2502 missing  missing \n   3 \u2502       3  banana\n   4 \u2502 missing  cherry\n</code></pre> <p>source</p> <p># <code>TidierData.n</code> \u2014 Method.</p> <pre><code>n()\n</code></pre> <p>Return the number of rows in the DataFrame or in the group if used in the context of a GroupedDataFrame.</p> <p>Arguments</p> <ul> <li>None</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'e', inner = 2), b = 1:10, c = 11:20);\n\njulia&gt; @chain df begin\n         @summarize(n = n())\n       end\n1\u00d71 DataFrame\n Row \u2502 n     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    10\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @summarize(n = n())\n       end\n5\u00d72 DataFrame\n Row \u2502 a     n     \n     \u2502 Char  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         2\n   2 \u2502 b         2\n   3 \u2502 c         2\n   4 \u2502 d         2\n   5 \u2502 e         2\n</code></pre> <p>source</p> <p># <code>TidierData.ntile</code> \u2014 Method.</p> <pre><code>ntile(x, n::Integer)\n</code></pre> <p>Break the input vector into <code>n</code> equal-sized buckets.</p> <p><code>ntile()</code> is a rough rank that breaks the input vector into <code>n</code> buckets. If <code>length(x)</code> is not an integer multiple of <code>n</code>, the size of the buckets will differ by up to one, with larger buckets coming first.</p> <p>Unlike other ranking functions, <code>ntile()</code> ignores ties: it will create evenly sized buckets even if the same value of <code>x</code> ends up in different buckets.</p> <p>Arguments</p> <ul> <li><code>x</code>: A vector to rank. By default, the smallest values will get the smallest ranks. Missing values will be given rank <code>missing</code>.</li> <li><code>n</code>: Number of groups to bucket into.</li> </ul> <p>Examples</p> <pre><code>julia&gt; x = [5,1,3,2,2, missing]\n6-element Vector{Union{Missing, Int64}}:\n 5\n 1\n 3\n 2\n 2\n  missing\n\njulia&gt; ntile(x, 2)\n6-element Vector{Union{Missing, Int64}}:\n 2\n 1\n 2\n 1\n 1\n  missing\n\njulia&gt; ntile(x, 4)\n6-element Vector{Union{Missing, Int64}}:\n 4\n 1\n 3\n 1\n 2\n  missing\n\njulia&gt; ntile(1:8, 3)\n8-element Vector{Int64}:\n 1\n 1\n 1\n 2\n 2\n 2\n 3\n 3\n\njulia&gt; df = DataFrame(a = 1:8);\n\njulia&gt; @chain df begin\n       @mutate(buckets = ntile(a, 3))\n       end\n8\u00d72 DataFrame\n Row \u2502 a      buckets \n     \u2502 Int64  Int64   \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1        1\n   2 \u2502     2        1\n   3 \u2502     3        1\n   4 \u2502     4        2\n   5 \u2502     5        2\n   6 \u2502     6        2\n   7 \u2502     7        3\n   8 \u2502     8        3\n</code></pre> <p>source</p> <p># <code>TidierData.replace_missing</code> \u2014 Method.</p> <pre><code>replace_missing(x, replacement)\n</code></pre> <p>Replace <code>missing</code> values in <code>x</code> with a specified <code>replacement</code> value.</p> <p>Arguments</p> <ul> <li><code>x</code>: The input value which can be of any type. If <code>x</code> is <code>missing</code>, the function will return <code>replacement</code>. Otherwise, it returns <code>x</code> unaltered.</li> <li><code>replacement</code>: The value to replace <code>missing</code> with in <code>x</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n              a = [1, missing, 3, 4],\n              b = [4, 5, missing, 8]\n            );\n\njulia&gt; @chain df begin\n         @mutate(a = replace_missing(a, 100),\n                 b = replace_missing(b, 35))\n       end\n4\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      4\n   2 \u2502   100      5\n   3 \u2502     3     35\n   4 \u2502     4      8\n</code></pre> <p>source</p> <p># <code>TidierData.row_number</code> \u2014 Method.</p> <pre><code>row_number()\n</code></pre> <p>Return each row's number in a DataFrame or in the group if used in the context of a GroupedDataFrame.</p> <p>Arguments</p> <ul> <li>None</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'e', inner = 2));\n\njulia&gt; @chain df begin\n         @mutate(row_num = row_number())\n       end\n10\u00d72 DataFrame\n Row \u2502 a     row_num \n     \u2502 Char  Int64   \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n   2 \u2502 a           2\n   3 \u2502 b           3\n   4 \u2502 b           4\n   5 \u2502 c           5\n   6 \u2502 c           6\n   7 \u2502 d           7\n   8 \u2502 d           8\n   9 \u2502 e           9\n  10 \u2502 e          10\n\njulia&gt; @chain df begin\n         @mutate(row_num = row_number() + 1)\n       end\n10\u00d72 DataFrame\n Row \u2502 a     row_num \n     \u2502 Char  Int64   \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           2\n   2 \u2502 a           3\n   3 \u2502 b           4\n   4 \u2502 b           5\n   5 \u2502 c           6\n   6 \u2502 c           7\n   7 \u2502 d           8\n   8 \u2502 d           9\n   9 \u2502 e          10\n  10 \u2502 e          11\n\njulia&gt; @chain df begin\n         @filter(row_number() &lt;= 5)\n       end\n5\u00d71 DataFrame\n Row \u2502 a    \n     \u2502 Char \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a\n   2 \u2502 a\n   3 \u2502 b\n   4 \u2502 b\n   5 \u2502 c\n</code></pre> <p>source</p> <p># <code>TidierData.starts_with</code> \u2014 Method.</p> <pre><code>starts_with(prefix)\n</code></pre> <p>Select all columns starting with the <code>prefix</code>.</p> <p>Arguments</p> <ul> <li><code>prefix</code>: A string.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a_1 = 1:5, a_2 = 11:15, b_1 = 21:25);\n\njulia&gt; @chain df begin \n         @select(starts_with(\"a\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 a_1    a_2   \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n</code></pre> <p>source</p> <p># <code>TidierData.where</code> \u2014 Method.</p> <pre><code>where(function)\n</code></pre> <p>Selects columns on which a function returns <code>true</code> for all values of the column.</p> <p>This function should only be called inside of TidierData.jl macros.</p> <p>Arguments</p> <ul> <li><code>function</code>: A predicate function (one that returns <code>true</code> or <code>false</code>).</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @select(where(is_number))\n       end\n5\u00d72 DataFrame\n Row \u2502 b      c     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n\njulia&gt; @chain df begin\n         @summarize(across(where(is_number), minimum))\n       end\n1\u00d72 DataFrame\n Row \u2502 b_minimum  c_minimum \n     \u2502 Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11\n\njulia&gt; @chain df begin\n         @mutate(across(where(is_number), minimum))\n       end\n5\u00d75 DataFrame\n Row \u2502 a     b      c      b_minimum  c_minimum \n     \u2502 Char  Int64  Int64  Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11          1         11\n   2 \u2502 b         2     12          1         11\n   3 \u2502 c         3     13          1         11\n   4 \u2502 d         4     14          1         11\n   5 \u2502 e         5     15          1         11\n\njulia&gt; df = DataFrame(a = repeat('a':'e', inner = 3),\n                      b = 1:15,\n                      c = 16:30,\n                      d = 31:45);\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @summarize(across(where(is_number), mean))\n       end\n5\u00d74 DataFrame\n Row \u2502 a     b_mean   c_mean   d_mean  \n     \u2502 Char  Float64  Float64  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         2.0     17.0     32.0\n   2 \u2502 b         5.0     20.0     35.0\n   3 \u2502 c         8.0     23.0     38.0\n   4 \u2502 d        11.0     26.0     41.0\n   5 \u2502 e        14.0     29.0     44.0\n</code></pre> <p>source</p> <p># <code>TidierData.@anti_join</code> \u2014 Macro.</p> <pre><code>@anti_join(df1, df2, [by])\n</code></pre> <p>Perform an anti-join on <code>df1</code> and <code>df2</code> with an optional <code>by</code>.</p> <p>Arguments</p> <ul> <li><code>df1</code>: A DataFrame.</li> <li><code>df2</code>: A DataFrame.</li> <li><code>by</code>: An optional column or tuple of columns. <code>by</code> supports interpolation of individual columns. If <code>by</code> is not supplied, then it will be inferred from shared names of columns between <code>df1</code> and <code>df2</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n\njulia&gt; df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n\njulia&gt; @anti_join(df1, df2)\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           2\n\njulia&gt; @anti_join(df1, df2, a)\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           2\n\njulia&gt; @anti_join(df1, df2, a = a)\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           2\n\njulia&gt; @anti_join(df1, df2, \"a\")\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           2\n\njulia&gt; @anti_join(df1, df2, \"a\" = \"a\")\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           2\n</code></pre> <p>source</p> <p># <code>TidierData.@arrange</code> \u2014 Macro.</p> <pre><code>@arrange(df, exprs...)\n</code></pre> <p>Order the rows of a DataFrame by the values of specified columns.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: Variables from the input DataFrame. Use <code>desc()</code> to sort in descending order. Multiple variables can be specified, separated by commas.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'e', inner = 2), b = 1:10, c = 11:20);\n\njulia&gt; @chain df begin\n         @arrange(a)\n       end\n10\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 a         2     12\n   3 \u2502 b         3     13\n   4 \u2502 b         4     14\n   5 \u2502 c         5     15\n   6 \u2502 c         6     16\n   7 \u2502 d         7     17\n   8 \u2502 d         8     18\n   9 \u2502 e         9     19\n  10 \u2502 e        10     20\n\njulia&gt; @chain df begin\n         @arrange(a, desc(b))\n       end\n10\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         2     12\n   2 \u2502 a         1     11\n   3 \u2502 b         4     14\n   4 \u2502 b         3     13\n   5 \u2502 c         6     16\n   6 \u2502 c         5     15\n   7 \u2502 d         8     18\n   8 \u2502 d         7     17\n   9 \u2502 e        10     20\n  10 \u2502 e         9     19\n</code></pre> <p>source</p> <p># <code>TidierData.@bind_cols</code> \u2014 Macro.</p> <pre><code>@bind_cols(dfs...)\n</code></pre> <p>Bind many DataFrames into one by column. </p> <p>Arguments</p> <ul> <li><code>dfs...</code>: DataFrames to combine.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a=1:3, b=1:3);\n\njulia&gt; df2 = DataFrame(a=4:6, b=4:6);\n\njulia&gt; df3 = DataFrame(a=7:9, c=7:9);\n\njulia&gt; @chain df1 begin\n         @bind_cols(df2, df3)\n       end\n3\u00d76 DataFrame\n Row \u2502 a      b      a_1    b_1    a_2    c     \n     \u2502 Int64  Int64  Int64  Int64  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1      4      4      7      7\n   2 \u2502     2      2      5      5      8      8\n   3 \u2502     3      3      6      6      9      9\n</code></pre> <p>source</p> <p># <code>TidierData.@bind_rows</code> \u2014 Macro.</p> <pre><code>@bind_rows(dfs..., id)\n</code></pre> <p>Bind many DataFrames into one by row. </p> <p>Columns present in at least one of the provided DataFrames are kept. Columns not present in some DataFrames are filled with missing values where necessary.</p> <p>Arguments</p> <ul> <li><code>dfs...</code>: DataFrames to combine.</li> <li><code>id</code>: string DataFrame identifier. When id is supplied, a new column of numeric identifiers is created to link each row to its original DataFrame.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a=1:3, b=1:3);\n\njulia&gt; df2 = DataFrame(a=4:6, b=4:6);\n\njulia&gt; df3 = DataFrame(a=7:9, c=7:9);\n\njulia&gt; @chain df1 begin\n         @bind_rows(df2)\n       end\n6\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1\n   2 \u2502     2      2\n   3 \u2502     3      3\n   4 \u2502     4      4\n   5 \u2502     5      5\n   6 \u2502     6      6\n</code></pre> <p>When columns are not present in some DataFrames, they are filled with missing values.</p> <pre><code>julia&gt; @chain df1 begin\n         @bind_rows(df2, df3)\n       end\n9\u00d73 DataFrame\n Row \u2502 a      b        c       \n     \u2502 Int64  Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1        1  missing \n   2 \u2502     2        2  missing \n   3 \u2502     3        3  missing \n   4 \u2502     4        4  missing \n   5 \u2502     5        5  missing \n   6 \u2502     6        6  missing \n   7 \u2502     7  missing        7\n   8 \u2502     8  missing        8\n   9 \u2502     9  missing        9\n\njulia&gt; @chain df1 begin\n         @bind_rows(df2, df3, id = \"id\")\n       end\n9\u00d74 DataFrame\n Row \u2502 a      b        c        id    \n     \u2502 Int64  Int64?   Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1        1  missing      1\n   2 \u2502     2        2  missing      1\n   3 \u2502     3        3  missing      1\n   4 \u2502     4        4  missing      2\n   5 \u2502     5        5  missing      2\n   6 \u2502     6        6  missing      2\n   7 \u2502     7  missing        7      3\n   8 \u2502     8  missing        8      3\n   9 \u2502     9  missing        9      3\n</code></pre> <p>source</p> <p># <code>TidierData.@count</code> \u2014 Macro.</p> <pre><code>@count(df, exprs..., [wt], [sort])\n</code></pre> <p>Count the unique values of one or more variables, with an optional weighting.</p> <p><code>@chain df @count(a, b)</code> is roughly equivalent to <code>@chain df @group_by(a, b) @summarize(n = n())</code>. Supply <code>wt</code> to perform weighted counts, switching the summary from <code>n = n()</code> to <code>n = sum(wt)</code>. Note that if grouping columns are provided, the result will be an ungrouped data frame, which is slightly different behavior than R's <code>tidyverse</code>.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame or GroupedDataFrame.</li> <li><code>exprs...</code>: Column names, separated by commas.</li> <li><code>wt</code>: Optional parameter. Used to calculate a sum over the provided <code>wt</code> variable instead of counting the rows.</li> <li><code>sort</code>: Defaults to <code>false</code>. Whether the result should be sorted from highest to lowest <code>n</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = vcat(repeat([\"a\"], inner = 3),\n                           repeat([\"b\"], inner = 3),\n                           repeat([\"c\"], inner = 1),\n                           missing),\n                      b = 1:8)\n8\u00d72 DataFrame\n Row \u2502 a        b     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            1\n   2 \u2502 a            2\n   3 \u2502 a            3\n   4 \u2502 b            4\n   5 \u2502 b            5\n   6 \u2502 b            6\n   7 \u2502 c            7\n   8 \u2502 missing      8\n\njulia&gt; @chain df @count()\n1\u00d71 DataFrame\n Row \u2502 n     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     8\n\njulia&gt; @chain df begin\n         @count(a)\n       end\n4\u00d72 DataFrame\n Row \u2502 a        n     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            3\n   2 \u2502 b            3\n   3 \u2502 c            1\n   4 \u2502 missing      1\n\njulia&gt; @chain df begin\n         @count(a, wt = b)\n       end\n4\u00d72 DataFrame\n Row \u2502 a        n     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            6\n   2 \u2502 b           15\n   3 \u2502 c            7\n   4 \u2502 missing      8\n\njulia&gt; @chain df begin\n         @count(a, wt = b, sort = true)\n       end\n4\u00d72 DataFrame\n Row \u2502 a        n     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           15\n   2 \u2502 missing      8\n   3 \u2502 c            7\n   4 \u2502 a            6       \n</code></pre> <p>source</p> <p># <code>TidierData.@distinct</code> \u2014 Macro.</p> <pre><code>distinct(df, exprs...)\n</code></pre> <p>Return distinct rows of a DataFrame.</p> <p>If no columns or expressions are provided, then unique rows across all columns are returned. Otherwise, unique rows are determined based on the columns or expressions provided, and then all columns are returned.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: One or more unquoted variable names separated by commas. Variable names         can also be used as their positions in the data, like <code>x:y</code>, to select         a range of variables.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'e', inner = 2), b = repeat(1:5, 2), c = 11:20);\n\njulia&gt; @chain df @distinct()\n10\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 a         2     12\n   3 \u2502 b         3     13\n   4 \u2502 b         4     14\n   5 \u2502 c         5     15\n   6 \u2502 c         1     16\n   7 \u2502 d         2     17\n   8 \u2502 d         3     18\n   9 \u2502 e         4     19\n  10 \u2502 e         5     20\n\njulia&gt; @chain df @distinct(a)\n5\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         3     13\n   3 \u2502 c         5     15\n   4 \u2502 d         2     17\n   5 \u2502 e         4     19\n\njulia&gt; @chain df begin\n         @distinct(starts_with(\"a\"))\n       end\n5\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         3     13\n   3 \u2502 c         5     15\n   4 \u2502 d         2     17\n   5 \u2502 e         4     19\n\njulia&gt; @chain df begin\n         @distinct(a, b)\n       end\n10\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 a         2     12\n   3 \u2502 b         3     13\n   4 \u2502 b         4     14\n   5 \u2502 c         5     15\n   6 \u2502 c         1     16\n   7 \u2502 d         2     17\n   8 \u2502 d         3     18\n   9 \u2502 e         4     19\n  10 \u2502 e         5     20\n</code></pre> <p>source</p> <p># <code>TidierData.@drop_missing</code> \u2014 Macro.</p> <pre><code>@drop_missing(df, [cols...])\n</code></pre> <p>Drop all rows with missing values.</p> <p>When called without arguments, <code>@drop_missing()</code> drops all rows with missing values in any column. If columns are provided as an optional argument, only missing values from named columns are considered when dropping rows.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame or GroupedDataFrame.</li> <li><code>cols...</code>: An optional column, or multiple columns separated by commas or specified using selection helpers.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n              a = [1, 2, missing, 4],\n              b = [1, missing, 3, 4]\n            )\n4\u00d72 DataFrame\n Row \u2502 a        b       \n     \u2502 Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502       1        1\n   2 \u2502       2  missing \n   3 \u2502 missing        3\n   4 \u2502       4        4\n\njulia&gt; @chain df @drop_missing()\n2\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1\n   2 \u2502     4      4\n\njulia&gt; @chain df @drop_missing(a)\n3\u00d72 DataFrame\n Row \u2502 a      b       \n     \u2502 Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1        1\n   2 \u2502     2  missing \n   3 \u2502     4        4\n\njulia&gt; @chain df @drop_missing(a, b)\n2\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1\n   2 \u2502     4      4\n\njulia&gt; @chain df @drop_missing(starts_with(\"a\"))\n3\u00d72 DataFrame\n Row \u2502 a      b       \n     \u2502 Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1        1\n   2 \u2502     2  missing \n   3 \u2502     4        4\n</code></pre> <p>source</p> <p># <code>TidierData.@fill_missing</code> \u2014 Macro.</p> <p>@fill_missing(df, [columns...], direction)</p> <p>Fill missing values in a DataFrame <code>df</code> using the specified method.</p> <p>Arguments</p> <ul> <li><code>df</code>: The DataFrame or GroupedDataFrame in which you want to fill missing values.</li> <li><code>columns</code>: (Optional) The columns for which missing values need to be filled, separated by commas. If not provided, the operation is applied to all columns.</li> <li><code>direction</code>: A string containing the method to use for filling missing values. Options include: \"down\" (last observation carried forward) or \"up\" (next observation carried backward).</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n          dt1 = [missing, 0.2, missing, missing, 1, missing, 5, 6],\n          dt2 = [0.3, 2, missing, 3, missing, 5, 6,missing],\n          dt3 = [missing, 0.2, missing, missing, 1, missing, 5, 6],\n          dt4 = [0.3, missing, missing, 3, missing, 5, 6, missing],\n          dt5 = ['a', 'b', 'a', 'b', 'a', 'a', 'a', 'b']);\n\njulia&gt; @fill_missing(df, dt2, dt4, \"down\")\n8\u00d75 DataFrame\n Row \u2502 dt1        dt2       dt3        dt4       dt5  \n     \u2502 Float64?   Float64?  Float64?   Float64?  Char \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing         0.3  missing         0.3  a\n   2 \u2502       0.2       2.0        0.2       0.3  b\n   3 \u2502 missing         2.0  missing         0.3  a\n   4 \u2502 missing         3.0  missing         3.0  b\n   5 \u2502       1.0       3.0        1.0       3.0  a\n   6 \u2502 missing         5.0  missing         5.0  a\n   7 \u2502       5.0       6.0        5.0       6.0  a\n   8 \u2502       6.0       6.0        6.0       6.0  b\n\njulia&gt; @chain df begin\n         @fill_missing(\"up\")\n       end\n8\u00d75 DataFrame\n Row \u2502 dt1       dt2        dt3       dt4        dt5  \n     \u2502 Float64?  Float64?   Float64?  Float64?   Char \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      0.2        0.3       0.2        0.3  a\n   2 \u2502      0.2        2.0       0.2        3.0  b\n   3 \u2502      1.0        3.0       1.0        3.0  a\n   4 \u2502      1.0        3.0       1.0        3.0  b\n   5 \u2502      1.0        5.0       1.0        5.0  a\n   6 \u2502      5.0        5.0       5.0        5.0  a\n   7 \u2502      5.0        6.0       5.0        6.0  a\n   8 \u2502      6.0  missing         6.0  missing    b \n\njulia&gt; @chain df begin\n         @group_by(dt5)\n         @fill_missing(dt1, \"up\")\n       end\nGroupedDataFrame with 2 groups based on key: dt5\nFirst Group (5 rows): dt5 = 'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)\n Row \u2502 dt1       dt2        dt3        dt4        dt5  \n     \u2502 Float64?  Float64?   Float64?   Float64?   Char \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      1.0        0.3  missing          0.3  a\n   2 \u2502      1.0  missing    missing    missing    a\n   3 \u2502      1.0  missing          1.0  missing    a\n   4 \u2502      5.0        5.0  missing          5.0  a\n   5 \u2502      5.0        6.0        5.0        6.0  a\n\u22ee\nLast Group (3 rows): dt5 = 'b': ASCII/Unicode U+0062 (category Ll: Letter, lowercase)\n Row \u2502 dt1       dt2        dt3        dt4        dt5  \n     \u2502 Float64?  Float64?   Float64?   Float64?   Char \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      0.2        2.0        0.2  missing    b\n   2 \u2502      6.0        3.0  missing          3.0  b\n   3 \u2502      6.0  missing          6.0  missing    b\n</code></pre> <p>source</p> <p># <code>TidierData.@filter</code> \u2014 Macro.</p> <pre><code>@filter(df, exprs...)\n</code></pre> <p>Subset a DataFrame and return a copy of DataFrame where specified conditions are satisfied.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: transformation(s) that produce vectors containing <code>true</code> or <code>false</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @filter(b &gt;= mean(b))\n       end\n3\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 c         3     13\n   2 \u2502 d         4     14\n   3 \u2502 e         5     15\n\njulia&gt; @chain df begin\n         @filter(b &gt;= 3 &amp;&amp; c &gt;= 14)\n       end\n2\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 d         4     14\n   2 \u2502 e         5     15\n\njulia&gt; @chain df begin\n         @filter(b in (1, 3))\n       end\n2\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 c         3     13\n</code></pre> <p>source</p> <p># <code>TidierData.@full_join</code> \u2014 Macro.</p> <pre><code>@full_join(df1, df2, [by])\n</code></pre> <p>Perform a full join on <code>df1</code> and <code>df2</code> with an optional <code>by</code>.</p> <p>Arguments</p> <ul> <li><code>df1</code>: A DataFrame.</li> <li><code>df2</code>: A DataFrame.</li> <li><code>by</code>: An optional column or tuple of columns. <code>by</code> supports interpolation of individual columns. If <code>by</code> is not supplied, then it will be inferred from shared names of columns between <code>df1</code> and <code>df2</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n\njulia&gt; df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n\njulia&gt; @full_join(df1, df2)\n3\u00d73 DataFrame\n Row \u2502 a       b        c       \n     \u2502 String  Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1        3\n   2 \u2502 b             2  missing \n   3 \u2502 c       missing        4\n\njulia&gt; @full_join(df1, df2, a)\n3\u00d73 DataFrame\n Row \u2502 a       b        c       \n     \u2502 String  Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1        3\n   2 \u2502 b             2  missing \n   3 \u2502 c       missing        4\n\njulia&gt; @full_join(df1, df2, a = a)\n3\u00d73 DataFrame\n Row \u2502 a       b        c       \n     \u2502 String  Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1        3\n   2 \u2502 b             2  missing \n   3 \u2502 c       missing        4\n\njulia&gt; @full_join(df1, df2, \"a\")\n3\u00d73 DataFrame\n Row \u2502 a       b        c       \n     \u2502 String  Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1        3\n   2 \u2502 b             2  missing \n   3 \u2502 c       missing        4\n\njulia&gt; @full_join(df1, df2, \"a\" = \"a\")\n3\u00d73 DataFrame\n Row \u2502 a       b        c       \n     \u2502 String  Int64?   Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1        3\n   2 \u2502 b             2  missing \n   3 \u2502 c       missing        4\n</code></pre> <p>source</p> <p># <code>TidierData.@glimpse</code> \u2014 Macro.</p> <pre><code>@glimpse(df, width = 80)\n</code></pre> <p>Preview a DataFrame (or GroupedDataFrame).</p> <p>The <code>@glimpse</code> macro is used to preview a DataFrame or GroupedDataFrame. Each column is printed on a separate row, along with its data type and first few elements, with the output truncated based on the <code>width</code>.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame or GroupedDataFrame.</li> <li><code>width</code>: The width of the output, measured in the number of characters. Defaults to 80.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n               a = 1:100, \n               b = 1:100, \n               c = repeat([\"a\"], 100)\n               );\n\njulia&gt; @chain df @glimpse\nRows: 100\nColumns: 3\n.a             Int64          1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,\n.b             Int64          1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,\n.c             String         a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a,\n\njulia&gt; @chain df begin\n       @group_by(a)\n       @glimpse()\n       end\nRows: 100\nColumns: 3\nGroups: a [100]\n.a             Int64          1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,\n.b             Int64          1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,\n.c             String         a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a,\n</code></pre> <p>source</p> <p># <code>TidierData.@group_by</code> \u2014 Macro.</p> <pre><code>@group_by(df, exprs...)\n</code></pre> <p>Return a <code>GroupedDataFrame</code> where operations are performed by groups specified by unique  sets of <code>cols</code>.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: DataFrame columns to group by or tidy expressions. Can be a single tidy expression or multiple expressions separated by commas.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @summarize(b = mean(b))\n       end\n5\u00d72 DataFrame\n Row \u2502 a     b       \n     \u2502 Char  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1.0\n   2 \u2502 b         2.0\n   3 \u2502 c         3.0\n   4 \u2502 d         4.0\n   5 \u2502 e         5.0  \n\njulia&gt; @chain df begin\n         @group_by(d = uppercase(a))\n         @summarize(b = mean(b))\n       end\n5\u00d72 DataFrame\n Row \u2502 d     b       \n     \u2502 Char  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 A         1.0\n   2 \u2502 B         2.0\n   3 \u2502 C         3.0\n   4 \u2502 D         4.0\n   5 \u2502 E         5.0\n\njulia&gt; @chain df begin\n         @group_by(-(b, c)) # same as `a`\n         @summarize(b = mean(b))\n       end\n5\u00d72 DataFrame\n Row \u2502 a     b       \n     \u2502 Char  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1.0\n   2 \u2502 b         2.0\n   3 \u2502 c         3.0\n   4 \u2502 d         4.0\n   5 \u2502 e         5.0\n\njulia&gt; @chain df begin\n         @group_by(!(b, c)) # same as `a`\n         @summarize(b = mean(b))\n       end\n5\u00d72 DataFrame\n Row \u2502 a     b       \n     \u2502 Char  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1.0\n   2 \u2502 b         2.0\n   3 \u2502 c         3.0\n   4 \u2502 d         4.0\n   5 \u2502 e         5.0\n</code></pre> <p>source</p> <p># <code>TidierData.@head</code> \u2014 Macro.</p> <pre><code>   @head(df, value)\n</code></pre> <p>Shows the first n rows of the the data frame or of each group in a grouped data frame. </p> <p>Arguments</p> <ul> <li><code>df</code>: The data frame.</li> <li><code>value</code>: number of rows to be returned. Defaults to 6 if left blank.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = vcat(repeat([\"a\"], inner = 4),\n                                  repeat([\"b\"], inner = 4)),\n                             b = 1:8)\n8\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n   2 \u2502 a           2\n   3 \u2502 a           3\n   4 \u2502 a           4\n   5 \u2502 b           5\n   6 \u2502 b           6\n   7 \u2502 b           7\n   8 \u2502 b           8\n\njulia&gt; @head(df, 3)\n3\u00d72 DataFrame\n Row \u2502 a        b     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            1\n   2 \u2502 a            2\n   3 \u2502 a            3\n\njulia&gt; @head(df)\n6\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n   2 \u2502 a           2\n   3 \u2502 a           3\n   4 \u2502 a           4\n   5 \u2502 b           5\n   6 \u2502 b           6\n\njulia&gt; @chain df begin\n         @group_by a\n         @head 2\n       end\nGroupedDataFrame with 2 groups based on key: a\nFirst Group (2 rows): a = \"a\"\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n   2 \u2502 a           2\n\u22ee\nLast Group (2 rows): a = \"b\"\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           5\n   2 \u2502 b           6\n</code></pre> <p>source</p> <p># <code>TidierData.@inner_join</code> \u2014 Macro.</p> <pre><code>@inner_join(df1, df2, [by])\n</code></pre> <p>Perform a inner join on <code>df1</code> and <code>df2</code> with an optional <code>by</code>.</p> <p>Arguments</p> <ul> <li><code>df1</code>: A DataFrame.</li> <li><code>df2</code>: A DataFrame.</li> <li><code>by</code>: An optional column or tuple of columns. <code>by</code> supports interpolation of individual columns. If <code>by</code> is not supplied, then it will be inferred from shared names of columns between <code>df1</code> and <code>df2</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n\njulia&gt; df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n\njulia&gt; @inner_join(df1, df2)\n1\u00d73 DataFrame\n Row \u2502 a       b      c     \n     \u2502 String  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1      3\n\njulia&gt; @inner_join(df1, df2, a)\n1\u00d73 DataFrame\n Row \u2502 a       b      c     \n     \u2502 String  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1      3\n\njulia&gt; @inner_join(df1, df2, a = a)\n1\u00d73 DataFrame\n Row \u2502 a       b      c     \n     \u2502 String  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1      3\n\njulia&gt; @inner_join(df1, df2, \"a\")\n1\u00d73 DataFrame\n Row \u2502 a       b      c     \n     \u2502 String  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1      3\n\njulia&gt; @inner_join(df1, df2, \"a\" = \"a\")\n1\u00d73 DataFrame\n Row \u2502 a       b      c     \n     \u2502 String  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1      3\n</code></pre> <p>source</p> <p># <code>TidierData.@left_join</code> \u2014 Macro.</p> <pre><code>@left_join(df1, df2, [by])\n</code></pre> <p>Perform a left join on <code>df1</code> and <code>df2</code> with an optional <code>by</code>.</p> <p>Arguments</p> <ul> <li><code>df1</code>: A DataFrame.</li> <li><code>df2</code>: A DataFrame.</li> <li><code>by</code>: An optional column or tuple of columns. <code>by</code> supports interpolation of individual columns. If <code>by</code> is not supplied, then it will be inferred from shared names of columns between <code>df1</code> and <code>df2</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n\njulia&gt; df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n\njulia&gt; @left_join(df1, df2)\n2\u00d73 DataFrame\n Row \u2502 a       b      c       \n     \u2502 String  Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1        3\n   2 \u2502 b           2  missing \n\njulia&gt; @left_join(df1, df2, a)\n2\u00d73 DataFrame\n Row \u2502 a       b      c       \n     \u2502 String  Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1        3\n   2 \u2502 b           2  missing\n\njulia&gt; @left_join(df1, df2, a = a)\n2\u00d73 DataFrame\n Row \u2502 a       b      c       \n     \u2502 String  Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1        3\n   2 \u2502 b           2  missing\n\njulia&gt; @left_join(df1, df2, \"a\")\n2\u00d73 DataFrame\n Row \u2502 a       b      c       \n     \u2502 String  Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1        3\n   2 \u2502 b           2  missing\n\njulia&gt; @left_join(df1, df2, \"a\" = \"a\")\n2\u00d73 DataFrame\n Row \u2502 a       b      c       \n     \u2502 String  Int64  Int64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1        3\n   2 \u2502 b           2  missing\n</code></pre> <p>source</p> <p># <code>TidierData.@mutate</code> \u2014 Macro.</p> <pre><code>@mutate(df, exprs...)\n</code></pre> <p>Create new columns as functions of existing columns. The results have the same number of rows as <code>df</code>.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: add new columns or replace values of existed columns using        <code>new_variable = values</code> syntax.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @mutate(d = b + c,\n                 b_minus_mean_b = b - mean(b))\n       end\n5\u00d75 DataFrame\n Row \u2502 a     b      c      d      b_minus_mean_b \n     \u2502 Char  Int64  Int64  Int64  Float64        \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11     12            -2.0\n   2 \u2502 b         2     12     14            -1.0\n   3 \u2502 c         3     13     16             0.0\n   4 \u2502 d         4     14     18             1.0\n   5 \u2502 e         5     15     20             2.0\n\njulia&gt; @chain df begin\n         @mutate begin\n           d = b + c\n           b_minus_mean_b = b - mean(b)\n         end\n       end\n5\u00d75 DataFrame\n Row \u2502 a     b      c      d      b_minus_mean_b \n     \u2502 Char  Int64  Int64  Int64  Float64        \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11     12            -2.0\n   2 \u2502 b         2     12     14            -1.0\n   3 \u2502 c         3     13     16             0.0\n   4 \u2502 d         4     14     18             1.0\n   5 \u2502 e         5     15     20             2.0\n\njulia&gt; @chain df begin\n         @mutate(d = b in (1,3))\n       end\n5\u00d74 DataFrame\n Row \u2502 a     b      c      d     \n     \u2502 Char  Int64  Int64  Bool  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11   true\n   2 \u2502 b         2     12  false\n   3 \u2502 c         3     13   true\n   4 \u2502 d         4     14  false\n   5 \u2502 e         5     15  false\n\njulia&gt; @chain df begin\n         @mutate(across((b, c), mean))\n       end\n5\u00d75 DataFrame\n Row \u2502 a     b      c      b_mean   c_mean  \n     \u2502 Char  Int64  Int64  Float64  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11      3.0     13.0\n   2 \u2502 b         2     12      3.0     13.0\n   3 \u2502 c         3     13      3.0     13.0\n   4 \u2502 d         4     14      3.0     13.0\n   5 \u2502 e         5     15      3.0     13.0\n\njulia&gt; @chain df begin\n         @summarize(across(contains(\"b\"), mean))\n       end\n1\u00d71 DataFrame\n Row \u2502 b_mean  \n     \u2502 Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0\n\njulia&gt; @chain df begin\n         @summarize(across(-contains(\"a\"), mean))\n       end\n1\u00d72 DataFrame\n Row \u2502 b_mean   c_mean  \n     \u2502 Float64  Float64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0     13.0\n\njulia&gt; @chain df begin\n         @mutate(across(where(is_number), minimum))\n       end\n5\u00d75 DataFrame\n Row \u2502 a     b      c      b_minimum  c_minimum \n     \u2502 Char  Int64  Int64  Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11          1         11\n   2 \u2502 b         2     12          1         11\n   3 \u2502 c         3     13          1         11\n   4 \u2502 d         4     14          1         11\n   5 \u2502 e         5     15          1         11\n</code></pre> <p>source</p> <p># <code>TidierData.@nest</code> \u2014 Macro.</p> <pre><code>@nest(df, new_column = nesting_columns)\n</code></pre> <p>Multiple columns are nested into one or more new columns in a DataFrame. </p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame</li> <li><code>new_column</code>: New column name</li> <li><code>nesting_columns</code>: Columns to be nested into the new_column</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'e', inner = 3),\n                      b = 1:15,\n                      c_1 = 16:30,\n                      c_2 = 31:45);\n\njulia&gt; @nest(df, data = b:c_2)\n5\u00d72 DataFrame\n Row \u2502 a     data          \n     \u2502 Char  DataFrame     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a     3\u00d73 DataFrame \n   2 \u2502 b     3\u00d73 DataFrame \n   3 \u2502 c     3\u00d73 DataFrame \n   4 \u2502 d     3\u00d73 DataFrame \n   5 \u2502 e     3\u00d73 DataFrame \n\njulia&gt; @nest(df, data_1 = b, data_2 = starts_with(\"c\"))\n5\u00d73 DataFrame\n Row \u2502 a     data_1         data_2        \n     \u2502 Char  DataFrame      DataFrame     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a     3\u00d71 DataFrame  3\u00d72 DataFrame \n   2 \u2502 b     3\u00d71 DataFrame  3\u00d72 DataFrame \n   3 \u2502 c     3\u00d71 DataFrame  3\u00d72 DataFrame \n   4 \u2502 d     3\u00d71 DataFrame  3\u00d72 DataFrame \n   5 \u2502 e     3\u00d71 DataFrame  3\u00d72 DataFrame \n\njulia&gt; @chain df begin\n         @nest(data = b:c_2)\n         @unnest_longer(data)\n       end\n15\u00d72 DataFrame\n Row \u2502 a     data                         \n     \u2502 Char  NamedTup\u2026                    \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a     (b = 1, c_1 = 16, c_2 = 31)\n   2 \u2502 a     (b = 2, c_1 = 17, c_2 = 32)\n   3 \u2502 a     (b = 3, c_1 = 18, c_2 = 33)\n   4 \u2502 b     (b = 4, c_1 = 19, c_2 = 34)\n   5 \u2502 b     (b = 5, c_1 = 20, c_2 = 35)\n   6 \u2502 b     (b = 6, c_1 = 21, c_2 = 36)\n   7 \u2502 c     (b = 7, c_1 = 22, c_2 = 37)\n   8 \u2502 c     (b = 8, c_1 = 23, c_2 = 38)\n   9 \u2502 c     (b = 9, c_1 = 24, c_2 = 39)\n  10 \u2502 d     (b = 10, c_1 = 25, c_2 = 40)\n  11 \u2502 d     (b = 11, c_1 = 26, c_2 = 41)\n  12 \u2502 d     (b = 12, c_1 = 27, c_2 = 42)\n  13 \u2502 e     (b = 13, c_1 = 28, c_2 = 43)\n  14 \u2502 e     (b = 14, c_1 = 29, c_2 = 44)\n  15 \u2502 e     (b = 15, c_1 = 30, c_2 = 45)\n\njulia&gt; @chain df begin\n         @nest(data = b:c_2)\n         @unnest_wider(data)\n       end\n5\u00d74 DataFrame\n Row \u2502 a     b             c_1           c_2          \n     \u2502 Char  Any           Any           Any          \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a     [1, 2, 3]     [16, 17, 18]  [31, 32, 33]\n   2 \u2502 b     [4, 5, 6]     [19, 20, 21]  [34, 35, 36]\n   3 \u2502 c     [7, 8, 9]     [22, 23, 24]  [37, 38, 39]\n   4 \u2502 d     [10, 11, 12]  [25, 26, 27]  [40, 41, 42]\n   5 \u2502 e     [13, 14, 15]  [28, 29, 30]  [43, 44, 45]\n\njulia&gt; @chain df begin\n         @nest(data = -a)\n         @unnest_wider(data) # wider first\n         @unnest_longer(-a)  # then longer\n       end\n15\u00d74 DataFrame\n Row \u2502 a     b      c_1    c_2   \n     \u2502 Char  Int64  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     16     31\n   2 \u2502 a         2     17     32\n   3 \u2502 a         3     18     33\n   4 \u2502 b         4     19     34\n   5 \u2502 b         5     20     35\n   6 \u2502 b         6     21     36\n   7 \u2502 c         7     22     37\n   8 \u2502 c         8     23     38\n   9 \u2502 c         9     24     39\n  10 \u2502 d        10     25     40\n  11 \u2502 d        11     26     41\n  12 \u2502 d        12     27     42\n  13 \u2502 e        13     28     43\n  14 \u2502 e        14     29     44\n  15 \u2502 e        15     30     45\n\njulia&gt; @chain df begin\n         @nest(data = -a)\n         @unnest_longer(data) # longer first\n         @unnest_wider(-a)    # then wider\n       end\n15\u00d74 DataFrame\n Row \u2502 a     b      c_2    c_1   \n     \u2502 Char  Int64  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     31     16\n   2 \u2502 a         2     32     17\n   3 \u2502 a         3     33     18\n   4 \u2502 b         4     34     19\n   5 \u2502 b         5     35     20\n   6 \u2502 b         6     36     21\n   7 \u2502 c         7     37     22\n   8 \u2502 c         8     38     23\n   9 \u2502 c         9     39     24\n  10 \u2502 d        10     40     25\n  11 \u2502 d        11     41     26\n  12 \u2502 d        12     42     27\n  13 \u2502 e        13     43     28\n  14 \u2502 e        14     44     29\n  15 \u2502 e        15     45     30\n</code></pre> <p>source</p> <p># <code>TidierData.@pivot_longer</code> \u2014 Macro.</p> <p>@pivotlonger(df, cols, [namesto], [values_to])</p> <p>Reshapes the DataFrame to make it longer, increasing the number of rows and reducing the number of columns.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>cols</code>: Columns to pivot into longer format. Multiple columns can be selected but providing tuples of columns is not yet supported.</li> <li><code>names_to</code>: Optional, defaults to <code>variable</code>. The name of the newly created column whose values will contain the input DataFrame's column names.</li> <li><code>values_to</code>: Optional, defaults to <code>value</code>. The name of the newly created column containing the input DataFrame's cell values.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df_wide = DataFrame(id = [1, 2], A = [1, 3], B = [2, 4]);\n\njulia&gt; @pivot_longer(df_wide, A:B)\n4\u00d73 DataFrame\n Row \u2502 id     variable  value \n     \u2502 Int64  String    Int64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  A             1\n   2 \u2502     2  A             3\n   3 \u2502     1  B             2\n   4 \u2502     2  B             4\n\njulia&gt; @pivot_longer(df_wide, -id)\n4\u00d73 DataFrame\n Row \u2502 id     variable  value \n     \u2502 Int64  String    Int64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  A             1\n   2 \u2502     2  A             3\n   3 \u2502     1  B             2\n   4 \u2502     2  B             4\n\njulia&gt; @pivot_longer(df_wide, A:B, names_to = \"letter\", values_to = \"number\")\n4\u00d73 DataFrame\n Row \u2502 id     letter  number \n     \u2502 Int64  String  Int64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  A            1\n   2 \u2502     2  A            3\n   3 \u2502     1  B            2\n   4 \u2502     2  B            4\n\njulia&gt; @pivot_longer(df_wide, A:B, names_to = letter, values_to = number)\n4\u00d73 DataFrame\n Row \u2502 id     letter  number \n     \u2502 Int64  String  Int64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  A            1\n   2 \u2502     2  A            3\n   3 \u2502     1  B            2\n   4 \u2502     2  B            4\n\njulia&gt; @pivot_longer(df_wide, A:B, names_to = \"letter\")\n4\u00d73 DataFrame\n Row \u2502 id     letter  value \n     \u2502 Int64  String  Int64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  A           1\n   2 \u2502     2  A           3\n   3 \u2502     1  B           2\n   4 \u2502     2  B           4\n</code></pre> <p>source</p> <p># <code>TidierData.@pivot_wider</code> \u2014 Macro.</p> <p>@pivotwider(df, namesfrom, valuesfrom[, valuesfill])</p> <p>Reshapes the DataFrame to make it wider, increasing the number of columns and reducing the number of rows.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>names_from</code>: The name of the column to get the name of the output columns from.</li> <li><code>values_from</code>: The name of the column to get the cell values from.</li> <li><code>values_fill</code>: The value to replace a missing name/value combination (default is <code>missing</code>)</li> </ul> <p>Examples</p> <pre><code>julia&gt; df_long = DataFrame(id = [1, 1, 2, 2],\n                           variable = [\"A\", \"B\", \"A\", \"B\"],\n                           value = [1, 2, 3, 4]);\n\njulia&gt; df_long_missing = DataFrame(id = [1, 1, 2],\n                           variable = [\"A\", \"B\", \"B\"],\n                           value = [1, 2, 4]);\n\njulia&gt; @pivot_wider(df_long, names_from = variable, values_from = value)\n2\u00d73 DataFrame\n Row \u2502 id     A       B      \n     \u2502 Int64  Int64?  Int64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1       1       2\n   2 \u2502     2       3       4\n\njulia&gt; @pivot_wider(df_long, names_from = \"variable\", values_from = \"value\")\n2\u00d73 DataFrame\n Row \u2502 id     A       B      \n     \u2502 Int64  Int64?  Int64?\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1       1       2\n   2 \u2502     2       3       4\n\njulia&gt; @pivot_wider(df_long_missing, names_from = variable, values_from = value, values_fill = 0)\n2\u00d73 DataFrame\n Row \u2502 id     A      B     \n     \u2502 Int64  Int64  Int64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1      2\n   2 \u2502     2      0      4\n</code></pre> <p>source</p> <p># <code>TidierData.@pull</code> \u2014 Macro.</p> <pre><code>@pull(df, column)\n</code></pre> <p>Pull (or extract) a column as a vector.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>column</code>: A single column, referred to either by its name or number.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df @pull(a)\n5-element Vector{Char}:\n 'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)\n 'b': ASCII/Unicode U+0062 (category Ll: Letter, lowercase)\n 'c': ASCII/Unicode U+0063 (category Ll: Letter, lowercase)\n 'd': ASCII/Unicode U+0064 (category Ll: Letter, lowercase)\n 'e': ASCII/Unicode U+0065 (category Ll: Letter, lowercase)\n\njulia&gt; @chain df @pull(2)\n5-element Vector{Int64}:\n 1\n 2\n 3\n 4\n 5\n</code></pre> <p>source</p> <p># <code>TidierData.@relocate</code> \u2014 Macro.</p> <pre><code>@relocate(df, columns, before = nothing, after = nothing)\n</code></pre> <p>Rearranges the columns of a data frame. This function allows for moving specified columns to a new position within the data frame, either before or after a given target column. The <code>columns</code>, <code>before</code>, and <code>after</code> arguments all accept tidy selection functions. Only one of <code>before</code> or <code>after</code> should be specified. If neither are specified, the selected columns will be moved to the beginning of the data frame.</p> <p>Arguments</p> <ul> <li><code>df</code>: The data frame.</li> <li><code>columns</code>: Column or columns to to be moved.</li> <li><code>before</code>: (Optional) Column or columns before which the specified columns will be moved. If not provided or <code>nothing</code>, this argument is ignored.</li> <li><code>after</code>: (Optional) Column or columns after which the specified columns will be moved. If not provided or <code>nothing</code>, this argument is ignored.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(A = 1:5, B = 6:10, C = [\"A\", \"b\", \"C\", \"D\", \"E\"], D = ['A', 'B','A', 'B','C'],\n                      E = 1:5, F = [\"A\", \"b\", \"C\", \"D\", \"E\"]);\n\njulia&gt; @relocate(df, where(is_string), before = where(is_integer))\n5\u00d76 DataFrame\n Row \u2502 C       F       A      B      E      D    \n     \u2502 String  String  Int64  Int64  Int64  Char \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 A       A           1      6      1  A\n   2 \u2502 b       b           2      7      2  B\n   3 \u2502 C       C           3      8      3  A\n   4 \u2502 D       D           4      9      4  B\n   5 \u2502 E       E           5     10      5  C\n\n\njulia&gt; @relocate(df, B, C, D, after = E)\n5\u00d76 DataFrame\n Row \u2502 A      E      B      C       D     F      \n     \u2502 Int64  Int64  Int64  String  Char  String \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1      6  A       A     A\n   2 \u2502     2      2      7  b       B     b\n   3 \u2502     3      3      8  C       A     C\n   4 \u2502     4      4      9  D       B     D\n   5 \u2502     5      5     10  E       C     E\n\njulia&gt; @relocate(df, B, C, D, after = starts_with(\"E\"))\n5\u00d76 DataFrame\n Row \u2502 A      E      B      C       D     F      \n     \u2502 Int64  Int64  Int64  String  Char  String \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1      6  A       A     A\n   2 \u2502     2      2      7  b       B     b\n   3 \u2502     3      3      8  C       A     C\n   4 \u2502     4      4      9  D       B     D\n   5 \u2502     5      5     10  E       C     E\n\njulia&gt; @relocate(df, B:C) # bring columns to the front\n5\u00d76 DataFrame\n Row \u2502 B      C       A      D     E      F      \n     \u2502 Int64  String  Int64  Char  Int64  String \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     6  A           1  A         1  A\n   2 \u2502     7  b           2  B         2  b\n   3 \u2502     8  C           3  A         3  C\n   4 \u2502     9  D           4  B         4  D\n   5 \u2502    10  E           5  C         5  E\n</code></pre> <p>source</p> <p># <code>TidierData.@rename</code> \u2014 Macro.</p> <pre><code>@rename(df, exprs...)\n</code></pre> <p>Change the names of individual column names in a DataFrame. Users can also use <code>@select()</code> to rename and select columns.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: Use <code>new_name = old_name</code> syntax to rename selected columns.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @rename(d = b, e = c)\n       end\n5\u00d73 DataFrame\n Row \u2502 a     d      e     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         2     12\n   3 \u2502 c         3     13\n   4 \u2502 d         4     14\n   5 \u2502 e         5     15\n</code></pre> <p>source</p> <p># <code>TidierData.@rename_with</code> \u2014 Macro.</p> <pre><code> @rename_with(df, fn, exprs...)\n</code></pre> <p>Renames the chosen column names using a function</p> <p>Arguments</p> <ul> <li><code>df</code>: a DataFrame</li> <li><code>fn</code>: desired function to (such as strremoveall from TidierStrings)</li> <li><code>exprs</code>: One or more unquoted variable names separated by commas. Variable names</li> </ul> <p>can also be used as their positions in the data, like <code>x:y</code>, to select  a range of variables. Variables names can also be chosen with starts with. Defaults to all columns if empty.</p> <p>Examples</p> <pre><code>julia&gt; function str_remove_all(column, pattern::String)\n         if ismissing(column)\n             return column\n         end\n         patterns = split(pattern, '|')\n         for p in patterns\n             column = replace(column, strip(p) =&gt; \"\")\n         end\n         return column\n       end;\n\njulia&gt; df = DataFrame(\n              term_a = [\"apple\", \"banana\", \"cherry\"],\n              document_a = [\"doc_1\", \"doc2\", \"doc3\"],\n              _n_ = [1, 2, 3]\n            ); \n\njulia&gt; @rename_with(df, str -&gt; str_remove_all(str, \"_a\"), !term_a)\n3\u00d73 DataFrame\n Row \u2502 term_a  document  _n_   \n     \u2502 String  String    Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 apple   doc_1         1\n   2 \u2502 banana  doc2          2\n   3 \u2502 cherry  doc3          3\n</code></pre> <p>source</p> <p># <code>TidierData.@right_join</code> \u2014 Macro.</p> <pre><code>@right_join(df1, df2, [by])\n</code></pre> <p>Perform a right join on <code>df1</code> and <code>df2</code> with an optional <code>by</code>.</p> <p>Arguments</p> <ul> <li><code>df1</code>: A DataFrame.</li> <li><code>df2</code>: A DataFrame.</li> <li><code>by</code>: An optional column or tuple of columns. <code>by</code> supports interpolation of individual columns. If <code>by</code> is not supplied, then it will be inferred from shared names of columns between <code>df1</code> and <code>df2</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n\njulia&gt; df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n\njulia&gt; @right_join(df1, df2)\n2\u00d73 DataFrame\n Row \u2502 a       b        c     \n     \u2502 String  Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1      3\n   2 \u2502 c       missing      4\n\njulia&gt; @right_join(df1, df2, a)\n2\u00d73 DataFrame\n Row \u2502 a       b        c     \n     \u2502 String  Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1      3\n   2 \u2502 c       missing      4\n\njulia&gt; @right_join(df1, df2, a = a)\n2\u00d73 DataFrame\n Row \u2502 a       b        c     \n     \u2502 String  Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1      3\n   2 \u2502 c       missing      4\n\njulia&gt; @right_join(df1, df2, \"a\")\n2\u00d73 DataFrame\n Row \u2502 a       b        c     \n     \u2502 String  Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1      3\n   2 \u2502 c       missing      4\n\njulia&gt; @right_join(df1, df2, \"a\" = \"a\")\n2\u00d73 DataFrame\n Row \u2502 a       b        c     \n     \u2502 String  Int64?   Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a             1      3\n   2 \u2502 c       missing      4\n</code></pre> <p>source</p> <p># <code>TidierData.@select</code> \u2014 Macro.</p> <pre><code>@select(df, exprs...)\n</code></pre> <p>Select variables in a DataFrame.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: One or more unquoted variable names separated by commas. Variable names         can also be used as their positions in the data, like <code>x:y</code>, to select         a range of variables.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df @select(a, b, c)\n5\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         2     12\n   3 \u2502 c         3     13\n   4 \u2502 d         4     14\n   5 \u2502 e         5     15\n\njulia&gt; @chain df @select(a:b)\n5\u00d72 DataFrame\n Row \u2502 a     b     \n     \u2502 Char  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1\n   2 \u2502 b         2\n   3 \u2502 c         3\n   4 \u2502 d         4\n   5 \u2502 e         5\n\njulia&gt; @chain df @select(1:2)\n5\u00d72 DataFrame\n Row \u2502 a     b     \n     \u2502 Char  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1\n   2 \u2502 b         2\n   3 \u2502 c         3\n   4 \u2502 d         4\n   5 \u2502 e         5\n\njulia&gt; @chain df @select(-(a:b))\n5\u00d71 DataFrame\n Row \u2502 c     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    11\n   2 \u2502    12\n   3 \u2502    13\n   4 \u2502    14\n   5 \u2502    15\n\njulia&gt; @chain df @select(!(a:b))\n5\u00d71 DataFrame\n Row \u2502 c     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    11\n   2 \u2502    12\n   3 \u2502    13\n   4 \u2502    14\n   5 \u2502    15\n\njulia&gt; @chain df @select(-(a, b))\n5\u00d71 DataFrame\n Row \u2502 c     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    11\n   2 \u2502    12\n   3 \u2502    13\n   4 \u2502    14\n   5 \u2502    15\n\njulia&gt; @chain df @select(!(a, b))\n5\u00d71 DataFrame\n Row \u2502 c     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    11\n   2 \u2502    12\n   3 \u2502    13\n   4 \u2502    14\n   5 \u2502    15\n\njulia&gt; @chain df begin\n         @select(contains(\"b\"), starts_with(\"c\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 b      c     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n\njulia&gt; @chain df @select(-(1:2))\n5\u00d71 DataFrame\n Row \u2502 c     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    11\n   2 \u2502    12\n   3 \u2502    13\n   4 \u2502    14\n   5 \u2502    15\n\njulia&gt; @chain df @select(!(1:2))\n5\u00d71 DataFrame\n Row \u2502 c     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    11\n   2 \u2502    12\n   3 \u2502    13\n   4 \u2502    14\n   5 \u2502    15\n\njulia&gt; @chain df @select(-c)\n5\u00d72 DataFrame\n Row \u2502 a     b     \n     \u2502 Char  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1\n   2 \u2502 b         2\n   3 \u2502 c         3\n   4 \u2502 d         4\n   5 \u2502 e         5\n\njulia&gt; @chain df begin\n         @select(-contains(\"a\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 b      c     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n\njulia&gt; @chain df begin\n         @select(!contains(\"a\"))\n       end\n5\u00d72 DataFrame\n Row \u2502 b      c     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n\njulia&gt; @chain df begin\n         @select(where(is_number))\n       end\n5\u00d72 DataFrame\n Row \u2502 b      c     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1     11\n   2 \u2502     2     12\n   3 \u2502     3     13\n   4 \u2502     4     14\n   5 \u2502     5     15\n</code></pre> <p>source</p> <p># <code>TidierData.@semi_join</code> \u2014 Macro.</p> <pre><code>@semi_join(df1, df2, [by])\n</code></pre> <p>Perform an semi-join on <code>df1</code> and <code>df2</code> with an optional <code>by</code>.</p> <p>Arguments</p> <ul> <li><code>df1</code>: A DataFrame.</li> <li><code>df2</code>: A DataFrame.</li> <li><code>by</code>: An optional column or tuple of columns. <code>by</code> supports interpolation of individual columns. If <code>by</code> is not supplied, then it will be inferred from shared names of columns between <code>df1</code> and <code>df2</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n\njulia&gt; df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n\njulia&gt; @semi_join(df1, df2)\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n\njulia&gt; @semi_join(df1, df2, a)\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n\njulia&gt; @semi_join(df1, df2, a = a)\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n\njulia&gt; @semi_join(df1, df2, \"a\")\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n\njulia&gt; @semi_join(df1, df2, \"a\" = \"a\")\n1\u00d72 DataFrame\n Row \u2502 a       b     \n     \u2502 String  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a           1\n</code></pre> <p>source</p> <p># <code>TidierData.@separate</code> \u2014 Macro.</p> <p>@separate(df, from, into, sep, extra = \"merge\")</p> <p>Separate a string column into mulitiple new columns based on a specified delimter </p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame</li> <li><code>from</code>: Column that will be split</li> <li><code>into</code>: New column names, supports [] or ()</li> <li><code>sep</code>: the string or character on which to split</li> <li><code>extra</code>: \"merge\", \"warn\" and \"drop\" . If not enough columns are provided, extra determines whether additional entries will be merged into the final one or dropped. \"warn\" generates a warning message for dropped values.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = [\"1-1\", \"2-2\", \"3-3-3\"]);\n\njulia&gt; @separate(df, a, [b, c, d], \"-\")\n3\u00d73 DataFrame\n Row \u2502 b          c          d          \n     \u2502 SubStrin\u2026  SubStrin\u2026  SubStrin\u2026? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 1          1          missing    \n   2 \u2502 2          2          missing    \n   3 \u2502 3          3          3\n\njulia&gt; @chain df begin\n         @separate(a, (b, c, d), \"-\")\n       end\n3\u00d73 DataFrame\n Row \u2502 b          c          d          \n     \u2502 SubStrin\u2026  SubStrin\u2026  SubStrin\u2026? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 1          1          missing    \n   2 \u2502 2          2          missing    \n   3 \u2502 3          3          3\n\njulia&gt; @separate(df, a, (b, c), \"-\")\n3\u00d72 DataFrame\n Row \u2502 b          c      \n     \u2502 SubStrin\u2026  String \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 1          1\n   2 \u2502 2          2\n   3 \u2502 3          3-3\n\njulia&gt; @chain df begin\n         @separate(a, (b, c), \"-\", extra = \"drop\")\n       end\n3\u00d72 DataFrame\n Row \u2502 b          c         \n     \u2502 SubStrin\u2026  SubStrin\u2026 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 1          1\n   2 \u2502 2          2\n   3 \u2502 3          3\n</code></pre> <p>source</p> <p># <code>TidierData.@separate_rows</code> \u2014 Macro.</p> <pre><code>separate_rows(df, columns..., sep)\n</code></pre> <p>Split the contents of specified columns in a DataFrame into multiple rows based on a given delimiter.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame</li> <li><code>columns</code>: A column or multiple columns to be split. Can be a mix of integers and column names.</li> <li><code>sep</code>: The string or character or regular expression used to split the column values.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 1:3,\n                      b = [\"a\", \"aa;bb;cc\", \"dd;ee\"],\n                      c = [\"1\", \"2;3;4\", \"5;6\"],\n                      d = [\"7\", \"8;9;10\", \"11;12\"])\n3\u00d74 DataFrame\n Row \u2502 a      b         c       d      \n     \u2502 Int64  String    String  String \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  a         1       7\n   2 \u2502     2  aa;bb;cc  2;3;4   8;9;10\n   3 \u2502     3  dd;ee     5;6     11;12\n\njulia&gt; @separate_rows(df, 2, 4, \";\")\n6\u00d74 DataFrame\n Row \u2502 a      b          c       d         \n     \u2502 Int64  SubStrin\u2026  String  SubStrin\u2026 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  a          1       7\n   2 \u2502     2  aa         2;3;4   8\n   3 \u2502     2  bb         2;3;4   9\n   4 \u2502     2  cc         2;3;4   10\n   5 \u2502     3  dd         5;6     11\n   6 \u2502     3  ee         5;6     12\n\njulia&gt; @separate_rows(df, b:d, \";\")\n6\u00d74 DataFrame\n Row \u2502 a      b          c          d         \n     \u2502 Int64  SubStrin\u2026  SubStrin\u2026  SubStrin\u2026 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  a          1          7\n   2 \u2502     2  aa         2          8\n   3 \u2502     2  bb         3          9\n   4 \u2502     2  cc         4          10\n   5 \u2502     3  dd         5          11\n   6 \u2502     3  ee         6          12\n</code></pre> <p>source</p> <p># <code>TidierData.@slice</code> \u2014 Macro.</p> <pre><code>@slice(df, exprs...)\n</code></pre> <p>Select, remove or duplicate rows by indexing their integer positions.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: integer row values. Use positive values to keep the rows, or negative values to drop. Values provided must be either all positive or all negative, and they must be within the range of DataFrames' row numbers.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = repeat('a':'c', inner = 3), b = 1:9, c = 11:19);\n\njulia&gt; @chain df @slice(1:5)\n5\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 a         2     12\n   3 \u2502 a         3     13\n   4 \u2502 b         4     14\n   5 \u2502 b         5     15\n\njulia&gt; @chain df @slice(-(1:2))\n7\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         3     13\n   2 \u2502 b         4     14\n   3 \u2502 b         5     15\n   4 \u2502 b         6     16\n   5 \u2502 c         7     17\n   6 \u2502 c         8     18\n   7 \u2502 c         9     19\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @slice(1)\n         @ungroup\n       end\n3\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         4     14\n   3 \u2502 c         7     17\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @slice(n())\n         @ungroup\n       end\n3\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         3     13\n   2 \u2502 b         6     16\n   3 \u2502 c         9     19\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @slice(-n())\n         @ungroup\n       end\n6\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 a         2     12\n   3 \u2502 b         4     14\n   4 \u2502 b         5     15\n   5 \u2502 c         7     17\n   6 \u2502 c         8     18\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @slice(-(2:n()))\n         @ungroup\n       end\n3\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         4     14\n   3 \u2502 c         7     17\n</code></pre> <p>source</p> <p># <code>TidierData.@slice_head</code> \u2014 Macro.</p> <pre><code>@slice_head(df; n, prop)\n</code></pre> <p>Retrieve rows from the beginning of a DataFrame or GroupedDataFrame.</p> <p>Arguments</p> <ul> <li><code>df</code>: The source data frame or grouped data frame from which to slice rows.</li> <li><code>prop</code>: The proportion of rows to slice.</li> <li><code>n</code>: An optional integer argument to specify the number of rows at the beginning of the dataframe to retrieve. Defaults to 1.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n           a = [missing, 0.2, missing, missing, 1, missing, 5, 6],\n           b = [0.3, 2, missing, 0.3, 6, 5, 7, 7],\n           c = [0.2, 0.2, 0.2, missing, 1, missing, 5, 6]);\n\njulia&gt; @chain df begin\n         @slice_head(n = 3)\n       end \n3\u00d73 DataFrame\n Row \u2502 a          b          c        \n     \u2502 Float64?   Float64?   Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing          0.3       0.2\n   2 \u2502       0.2        2.0       0.2\n   3 \u2502 missing    missing         0.2\n\njulia&gt; @chain df begin\n         @slice_head(prop = 0.25)\n       end \n2\u00d73 DataFrame\n Row \u2502 a          b         c        \n     \u2502 Float64?   Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing         0.3       0.2\n   2 \u2502       0.2       2.0       0.2\n</code></pre> <p>source</p> <p># <code>TidierData.@slice_max</code> \u2014 Macro.</p> <pre><code>@slice_max(df, column; with_ties = true, n, prop, missing_rm = true)\n</code></pre> <p>Retrieve rows with the maximum value(s) from the specified column of a DataFrame or GroupedDataFrame.</p> <p>Arguments</p> <ul> <li><code>df</code>: The source data frame or grouped data frame from which to slice rows.</li> <li><code>column</code>: The column for which to slice the maximum values.</li> <li><code>with_ties</code>: Whether or not all ties will be shown, defaults to true. When false it will only show the first row.</li> <li><code>prop</code>: The proportion of rows to slice.</li> <li><code>n</code>: An optional integer argument to specify the number of maximum rows to retrieve. If with_ties = true, and the ties &gt; n, n will be overridden.</li> <li><code>missing_rm</code>: Defaults to true, skips the missing values when determining the proportion of the dataframe to slice.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n           a = [missing, 0.2, missing, missing, 1, missing, 5, 6],\n           b = [0.3, 2, missing, 3, 6, 5, 7, 7],\n           c = [0.2, 0.2, 0.2, missing, 1, missing, 5, 6]);\n\njulia&gt; @chain df begin\n         @slice_max(b)\n       end \n2\u00d73 DataFrame\n Row \u2502 a         b         c        \n     \u2502 Float64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      5.0       7.0       5.0\n   2 \u2502      6.0       7.0       6.0\n\njulia&gt; @chain df begin\n         @slice_max(b, with_ties = false)\n       end \n1\u00d73 DataFrame\n Row \u2502 a         b         c        \n     \u2502 Float64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      5.0       7.0       5.0\n\njulia&gt; @chain df begin\n         @slice_max(b, n = 3)\n       end \n3\u00d73 DataFrame\n Row \u2502 a         b         c        \n     \u2502 Float64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      5.0       7.0       5.0\n   2 \u2502      6.0       7.0       6.0\n   3 \u2502      1.0       6.0       1.0\n\njulia&gt; @chain df begin\n         @slice_max(b, prop = 0.5, missing_rm = true)\n       end\n3\u00d73 DataFrame\n Row \u2502 a         b         c        \n     \u2502 Float64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      5.0       7.0       5.0\n   2 \u2502      6.0       7.0       6.0\n   3 \u2502      1.0       6.0       1.0\n</code></pre> <p>source</p> <p># <code>TidierData.@slice_min</code> \u2014 Macro.</p> <pre><code>@slice_min(df, column; with_ties = true, n, prop, missing_rm = true)\n</code></pre> <p>Retrieve rows with the minimum value(s) from the specified column of a DataFrame or GroupedDataFrame.</p> <p>Arguments</p> <ul> <li><code>df</code>: The source data frame or grouped data frame from which to slice rows.</li> <li><code>column</code>: The column for which to slice the minimum values.</li> <li><code>with_ties</code>: Whether or not all ties will be shown, defaults to true and shows all ties. When false it will only show the first row.</li> <li><code>prop</code>: The proportion of rows to slice.</li> <li><code>n</code>: An optional integer argument to specify the number of minimum rows to retrieve. If with_ties = true, and the ties &gt; n, n will be overridden.</li> <li><code>missing_rm</code>: Defaults to true, skips the missing values when determining the proportion of the dataframe to slice.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n           a = [missing, 0.2, missing, missing, 1, missing, 5, 6],\n           b = [0.3, 2, missing, 0.3, 6, 5, 7, 7],\n           c = [0.2, 0.2, 0.2, missing, 1, missing, 5, 6]);\n\njulia&gt; @chain df begin\n         @slice_min(b)\n       end \n2\u00d73 DataFrame\n Row \u2502 a         b         c         \n     \u2502 Float64?  Float64?  Float64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502  missing       0.3        0.2\n   2 \u2502  missing       0.3  missing\n\njulia&gt; @chain df begin\n         @slice_min(b, with_ties = false)\n       end \n1\u00d73 DataFrame\n Row \u2502 a         b         c        \n     \u2502 Float64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502  missing       0.3       0.2\n\njulia&gt; @chain df begin\n         @slice_min(b, n = 3)\n       end\n3\u00d73 DataFrame\n Row \u2502 a          b         c         \n     \u2502 Float64?   Float64?  Float64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing         0.3        0.2\n   2 \u2502 missing         0.3  missing   \n   3 \u2502       0.2       2.0        0.2  \n\njulia&gt; @chain df begin\n         @slice_min(b, prop = 0.5, missing_rm = true)\n       end\n3\u00d73 DataFrame\n Row \u2502 a          b         c         \n     \u2502 Float64?   Float64?  Float64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing         0.3        0.2\n   2 \u2502 missing         0.3  missing   \n   3 \u2502       0.2       2.0        0.2\n</code></pre> <p>source</p> <p># <code>TidierData.@slice_sample</code> \u2014 Macro.</p> <pre><code>@slice_sample(df, [n = 1, prop, replace = false])\n</code></pre> <p>Randomly sample rows from a DataFrame <code>df</code> or from each group in a GroupedDataFrame. The default is to return 1 row. Either the number of rows (<code>n</code>) or the proportion of rows (<code>prop</code>) should be provided as a keyword argument.</p> <p>Arguments</p> <ul> <li><code>df</code>: The source data frame or grouped data frame from which to sample rows.</li> <li><code>n</code>: The number of rows to sample. Defaults to <code>1</code>.</li> <li><code>prop</code>: The proportion of rows to sample.</li> <li><code>replace</code>: Whether to sample with replacement. Defaults to <code>false</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 1:10, b = 11:20);\n\njulia&gt; using StableRNGs, Random\n\njulia&gt; rng = StableRNG(1);\n\njulia&gt; Random.seed!(rng, 1);\n\njulia&gt; @chain df begin \n         @slice_sample(n = 5)\n       end\n5\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     6     16\n   2 \u2502     1     11\n   3 \u2502     5     15\n   4 \u2502     4     14\n   5 \u2502     8     18\n\njulia&gt; @chain df begin \n         @slice_sample(n = 5, replace = true)\n       end\n5\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     7     17\n   2 \u2502     2     12\n   3 \u2502     1     11\n   4 \u2502     4     14\n   5 \u2502     2     12\n\njulia&gt; @chain df begin \n         @slice_sample(prop = 0.5)\n       end\n5\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     6     16\n   2 \u2502     7     17\n   3 \u2502     5     15\n   4 \u2502     9     19\n   5 \u2502     2     12\n\njulia&gt; @chain df begin \n         @slice_sample(prop = 0.5, replace = true)\n       end\n5\u00d72 DataFrame\n Row \u2502 a      b     \n     \u2502 Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    10     20\n   2 \u2502     4     14\n   3 \u2502     9     19\n   4 \u2502     9     19\n   5 \u2502     8     18\n</code></pre> <p>source</p> <p># <code>TidierData.@slice_tail</code> \u2014 Macro.</p> <pre><code>@slice_tail(df; n, prop)\n</code></pre> <p>Retrieve rows from the end of a DataFrame or GroupedDataFrame.</p> <p>Arguments</p> <ul> <li><code>df</code>: The source data frame or grouped data frame from which to slice rows.</li> <li><code>prop</code>: The proportion of rows to slice.</li> <li><code>n</code>: An optional integer argument to specify the number of rows at the beginning of the dataframe to retrieve. Defaults to 1.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(\n           a = [missing, 0.2, missing, missing, 1, missing, 5, 6],\n           b = [0.3, 2, missing, 0.3, 6, 5, 7, 7],\n           c = [0.2, 0.2, 0.2, missing, 1, missing, 5, 6]);\n\njulia&gt; @chain df begin\n         @slice_tail(n = 3)\n       end \n3\u00d73 DataFrame\n Row \u2502 a          b         c         \n     \u2502 Float64?   Float64?  Float64?  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 missing         5.0  missing   \n   2 \u2502       5.0       7.0        5.0\n   3 \u2502       6.0       7.0        6.0\n\njulia&gt; @chain df begin\n         @slice_tail(prop = 0.25)\n       end \n2\u00d73 DataFrame\n Row \u2502 a         b         c        \n     \u2502 Float64?  Float64?  Float64? \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502      5.0       7.0       5.0\n   2 \u2502      6.0       7.0       6.0\n</code></pre> <p>source</p> <p># <code>TidierData.@summarise</code> \u2014 Macro.</p> <pre><code>@summarize(df, exprs...)\n@summarise(df, exprs...)\n</code></pre> <p>Create a new DataFrame with one row that aggregating all observations from the input DataFrame or GroupedDataFrame. </p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: a <code>new_variable = function(old_variable)</code> pair. <code>function()</code> should be an aggregate function that returns a single value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @summarize(mean_b = mean(b),\n                    median_b = median(b))\n       end\n1\u00d72 DataFrame\n Row \u2502 mean_b   median_b \n     \u2502 Float64  Float64  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0       3.0\n\njulia&gt; @chain df begin\n         @summarize begin\n           mean_b = mean(b)\n           median_b = median(b)\n         end\n       end\n1\u00d72 DataFrame\n Row \u2502 mean_b   median_b \n     \u2502 Float64  Float64  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0       3.0 \n\njulia&gt; @chain df begin\n         @summarise(mean_b = mean(b), median_b = median(b))\n       end\n1\u00d72 DataFrame\n Row \u2502 mean_b   median_b \n     \u2502 Float64  Float64  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0       3.0\n\njulia&gt; @chain df begin\n         @summarize(across((b,c), (minimum, maximum)))\n       end\n1\u00d74 DataFrame\n Row \u2502 b_minimum  c_minimum  b_maximum  c_maximum \n     \u2502 Int64      Int64      Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11          5         15\n\njulia&gt; @chain df begin\n         @summarize(across(where(is_number), minimum))\n       end\n1\u00d72 DataFrame\n Row \u2502 b_minimum  c_minimum \n     \u2502 Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11\n</code></pre> <p>source</p> <p># <code>TidierData.@summarize</code> \u2014 Macro.</p> <pre><code>@summarize(df, exprs...)\n@summarise(df, exprs...)\n</code></pre> <p>Create a new DataFrame with one row that aggregating all observations from the input DataFrame or GroupedDataFrame. </p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: a <code>new_variable = function(old_variable)</code> pair. <code>function()</code> should be an aggregate function that returns a single value.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @summarize(mean_b = mean(b),\n                    median_b = median(b))\n       end\n1\u00d72 DataFrame\n Row \u2502 mean_b   median_b \n     \u2502 Float64  Float64  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0       3.0\n\njulia&gt; @chain df begin\n         @summarize begin\n           mean_b = mean(b)\n           median_b = median(b)\n         end\n       end\n1\u00d72 DataFrame\n Row \u2502 mean_b   median_b \n     \u2502 Float64  Float64  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0       3.0 \n\njulia&gt; @chain df begin\n         @summarise(mean_b = mean(b), median_b = median(b))\n       end\n1\u00d72 DataFrame\n Row \u2502 mean_b   median_b \n     \u2502 Float64  Float64  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     3.0       3.0\n\njulia&gt; @chain df begin\n         @summarize(across((b,c), (minimum, maximum)))\n       end\n1\u00d74 DataFrame\n Row \u2502 b_minimum  c_minimum  b_maximum  c_maximum \n     \u2502 Int64      Int64      Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11          5         15\n\njulia&gt; @chain df begin\n         @summarize(across(where(is_number), minimum))\n       end\n1\u00d72 DataFrame\n Row \u2502 b_minimum  c_minimum \n     \u2502 Int64      Int64     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502         1         11\n</code></pre> <p>source</p> <p># <code>TidierData.@summary</code> \u2014 Macro.</p> <pre><code>   @summary(df, cols...)\n</code></pre> <p>For numerical columns, returns a dataframe with the Q1,Q3, min, max, mean, median, number of missing values</p> <p>Arguments</p> <ul> <li>'df': A DataFrame</li> <li><code>cols</code>: columns on which summary will be performed. This is an optional arguement, without which summary will be performed on all numerical columns</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = [1, 2, 3, 4, 5],\n                      b = [missing, 7, 8, 9, 10],\n                      c = [11, missing, 13, 14, missing],\n                      d = [16, 17, 18, 19, 20]);\n\njulia&gt; @summary(df);\n\njulia&gt; @summary(df, (b:d));\n\njulia&gt; @chain df begin\n         @summary(b:d)\n       end;\n</code></pre> <p>source</p> <p># <code>TidierData.@tally</code> \u2014 Macro.</p> <pre><code>@tally(df, [wt], [sort])\n</code></pre> <p>Tally the unique values of one or more variables, with an optional weighting.</p> <p><code>@tally()</code> is a low-level helper macro for <code>@count()</code> that assumes that any grouping has already been performed. <code>@chain @tally()</code> is roughly equivalent to <code>@chain df @summarize(n = n())</code>. Supply <code>wt</code> to perform weighted counts, switching the summary from <code>n = n()</code> to <code>n = sum(wt)</code>.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame or GroupedDataFrame.</li> <li><code>wt</code>: Optional parameter. Used to calculate a sum over the provided <code>wt</code> variable instead of counting the rows.</li> <li><code>sort</code>: Defaults to <code>false</code>. Whether the result should be sorted from highest to lowest <code>n</code>.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = vcat(repeat([\"a\"], inner = 3),\n                           repeat([\"b\"], inner = 3),\n                           repeat([\"c\"], inner = 1),\n                           missing),\n                      b = 1:8)\n8\u00d72 DataFrame\n Row \u2502 a        b     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            1\n   2 \u2502 a            2\n   3 \u2502 a            3\n   4 \u2502 b            4\n   5 \u2502 b            5\n   6 \u2502 b            6\n   7 \u2502 c            7\n   8 \u2502 missing      8\n\njulia&gt; @chain df @tally()\n1\u00d71 DataFrame\n Row \u2502 n     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     8\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @tally()\n       end\n4\u00d72 DataFrame\n Row \u2502 a        n     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            3\n   2 \u2502 b            3\n   3 \u2502 c            1\n   4 \u2502 missing      1\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @tally(wt = b)\n       end\n4\u00d72 DataFrame\n Row \u2502 a        n     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a            6\n   2 \u2502 b           15\n   3 \u2502 c            7\n   4 \u2502 missing      8\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @tally(wt = b, sort = true)\n       end\n4\u00d72 DataFrame\n Row \u2502 a        n     \n     \u2502 String?  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 b           15\n   2 \u2502 missing      8\n   3 \u2502 c            7\n   4 \u2502 a            6       \n</code></pre> <p>source</p> <p># <code>TidierData.@transmute</code> \u2014 Macro.</p> <pre><code>@transmute(df, exprs...)\n</code></pre> <p>Create a new DataFrame with only computed columns.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>exprs...</code>: add new columns or replace values of existed columns using        <code>new_variable = values</code> syntax.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @transmute(d = b + c)\n       end\n5\u00d71 DataFrame\n Row \u2502 d     \n     \u2502 Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    12\n   2 \u2502    14\n   3 \u2502    16\n   4 \u2502    18\n   5 \u2502    20\n</code></pre> <p>source</p> <p># <code>TidierData.@ungroup</code> \u2014 Macro.</p> <pre><code>@ungroup(df)\n</code></pre> <p>Return a <code>DataFrame</code> with all groups removed.</p> <p>If this is applied to a <code>GroupedDataFrame</code>, then it removes the grouping. If this is applied to a <code>DataFrame</code> (without any groups), then it returns the <code>DataFrame</code> unchanged.</p> <p>Arguments</p> <ul> <li><code>df</code>: A <code>GroupedDataFrame</code> or `DataFrame``.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a = 'a':'e', b = 1:5, c = 11:15);\n\njulia&gt; @chain df begin\n         @group_by(a)\n       end\nGroupedDataFrame with 5 groups based on key: a\nFirst Group (1 row): a = 'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n\u22ee\nLast Group (1 row): a = 'e': ASCII/Unicode U+0065 (category Ll: Letter, lowercase)\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 e         5     15\n\njulia&gt; @chain df begin\n         @group_by(a)\n         @ungroup\n       end\n5\u00d73 DataFrame\n Row \u2502 a     b      c     \n     \u2502 Char  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 a         1     11\n   2 \u2502 b         2     12\n   3 \u2502 c         3     13\n   4 \u2502 d         4     14\n   5 \u2502 e         5     15\n</code></pre> <p>source</p> <p># <code>TidierData.@unite</code> \u2014 Macro.</p> <pre><code>  @unite(df, new_cols, from_cols, sep, remove = true)\n</code></pre> <p>Separate a multiple columns into one new columns using a specific delimter</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame</li> <li><code>new_col</code>: New column that will recieve the combination</li> <li><code>from_cols</code>: Column names that it will combine, supports [] or ()</li> <li><code>sep</code>: the string or character that will separate the values in the new column</li> <li><code>remove</code>: defaults to <code>true</code>, removes input columns from data frame</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame( b = [\"1\", \"2\", \"3\"], c = [\"1\", \"2\", \"3\"], d = [missing, missing, \"3\"]);\n\njulia&gt; @unite(df, new_col, (b, c, d), \"-\")\n3\u00d71 DataFrame\n Row \u2502 new_col \n     \u2502 String  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 1-1\n   2 \u2502 2-2\n   3 \u2502 3-3-3\n\njulia&gt; @unite(df, new_col, (b, c, d), \"-\", remove = false)\n3\u00d74 DataFrame\n Row \u2502 b       c       d        new_col \n     \u2502 String  String  String?  String  \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 1       1       missing  1-1\n   2 \u2502 2       2       missing  2-2\n   3 \u2502 3       3       3        3-3-3\n</code></pre> <p>source</p> <p># <code>TidierData.@unnest_longer</code> \u2014 Macro.</p> <pre><code>@unnest_longer(df, columns, indices_include=false)\n</code></pre> <p>Unnest arrays in columns from a DataFrame to create a longer DataFrame with one row for each entry of the array.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>columns</code>: Columns to unnest. Can be a column symbols or a range of columns if they align for number of values.</li> <li><code>indices_include</code>: Optional. When set to <code>true</code>, adds an index column for each unnested column, which logs the position of each array entry.</li> <li><code>keep_empty</code>: Optional. When set to <code>true</code>, rows with empty arrays are kept, not skipped, and unnested as missing.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])\n2\u00d73 DataFrame\n Row \u2502 a      b       c      \n     \u2502 Int64  Array\u2026  Array\u2026 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  [1, 2]  [5, 6]\n   2 \u2502     2  [3, 4]  [7, 8]\n\njulia&gt; @unnest_longer(df, 2)\n4\u00d73 DataFrame\n Row \u2502 a      b      c      \n     \u2502 Int64  Int64  Array\u2026 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1  [5, 6]\n   2 \u2502     1      2  [5, 6]\n   3 \u2502     2      3  [7, 8]\n   4 \u2502     2      4  [7, 8]\n\njulia&gt; @unnest_longer(df, b:c, indices_include = true)\n4\u00d75 DataFrame\n Row \u2502 a      b      c      b_id   c_id  \n     \u2502 Int64  Int64  Int64  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1      5      1      1\n   2 \u2502     1      2      6      2      2\n   3 \u2502     2      3      7      1      1\n   4 \u2502     2      4      8      2      2\n\njulia&gt; df2 = DataFrame(x = 1:4, y = [[], [1, 2, 3], [4, 5], Int[]])\n4\u00d72 DataFrame\n Row \u2502 x      y            \n     \u2502 Int64  Array\u2026       \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  Any[]\n   2 \u2502     2  Any[1, 2, 3]\n   3 \u2502     3  Any[4, 5]\n   4 \u2502     4  Any[]\n\njulia&gt; @unnest_longer(df2, y, keep_empty = true)\n7\u00d72 DataFrame\n Row \u2502 x      y       \n     \u2502 Int64  Any     \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  missing \n   2 \u2502     2  1\n   3 \u2502     2  2\n   4 \u2502     2  3\n   5 \u2502     3  4\n   6 \u2502     3  5\n   7 \u2502     4  missing \n</code></pre> <p>source</p> <p># <code>TidierData.@unnest_wider</code> \u2014 Macro.</p> <pre><code>@unnest_wider(df, columns, names_sep)\n</code></pre> <p>Unnest specified columns of arrays or dictionaries into wider format dataframe with individual columns.</p> <p>Arguments</p> <ul> <li><code>df</code>: A DataFrame.</li> <li><code>columns</code>: Columns to be unnested. These columns should contain arrays, dictionaries, dataframes, or tuples. Dictionarys headings will be converted to column names.</li> <li><code>names_sep</code>: An optional string to specify the separator for creating new column names. If not provided, defaults to no separator.</li> </ul> <p>Examples</p> <pre><code>julia&gt; df = DataFrame(name = [\"Zaki\", \"Farida\"], attributes = [\n               Dict(\"age\" =&gt; 25, \"city\" =&gt; \"New York\"),\n               Dict(\"age\" =&gt; 30, \"city\" =&gt; \"Los Angeles\")]);\n\njulia&gt; @unnest_wider(df, attributes)\n2\u00d73 DataFrame\n Row \u2502 name    city         age   \n     \u2502 String  String       Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502 Zaki    New York        25\n   2 \u2502 Farida  Los Angeles     30\n\njulia&gt; df2 = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])\n2\u00d73 DataFrame\n Row \u2502 a      b       c      \n     \u2502 Int64  Array\u2026  Array\u2026 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1  [1, 2]  [5, 6]\n   2 \u2502     2  [3, 4]  [7, 8]\n\njulia&gt; @unnest_wider(df2, b:c, names_sep = \"_\")\n2\u00d75 DataFrame\n Row \u2502 a      b_1    b_2    c_1    c_2   \n     \u2502 Int64  Int64  Int64  Int64  Int64 \n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     1      1      2      5      6\n   2 \u2502     2      3      4      7      8\n</code></pre> <p>source</p> <p></p> <p></p>"},{"location":"reference/#reference-internal-functions","title":"Reference - Internal functions","text":""},{"location":"examples/generated/Contributors/Howto/","title":"Contribute","text":""},{"location":"examples/generated/Contributors/Howto/#contribute-to-documentation","title":"Contribute to Documentation","text":"<p>Contributing with examples can be done by first creating a new file example here</p> <p>Info</p> <ul> <li><code>your_new_file.jl</code> at <code>docs/examples/UserGuide/</code></li> </ul> <p>Once this is done you need to add a new entry here at the bottom and the appropriate level.</p> <p>Info</p> <p>Your new entry should look like:</p> <ul> <li><code>\"Your title example\" : \"examples/generated/UserGuide/your_new_file.md\"</code></li> </ul> <p></p> <p></p>"},{"location":"examples/generated/Contributors/Howto/#build-docs-locally","title":"Build docs locally","text":"<p>If you want to take a look at the docs locally before doing a PR follow the next steps:</p> <p>build docs locally</p> <p>Install the following dependencies in your system via pip, i.e.</p> <ul> <li><code>pip install mkdocs pygments python-markdown-math</code></li> <li><code>pip install mkdocs-material pymdown-extensions mkdocstrings</code></li> <li><code>pip install mknotebooks pytkdocs_tweaks mkdocs_include_exclude_files jinja2 mkdocs-video</code></li> </ul> <p>Then simply go to your <code>docs</code> env and activate it, i.e.</p> <p><code>docs&gt; julia</code></p> <p><code>julia&gt; ]</code></p> <p><code>(docs) pkg&gt; activate .</code></p> <p>Next, run the scripts:</p> <p>Info</p> <p>Generate files and build docs by running:</p> <ul> <li><code>genfiles.jl</code></li> <li><code>make.jl</code></li> </ul> <p>Now go to your <code>terminal</code> in the same path <code>docs&gt;</code> and run:</p> <p><code>mkdocs serve</code></p> <p>This should output <code>http://127.0.0.1:8000</code>, copy/paste this into your browser and you are all set.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/across/","title":"across","text":"<p><code>across()</code> is a helper function that is typically used inside <code>@mutate()</code> or <code>@summarize</code> to operate on multiple columns and/or multiple functions. Notice that <code>across()</code> accepts two arguments, a set of variables and a set of functions. If providing multiple variables or functions, these should be provided as a tuple \u2013 in other words, wrapped in parentheses and separated by commas. If you want to skip missing values, you can \"fuse\" the summary function (such as <code>mean()</code>) with the <code>skipmissing()</code> function by using the fuction fusion operator, which you can type out in Julia by typing <code>\\circ</code> and then pressing <code>[Tab]</code> such that it reads <code>mean\u2218skipmissing</code>.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/across/#one-variable-one-function","title":"One variable, one function","text":"<pre><code>@chain movies begin\n  @mutate(Budget = Budget / 1_000_000)\n  @summarize(across(Budget, mean\u2218skipmissing))\nend\n</code></pre> 1\u00d71 DataFrame RowBudget_mean_skipmissingFloat64113.4125"},{"location":"examples/generated/UserGuide/across/#one-variable-one-anonymous-function","title":"One variable, one anonymous function","text":"<pre><code>@chain movies begin\n  @mutate(Budget = Budget / 1_000_000)\n  @summarize(across(Budget, (x -&gt; mean(skipmissing(x)))))\nend\n</code></pre> 1\u00d71 DataFrame RowBudget_functionFloat64113.4125 <p>Note: compound functions are not correctly supported inside of anonymous functions. As of right now, the above function works, but <code>(x -&gt; mean\u2218skipmissing(x))</code> does not work. This is a known bug and will be fixed in a future update.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/across/#multiple-variables-multiple-functions","title":"Multiple variables, multiple functions","text":"<pre><code>@chain movies begin\n    @mutate(Budget = Budget / 1_000_000)\n    @summarize(across((Rating, Budget), (mean\u2218skipmissing, median\u2218skipmissing)))\nend\n</code></pre> 1\u00d74 DataFrame RowRating_mean_skipmissingBudget_mean_skipmissingRating_median_skipmissingBudget_median_skipmissingFloat64Float64Float64Float6415.9328513.41256.13.0"},{"location":"examples/generated/UserGuide/across/#multiple-selection-helpers-multiple-functions","title":"Multiple selection helpers, multiple functions","text":"<pre><code>@chain movies begin\n  @mutate(Budget = Budget / 1_000_000)\n  @summarize(across((starts_with(\"Bud\"), ends_with(\"ting\")), (mean\u2218skipmissing, median\u2218skipmissing)))\nend\n</code></pre> 1\u00d74 DataFrame RowBudget_mean_skipmissingRating_mean_skipmissingBudget_median_skipmissingRating_median_skipmissingFloat64Float64Float64Float64113.41255.932853.06.1 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/arrange/","title":"@arrange","text":"<p>Arranging is the way to sort a data frame. <code>@arrange()</code> can take multiple arguments. Arguments refer to columns that are sorted in ascending order by default. If you want to sort in descending order, make sure to wrap the column name in <code>desc()</code> as shown below.</p> <p><code>DataFrames.jl</code> does not currently support the <code>sort()</code> function on grouped data frames. In order to make this work in <code>TidierData.jl</code>, if you apply <code>@arrange()</code> to a GroupedDataFrame, <code>@arrange()</code> will temporarily ungroup the data, perform the <code>sort()</code>, and then re-group by the original grouping variables.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/arrange/#sort-both-variables-in-ascending-order","title":"Sort both variables in ascending order","text":"<pre><code>@chain movies begin\n  @arrange(Year, Rating)\n  @select(1:5)\n  @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641Blacksmith Scene18931missing7.02Hadj Cheriff18941missing4.13Glenroy Bros., No. 218941missing4.24Leonard-Cushing Fight18941missing4.45Sioux Ghost Dance18941missing4.4"},{"location":"examples/generated/UserGuide/arrange/#sort-in-a-mix-of-ascending-and-descending-order","title":"Sort in a mix of ascending and descending order","text":"<p>To sort in descending order, make sure to wrap the variable inside of <code>desc()</code>.</p> <pre><code>@chain movies begin\n  @arrange(Year, desc(Rating))\n  @select(1:5)\n  @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641Blacksmith Scene18931missing7.02Luis Martinetti, Contortionist18941missing6.13Caicedo (with Pole)18941missing5.84Glenroy Brothers (Comic Boxing)18941missing5.45Buffalo Dance18941missing5.0 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/autovec/","title":"Auto-vectorization","text":"<p>TidierData.jl uses a lookup table to decide which functions not to vectorize. For example, <code>mean()</code> is listed as a function that should never be vectorized. Also, any function used inside of <code>across()</code> is also not automatically vectorized. Any function that is not included in this list and is used in a context other than <code>across()</code> is automatically vectorized.</p> <p>Which functions are not vectorized? The set of non-vectorized functions is contained in the array <code>TidierData.not_vectorized[]</code>. Let's take a look at this array. We will wrap it in a <code>string()</code> to make the output easier to read.</p> <pre><code>using TidierData\n\nstring(TidierData.not_vectorized[])\n</code></pre> <pre><code>\"[:getindex, :rand, :esc, :Ref, :Set, :Cols, :collect, :(:), :\u2218, :lag, :lead, :ntile, :repeat, :across, :desc, :mean, :std, :var, :median, :mad, :first, :last, :minimum, :maximum, :sum, :length, :skipmissing, :quantile, :passmissing, :cumsum, :cumprod, :accumulate, :is_float, :is_integer, :is_string, :cat_rev, :cat_relevel, :cat_infreq, :cat_lump, :cat_reorder, :cat_collapse, :cat_lump_min, :cat_lump_prop, :categorical, :as_categorical, :is_categorical, :unique, :iqr, :cat_other, :cat_replace_missing, :cat_recode]\"\n</code></pre> <p>This \"auto-vectorization\" makes working with TidierData.jl more R-like and convenient. However, if you ever define your own function and try to use it, TidierData.jl may unintentionally vectorize it for you. To prevent auto-vectorization, you can prefix your function with a <code>~</code>.</p> <pre><code>df = DataFrame(a = repeat('a':'e', inner = 2), b = [1,1,1,2,2,2,3,3,3,4], c = 11:20)\n</code></pre> 10\u00d73 DataFrame RowabcCharInt64Int641a1112a1123b1134b2145c2156c2167d3178d3189e31910e420 <p>For example, let's define a function <code>new_mean()</code> that calculates a mean.</p> <pre><code>new_mean(exprs...) = mean(exprs...)\n</code></pre> <pre><code>new_mean (generic function with 1 method)\n</code></pre> <p>If we try to use <code>new_mean()</code> inside of <code>@mutate()</code>, it will give us the wrong result. This is because <code>new_mean()</code> is vectorized, which results in the mean being calculated element-wise, which is almost never what we actually want.</p> <pre><code>@chain df begin\n    @mutate(d = c - new_mean(c))\nend\n</code></pre> 10\u00d74 DataFrame RowabcdCharInt64Int64Float641a1110.02a1120.03b1130.04b2140.05c2150.06c2160.07d3170.08d3180.09e3190.010e4200.0 <p>To prevent <code>new_mean()</code> from being vectorized, we need to prefix it with a <code>~</code> like this:</p> <pre><code>@chain df begin\n    @mutate(d = c - ~new_mean(c))\nend\n</code></pre> 10\u00d74 DataFrame RowabcdCharInt64Int64Float641a111-4.52a112-3.53b113-2.54b214-1.55c215-0.56c2160.57d3171.58d3182.59e3193.510e4204.5 <p>Or you can modify the do-not-vectorize list like this:</p> <pre><code>push!(TidierData.not_vectorized[], :new_mean)\n</code></pre> <pre><code>52-element Vector{Symbol}:\n :getindex\n :rand\n :esc\n :Ref\n :Set\n :Cols\n :collect\n :(:)\n :\u2218\n :lag\n \u22ee\n :categorical\n :as_categorical\n :is_categorical\n :unique\n :iqr\n :cat_other\n :cat_replace_missing\n :cat_recode\n :new_mean\n</code></pre> <p>Now <code>new_mean()</code> should behave just like <code>mean()</code> in that it is treated as non-vectorized.</p> <pre><code>@chain df begin\n    @mutate(d = c - new_mean(c))\nend\n</code></pre> 10\u00d74 DataFrame RowabcdCharInt64Int64Float641a111-4.52a112-3.53b113-2.54b214-1.55c215-0.56c2160.57d3171.58d3182.59e3193.510e4204.5 <p>This gives us the correct answer. Notice that adding a <code>~</code> is not needed with <code>mean()</code> because <code>mean()</code> is already included on our look-up table of functions not requiring vectorization.</p> <pre><code>@chain df begin\n    @mutate(d = c - mean(c))\nend\n</code></pre> 10\u00d74 DataFrame RowabcdCharInt64Int64Float641a111-4.52a112-3.53b113-2.54b214-1.55c215-0.56c2160.57d3171.58d3182.59e3193.510e4204.5 <p>If you're not sure if a function is vectorized and want to prevent it from being vectorized, you can always prefix it with a ~ to prevent vectorization. Even though <code>mean()</code> is not vectorized anyway, prefixing it with a ~ will not cause any harm.</p> <pre><code>@chain df begin\n    @mutate(d = c - ~mean(c))\nend\n</code></pre> 10\u00d74 DataFrame RowabcdCharInt64Int64Float641a111-4.52a112-3.53b113-2.54b214-1.55c215-0.56c2160.57d3171.58d3182.59e3193.510e4204.5 <p>If for some crazy reason, you did want to vectorize <code>mean()</code>, you are always allowed to vectorize it, and TidierData.jl won't un-vectorize it.</p> <pre><code>@chain df begin\n    @mutate(d = c - mean.(c))\nend\n</code></pre> 10\u00d74 DataFrame RowabcdCharInt64Int64Float641a1110.02a1120.03b1130.04b2140.05c2150.06c2160.07d3170.08d3180.09e3190.010e4200.0 <p>Note: <code>~</code> also works with operators, so if you want to not vectorize an operator, you can prefix it with <code>~</code>, for example, <code>a ~* b</code> will perform a matrix multiplication rather than element-wise multiplication.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/benchmark/","title":"Benchmark","text":"<p>The goal of this benchmarking is to guage how Tidier.jl performs in comparison to DataFrames.jl. Ultimately, from this benchmarking, we can check that Tidier.jl is comparable in speed to DataFrames.jl.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/benchmark/#why-function-wrap","title":"Why function wrap?","text":"<p>Wrapping code in a function allows it to compile just once, which more closely reflects the reality of production workflows. For a more robust explanation, please see @kdpsingh comment here: https://github.com/TidierOrg/TidierData.jl/issues/24#issuecomment-1682718061</p> <pre><code>using TidierData\nusing RDatasets\nusing BenchmarkTools\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/benchmark/#filtering","title":"filtering","text":"<pre><code>function filtering_tidier()\n@chain movies begin\n    @filter(Year &gt; 1939 &amp;&amp; Votes &gt; 40)\nend\nend\n\n@benchmark filtering_tidier()\n\n@benchmark filter(row -&gt; row.Year &gt; 1939 &amp;&amp; row.Votes &gt; 40, movies)\n</code></pre> <pre><code>BenchmarkTools.Trial: 493 samples with 1 evaluation.\n Range (min \u2026 max):   9.672 ms \u2026  19.010 ms  \u250a GC (min \u2026 max): 0.00% \u2026 4.76%\n Time  (median):      9.973 ms               \u250a GC (median):    0.00%\n Time  (mean \u00b1 \u03c3):   10.144 ms \u00b1 714.436 \u03bcs  \u250a GC (mean \u00b1 \u03c3):  1.19% \u00b1 2.63%\n\n       \u2584\u2586\u2587\u2588\u2587\u2586\u2584\u2583\u2581\u2582\u2582                                              \n  \u2582\u2583\u2582\u2585\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2585\u2586\u2583\u2584\u2581\u2583\u2583\u2581\u2581\u2581\u2581\u2581\u2581\u2582\u2583\u2583\u2582\u2583\u2584\u2583\u2585\u2583\u2583\u2584\u2584\u2583\u2584\u2583\u2583\u2584\u2583\u2582\u2581\u2583\u2582\u2581\u2583\u2581\u2581\u2581\u2581\u2581\u2581\u2582 \u2583\n  9.67 ms         Histogram: frequency by time         11.3 ms &lt;\n\n Memory estimate: 7.76 MiB, allocs estimate: 287668.\n</code></pre>"},{"location":"examples/generated/UserGuide/benchmark/#group_by-summarize","title":"group_by summarize","text":"<pre><code>function groupbysummarize_tidier()\n@chain movies begin\n    @group_by(MPAA)\n    @summarise(n=n())\nend\nend\n\n@benchmark groupbysummarize_tidier()\n\n@benchmark combine(groupby(movies, :MPAA), nrow =&gt; :n)\n</code></pre> <pre><code>BenchmarkTools.Trial: 10000 samples with 1 evaluation.\n Range (min \u2026 max):  419.333 \u03bcs \u2026  3.638 ms  \u250a GC (min \u2026 max): 0.00% \u2026 16.92%\n Time  (median):     426.235 \u03bcs              \u250a GC (median):    0.00%\n Time  (mean \u00b1 \u03c3):   438.110 \u03bcs \u00b1 74.097 \u03bcs  \u250a GC (mean \u00b1 \u03c3):  1.22% \u00b1  5.06%\n\n  \u2586\u2588\u2587\u2586\u2584\u2584\u2584\u2583\u2582\u2582\u2581\u2581\u2581                                                \u2582\n  \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2587\u2587\u2586\u2586\u2586\u2587\u2585\u2585\u2584\u2585\u2584\u2583\u2586\u2586\u2586\u2584\u2584\u2585\u2583\u2585\u2583\u2584\u2583\u2583\u2581\u2584\u2584\u2586\u2587\u2586\u2587\u2586\u2581\u2583\u2584\u2583\u2584\u2585\u2583\u2581\u2583\u2581\u2587\u2586\u2585 \u2588\n  419 \u03bcs        Histogram: log(frequency) by time       619 \u03bcs &lt;\n\n Memory estimate: 474.87 KiB, allocs estimate: 270.\n</code></pre>"},{"location":"examples/generated/UserGuide/benchmark/#one-mutate","title":"one mutate","text":"<pre><code>function mutate_1_tidier()\n@chain movies begin\n    @mutate(new_col = Votes * R1)\nend\nend\n\n@benchmark mutate_1_tidier()\n\n@benchmark transform(movies, [:Votes, :R1] =&gt; ((v, r) -&gt; v .* r) =&gt; :new_col)\n</code></pre> <pre><code>BenchmarkTools.Trial: 6516 samples with 1 evaluation.\n Range (min \u2026 max):  557.359 \u03bcs \u2026   7.220 ms  \u250a GC (min \u2026 max): 0.00% \u2026  9.33%\n Time  (median):     686.274 \u03bcs               \u250a GC (median):    0.00%\n Time  (mean \u00b1 \u03c3):   763.019 \u03bcs \u00b1 243.408 \u03bcs  \u250a GC (mean \u00b1 \u03c3):  9.17% \u00b1 14.37%\n\n    \u2582\u2585\u2586\u2587\u2588\u2588\u2588\u2588\u2587\u2586\u2585\u2584\u2584\u2582\u2582                    \u2581\u2582\u2581\u2581\u2581\u2581\u2581 \u2581\u2581 \u2581     \u2581 \u2581\u2581    \u2582\n  \u2583\u2587\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2587\u2588\u2587\u2587\u2586\u2585\u2584\u2585\u2583\u2583\u2585\u2585\u2583\u2585\u2585\u2585\u2585\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2587\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588 \u2588\n  557 \u03bcs        Histogram: log(frequency) by time       1.45 ms &lt;\n\n Memory estimate: 8.42 MiB, allocs estimate: 223.\n</code></pre>"},{"location":"examples/generated/UserGuide/benchmark/#mutate-6-new-columns","title":"mutate 6 new columns","text":"<pre><code>function mutate6_tidier()\n    @chain movies begin\n        @mutate(\n        Votes_R1_Product = Votes .* R1,\n        Rating_Year_Ratio = Rating ./ Year,\n        R1_to_R5_Sum = R1 + R2 + R3 + R4 + R5,\n        High_Budget_Flag = if_else(ismissing(Budget), \"NA\", Budget .&gt; 50000),\n        R6_to_R8_Avg = (R6 + R7 + R8) / 3,\n        year_Minus_Length = Year - Length)\n    end\nend\n\n@benchmark mutate6_tidier()\n\n@benchmark transform(movies, [:Votes, :R1] =&gt; ((v, r) -&gt; v .* r) =&gt; :Votes_R1_Product, [:Rating, :Year] =&gt; ((r, y) -&gt; r ./ y) =&gt; :Rating_Year_Ratio, [:R1, :R2, :R3, :R4, :R5] =&gt; ((a, b, c, d, e) -&gt; a + b + c + d + e) =&gt; :R1_to_R5_Sum, :Budget =&gt; (b -&gt; ifelse.(ismissing.(b), missing, b .&gt; 50000)) =&gt; :High_Budget_Flag, [:R6, :R7, :R8] =&gt; ((f, g, h) -&gt; (f + g + h) / 3) =&gt; :R6_to_R8_Avg, [:Year, :Length] =&gt; ((y, l) -&gt; y - l) =&gt; :Year_Minus_Length )\n</code></pre> <pre><code>BenchmarkTools.Trial: 4012 samples with 1 evaluation.\n Range (min \u2026 max):  1.022 ms \u2026   6.252 ms  \u250a GC (min \u2026 max): 0.00% \u2026 10.10%\n Time  (median):     1.143 ms               \u250a GC (median):    0.00%\n Time  (mean \u00b1 \u03c3):   1.241 ms \u00b1 282.196 \u03bcs  \u250a GC (mean \u00b1 \u03c3):  7.04% \u00b1 11.85%\n\n       \u2583\u2588\u2588\u2585\u2582                                                   \n  \u2582\u2582\u2583\u2584\u2587\u2588\u2588\u2588\u2588\u2588\u2587\u2585\u2583\u2583\u2583\u2582\u2582\u2582\u2582\u2581\u2582\u2581\u2582\u2582\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2582\u2582\u2581\u2581\u2581\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2583\u2583\u2583\u2584\u2583\u2583\u2583\u2583\u2583\u2583 \u2583\n  1.02 ms         Histogram: frequency by time        1.92 ms &lt;\n\n Memory estimate: 10.56 MiB, allocs estimate: 581.\n</code></pre>"},{"location":"examples/generated/UserGuide/benchmark/#groupby-then-2-mutates","title":"groupby then 2 mutates","text":"<pre><code>function groupby1_2mutate_tidier()\n@chain movies begin\n    @group_by(MPAA)\n    @mutate(ace = R1 -&gt; R1/2 * 4)\n    @mutate(Bace = Votes^R1)\nend\nend\n\n@benchmark groupby1_2mutate_tidier()\n\n@benchmark transform( transform( groupby(movies, :MPAA), :R1 =&gt; (x -&gt; x/2 * 4) =&gt; :ace, ungroup = false), [:Votes, :R1] =&gt; ((a, b) -&gt; b .^ a) =&gt; :Bace, ungroup = false)\n</code></pre> <pre><code>BenchmarkTools.Trial: 683 samples with 1 evaluation.\n Range (min \u2026 max):  6.629 ms \u2026  12.749 ms  \u250a GC (min \u2026 max): 0.00% \u2026 7.02%\n Time  (median):     7.068 ms               \u250a GC (median):    0.00%\n Time  (mean \u00b1 \u03c3):   7.318 ms \u00b1 541.471 \u03bcs  \u250a GC (mean \u00b1 \u03c3):  2.98% \u00b1 4.19%\n\n      \u2581 \u2584\u2588\u2585\u2584\u2582\u2583                                                 \n  \u2582\u2583\u2584\u2587\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2586\u2584\u2584\u2583\u2584\u2582\u2583\u2583\u2584\u2583\u2584\u2585\u2585\u2586\u2585\u2586\u2586\u2585\u2586\u2587\u2586\u2584\u2586\u2585\u2584\u2583\u2583\u2583\u2583\u2582\u2582\u2582\u2582\u2583\u2582\u2583\u2582\u2582\u2581\u2582\u2583\u2582\u2582\u2581\u2581\u2582\u2583 \u2583\n  6.63 ms         Histogram: frequency by time        8.81 ms &lt;\n\n Memory estimate: 26.17 MiB, allocs estimate: 2449.\n</code></pre>"},{"location":"examples/generated/UserGuide/benchmark/#select-5-columns","title":"select 5 columns","text":"<pre><code>function select5_tidier()\n    @chain movies begin\n        @select(R1:R5)\n    end\nend\n\n@benchmark select5_tidier()\n\n@benchmark select(movies, :R1, :R2, :R3, :R4, :R5)\n</code></pre> <pre><code>BenchmarkTools.Trial: 10000 samples with 1 evaluation.\n Range (min \u2026 max):  173.423 \u03bcs \u2026  4.715 ms  \u250a GC (min \u2026 max): 0.00% \u2026 8.59%\n Time  (median):     221.673 \u03bcs              \u250a GC (median):    0.00%\n Time  (mean \u00b1 \u03c3):   237.430 \u03bcs \u00b1 95.591 \u03bcs  \u250a GC (mean \u00b1 \u03c3):  4.87% \u00b1 9.96%\n\n      \u2585\u2588\u2584                                                       \n  \u2582\u2582\u2584\u2588\u2588\u2588\u2588\u2586\u2585\u2584\u2583\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2581\u2581\u2582\u2581\u2582\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2582\u2581\u2581\u2582\u2581\u2582\u2582\u2582\u2582\u2581\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582\u2582 \u2583\n  173 \u03bcs          Histogram: frequency by time          688 \u03bcs &lt;\n\n Memory estimate: 2.25 MiB, allocs estimate: 200.\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/binding/","title":"Binding","text":"<p>Whereas joins are useful for combining data frames based on matching keys, another way to combine data frames is to bind them together, which can be done either by rows or by columns. <code>TidierData.jl</code> implements these actions using <code>@bind_rows()</code> and <code>@bind_cols()</code>, respectively.</p> <p>Let's generate three data frames to combine.</p> <pre><code>using TidierData\n\ndf1 = DataFrame(a=1:3, b=1:3);\n\ndf2 = DataFrame(a=4:6, b=4:6);\n\ndf3 = DataFrame(a=7:9, c=7:9);\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/binding/#bind_rows","title":"<code>@bind_rows()</code>","text":"<pre><code>@bind_rows(df1, df2)\n</code></pre> 6\u00d72 DataFrame RowabInt64Int64111222333444555666 <p><code>@bind_rows()</code> keeps columns that are present in at least one of the provided data frames. Any missing columns will be filled with <code>missing</code> values.</p> <pre><code>@bind_rows(df1, df3)\n</code></pre> 6\u00d73 DataFrame RowabcInt64Int64?Int64?111missing222missing333missing47missing758missing869missing9 <p>There is an optional <code>id</code> argument to add an identifier for combined data frames. Note that both <code>@bind_rows</code> and <code>@bind_cols</code> accept multiple (i.e., more than 2) data frames, as in the example below.</p> <pre><code>@bind_rows(df1, df2, df3, id = \"id\")\n</code></pre> 9\u00d74 DataFrame RowabcidInt64Int64?Int64?Int64111missing1222missing1333missing1444missing2555missing2666missing277missing7388missing8399missing93 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/binding/#bind_cols","title":"<code>@bind_cols()</code>","text":"<p><code>@bind_cols</code> works similarly to R's <code>tidyverse</code> although the <code>.name_repair</code> argument is not supported.</p> <pre><code>@bind_cols(df1, df2)\n</code></pre> 3\u00d74 DataFrame Rowaba_1b_1Int64Int64Int64Int64111442225533366 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/column_names/","title":"Column names","text":"<p>When referring to column names, TidierData.jl is a bit unusual for a Julia package in that it does not use symbols. This is because TidierData.jl uses tidy expressions, which in R lingo equates to a style of programming referred to as \"non-standard evaluation.\" If you are creating a new column <code>a</code> containing a value that is the mean of column <code>b</code>, you would simply write <code>a = mean(b)</code>.</p> <p>However, there may be times when you wish to create or refer to a column containing a space in it. Let's start by creating some column names containing a space in their name.</p> <pre><code>using TidierData\n\ndf = DataFrame(var\"my name\" = [\"Ada\", \"Twist\"],\n               var\"my age\" = [40, 50])\n</code></pre> 2\u00d72 DataFrame Rowmy namemy ageStringInt641Ada402Twist50 <p>To create a column name containing a space, we used the <code>var\"column name\"</code> notation. Because <code>DataFrame()</code> is a regular Julia function, this is the standard way to refer to a variable containing a space, which is why we need to use this here.</p> <p>This notation also works inside of TidierData.jl.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/column_names/#varcolumn-name-notation","title":"<code>var\"column name\"</code> notation","text":"<p>If we want to figure out the age for the people in our dataset a decade from today, we could use this same <code>var\"column name\"</code> notation inside of <code>@mutate</code>.</p> <pre><code>@chain df begin\n  @mutate(var\"age in 10 years\" = var\"my age\" + 10)\nend\n</code></pre> 2\u00d73 DataFrame Rowmy namemy ageage in 10 yearsStringInt64Int641Ada40502Twist5060 <p>However, typing out the <code>var\"column name\"</code> can become cumbersome. TidierData.jl also supports another shorthand notation to refer to column names containing spaces or other special characters: backticks.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/column_names/#backtick-notation","title":"Backtick notation","text":"<p>This same code could be written more concisely like this:</p> <pre><code>@chain df begin\n  @mutate(`age in 10 years` = `my age` + 10)\nend\n</code></pre> 2\u00d73 DataFrame Rowmy namemy ageage in 10 yearsStringInt64Int641Ada40502Twist5060 <p>Backticks are an R convention. While they are not specific to tidyverse, they are a convenient way to refer to column names that otherwise would not parse correctly as a single entity. Backticks are supported in all TidierData.jl functions where column names may be referenced.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/column_names/#cleaning-up-column-names","title":"Cleaning up column names","text":"<p>Another option is to clean up the column names so that you do not have spaces to begin with. In R, this is usually accomplished using the <code>janitor</code> package. In Julia, the Cleaner.jl package provides this functionality, which we have wrapped inside of TidierData.jl.</p> <pre><code>@chain df begin\n  @clean_names\nend\n</code></pre> 2\u00d72 DataFrame Rowmy_namemy_ageStringInt641Ada402Twist50 <p>Although the default value for the <code>case</code> argument is \"snake_case\", you can also set this to \"camelCase\".</p> <pre><code>@chain df begin\n  @clean_names(case = \"camelCase\")\nend\n</code></pre> 2\u00d72 DataFrame RowmyNamemyAgeStringInt641Ada402Twist50 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/conditionals/","title":"Conditionals","text":"<p>Conditional functions are a useful tool to update or create new columns conditional on the values of a column of data. When continuous variables are converted to categories, this is sometimes referred to as \"recoding\" a column.</p> <p>TidierData.jl provides two functions to recode data: <code>if_else()</code> and <code>case_when()</code>.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/conditionals/#if_else","title":"<code>if_else()</code>","text":"<p>Why do we need another <code>if_else()</code> function if base Julia already comes with an <code>ifelse()</code> function. Similar to R, the base Julia implementation of <code>if_else()</code> does not include a way to designate what value to return if the enclosed vector contains a missing value. Additionally, the base Julia implementation of <code>ifelse()</code> produces an error if presented with a <code>missing</code> value in the condition. The TidierData.jl <code>if_else()</code> can handle missing values and includes an optional 4th argument that is used to designate what to return in the event of a `missing`` value for the condition. Let's take a look at some examples.</p> <pre><code>using TidierData\n\ndf = DataFrame(a = [1, 2, missing, 4, 5])\n</code></pre> 5\u00d71 DataFrame RowaInt64?11223missing4455 <p>Here, we have created a <code>DataFrame</code> containing a single column <code>a</code> with 5 values, for which the 3rd value is missing.</p> <p>Now, let's create a new column <code>b</code> that contains a \"yes\" if <code>a</code> is greater than or equal to 3, and a \"no\" otherwise. Notice that when we do this, the <code>missing</code> values remains as <code>missing</code>.</p> <pre><code>@chain df begin\n  @mutate(b = if_else(a &gt;= 3, \"yes\", \"no\"))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?String?11no22no3missingmissing44yes55yes <p>What if we wanted to fill in the missing value with \"unknown\"? All we need to do is provide an optional 4th argument containing the value to return in the event of a missing condition. When we run this version, <code>missing</code> values in <code>a</code> are converted to \"unknown\" in <code>b</code>.</p> <pre><code>@chain df begin\n  @mutate(b = if_else(a &gt;= 3, \"yes\", \"no\", \"unknown\"))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?String11no22no3missingunknown44yes55yes <p>Although both of these examples showed how to return a single value (like \"yes\" and \"no\"), you can also return a vector of values, which is useful for updating only a subset of the values of a column. For example, if we wanted to create a column <code>b</code> that contains a 3 when <code>a</code> is greater than or equal to 3 but otherwise remains unchanged, we could provide a 3 for the <code>yes</code> condition and a vector (column) <code>a</code> in the <code>no</code> condition. If we do not provide the optional 4th argument, <code>missing</code> values remain <code>missing</code>.</p> <pre><code>@chain df begin\n  @mutate(b = if_else(a &gt;= 3, 3, a))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?Int64?1112223missingmissing443553 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/conditionals/#case_when","title":"<code>case_when()</code>","text":"<p>Although <code>if_else()</code> is convenient when evaluating a single condition, it can be cumbersome when evaluating multiple conditions because subsequent conditions need to be nested within the <code>no</code> condition for the preceding argument. For situations where multiple conditions need to be evaluated, <code>case_when()</code> is more convenient.</p> <p>Let's first consider a similar example from above and recreate it using <code>case_when()</code>. The following code creates a column <code>b</code> that assigns a value of 3 if <code>a &gt;= 3</code> and otherwise leaves the value unchanged.</p> <pre><code>@chain df begin\n  @mutate(b = case_when(a &gt;= 3  =&gt;  3,\n                        true    =&gt;  a))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?Int64?1112223missingmissing443553 <p>What is going on here? <code>case_when()</code> uses a <code>condition =&gt; return_value</code> syntax, which are encoded as pairs in Julia. You can provide a single pair, or multiple pairs separated by commas. Because the pairs operator (<code>=&gt;</code>) might be confused with a greater than or equal to sign (<code>&gt;=</code>), we have padded two spaces on either side of the <code>=&gt;</code> to make sure that the pair remains visually distinct. We do not use a <code>~</code> operator in <code>case_when()</code> (as is used in R) because the <code>~</code> operator is used to denote de-vectorized functions in TidierData.jl.</p> <p>There are 2 other things to note above. First, the <code>true</code> condition evaluates to <code>true</code> for all remaining values of <code>a</code>. The only reason that the <code>b</code> contains a <code>missing</code> value here is that the <code>true</code> condition was met, leading to the value of <code>a</code> (in this case, <code>missing</code>) to be assigned to <code>b</code>. Second, we were able to return a single value (3) in the first condition, and a vector (column) of data (<code>a</code>) in the second condition.</p> <p>What if we wanted to fill in the missing values with something else? In this case, we would need to create an explicit condition that checks for missing values and assigns a return value to that condition.</p> <pre><code>@chain df begin\n  @mutate(b = case_when(a &gt;= 3        =&gt;  3,\n                        ismissing(a)  =&gt;  0,\n                        true          =&gt;  a))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?Int641112223missing0443553 <p>Do our conditions have to be mutually exclusive? No. The return value for the first matching condition is assigned to <code>b</code> because the conditions are evaluated sequentially from first to last.</p> <pre><code>@chain df begin\n  @mutate(b = case_when(a &gt; 4  =&gt;  \"hi\",\n                        a &gt; 2  =&gt;  \"medium\",\n                        a &gt; 0  =&gt;  \"low\"))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?String?11low22low3missingmissing44medium55hi <p>Again, if we want to fill in remaining values (which in this case are the <code>missing</code> ones), we can map the final condition <code>true</code> to the value of \"unknown\". Because the ordering of the conditions matters, the <code>true</code> condition should always be listed last if it is included.</p> <pre><code>@chain df begin\n  @mutate(b = case_when(a &gt; 4  =&gt;  \"hi\",\n                        a &gt; 2  =&gt;  \"medium\",\n                        a &gt; 0  =&gt;  \"low\",\n                        true   =&gt;  \"unknown\"))\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64?String11low22low3missingunknown44medium55hi <p></p> <p></p>"},{"location":"examples/generated/UserGuide/conditionals/#do-these-functions-work-outside-of-tidierdatajl","title":"Do these functions work outside of TidierData.jl?","text":"<p>Yes, both <code>if_else()</code> and <code>case_when()</code> work outside of TidierData.jl. However, you'll need to remember that if working with vectors, both the functions and conditions will need to be vectorized, and in the case of <code>case_when()</code>, the <code>=&gt;</code> will need to be written as <code>.=&gt;</code>. The reason this is not needed when using these functions inside of TidierData.jl is because they are auto-vectorized.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/dataset_movies/","title":"Movies dataset","text":"<p>To get started, we will load the <code>movies</code> dataset from the <code>RDatasets.jl</code> package.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p>To work with this dataset, we will use the <code>@chain</code> macro. This macro initiates a pipe, and every function or macro provided to it between the <code>begin</code> and <code>end</code> blocks modifies the dataframe mentioned at the beginning of the pipe. You don't have to necessarily spread a chain over multiple lines of code, but when working with data frames it's often easiest to do so. Before going further, take a look at the Chain.jl GitHub page to see all the cool things that are possible with this, including mid-chain side effects using <code>@aside</code> and mid-chain assignment of variables.</p> <p>Let's take a look at the first 5 rows of the <code>movies</code> dataset using <code>@slice()</code>.</p> <pre><code>@chain movies begin\n    @slice(1:5)\nend\n</code></pre> 5\u00d724 DataFrame RowTitleYearLengthBudgetRatingVotesR1R2R3R4R5R6R7R8R9R10MPAAActionAnimationComedyDramaDocumentaryRomanceShortStringInt32Int32Int32?Float64Int32Float64Float64Float64Float64Float64Float64Float64Float64Float64Float64Cat\u2026Int32Int32Int32Int32Int32Int32Int321$1971121missing6.43484.54.54.54.514.524.524.514.54.54.500110002$1000 a Touchdown193971missing6.0200.014.54.524.514.514.514.54.54.514.500100003$21 a Day Once a Month19417missing8.250.00.00.00.00.024.50.044.524.524.501000014$40,000199670missing8.2614.50.00.00.00.00.00.00.034.545.500100005$50,000 Climax Show, The197571missing3.41724.54.50.014.514.54.50.00.00.024.50000000 <p>Let's use <code>@glimpse()</code> to preview the dataset.</p> <pre><code>@glimpse(movies)\n</code></pre> <pre><code>Rows: 58788\nColumns: 24\n.Title         String         $, $1000 a Touchdown, $21 a Day Once a Month, $40,\n.Year          Int32          1971, 1939, 1941, 1996, 1975, 2000, 2002, 2002, 19\n.Length        Int32          121, 71, 7, 70, 71, 91, 93, 25, 97, 61, 99, 96, 10\n.Budget        Union{Missing, Int32}missing, missing, missing, missing, missing,\n.Rating        Float64        6.4, 6.0, 8.2, 8.2, 3.4, 4.3, 5.3, 6.7, 6.6, 6.0,\n.Votes         Int32          348, 20, 5, 6, 17, 45, 200, 24, 18, 51, 23, 53, 44\n.R1            Float64        4.5, 0.0, 0.0, 14.5, 24.5, 4.5, 4.5, 4.5, 4.5, 4.5\n.R2            Float64        4.5, 14.5, 0.0, 0.0, 4.5, 4.5, 0.0, 4.5, 4.5, 0.0,\n.R3            Float64        4.5, 4.5, 0.0, 0.0, 0.0, 4.5, 4.5, 4.5, 4.5, 4.5,\n.R4            Float64        4.5, 24.5, 0.0, 0.0, 14.5, 14.5, 4.5, 4.5, 0.0, 4.\n.R5            Float64        14.5, 14.5, 0.0, 0.0, 14.5, 14.5, 24.5, 4.5, 0.0,\n.R6            Float64        24.5, 14.5, 24.5, 0.0, 4.5, 14.5, 24.5, 14.5, 0.0,\n.R7            Float64        24.5, 14.5, 0.0, 0.0, 0.0, 4.5, 14.5, 14.5, 34.5,\n.R8            Float64        14.5, 4.5, 44.5, 0.0, 0.0, 4.5, 4.5, 14.5, 14.5, 4\n.R9            Float64        4.5, 4.5, 24.5, 34.5, 0.0, 14.5, 4.5, 4.5, 4.5, 4.\n.R10           Float64        4.5, 14.5, 24.5, 45.5, 24.5, 14.5, 14.5, 14.5, 24.\n.MPAA          CategoricalArrays.CategoricalValue{String, UInt8}, , , , , , R, ,\n.Action        Int32          0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,\n.Animation     Int32          0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n.Comedy        Int32          1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0,\n.Drama         Int32          1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1,\n.Documentary   Int32          0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n.Romance       Int32          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n.Short         Int32          0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/distinct/","title":"@distinct","text":"<p>The <code>@distinct()</code> macro in <code>TidierData.jl</code> is useful to select distinct rows. Like it's R counterpart, it can be used with or without arguments. When arguments are provided, it behaves slightly differently than the R version. Whereas the R function only returns the provided columns, the TidierData.jl version returns all columns, where the first match is returned for the non-selected columns.</p> <pre><code>using TidierData\n\ndf = DataFrame(a = 1:10, b = repeat('a':'e', inner = 2))\n</code></pre> 10\u00d72 DataFrame RowabInt64Char11a22a33b44b55c66c77d88d99e1010e <p></p> <p></p>"},{"location":"examples/generated/UserGuide/distinct/#select-distinct-values-overall","title":"Select distinct values overall","text":"<p>Since there are no duplicate rows, this will return all rows.</p> <pre><code>@chain df begin\n    @distinct()\nend\n</code></pre> 10\u00d72 DataFrame RowabInt64Char11a22a33b44b55c66c77d88d99e1010e <p></p> <p></p>"},{"location":"examples/generated/UserGuide/distinct/#select-distinct-values-based-on-column-b","title":"Select distinct values based on column <code>b</code>","text":"<p>Notice that the first matching row for column <code>a</code> is returned for every distinct value of column <code>b</code>. This is slightly different behavior than R's tidyverse, which would have returned only column <code>b</code>.</p> <pre><code>@chain df begin\n  @distinct(b)\nend\n</code></pre> 5\u00d72 DataFrame RowabInt64Char11a23b35c47d59e <p>In TidierData.jl, <code>@distinct()</code> works with grouped data frames. If grouped, <code>@distinct()</code> will ignore the grouping when determining distinct values but will return the data frame in grouped form based on the original groupings.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/fill_missing/","title":"Fill missing","text":"<p>The @fill_missing macro is a reimplementation of fill(). To mirror the syntax in R, the methods availble are \"up\" (fill from bottom up) and \"down\" fill from top down.</p> <pre><code>using TidierData\n\ndf = DataFrame(\n    a = [missing, 2, 3, missing, 5],\n    b = [missing, 1, missing, 4, 5],\n    c = ['a', 'b', missing, 'd', 'e'],\n    group = ['A', 'A', 'B', 'B', 'A']\n);\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/fill_missing/#fill-all-columns","title":"Fill all columns","text":"<p>Fill missing values for the whole DataFrame using the \"down\" method (top to bottom)</p> <pre><code>@chain df begin\n    @fill_missing(\"down\")\nend\n\n@fill_missing(df, \"down\")\n</code></pre> 5\u00d74 DataFrame RowabcgroupInt64?Int64?Char?Char1missingmissingaA221bA331bB434dB555eA <p></p> <p></p>"},{"location":"examples/generated/UserGuide/fill_missing/#fill-specifc-columns","title":"Fill specifc columns","text":"<p>This fills missing values in columns <code>a</code> and <code>c</code> going from bottom to top.</p> <pre><code>@chain df begin\n    @fill_missing(a, c, \"up\")\nend\n</code></pre> 5\u00d74 DataFrame RowabcgroupInt64?Int64?Char?Char12missingaA221bA33missingdB454dB555eA <p></p> <p></p>"},{"location":"examples/generated/UserGuide/fill_missing/#fill-with-grouped-dataframes","title":"Fill with Grouped DataFrames","text":"<p>When grouping by the <code>group</code> column, this fills missing values in columns <code>a</code> within each group going from top to bottom within that group</p> <pre><code>@chain df begin\n    @group_by(group)\n    @fill_missing(a, \"down\")\nend\n</code></pre> <p>GroupedDataFrame with 2 groups based on key: group</p> First Group (3 rows): group = 'A': ASCII/Unicode U+0041 (category Lu: Letter, uppercase) RowabcgroupInt64?Int64?Char?Char1missingmissingaA221bA355eA <p>&amp;vellip;</p> Last Group (2 rows): group = 'B': ASCII/Unicode U+0042 (category Lu: Letter, uppercase) RowabcgroupInt64?Int64?Char?Char13missingmissingB234dB <p></p> <p></p>"},{"location":"examples/generated/UserGuide/fill_missing/#replace_missing","title":"<code>replace_missing()</code>","text":"<p>The <code>replace_missing</code> function facilitates the replacement of <code>missing</code> values with a specified replacement.</p> <pre><code>@chain df begin\n    @mutate(b = replace_missing(b, 2))\nend\n</code></pre> 5\u00d74 DataFrame RowabcgroupInt64?Int64Char?Char1missing2aA221bA332missingB4missing4dB555eA <p></p> <p></p>"},{"location":"examples/generated/UserGuide/fill_missing/#missing_if","title":"<code>missing_if()</code>","text":"<p>The <code>missing_if</code> function is used to introduce <code>missing</code> values under specific conditions.</p> <pre><code>@chain df begin\n    @mutate(b = missing_if(b, 5))\nend\n</code></pre> 5\u00d74 DataFrame RowabcgroupInt64?Int64?Char?Char1missingmissingaA221bA33missingmissingB4missing4dB55missingeA <p>Both <code>missing_if</code> and <code>replace_missing</code> are not type specifc.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/filter/","title":"@filter","text":"<p>Filtering is a mechanism to indicate which rows you want to keep in a dataset based on criteria. This is also referred to as subsetting. Filtering rows is normally a bit tricky in <code>DataFrames.jl</code> because comparison operators like <code>&gt;=</code> actually need to be vectorized as <code>.&gt;=</code>, which can catch new Julia users by surprise. <code>@filter()</code> mimics R's <code>tidyverse</code> behavior by auto-vectorizing the code and then only selecting those rows that evaluate to <code>true</code>. Similar to <code>dplyr</code>, rows that evaluate to <code>missing</code> are skipped.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/filter/#lets-take-a-look-at-the-movies-whose-budget-was-more-than-average-we-will-select-only-the-first-5-rows-for-the-sake-of-brevity","title":"Let\u2019s take a look at the movies whose budget was more than average. We will select only the first 5 rows for the sake of brevity.","text":"<pre><code>@chain movies begin\n  @mutate(Budget = Budget / 1_000_000)\n  @filter(Budget &gt;= mean(skipmissing(Budget)))\n  @select(Title, Budget)\n  @slice(1:5)\nend\n</code></pre> 5\u00d72 DataFrame RowTitleBudgetStringFloat64?1'Til There Was You23.0210 Things I Hate About You16.03102 Dalmatians85.0413 Going On 3037.0513th Warrior, The85.0"},{"location":"examples/generated/UserGuide/filter/#lets-search-for-movies-that-have-at-least-200-votes-and-a-rating-of-greater-than-or-equal-to-8-there-are-3-ways-you-can-specify-an-and-condition-inside-of-tidierdatajl","title":"Let's search for movies that have at least 200 votes and a rating of greater than or equal to 8. There are 3 ways you can specify an \"and\" condition inside of <code>TidierData.jl</code>.","text":""},{"location":"examples/generated/UserGuide/filter/#the-first-option-is-to-use-the-short-circuiting-operator-as-shown-below-this-is-the-preferred-approach-because-the-second-expression-is-only-evaluated-per-element-if-the-first-one-is-true","title":"The first option is to use the short-circuiting <code>&amp;&amp;</code> operator as shown below. This is the preferred approach because the second expression is only evaluated (per element) if the first one is true.","text":"<pre><code>@chain movies begin\n  @filter(Votes &gt;= 200 &amp;&amp; Rating &gt;= 8)\n  @select(Title, Votes, Rating)\n  @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowTitleVotesRatingStringInt32Float64112 Angry Men292788.7212 stulev2528.932001: A Space Odyssey649828.3421 Grams218578.0539 Steps, The79318.0"},{"location":"examples/generated/UserGuide/filter/#the-second-option-is-to-use-the-bitwise-operator-note-that-there-is-a-key-difference-in-syntax-between-and-because-the-operator-takes-a-higher-operator-precedence-than-you-have-to-wrap-the-comparison-expressions-inside-of-parentheses-to-ensure-that-the-overall-expression-is-evaluated-correctly","title":"The second option is to use the bitwise <code>&amp;</code> operator. Note that there is a key difference in syntax between <code>&amp;</code> and <code>&amp;&amp;</code>. Because the <code>&amp;</code> operator takes a higher operator precedence than <code>&gt;=</code>, you have to wrap the comparison expressions inside of parentheses to ensure that the overall expression is evaluated correctly.","text":"<pre><code>@chain movies begin\n  @filter((Votes &gt;= 200) &amp; (Rating &gt;= 8))\n  @select(Title, Votes, Rating)\n  @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowTitleVotesRatingStringInt32Float64112 Angry Men292788.7212 stulev2528.932001: A Space Odyssey649828.3421 Grams218578.0539 Steps, The79318.0"},{"location":"examples/generated/UserGuide/filter/#the-third-option-for-and-conditions-only-is-to-separate-the-expressions-with-commas-this-is-similar-to-the-behavior-of-filter-in-tidyverse","title":"The third option for \"and\" conditions only is to separate the expressions with commas. This is similar to the behavior of <code>filter()</code> in <code>tidyverse</code>.","text":"<pre><code>@chain movies begin\n  @filter(Votes &gt;= 200, Rating &gt;= 8)\n  @select(Title, Votes, Rating)\n  @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowTitleVotesRatingStringInt32Float64112 Angry Men292788.7212 stulev2528.932001: A Space Odyssey649828.3421 Grams218578.0539 Steps, The79318.0"},{"location":"examples/generated/UserGuide/filter/#now-lets-see-how-to-use-filter-with-in-heres-an-example-with-a-tuple","title":"Now let's see how to use <code>@filter()</code> with <code>in</code>. Here's an example with a tuple.","text":"<pre><code>@chain movies begin\n  @filter(Title in (\"101 Dalmatians\",\n                    \"102 Dalmatians\"))\n  @select(1:5)\nend\n</code></pre> 2\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641101 Dalmatians1996103missing5.52102 Dalmatians2000100850000004.7"},{"location":"examples/generated/UserGuide/filter/#we-can-also-use-filter-with-in-using-a-vector-denoted-by-a","title":"We can also use <code>@filter()</code> with <code>in</code> using a vector, denoted by a <code>[]</code>.","text":"<pre><code>@chain movies begin\n  @filter(Title in [\"101 Dalmatians\",\n                    \"102 Dalmatians\"])\n  @select(1:5)\nend\n</code></pre> 2\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641101 Dalmatians1996103missing5.52102 Dalmatians2000100850000004.7"},{"location":"examples/generated/UserGuide/filter/#finally-we-can-combine-filter-with-row_number-to-retrieve-the-first-5-rows-which-can-be-used-to-mimic-the-functionality-provided-by-slice","title":"Finally, we can combine <code>@filter</code> with <code>row_number()</code> to retrieve the first 5 rows, which can be used to mimic the functionality provided by <code>@slice</code>.","text":"<pre><code>@chain movies begin\n  @filter(row_number() &lt;= 5)\n  @select(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641$1971121missing6.42$1000 a Touchdown193971missing6.03$21 a Day Once a Month19417missing8.24$40,000199670missing8.25$50,000 Climax Show, The197571missing3.4 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/group_by/","title":"@group_by","text":"<p>Grouping and ungrouping behavior is one of the nicest parts of using R's tidyverse. Once a data frame is grouped, all verbs applied to that data frame respect the grouping, including but not limited to <code>@mutate()</code>, <code>@summarize()</code>, <code>@slice()</code> and <code>@filter</code>, which allows for really powerful abstractions. For example, with <code>@group_by()</code> followed by <code>@filter()</code>, you can limit the rows of a dataset to the maximum or minimum values for each group.</p> <p>Exactly as in R's <code>tidyverse</code>, once a data frame is grouped, it remains grouped until either <code>@summarize()</code> is called (which \"peels off\" one layer of grouping) or <code>@ungroup()</code> is called, which removes all layers of grouping. Also as in R's <code>tidyverse</code>, <code>@group_by()</code> sorts the groups in ascending order. Unlike in R, there is never any question about whether a data frame is currently grouped because GroupedDataFrames print out in a very different form than DataFrames, making them easy to tell apart.</p> <p>When using <code>@chain</code>, note that you can write either <code>@ungroup</code> or <code>@ungroup()</code>. Both are considered valid.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/group_by/#combining-group_by-with-mutate","title":"Combining <code>@group_by()</code> with <code>@mutate()</code>","text":"<pre><code>@chain movies begin\n    @group_by(Year)\n    @mutate(Mean_Yearly_Rating = mean(skipmissing(Rating)))\n    @select(Year, Rating, Mean_Yearly_Rating)\n    @ungroup\n    @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowYearRatingMean_Yearly_RatingInt32Float64Float64119716.45.66517219396.06.35041319418.26.34107419968.25.74712519753.45.62908"},{"location":"examples/generated/UserGuide/group_by/#combining-group_by-with-summarize","title":"Combining @group_by() with @summarize()","text":"<pre><code>@chain movies begin\n    @group_by(Year)\n    @summarize(Mean_Yearly_Rating = mean(skipmissing(Rating)),\n        Median_Yearly_Rating = median(skipmissing(Rating)))\n    @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowYearMean_Yearly_RatingMedian_Yearly_RatingInt32Float64Float64119715.665175.8219396.350416.4319416.341076.4419965.747125.9519755.629085.7"},{"location":"examples/generated/UserGuide/group_by/#grouping-by-multiple-columns","title":"Grouping by multiple columns","text":"<pre><code>@chain movies begin\n  @group_by(Year, Comedy)\n  @summarize(Mean_Yearly_Rating = mean(skipmissing(Rating)),\n      Median_Yearly_Rating = median(skipmissing(Rating)))\n  @ungroup # Need to ungroup to peel off grouping by Year\n  @arrange(desc(Year), Comedy)\n  @slice(1:5)\nend\n</code></pre> 5\u00d74 DataFrame RowYearComedyMean_Yearly_RatingMedian_Yearly_RatingInt32Int32Float64Float641200506.627886.752200516.300816.13200406.765216.94200416.428986.65200306.404096.6"},{"location":"examples/generated/UserGuide/group_by/#combining-group_by-with-filter","title":"Combining @group_by() with @filter()","text":"<pre><code>@chain movies begin\n    @group_by(Year)\n    @filter(Rating == minimum(Rating))\n    @ungroup\n    @select(Year, Rating)\n    @arrange(desc(Year))\n    @slice(1:10)\nend\n</code></pre> 10\u00d72 DataFrame RowYearRatingInt32Float64120051.8220041.0320041.0420041.0520041.0620041.0720041.0820041.0920031.01020031.0 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/interpolation/","title":"Interpolation","text":"<p>The <code>!!</code> (\"bang bang\") operator can be used to interpolate values of variables from the parent environment into your code. This operator is borrowed from the R <code>rlang</code> package. At some point, we may switch to using native Julia interpolation, but for a variety of reasons that introduce some complexity with native interpolation, we plan to continue to support <code>!!</code> interpolation.</p> <p>To interpolate multiple variables, the <code>rlang</code> R package uses the <code>!!!</code> \"triple bang\" operator. However, in <code>TidierData.jl</code>, the <code>!!</code> \"bang bang\" operator can be used to interpolate either single or multiple values as shown in the examples below.</p> <p>Note: You can only interpolate values from variables in the parent environment. If you would like to interpolate column names, you have two options: you can either use <code>across()</code> or you can use <code>@aside</code> with <code>@pull()</code> to create variables in the parent environment containing the values of those columns which can then be accessed using interpolatino.</p> <p>myvar = :b<code>and</code>myvar = Cols(:a, :b)<code>both refer to *columns* with those names. On the other hand,</code>myvar = \"b\"<code>,</code>myvar = (\"a\", \"b\")<code>and</code>myvar = [\"a\", \"b\"]<code>will interpolate the *values*. If you intend to interpolate column names, the preferred way is to use</code>Cols()` as in the examples below.</p> <pre><code>using TidierData\n\ndf = DataFrame(a = string.(repeat('a':'e', inner = 2)),\n               b = [1,1,1,2,2,2,3,3,3,4],\n               c = 11:20)\n</code></pre> 10\u00d73 DataFrame RowabcStringInt64Int641a1112a1123b1134b2145c2156c2167d3178d3189e31910e420 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#select-the-column-because-myvar-contains-a-symbol","title":"Select the column (because <code>myvar</code> contains a symbol)","text":"<pre><code>myvar = :b\n\n@chain df begin\n  @select(!!myvar)\nend\n</code></pre> 10\u00d71 DataFrame RowbInt64112131425262738393104"},{"location":"examples/generated/UserGuide/interpolation/#select-multiple-variables","title":"Select multiple variables","text":"<p>You can also use a vector as in <code>[:a, :b]</code>, but <code>Cols()</code> is preferred because it lets you mix and match numbers.</p> <pre><code>myvars = Cols(:a, :b)\n\n@chain df begin\n  @select(!!myvars)\nend\n</code></pre> 10\u00d72 DataFrame RowabStringInt641a12a13b14b25c26c27d38d39e310e4 <p>This is the same as this...</p> <pre><code>myvars = Cols(:a, 2)\n\n@chain df begin\n  @select(!!myvars)\nend\n</code></pre> 10\u00d72 DataFrame RowabStringInt641a12a13b14b25c26c27d38d39e310e4 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#filter-rows-containing-the-value-of-myvar_string","title":"Filter rows containing the value of <code>myvar_string</code>","text":"<pre><code>myvar_string = \"b\"\n\n@chain df begin\n  @filter(a == !!myvar_string)\nend\n</code></pre> 2\u00d73 DataFrame RowabcStringInt64Int641b1132b214"},{"location":"examples/generated/UserGuide/interpolation/#filtering-rows-works-similarly-using-in","title":"Filtering rows works similarly using <code>in</code>.","text":"<p>Note that for <code>in</code> to work here, we have to wrap it in <code>[]</code> because otherwise, the string will be converted into a collection of characters, which are a different data type.</p> <pre><code>myvar_string = \"b\"\n\n@chain df begin\n  @filter(a in [!!myvar_string])\nend\n</code></pre> 2\u00d73 DataFrame RowabcStringInt64Int641b1132b214 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#you-can-also-use-this-for-a-vector-or-tuple-of-strings","title":"You can also use this for a vector (or tuple) of strings.","text":"<pre><code>myvars_string = [\"a\", \"b\"]\n\n@chain df begin\n  @filter(a in !!myvars_string)\nend\n</code></pre> 4\u00d73 DataFrame RowabcStringInt64Int641a1112a1123b1134b214"},{"location":"examples/generated/UserGuide/interpolation/#mutate-one-variable","title":"Mutate one variable","text":"<p>Remember: You cannot interpolate column names into <code>@mutate()</code> expressions. However, you can create a temporary variable containing the values of the column in question or you can use <code>@mutate()</code> with <code>across()</code>.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#option-1-create-a-temporary-variable-containing-the-values-of-the-column","title":"Option 1: Create a temporary variable containing the values of the column.","text":"<pre><code>myvar = :b\n\n@chain df begin\n  @aside(myvar_values = @pull(_, !!myvar))\n  @mutate(d = !!myvar_values + 1)\nend\n</code></pre> 10\u00d74 DataFrame RowabcdStringInt64Int64Int641a11122a11223b11324b21435c21536c21637d31748d31849e319410e4205"},{"location":"examples/generated/UserGuide/interpolation/#option-2-use-mutate-with-across","title":"Option 2: Use <code>@mutate()</code> with <code>across()</code>","text":"<p>Note: when using <code>across()</code>, anonymous functions are not vectorized. This is intentional to allow users to specify their function exactly as desired.</p> <pre><code>@chain df begin\n  @mutate(across(!!myvar, x -&gt; x .+ 1))\n  @rename(d = b_function)\nend\n</code></pre> 10\u00d74 DataFrame RowabcdStringInt64Int64Int641a11122a11223b11324b21435c21536c21637d31748d31849e319410e4205 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#summarize-across-one-variable","title":"Summarize across one variable","text":"<pre><code>myvar = :b\n\n@chain df begin\n  @summarize(across(!!myvar, mean))\nend\n</code></pre> 1\u00d71 DataFrame Rowb_meanFloat6412.2"},{"location":"examples/generated/UserGuide/interpolation/#summarize-across-multiple-variables","title":"Summarize across multiple variables","text":"<pre><code>myvars = Cols(:b, :c)\n\n@chain df begin\n  @summarize(across(!!myvars, (mean, minimum, maximum)))\nend\n</code></pre> 1\u00d76 DataFrame Rowb_meanc_meanb_minimumc_minimumb_maximumc_maximumFloat64Float64Int64Int64Int64Int6412.215.5111420"},{"location":"examples/generated/UserGuide/interpolation/#group-by-one-interpolated-variable","title":"Group by one interpolated variable","text":"<pre><code>myvar = :a\n\n@chain df begin\n  @group_by(!!myvar)\n  @summarize(c = mean(c))\nend\n</code></pre> 5\u00d72 DataFrame RowacStringFloat641a11.52b13.53c15.54d17.55e19.5"},{"location":"examples/generated/UserGuide/interpolation/#group-by-multiple-interpolated-variables","title":"Group by multiple interpolated variables","text":"<p>Once again, you can mix and match column selectors within <code>Cols()</code></p> <pre><code>myvars = Cols(:a, 2)\n\n@chain df begin\n  @group_by(!!myvars)\n  @summarize(c = mean(c))\nend\n</code></pre> <p>GroupedDataFrame with 5 groups based on key: a</p> First Group (1 row): a = \"a\" RowabcStringInt64Float641a111.5 <p>&amp;vellip;</p> Last Group (2 rows): a = \"e\" RowabcStringInt64Float641e319.02e420.0 <p>Notice that <code>df</code> remains grouped by <code>a</code> because the <code>@summarize()</code> peeled off one layer of grouping.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#global-constants","title":"Global constants","text":"<p>You can also use <code>!!</code> interpolation to access global variables like <code>pi</code>.</p> <pre><code>df = DataFrame(radius = 1:5)\n\n@chain df begin\n  @mutate(area = !!pi * radius^2)\nend\n</code></pre> 5\u00d72 DataFrame RowradiusareaInt64Float64113.141592212.56643328.27434450.26555578.5398 <p>As of v0.14.0, global constants defined within the Base or Core modules (like <code>missing</code>, <code>pi</code>, and <code>Real</code> can be directly referenced without any <code>!!</code>)</p> <pre><code>@chain df begin\n  @mutate(area = pi * radius^2)\nend\n</code></pre> 5\u00d72 DataFrame RowradiusareaInt64Float64113.141592212.56643328.27434450.26555578.5398 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/interpolation/#alternative-interpolation-syntax","title":"Alternative interpolation syntax","text":"<p>Since we know that <code>pi</code> is defined in the <code>Main</code> module, we can also access it using <code>Main.pi</code>.</p> <pre><code>@chain df begin\n  @mutate(area = Main.pi * radius^2)\nend\n</code></pre> 5\u00d72 DataFrame RowradiusareaInt64Float64113.141592212.56643328.27434450.26555578.5398 <p>The key lesson with interpolation is that any bare unquoted variable is assumed to refer to a column name in the DataFrame. If you are referring to any variable outside of the DataFrame, you need to either use <code>!!variable</code> or <code>[Module_name_here].variable</code> syntax to refer to this variable.</p> <p>Note: You can use <code>!!</code> interpolation anywhere, including inside of functions and loops.</p> <pre><code>df = DataFrame(a = string.(repeat('a':'e', inner = 2)),\n               b = [1,1,1,2,2,2,3,3,3,4],\n               c = 11:20)\n\nfor col in [:b, :c]\n  @chain df begin\n    @summarize(across(!!col, mean))\n    println\n  end\nend\n</code></pre> <pre><code>1\u00d71 DataFrame\n Row \u2502 b_mean\n     \u2502 Float64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502     2.2\n1\u00d71 DataFrame\n Row \u2502 c_mean\n     \u2502 Float64\n\u2500\u2500\u2500\u2500\u2500\u253c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n   1 \u2502    15.5\n</code></pre> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/joins/","title":"Joins","text":"<p>One really nice thing about the R <code>tidyverse</code> implementation of joins is that they support natural joins. If you don't specify which columns to join on, these column names are inferred from the overlapping columns. While you can override this behavior by specifying which columns to join on, it's convenient that this is not strictly required. We have adopted a similar approach to joins in <code>TidierData.jl</code>.</p> <p>Here, we will only show examples of natural joins. For additional ways to join, take a look at the examples in the Reference.</p> <pre><code>using TidierData\n</code></pre> <p>Let's generate two data frames to join on. Here's the first one.</p> <pre><code>df1 = DataFrame(a = [\"a\", \"b\"], b = 1:2);\n</code></pre> <p>And here's the second one.</p> <pre><code>df2 = DataFrame(a = [\"a\", \"c\"], c = 3:4);\n</code></pre> <p>All the joins work similarly to R's <code>tidyverse</code> although the new <code>join_by</code> syntax for non-equijoins is not (yet) supported.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/joins/#left-join","title":"Left join","text":"<pre><code>@left_join(df1, df2)\n</code></pre> 2\u00d73 DataFrame RowabcStringInt64Int64?1a132b2missing"},{"location":"examples/generated/UserGuide/joins/#right-join","title":"Right join","text":"<pre><code>@right_join(df1, df2)\n</code></pre> 2\u00d73 DataFrame RowabcStringInt64?Int641a132cmissing4"},{"location":"examples/generated/UserGuide/joins/#inner-join","title":"Inner join","text":"<pre><code>@inner_join(df1, df2)\n</code></pre> 1\u00d73 DataFrame RowabcStringInt64Int641a13"},{"location":"examples/generated/UserGuide/joins/#full-join","title":"Full join","text":"<pre><code>@full_join(df1, df2)\n</code></pre> 3\u00d73 DataFrame RowabcStringInt64?Int64?1a132b2missing3cmissing4 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/mutate_transmute/","title":"@mutate","text":"<p>The primary purpose of <code>@mutate()</code> is to either create a new column or to update an existing column without changing the number of rows in the dataset. If you only plan to select the mutated columns, then you can use <code>@transmute()</code> instead of <code>@mutate()</code>. However, in <code>TidierData.jl</code>, <code>@select()</code> can also be used to create and select new columns (unlike R's <code>tidyverse</code>), which means that <code>@transmute()</code> is a redundant function in that it has the same functionality as <code>@select()</code>. <code>@transmute</code> is included in <code>TidierData.jl</code> for convenience but is not strictly required.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/mutate_transmute/#using-mutate-to-add-a-new-column","title":"Using <code>@mutate()</code> to add a new column","text":"<p>Let's create a new column that contains the budget for each movie expressed in millions of dollars, and the select a handful of columns and rows for the sake of brevity. Notice that the underscores in in <code>1_000_000</code> are strictly optional and included only for the sake of readability. Underscores within numbers are ignored by Julia, such that <code>1_000_000</code> is read by Julia exactly the same as <code>1000000</code>.</p> <pre><code>@chain movies begin\n  @filter(!ismissing(Budget))\n  @mutate(Budget_Millions = Budget/1_000_000)\n  @select(Title, Budget, Budget_Millions)\n  @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowTitleBudgetBudget_MillionsStringInt32?Float641'G' Men4500000.452'Manos' the Hands of Fate190000.0193'Til There Was You2300000023.04.com for Murder50000005.0510 Things I Hate About You1600000016.0 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/mutate_transmute/#using-mutate-to-update-an-existing-column","title":"Using <code>@mutate()</code> to update an existing column","text":"<p>Here we will repeat the same exercise, except that we will overwrite the existing <code>Budget</code> column.</p> <pre><code>@chain movies begin\n    @filter(!ismissing(Budget))\n    @mutate(Budget = Budget/1_000_000)\n    @select(Title, Budget)\n    @slice(1:5)\nend\n</code></pre> 5\u00d72 DataFrame RowTitleBudgetStringFloat641'G' Men0.452'Manos' the Hands of Fate0.0193'Til There Was You23.04.com for Murder5.0510 Things I Hate About You16.0 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/mutate_transmute/#using-mutate-with-in","title":"Using <code>@mutate()</code> with <code>in</code>","text":"<p>Here's an example of using <code>@mutate</code> with <code>in</code>.</p> <pre><code>@chain movies begin\n  @filter(!ismissing(Budget))\n  @mutate(Nineties = Year in 1990:1999)\n  @select(Title, Year, Nineties)\n  @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowTitleYearNinetiesStringInt32Bool1'G' Men1935false2'Manos' the Hands of Fate1966false3'Til There Was You1997true4.com for Murder2002false510 Things I Hate About You1999true <p></p> <p></p>"},{"location":"examples/generated/UserGuide/mutate_transmute/#using-mutate-with-n-and-row_number","title":"Using <code>@mutate</code> with <code>n()</code> and <code>row_number()</code>","text":"<p>Here's an example of using <code>@mutate</code> with both <code>n()</code> and <code>row_number()</code>. Within the context of <code>mutate()</code>, <code>n()</code> and <code>row_number()</code> are created into temporarily columns, which means that they can be used inside of expressions.</p> <pre><code>@chain movies begin\n  @mutate(Row_Num = row_number(),\n          Total_Rows = n())\n  @filter(!ismissing(Budget))\n  @select(Title, Year, Row_Num, Total_Rows)\n  @slice(1:5)\nend\n</code></pre> 5\u00d74 DataFrame RowTitleYearRow_NumTotal_RowsStringInt32Int64Int641'G' Men193522587882'Manos' the Hands of Fate196635587883'Til There Was You199748587884.com for Murder20029158788510 Things I Hate About You199911258788 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/mutate_transmute/#using-transmute-to-update-and-select-columns","title":"Using <code>@transmute</code> to update and select columns.","text":"<p>If we knew we wanted to select only the <code>Title</code> and <code>Budget</code> columns, we could have also used<code>@transmute()</code>, which (again) is just an alias for <code>@select()</code>.</p> <pre><code>@chain movies begin\n    @filter(!ismissing(Budget))\n    @transmute(Title = Title, Budget = Budget/1_000_000)\n    @slice(1:5)\nend\n</code></pre> 5\u00d72 DataFrame RowTitleBudgetStringFloat641'G' Men0.452'Manos' the Hands of Fate0.0193'Til There Was You23.04.com for Murder5.0510 Things I Hate About You16.0 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/nesting/","title":"Nesting","text":""},{"location":"examples/generated/UserGuide/nesting/#nest","title":"<code>@nest</code>","text":"<p>Nest columns into a dataframe nested into a new column</p> <pre><code>using TidierData\n\ndf4 = DataFrame(x = [\"a\", \"b\", \"a\", \"b\", \"C\", \"a\"], y = 1:6, yz = 13:18, a = 7:12, ab = 12:-1:7)\n\nnested_df = @nest(df4, n2 = starts_with(\"a\"), n3 = y:yz)\n</code></pre> 3\u00d73 DataFrame Rowxn3n2StringDataFrameDataFrame1a3\u00d72 DataFrame3\u00d72 DataFrame2b2\u00d72 DataFrame2\u00d72 DataFrame3C1\u00d72 DataFrame1\u00d72 DataFrame <p>To return to the original dataframe, you can unnest wider and then longer.</p> <pre><code>@chain nested_df begin\n    @unnest_wider(n3:n2)\n    @unnest_longer(y:ab)\nend\n</code></pre> 6\u00d75 DataFrame RowxyyzaabStringInt64Int64Int64Int641a1137122a3159103a6181274b2148115b4161096C517118 <p>Or you can unnest longer and then wider.</p> <pre><code>@chain nested_df begin\n  @unnest_longer(n3:n2)\n  @unnest_wider(n3:n2)\nend\n</code></pre> 6\u00d75 DataFrame RowxyzyaabStringInt64Int64Int64Int641a1317122a1539103a1861274b1428115b1641096C175118 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/nesting/#unnest_longer","title":"<code>@unnest_longer</code>","text":"<p><code>@unnest_longer</code> adds one row per entry of an array or dataframe, lengthening dataframe by flattening the column or columns.</p> <pre><code>df = DataFrame(x = 1:4, y = [[], [1, 2, 3], [4, 5], Int[]]);\n\n@chain df begin\n    @unnest_longer(y)\nend\n</code></pre> 5\u00d72 DataFrame RowxyInt64Any121222323434535 <p>If there are rows with empty arrays, <code>keep_empty</code> will prevent these rows from being dropped. <code>include_indices</code> will add a new column for each flattened column that logs the position of each entry in the array.</p> <pre><code>@chain df begin\n    @unnest_longer(y, keep_empty = true, indices_include = true)\nend\n</code></pre> 7\u00d73 DataFrame Rowxyy_idInt64AnyInt6411missing12211322242335341635274missing1 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/nesting/#unnest_wider","title":"<code>@unnest_wider</code>","text":"<p><code>@unnest_wider</code> will widen a column or column(s) of Dicts, Arrays, Tuples or Dataframes into multiple columns.</p> <pre><code>df2 = DataFrame(\n           name = [\"Zaki\", \"Farida\"],\n           attributes = [\n               Dict(\"age\" =&gt; 25, \"city\" =&gt; \"New York\"),\n               Dict(\"age\" =&gt; 30, \"city\" =&gt; \"Los Angeles\")]);\n\n@chain df2 begin\n    @unnest_wider(attributes)\nend\n</code></pre> 2\u00d73 DataFrame RownamecityageStringStringInt641ZakiNew York252FaridaLos Angeles30 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/nesting/#unnesting-nested-dataframes-with-different-lengths-which-contains-arrays","title":"Unnesting nested Dataframes with different lengths which contains arrays","text":"<pre><code>df3 = DataFrame(\n    x = 1:3,\n    y = Any[\n        DataFrame(),\n        DataFrame(a = [\"A\"], b = [14]),\n        DataFrame(a = [\"A\", \"B\", \"C\"], b = [13, 12, 11], c = [4, 4, 4])\n    ]\n)\n</code></pre> 3\u00d72 DataFrame RowxyInt64Any110\u00d70 DataFrame221\u00d72 DataFrame333\u00d73 DataFrame <p><code>df3</code> contains dataframes in with different widths that also contain arrays. Chaining together <code>@unnest_wider</code> and <code>@unnest_longer</code> will unnest the columns to tuples first and then they will be fully unnested after.</p> <pre><code>@chain df3 begin\n    @unnest_wider(y)\n    @unnest_longer(a:c, keep_empty = true)\nend\n</code></pre> 5\u00d74 DataFrame RowxabcInt64AnyInt64?Int64?11missingmissingmissing22A14missing33A13443B12453C114 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/piping/","title":"Piping","text":"<p>The easiest way to use TidierData.jl for complex data transformation operations is to connect them together using pipes. Julia comes with the built-in <code>|&gt;</code> pipe operator, but TidierData.jl also includes and re-exports the <code>@chain</code> macro from the Chain.jl package. On this page, we will show you how to use both approaches.</p> <p>First, let's load a dataset.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/piping/#julias-built-in-pipe","title":"Julia's built-in <code>|&gt;</code> pipe","text":"<p>If we wanted to figure out the number of rows in the <code>movies</code> data frame, one way to do this is to apply the <code>nrow()</code> function to movies. The most straightforward way is to write it like this:</p> <pre><code>nrow(movies)\n</code></pre> <pre><code>58788\n</code></pre> <p>Another perfectly valid way to write this expression is by piping <code>movies</code> into <code>nrow</code> using the <code>|&gt;</code> pipe operator.</p> <pre><code>movies |&gt; nrow\n</code></pre> <pre><code>58788\n</code></pre> <p>Why might we want to do this? Well, whereas the first expression would naturally be read as \"Calculate the number of rows of movies,\" the second expression reads as \"Start with movies, then calculate the number of rows.\" For a simple expression, these are easy enough to reason about. However, as we start to pipe more and more functions in a single expression, the piped version becomes much easier to reason about.</p> <p>One quick note about Julia's built-in pipe: writing <code>movies |&gt; nrow()</code> would not be considered valid. This is because Julia's built-in pipe always expects a function and not a function call. Writing <code>nrow</code> by itself is naming the function, whereas writing <code>nrow()</code> is calling the function. This quickly becomes an issue once we want to supply arguments to the function we are calling.</p> <p>Consider another approach to calculating the number of rows:</p> <pre><code>size(movies, 1)\n</code></pre> <pre><code>58788\n</code></pre> <p>In this case, the <code>size()</code> function returns a tuple of <code>(rows, columns)</code>, and if you supply an optional second argument specifying the index of the tuple, it returns only that dimension. In this case, we called <code>size()</code> with a second argument of <code>1</code>, indicating that we only wanted the function to return the number of rows.</p> <p>How would we write this using Julia's built-in pipe?</p> <pre><code>movies |&gt;\n  x -&gt; size(x, 1)\n</code></pre> <pre><code>58788\n</code></pre> <p>You might have wanted to write <code>movies |&gt; size(1)</code>, but because <code>size(1)</code> would represent a function call, we have to wrap the function call within an anonymous function, which is easily accomplished using the <code>x -&gt; func(x, arg1, arg2)</code> syntax, where <code>func()</code> refers to any function and <code>arg1</code> and <code>arg2</code> refer to any additional arguments that are needed.</p> <p>Another way we could have accomplished this is to calculate <code>size</code>, which returns a tuple of <code>(rows, columns)</code>, and then to use an anonymous function to grab the first value. Since we are calculating <code>size</code> without any arguments, we can simply write <code>size</code> within the pipe. However, to grab the first value using the <code>x[1]</code> syntax, we have to define an anonymous function. Putting it all together, we get this approach to piping:</p> <pre><code>movies |&gt;\n  size |&gt;\n  x -&gt; x[1]\n</code></pre> <pre><code>58788\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/piping/#using-the-chain-macro","title":"Using the <code>@chain</code> macro","text":"<p>The <code>@chain</code> macro comes from the Chain.jl package and is included and re-exported by TidierData.jl. Let's do this same series of exercises using <code>@chain</code>.</p> <p>Let's calculate the number of rows using <code>@chain</code>.</p> <pre><code>@chain movies nrow\n</code></pre> <pre><code>58788\n</code></pre> <p>One of the reasons we prefer the use of <code>@chain</code> in TidierData.jl is that it is so concise. There is no need for any operator. Another interesting thing is that <code>@chain</code> doesn't care whether you use a function name or a function call. Both approaches work. As a result, writing <code>nrow()</code> instead of <code>nrow</code> is equally valid using <code>@chain</code>.</p> <pre><code>@chain movies nrow()\n</code></pre> <pre><code>58788\n</code></pre> <p>There are two options for writing out multi-row chains. The preferred approach is as follows, where the starting item is listed, followed by a <code>begin-end</code> block.</p> <pre><code>@chain movies begin\n  nrow\nend\n</code></pre> <pre><code>58788\n</code></pre> <p><code>@chain</code> also comes with a built-in placeholder, which is <code>_</code>. To calculate the <code>size</code> and extract the first value, we can use this approach:</p> <pre><code>@chain movies begin\n  size\n  _[1]\nend\n</code></pre> <pre><code>58788\n</code></pre> <p>You don't have to list the data frame before the <code>begin-end</code> block. This is equally valid:</p> <pre><code>@chain begin\n  movies\n  size\n  _[1]\nend\n</code></pre> <pre><code>58788\n</code></pre> <p>The only time this approach is preferred is when instead of simply naming the data frame, you are using a function to read in the data frame from a file or database. Because this function call may include the path of the file, which could be quite long, it's easier to write this on it's own line within the <code>begin-end</code> block.</p> <p>While the documentation for TidierData.jl follows the convention of placing piped functions on separate lines of code using <code>begin-end</code> blocks, this is purely convention for ease of readability. You could rewrite the code above without the <code>begin-end</code> block as follows:</p> <pre><code>@chain movies size _[1]\n</code></pre> <pre><code>58788\n</code></pre> <p>For simple transformations, this approach is both concise and readable.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/piping/#using-chain-with-tidierdatajl","title":"Using <code>@chain</code> with TidierData.jl","text":"<p>Returning to our convention of multi-line pipes, let's grab the first five movies that were released since 2000 and had a rating of at least 9 out of 10. Here is one way that we could write this:</p> <pre><code>@chain movies begin\n    @filter(Year &gt;= 2000 &amp;&amp; Rating &gt;= 9)\n    @slice(1:5)\n    @select(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float64113 Lakes2004135missing9.022wks, 1yr2002104missing9.43500 Years Later2005106missing9.349020051440009.15Able's House Is Green, The20031368009.4 <p>Note: we generally prefer using <code>&amp;&amp;</code> in Julia because it is a \"short-cut\" operator. If the first condition evaluates to <code>false</code>, then the second condition is not even evaluated, which makes it faster (because it takes a short-cut).</p> <p>In the case of <code>@filter</code>, multiple conditions can be written out as separate expressions.</p> <pre><code>@chain movies begin\n  @filter(Year &gt;= 2000, Rating &gt;= 9)\n  @slice(1:5)\n  @select(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float64113 Lakes2004135missing9.022wks, 1yr2002104missing9.43500 Years Later2005106missing9.349020051440009.15Able's House Is Green, The20031368009.4 <p>Another to write this expression is take advantage of the fact that Julia macros can be called without parentheses. In this case, we will add back the <code>&amp;&amp;</code> for the sake of readability.</p> <pre><code>@chain movies begin\n  @filter Year &gt;= 2000 &amp;&amp; Rating &gt;= 9\n  @slice 1:5\n  @select 1:5\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float64113 Lakes2004135missing9.022wks, 1yr2002104missing9.43500 Years Later2005106missing9.349020051440009.15Able's House Is Green, The20031368009.4 <p>Lastly, TidierData.jl also supports multi-line expressions within each of the macros that accept multiple expressions. So you could also write this as follows:</p> <pre><code>@chain movies begin\n  @filter begin\n    Year &gt;= 2000\n    Rating &gt;= 9\n  end\n  @slice 1:5\n  @select 1:5\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float64113 Lakes2004135missing9.022wks, 1yr2002104missing9.43500 Years Later2005106missing9.349020051440009.15Able's House Is Green, The20031368009.4 <p>What's nice about this approach is that if you want to remove some criteria, you can easily comment out the relevant parts. For example, if you're willing to consider older movies, just comment out the <code>Year &gt;= 2000</code>.</p> <pre><code>@chain movies begin\n  @filter begin\n    # Year &gt;= 2000\n    Rating &gt;= 9\n  end\n  @slice 1:5\n  @select 1:5\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641+1 -119877missing9.42100 Years at the Movies19949missing9.2313 Lakes2004135missing9.042wks, 1yr2002104missing9.45500 Years Later2005106missing9.3 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/piping/#which-approach-to-use","title":"Which approach to use?","text":"<p>The purpose of this page was to show you that both Julia's native pipes and the <code>@chain</code> macro are perfectly valid and capable. We prefer the use of <code>@chain</code> because it is a bit more flexible and concise, with a syntax that makes it easy to comment out individual operations. We have adopted a similar <code>begin-end</code> block functionality within TidierData.jl itself, so that you can spread arguments out over multiple lines if you prefer. In the end, the choice is up to you!</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/pivots/","title":"Pivoting","text":"<p>Pivoting a dataset is needed when information sitting inside of cell values needs to be converted into column names (to make the dataset wider) or vice verse (to make the dataset longer). Either action can be referred to as \"reshaping\" a dataset, and various frameworks refer to the actions as unstacking/stacking or spreading/gathering. In R's tidyverse, these actions are referred to as pivoting, where the two accompanying actions are <code>@pivot_wider()</code> and <code>@pivot_longer()</code>.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/pivots/#pivot_wider","title":"<code>@pivot_wider()</code>","text":"<p>Pivoting a dataset to make it wider is needed when information sitting inside of cell values needs to be converted into column names. The wider format is sometimes required for the purposes of calculating correlations or running statistical tests.</p> <p>Let's start with a \"long\" DataFrame and make it wide. Why would we want to make it wide? Well, if we wanted to calculate a correlation between <code>A</code> and <code>B</code> for rows with corresponding <code>id</code> numbers, we may need to first make sure that <code>A</code> and <code>B</code> are represented in adjacent columns.</p> <pre><code>using TidierData\n\ndf_long = DataFrame(id = [1, 1, 2, 2],\n                    variable = [\"A\", \"B\", \"A\", \"B\"],\n                    value = [1, 2, 3, 4])\n</code></pre> 4\u00d73 DataFrame RowidvariablevalueInt64StringInt6411A121B232A342B4 <p>To make this dataset wider, we can do the following:</p> <pre><code>@pivot_wider(df_long, names_from = variable, values_from = value)\n</code></pre> 2\u00d73 DataFrame RowidABInt64Int64?Int64?11122234 <p>In <code>@pivot_wider()</code>, both the <code>names_from</code> and <code>values_from</code> arguments are required. <code>@pivot_wider()</code> also supports string values for the <code>names_from</code> and <code>values_from</code> arguments.</p> <pre><code>@pivot_wider(df_long, names_from = \"variable\", values_from = \"value\")\n</code></pre> 2\u00d73 DataFrame RowidABInt64Int64?Int64?11122234 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/pivots/#pivot_longer","title":"<code>@pivot_longer()</code>","text":"<p>For calculating summary statistics (e.g., mean) by groups, or for plotting purposes, DataFrames often need to be converted to their longer form. For this, we can use <code>@pivot_longer</code>. First, let's start with a \"wide\" DataFrame.</p> <pre><code>df_wide = DataFrame(id = [1, 2], A = [1, 3], B = [2, 4])\n</code></pre> 2\u00d73 DataFrame RowidABInt64Int64Int6411122234 <p>Now, let's transform this wide dataset into the longer form. Unlike <code>@pivot_wider()</code>, where providing the <code>names_from</code> and <code>values_from</code> arguments is required, the only item that's required in <code>@pivot_wider()</code> is a set of columns to pivot. The <code>names_to</code> and <code>values_to</code> arguments are optional, and if not provided, they will default to \"variable\" and \"value\", respectively.</p> <p>We can recreate the original long dataset by doing the following. Multiple columns must be provided using selection syntax or a selection helper. Tuples containing multiple columns are not yet supported.</p> <pre><code>@pivot_longer(df_wide, A:B)\n</code></pre> 4\u00d73 DataFrame RowidvariablevalueInt64StringInt6411A122A331B242B4 <p>Here is another way of providing the same result using a different type of selection syntax.</p> <pre><code>@pivot_longer(df_wide, -id)\n</code></pre> 4\u00d73 DataFrame RowidvariablevalueInt64StringInt6411A122A331B242B4 <p>The selected columns can also be included as an array</p> <pre><code>@pivot_longer(df_wide, [id, B])\n</code></pre> 4\u00d73 DataFrame RowAvariablevalueInt64StringInt6411id123id231B243B4 <p>or excluded</p> <pre><code>@pivot_longer(df_wide, -[id, B])\n</code></pre> 2\u00d74 DataFrame RowidBvariablevalueInt64Int64StringInt64112A1224A3 <p>If all columns should be included, they can be specified by either <code>everything()</code>, <code>:</code>, or by leaving the argument blank</p> <pre><code>@pivot_longer(df_wide, everything())\n</code></pre> 6\u00d72 DataFrame RowvariablevalueStringInt641id12id23A14A35B26B4 <p>In this example, we set the <code>names_to</code> and <code>values_to</code> arguments. Either argument can be left out and will revert to the default value. The <code>names_to</code> and <code>values_to</code> arguments can be provided as strings or as bare unquoted variable names.</p> <p>Here is an example with <code>names_to</code> and <code>values_to</code> containing strings:</p> <pre><code>@pivot_longer(df_wide, A:B, names_to = \"letter\", values_to = \"number\")\n</code></pre> 4\u00d73 DataFrame RowidletternumberInt64StringInt6411A122A331B242B4 <p>And here is an example with <code>names_to</code> and <code>values_to</code> containing bare unquoted variables:</p> <pre><code>@pivot_longer(df_wide, A:B, names_to = letter, values_to = number)\n</code></pre> 4\u00d73 DataFrame RowidletternumberInt64StringInt6411A122A331B242B4 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/rename/","title":"@rename","text":"<p>Renaming columns follows the same syntax as in R's <code>tidyverse</code>, where the \"tidy expression\" is <code>new_name = old_name</code>. While the main function to rename columns is <code>@rename()</code>, you can also use <code>@select()</code> if you additionally plan to select only the renamed columns.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/rename/#rename-using-rename","title":"Rename using <code>@rename()</code>","text":"<p>If you only want to rename the columns without selecting them, then this is where <code>@rename()</code> comes in handy. For the sake of brevity, we are selecting the first 5 columns and rows after performing the <code>@rename()</code>.</p> <pre><code>@chain movies begin\n    @rename(title = Title, Minutes = Length)\n    @select(1:5)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowtitleYearMinutesBudgetRatingStringInt32Int32Int32?Float641$1971121missing6.42$1000 a Touchdown193971missing6.03$21 a Day Once a Month19417missing8.24$40,000199670missing8.25$50,000 Climax Show, The197571missing3.4 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/rename/#rename-using-select","title":"Rename using <code>@select()</code>","text":"<p>If you plan to only select those columns that you would like to rename, then you can use <code>@select()</code> to both rename and select the columns of interest.</p> <pre><code>@chain movies begin\n  @select(title = Title, Minutes = Length)\n  @slice(1:5)\nend\n</code></pre> 5\u00d72 DataFrame RowtitleMinutesStringInt321$1212$1000 a Touchdown713$21 a Day Once a Month74$40,000705$50,000 Climax Show, The71 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/select/","title":"@select","text":"<p>The <code>@select()</code> macro in <code>TidierData.jl</code> supports many of the nuances of the R <code>tidyverse</code> implementation, including indexing columns individually by name or number, indexing by ranges of columns using the <code>:</code> operator between column names or numbers, and negative selection using negated column names or numbers. Selection helpers such as <code>starts_with()</code>, <code>ends_with()</code>, <code>matches()</code>, and <code>contains()</code> are also supported.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/select/#select-the-first-5-columns-individually-by-name","title":"Select the first 5 columns individually by name","text":"<pre><code>@chain movies begin\n    @select(Title, Year, Length, Budget, Rating)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641$1971121missing6.42$1000 a Touchdown193971missing6.03$21 a Day Once a Month19417missing8.24$40,000199670missing8.25$50,000 Climax Show, The197571missing3.4"},{"location":"examples/generated/UserGuide/select/#select-the-first-5-columns-individually-by-number","title":"Select the first 5 columns individually by number","text":"<pre><code>@chain movies begin\n    @select(1, 2, 3, 4, 5)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641$1971121missing6.42$1000 a Touchdown193971missing6.03$21 a Day Once a Month19417missing8.24$40,000199670missing8.25$50,000 Climax Show, The197571missing3.4"},{"location":"examples/generated/UserGuide/select/#select-the-first-5-columns-by-name-using-a-range","title":"Select the first 5 columns by name (using a range)","text":"<pre><code>@chain movies begin\n    @select(Title:Rating)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641$1971121missing6.42$1000 a Touchdown193971missing6.03$21 a Day Once a Month19417missing8.24$40,000199670missing8.25$50,000 Climax Show, The197571missing3.4"},{"location":"examples/generated/UserGuide/select/#select-the-first-5-columns-by-number-using-a-range","title":"Select the first 5 columns by number (using a range)","text":"<pre><code>@chain movies begin\n    @select(1:5)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowTitleYearLengthBudgetRatingStringInt32Int32Int32?Float641$1971121missing6.42$1000 a Touchdown193971missing6.03$21 a Day Once a Month19417missing8.24$40,000199670missing8.25$50,000 Climax Show, The197571missing3.4"},{"location":"examples/generated/UserGuide/select/#select-all-but-the-first-5-columns-by-name","title":"Select all but the first 5 columns by name","text":"<p>Here we will limit the results to the first 5 remaining columns and the first 5 rows for the sake of brevity.</p> <pre><code>@chain movies begin\n    @select(-(Title:Rating))\n    @select(1:5)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowVotesR1R2R3R4Int32Float64Float64Float64Float6413484.54.54.54.52200.014.54.524.5350.00.00.00.04614.50.00.00.051724.54.50.014.5 <p>We can also use <code>!</code> for inverted selection instead of <code>-</code>.</p> <pre><code>@chain movies begin\n  @select(!(Title:Rating))\n  @select(1:5)\n  @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowVotesR1R2R3R4Int32Float64Float64Float64Float6413484.54.54.54.52200.014.54.524.5350.00.00.00.04614.50.00.00.051724.54.50.014.5 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/select/#select-all-but-the-first-5-columns-by-number","title":"Select all but the first 5 columns by number","text":"<p>We will again limit the results to the first 5 remaining columns and the first 5 rows for the sake of brevity.</p> <pre><code>@chain movies begin\n    @select(-(1:5))\n    @select(1:5)\n    @slice(1:5)\nend\n</code></pre> 5\u00d75 DataFrame RowVotesR1R2R3R4Int32Float64Float64Float64Float6413484.54.54.54.52200.014.54.524.5350.00.00.00.04614.50.00.00.051724.54.50.014.5 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/select/#mix-and-match-selection","title":"Mix and match selection","text":"<p>Just like in R's <code>tidyverse</code>, you can separate multiple selections with commas and mix and match different ways of selecting columns.</p> <pre><code>@chain movies begin\n    @select(1, Budget:Rating)\n    @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame RowTitleBudgetRatingStringInt32?Float641$missing6.42$1000 a Touchdownmissing6.03$21 a Day Once a Monthmissing8.24$40,000missing8.25$50,000 Climax Show, Themissing3.4 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/sep_unite/","title":"Separating","text":"<p>Follwing the tidyverse syntax, the <code>@separate()</code> macro in <code>TidierData.jl</code> separates a single column into multiple columns. This is particularly useful for splitting a column containing delimited values into individual columns.</p> <pre><code>using TidierData\n\ndf = DataFrame(a = [\"1-1\", \"2-2\", \"3-3-3\"]);\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/sep_unite/#separate","title":"<code>@separate</code>","text":"<p>Separate the \"a\" column into \"b\", \"c\", and \"d\" columns based on the dash delimiter</p> <pre><code>@chain df begin\n    @separate(a, (b, c, d), \"-\")\nend\n</code></pre> 3\u00d73 DataFrame RowbcdSubStrin\u2026SubStrin\u2026SubStrin\u2026?111missing222missing3333 <p>The <code>into</code> columns can also be designated as follows:</p> <pre><code>new_names = [\"x$(i)\" for i in 1:3]; # or new_names = [\"b\", \"c\", \"d\"], or new_names = [:b, :c, :d]\n\n@separate(df, a, !!new_names, \"-\")\n</code></pre> 3\u00d73 DataFrame Rowx1x2x3SubStrin\u2026SubStrin\u2026SubStrin\u2026?111missing222missing3333 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/sep_unite/#unite","title":"<code>@unite</code>","text":"<p>The <code>@unite</code> macro brings together multiple columns into one, separate the characters by a user specified delimiter Here, the <code>@unite</code> macro combines the \"b\", \"c\", and \"d\" columns columns into a single new \"new_col\" column using the \"/\" delimiter</p> <pre><code>df = DataFrame(\n       b = [\"1\", \"2\", \"3\"],\n       c = [\"1\", \"2\", \"3\"],\n       d = [missing, missing, \"3\"]);\n\n@chain df begin\n    @unite(new_col, (b, c, d), \"/\")\nend\n</code></pre> 3\u00d71 DataFrame Rownew_colString11/122/233/3/3 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/sep_unite/#separate_rows","title":"<code>@separate_rows</code>","text":"<p>Separate rows into multiple rows based on a chosen delimiter.</p> <pre><code>df = DataFrame(\n       a = 1:3,\n       b = [\"a\", \"aa;bb;cc\", \"dd;ee\"],\n       c = [\"1\", \"2;3;4\", \"5;6\"],\n       d = [\"7\", \"8;9;10\", \"11;12\"],\n       e = [\"11\", \"22;33;44\", \"55;66\"]);\n\n@separate_rows(df, b:e, \";\")\n</code></pre> 6\u00d75 DataFrame RowabcdeInt64SubStrin\u2026SubStrin\u2026SubStrin\u2026SubStrin\u202611a171122aa282232bb393342cc4104453dd5115563ee61266 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/slice/","title":"@slice","text":"<p>Slicing rows is similar to filtering rows, except that slicing is performed based on row numbers rather tha filter criteria. In <code>TidierData.jl</code>, slicing works similarly to R's <code>tidyverse</code> in that both positive (which rows to keep) and negative (which rows to remove) slicing is supported. For <code>@slice()</code>, any valid <code>UnitRange</code> of integers is considered valid; this is not the case for <code>@select()</code> or <code>across()</code>.</p> <p>Remember: Just like every other <code>TidierData.jl</code> top-level macro, <code>@slice()</code> respects group. This means that in a grouped data frame, <code>@slice(1:2)</code> will select the first 2 rows from each group.</p> <pre><code>using TidierData\n\ndf = DataFrame(row_num = 1:10,\n               a = string.(repeat('a':'e', inner = 2)),\n               b = [1,1,1,2,2,2,3,3,3,4])\n</code></pre> 10\u00d73 DataFrame Rowrow_numabInt64StringInt6411a122a133b144b255c266c277d388d399e31010e4 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#slicing-using-a-range-of-numbers","title":"Slicing using a range of numbers","text":"<p>This is an easy way of retrieving 5 consecutive rows.</p> <pre><code>@chain df begin\n    @slice(1:5)\nend\n</code></pre> 5\u00d73 DataFrame Rowrow_numabInt64StringInt6411a122a133b144b255c2 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#slicing-using-a-more-complex-unitrange-of-numbers","title":"Slicing using a more complex UnitRange of numbers","text":"<p>How would we obtain every other from 1 to 7 (counting up by 2)? Note that <code>range()</code> is similar to <code>seq()</code> in R.</p> <pre><code>@chain df begin\n  @slice(range(start = 1, step = 2, stop = 7))\nend\n</code></pre> 4\u00d73 DataFrame Rowrow_numabInt64StringInt6411a123b135c247d3 <p>This same code can also be written using Julia's shorthand syntax for unit ranges.</p> <pre><code>@chain df begin\n  @slice(1:2:7)\nend\n</code></pre> 4\u00d73 DataFrame Rowrow_numabInt64StringInt6411a123b135c247d3 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#separate-multiple-row-selections-with-commas","title":"Separate multiple row selections with commas","text":"<p>If you have multiple different row selections, you can separate them with commas.</p> <pre><code>@chain df begin\n    @slice(1:5, 10)\nend\n</code></pre> 6\u00d73 DataFrame Rowrow_numabInt64StringInt6411a122a133b144b255c2610e4 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#use-n-as-short-hand-to-indicate-the-number-of-rows","title":"Use <code>n()</code> as short-hand to indicate the number of rows","text":"<p>Select the last 2 rows.</p> <pre><code>@chain df begin\n  @slice(n()-1, n())\nend\n</code></pre> 2\u00d73 DataFrame Rowrow_numabInt64StringInt6419e3210e4 <p>You can even use <code>n()</code> inside of UnitRanges, just like in R. Notice that the order of operations is slightly different in Julia as compared to R, so you don't have to wrap the <code>n()-1</code> expression inside of parentheses.</p> <pre><code>@chain df begin\n  @slice(n()-1:n())\nend\n</code></pre> 2\u00d73 DataFrame Rowrow_numabInt64StringInt6419e3210e4 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#inverted-selection-using-negative-numbers","title":"Inverted selection using negative numbers","text":"<p>This line selects all rows except the first 5 rows.</p> <pre><code>@chain df begin\n    @slice(-(1:5))\nend\n</code></pre> 5\u00d73 DataFrame Rowrow_numabInt64StringInt6416c227d338d349e3510e4 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#sample-5-random-rows-in-the-data-frame","title":"Sample 5 random rows in the data frame","text":"<pre><code>@chain df begin\n  @slice_sample(n = 5)\nend\n</code></pre> 5\u00d73 DataFrame Rowrow_numabInt64StringInt6416c229e337d343b158d3"},{"location":"examples/generated/UserGuide/slice/#slice-the-min","title":"Slice the min","text":"<p>This line selects all rows with the the minimum value of the desired column</p> <pre><code>@chain df begin\n  @slice_min(b)\nend\n</code></pre> 3\u00d73 DataFrame Rowrow_numabInt64StringInt6411a122a133b1 <p>This line will only show the first row.</p> <pre><code>@chain df begin\n  @slice_min(b, with_ties = false)\nend\n</code></pre> 1\u00d73 DataFrame Rowrow_numabInt64StringInt6411a1 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#slice-the-max","title":"Slice the max","text":"<p>The optional prop arguement will slice a proportion of the full dataframe.</p> <pre><code>@chain df begin\n  @slice_max(b, prop = 0.5)\nend\n</code></pre> 5\u00d73 DataFrame Rowrow_numabInt64StringInt64110e427d338d349e354b2 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/slice/#slice-the-tail","title":"Slice the tail","text":"<pre><code>@chain df begin\n  @slice_tail(prop = 0.5)\nend\n</code></pre> 5\u00d73 DataFrame Rowrow_numabInt64StringInt6416c227d338d349e3510e4"},{"location":"examples/generated/UserGuide/slice/#slice-the-head","title":"Slice the head","text":"<pre><code>@chain df begin\n  @slice_head(n = 3)\nend\n</code></pre> 3\u00d73 DataFrame Rowrow_numabInt64StringInt6411a122a133b1 <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/summarize/","title":"@summarize","text":"<p>Summarizing a dataset involves aggregating multiple rows down to (usually) a single row of data. This can be performed across the entire dataset, or if the dataset is grouped, then for each row in the dataset. This is implemented similarly to R's tidyverse using <code>@summarize()</code>. Out of admiration for Hadley Wickham, and to be consistent with the R <code>tidyverse</code>, both <code>@summarize()</code> and <code>@summarise()</code> are supported.</p> <p>Note that summarization is different from other verbs in the <code>TidierData.jl</code> in 2 respects:</p> <ol> <li>No auto-vectorization is performed when using <code>@summarize()</code></li> <li>One layer of grouping is removed after each <code>@summarize()</code> function.</li> </ol> <p>If you require further changes to grouping beyond the defaults, you can either <code>@ungroup()</code> or call <code>@group_by()</code> to regroup by a different set of variables.</p> <pre><code>using TidierData\nusing RDatasets\n\nmovies = dataset(\"ggplot2\", \"movies\");\n</code></pre> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/summarize/#using-summarize-with-n-to-count-the-number-of-movies-in-the-dataset","title":"Using <code>@summarize()</code> with <code>n()</code> to count the number of movies in the dataset.","text":"<p>Within the context of <code>@summarize()</code> only, <code>n()</code> is converted to DataFrames.jl's <code>nrow()</code> function.</p> <pre><code>@chain movies begin\n    @summarize(n = n())\nend\n</code></pre> 1\u00d71 DataFrame RownInt64158788 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/summarize/#using-summarize-to-calculate-average-budget-of-movies-in-the-dataset","title":"Using <code>@summarize()</code> to calculate average budget of movies in the dataset.","text":"<p>The median budget in this dataset is 3 million, and the mean budget is 13 million! Making movies must be way more lucrative than making Julia packages.</p> <pre><code>@chain movies begin\n  @mutate(Budget = Budget / 1_000_000)\n  @summarize(median_budget = median(skipmissing(Budget)),\n             mean_budget = mean(skipmissing(Budget)))\nend\n</code></pre> 1\u00d72 DataFrame Rowmedian_budgetmean_budgetFloat64Float6413.013.4125 <p></p> <p></p>"},{"location":"examples/generated/UserGuide/summarize/#combining-group_by-with-summarise","title":"Combining <code>@group_by()</code> with <code>@summarise()</code>","text":"<p>How many movies came out in each of the last 5 years?</p> <pre><code>@chain movies begin\n  @group_by(Year)\n  @summarise(n = n())\n  @arrange(desc(Year))\n  @slice(1:5)\nend\n</code></pre> 5\u00d72 DataFrame RowYearnInt32Int6412005349220041945320032158420022168520012121 <p>Notice that there was no need to explicitly <code>@ungroup()</code> the dataset after summarizing here. The <code>@summarise()</code> function removed one layer of grouping. Since this dataset was only grouped by one variable (<code>Year</code>), it was no longer grouped after the <code>@summarise</code> was performed.</p> <p>This page was generated using Literate.jl.</p>"},{"location":"examples/generated/UserGuide/summary/","title":"@summary","text":"<p>The <code>@summary()</code> macro in <code>TidierData.jl</code> provides a concise way to compute summary statistics on data. Similar to its R counterpart, it will provide the mean, median, Q1, Q3, minimum, maximum, and number of missing values in a numerical column or columns.</p> <p></p> <p></p>"},{"location":"examples/generated/UserGuide/summary/#summary-for-the-whole-dataframe","title":"Summary for the whole dataframe","text":"<pre><code>using TidierData\n\ndf = DataFrame( A = [1, 2, 3, 4, 5], B = [missing, 7, 8, 9, 10], C = [11, missing, 13, 14, missing], D = [16, 17, 18, 19, 20]);\n\n@chain df begin\n    @summary()\nend\n\n@summary(df)\n</code></pre> 4\u00d79 DataFrame RowColumnMinQ1MedianMeanQ3MaxCountMissing_CountStringInt64Float64Float64Float64Float64Int64Int64Int641A12.03.03.04.05502B77.758.58.59.2510413C1112.013.012.666713.514324D1617.018.018.019.02050"},{"location":"examples/generated/UserGuide/summary/#you-can-specify-columns-for-which-you-want-to-compute-the-summary-this-is-useful-if-the-dataframe-has-a-large-number-of-columns-and-youre-interested-in-only-a-subset-of-them","title":"You can specify columns for which you want to compute the summary. This is useful if the DataFrame has a large number of columns and you're interested in only a subset of them.","text":"<pre><code>@chain df begin\n    @summary(B)\nend\n\n@summary(df, B)\n</code></pre> 1\u00d79 DataFrame RowColumnMinQ1MedianMeanQ3MaxCountMissing_CountStringInt64Float64Float64Float64Float64Int64Int64Int641B77.758.58.59.251041"},{"location":"examples/generated/UserGuide/summary/#or-for-a-range-of-columns","title":"or for a range of columns","text":"<pre><code>@chain df begin\n    @select(B:D)\n    @summary() # you can also write this @summary(2:4)\nend\n</code></pre> 3\u00d79 DataFrame RowColumnMinQ1MedianMeanQ3MaxCountMissing_CountStringInt64Float64Float64Float64Float64Int64Int64Int641B77.758.58.59.2510412C1112.013.012.666713.514323D1617.018.018.019.02050 <p>This page was generated using Literate.jl.</p>"}]}
\ No newline at end of file

Row	b	c	d	new_col
	String	String	String?	String
1	1	1	missing	1/1
2	2	2	missing	2/2
3	3	3	3	3/3/3