forked from wbuchanan/stataConference2023
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
3078 lines (2331 loc) · 110 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Stata Conference 2023 - Metaprogramming</title>
<meta name="description" content="Talk about Metaprogramming in Stata">
<meta name="author" content="William Buchanan">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="dist/reset.css">
<link rel="stylesheet" href="dist/reveal.css">
<link rel="stylesheet" href="dist/theme/black.css" id="theme">
<!-- Theme used for syntax highlighting of code -->
<link rel="stylesheet" href="plugin/highlight/monokai.css">
<style>
.tex sub, .latex sub, .latex sup {
text-transform: uppercase;
}
.tex sub, .latex sub {
vertical-align: -0.5ex;
margin-left: -0.1667em;
margin-right: -0.125em;
}
.tex, .latex, .tex sub, .latex sub {
font-size: 1em;
}
.latex sup {
font-size: 0.85em;
vertical-align: 0.15em;
margin-left: -0.36em;
margin-right: -0.15em;
}
</style>
</head>
<body>
<div class="reveal">
<div class="slides">
<section>
<h2>Metaprogramming</h2>
<h3>What it is, how to use it, and why you should care</h3>
<h4><a href="https://github.com/wbuchanan" target="_blank">Billy Buchanan</a></h4>
<h4 style="margin-bottom: 0;">Sr Research Scientist, <a href="https://www.sagcorp.com" target="_blank">SAG Corporation</a></h4>
<span style="margin-top: -25%;">
<img src="img/saglogo.png" alt="Logo for SAG Corporation" width="150px">
<img src="img/sdvosblogo.png" alt="Service Disabled Veteran Owned Small Business Emblem" width="100px">
</span>
<p style="margin-top: -2.5%;">Slides available at: <a href="https://wbuchanan.github.io/stataConference2023" target="_blank">https://wbuchanan.github.io/stataConference2023</a></p>
<aside class="notes">
<p>Disclaimer: These are my personal views and do not reflect any official views or positions of my employer or any of our clients.</p>
</aside>
</section>
<section>
<section>
<h2>What it is</h2>
<aside class="notes">
<ul>
<li>Stata Linter is likely an example of a meta-program for static code analysis</li>
<li>Please raise your hand if you are already familiar with metaprogramming.</li>
<li>Please raise your hand if you've ever used `estout` to create tables in LaTeX.</li>
<li>Please raise your hand if you've ever created a Mata library or saved Mata code in a .mo file.</li>
</ul>
</aside>
</section>
<section>
<ul>
<li>Programs that write or modify programs</li>
<li>Inputs are data, metadata, and/or sourcecode</li>
<li>Processes Concept Classes used <a href="https://www.researchgate.net/publication/267704068_Taxonomy_of_The_Fundamental_Concepts_of_Meta-Programming" target="_blank">(see Damaševičius & Štuikys, 2008 for details)</a>:</li>
<ul>
<li>Code Generation</li>
<li>Transformation</li>
<li>Reflection</li>
<li>Generalization</li>
</ul>
<li>Output is static or dynamic</li>
</ul>
<aside class="notes">
<ul>
<li>In Stata the first three process concept classes are most likely.</li>
<li>Code generation can be static or dynamic. Mata compiler = static. Dynamic is more constructing and executing code at runtime.</li>
<li>Transformation is when the program modifies its behavior or output based on input and logic statements</li>
<li>Reflection is when the program inspects its own properties, or the properties of the input, to modify its behavior or output</li>
<li>Generalization can be based on the use of generics (Java) or templates (C++); think transmorphic classes in Mata.</li>
<li>A lot of this functionality can be implemented in one way or another using macros in Stata or Mata objects/variables.</li>
</ul>
</aside>
</section>
<section data-autoslide="3500">
<h2>How is it implemented in Stata?</h2>
</section>
<section>
<h2>Macros</h2>
<ul>
<li>Macros can be used to compose syntax</li>
<li>Evaluation of macros enables flexible composition</li>
<li>Control evaluation with \ or <code>macval()</code></li>
<li>Access metadata with extended macro functions</li>
</ul>
<aside class="notes">
<ul>
<li>Macros are basically the secret sauce of metaprogramming in Stata</li>
<li>Since macros can store strings and contain references to other macros themselves, you can use them to compose new syntax.</li>
<li>It is possible to compose complex code by controlling evaluation</li>
<li>Remember local macros are evaluated from the inside out and globals are evaluated from left to right, unless using curly braces.</li>
<li>The macval() function will expand the outer macro while leaving the inner macro in place</li>
<li>Pseudo-reflection can be implemented by accessing metadata</li>
</ul>
</aside>
</section>
<section>
<h2>Mata Strings</h2>
<ul>
<li>Use string variables to create and modify syntax</li>
<li>Incorporate values stored in Stata macros as needed</li>
<li>Mata function <code>stata()</code> can execute your code</li>
<li><b>Mata classes persist across Stata commands</b></li>
</ul>
<aside class="notes">
<ul>
<li>The last point is important.</li>
<li>While local macros vanish outside their scope, or get overwritten by other commands, the same is not true for a Mata class you define.</li>
<li>Using a Mata class allows you to persist some information in memory while also being able to add new information at runtime.</li>
<li>I'll show an example using a Mata class in a bit.</li>
</ul>
</aside>
</section>
</section>
<section>
<section data-autoslide="3500">
<h2>How to use it</h2>
</section>
<section>
<h2>Dynamic Code Generation</h2>
<pre data-id="dynamic-animation" style="width: 65vw; height: 125%; margin-left: -5%;">
<code class="hljs" data-trim data-line-numbers="167-179|181-196|272-294">
<script type="text/template">
mata
// Clears objects, methods, and functions from Mata
mata clear
/*
Defines a struct object used by the commands in drb.ado to store/persist info
across the user session.
*/
class Drbs {
// Defines any public member variables or methods
public:
void new(), makefr(), drbstore(), vallabs()
// Stores a column vector containing the values of the disclosure variable
string colvector values
// Stores a column vector containing the values used to construct the
// coeflegend values
string colvector fvstubs
// Stores a column vector containing the values used to access the cell size
// macros
string colvector csnames
// Stores a column vector containing all of the coef/SE references
string colvector bse
// Stores a column vector with the names of the variables in the storage
// frame
string colvector stovar
// Stores the value labels associated with the focal variable
string colvector vallab
// Stores the values corresponding to the labels associated with the
// focal variable
real colvector labval
// Stores the name of the variable with values that will have coefficients
string scalar drbvar
// Stores the string
string scalar cspref
// Stores the string that defines the frame where results are stored
string scalar frdef
// Stores the string used to store the results in the frame defined by frdef
string scalar store
// Stores the name of the frame where results get stored
string scalar frname
// The name of the frame when initialized
string scalar stframe
// Stores the number of unique values in the values member
real scalar levs
// The name of the directory where output will be saved
string scalar outdir
// Stores the name of the value labels associated with the focal variable
string scalar vallabname
// Stores the variable label for the variable of interest
string scalar varlabel
} // End of struct definition
/*
Object constructor function that initializes the object and populates member
variables by extracting the unique levels of the variable that will have
coefficients disclosed and for which cell sizes are needed.
@returns void. It initializes the object when instantiated
*/
void Drbs::new() {
// Initialize the column vectors that will store the values and labels for
// the focal variable
this.vallab = J(0, 1, "")
this.labval = J(0, 1, .)
// Stores the name of the focal variable
this.drbvar = st_local("varlist")
// Stores the name of the variable label associated with the focal variable
this.vallabname = st_varvaluelabel(this.drbvar)
// Test if there is a value label associated with the focal variable
// If there is a value label associated with the variable store the mapping
// so it can be reconstructed later
if (this.vallabname != "" ) st_vlload(this.vallabname, this.labval, this.vallab)
// Stores string representation of all the values of the variable except for
// the reference group
this.values = strofreal(uniqrows(st_data(., st_local("varlist"), st_local("touse"))))
// Stores the total number of unique values used for iteration later
this.levs = rows(this.values)
// Stores the name of the frame where results will be stored
this.frname = st_local("framename")
// Stores the name of the frame when function is called/struct initialized
this.stframe = st_framecurrent()
// Stores the factor variable expansion used to reference coefs/SEs
this.fvstubs = this.values + J(this.levs, 1, "." + this.drbvar)
// Stores all of the coefficient legend references to the coefficients and
// standard errors to construct the string used to store the results in the
// storage frame
this.bse = (J(this.levs, 1, "_b[") + this.fvstubs + J(this.levs, 1, "]") \ ///
J(this.levs, 1, "_se[") + this.fvstubs + J(this.levs, 1, "]"))
// Stores a column vector with all variable names for the storage frame
this.stovar = ( J(this.levs, 1, "b") + this.values \ ///
J(this.levs, 1, "se") + this.values \ ///
J(this.levs, 1, "n") + this.values )
// Creates the references to the names that will be used to store the cell
// size values
this.csnames = ( J(this.levs, 1, this.drbvar) + this.values )
// Stores the directory where the output will be saved in the outdir member
// variable of the struct
this.outdir = st_local("outdir")
// Stores the variable label for the variable of interest
this.varlabel = st_varlabel(this.drbvar)
// Calls the method used to define the frame where results will be stored
// and constructs the storage frame
this.makefr()
// Creates the string used to store the model results and cell sizes using
// the store subroutine of the drb command
this.coefrefs()
} // End of drbvar function definition
/*
Drbs object method used to reconstruct the value label associated with the
focal variable prior to writing the results to a file
@returns void. Defines a value label in Stata that can be applied to a variable.
*/
void Drbs::vallabs() {
// Reconstructs the value label for the focal variable
st_vlmodify(this.vallabname, this.labval, this.vallab)
// Add a local with the label name
st_local("labname", this.vallabname)
} // End of vallabs method
/*
Mata function that creates the string defining the structure of the frame where
results will be posted
@param drbvals The string matrix returned by drbvar()
@returns A string scalar containing the syntax to pop
*/
void Drbs::makefr() {
// Declares two scalars to use to iterate over the values and for each of
// the types of results that need to be stored
real scalar i, j
// Creates the string that defines the frame that stores all of the results
// will also store the command line that fitted the model
this.frdef = "frame create " + this.frname + " int(dc) " + ///
"str2045(model) str2045(cmd) double("
// Sets a scalar to ID the point where the type needs to change to long
i = (this.levs * 2) + 1
// Loops over the vector containing the variable names.
for(j = 1; j <= rows(this.stovar); j++) {
// If this iteration is the first cell size, change the storage type
if(j == i) this.frdef = this.frdef + ") long(" + this.stovar[j, 1] + " "
// For all others
else this.frdef = this.frdef + this.stovar[j, 1] + " "
} // End Loop to define storage for coefs/SEs/cell sizes
// Adds the closing parentheses for the frame definition string
this.frdef = this.frdef + ")"
// Create the frame to store the results
stata(this.frdef)
} // End makefr function definition
/*
Definies a member function used to store the estimation results and corresponding
cell sizes.
@returns Void. All results are stored in the frame initialized by the drbs
object.
*/
void Drbs::drbstore() {
// Declare variable to count the number of observations for the levels of
// the variable of interest
string scalar cellcmd, stocmd
// Declare a variable to store the matrix column stripe from the coefficient
// vector stored in e(b).
string colvector params
// Declare a variable to use as an iterator
real scalar i, borse
// Get the estimation labels
params = st_matrixcolstripe("e(b)")[., 2]
// Start constructing the storage command
stocmd = `"frame post "' + this.frname + `" (\`dc')("\`model'")("\`e(cmdline)'") "'
// Use fvstubs to check for the presence of the estimate.
// Use anyof(params, fvstub[i, 1]) to test for each parameter
for(borse = 1; borse <= 2; borse++) {
// Loop over the factor variable stubs that ID the parameters we want
for(i = 1; i <= rows(this.fvstubs); i++) {
// Test whether to get/test for results for coefficients
if(borse == 1) {
// If the fvstub shows up in the coefficient vector
if(anyof(params, this.fvstubs[i, 1])) {
// Add the syntax used to reference that value from the
// estimation results
stocmd = stocmd + "(_b[" + this.fvstubs[i, 1] + "])"
} // End IF Block for estimated parameter
// Otherwise, store an extended missing value to indicate that
// it was ommitted for some reason
else stocmd = stocmd + "(.o)"
} // End of IF Block for coefficients
// Because of the structure of the storage, we need to test for
// standard errors after testing for all of the coefficients
else {
// Check if the fvstub shows up in the coefficient vector label
if(anyof(params, this.fvstubs[i, 1])) {
// If it does add the reference syntax to the standard error
stocmd = stocmd + "(_se[" + this.fvstubs[i, 1] + "])"
} // End IF Block for included standard error
// Otherwise use an extended missing value to indicate it was
// omitted
else stocmd = stocmd + "(.o)"
} // End of ELSE Block for standard errors
} // End Loop over the parameters of interest
} // End Loop over coefficients vs standard errors
// Pre-populate the cellcmd variable
cellcmd = "qui: count if e(sample) & " + this.drbvar + " == "
// Iterate over the number of levels to get cell sizes
for(i = 1; i <= this.levs; i++) {
// Counts the number of records in the estimation sample for the ith
// value in the values member
stata(cellcmd + this.values[i, 1])
// Assigns the number of records in the estimation sample for the ith
// value of the values member to the appropriate local macro name for
// the command below that will store the results
st_local(this.csnames[i, 1], strofreal(st_numscalar("r(N)")))
// Adds the reference to the storage string
// Added to try dealing with missing parameters
stocmd = stocmd + "(\`" + this.csnames[i, 1] + "')"
} // End Loop to get the cell sizes
// Calls the command we construct in this method
stata(stocmd)
} // End of drbstore method
end
</script>
</code>
</pre>
<aside class="notes">
<ul>
<li>We need to store model metadata, an identifier, parameter estimates, standard errors, and cell sizes.</li>
<li>Given the nature of the data, the number of parameter estimates can vary between models.</li>
<li>So, we need to dynamically generate the code needed to create the storage frame.</li>
<li>We also need some checking/coding of results and to generate cell sizes which requires additional dynamic code generation and execution.</li>
<li>The result is orders of magnitude fewer I/O operations, which means a ton of time savings on getting output.</li>
</ul>
</aside>
</section>
<section>
<h2>Static Code Generation</h2>
<pre data-id="static-animation" style="width: 65vw; height: 125%; margin-left: -5%;">
<code class="hljs" data-trim data-line-numbers="1-11|62-75|101-120|123-148">
<script type="text/template">
// Start Python interpreter
python
# Imports the document class object from the python-docx module from pip
from docx.api import Document
# Imports the pandas library with the alias pd
import pandas as pd
# Imports the regular expression module
import re
# Location where the file is stored
filepath = '/Users/billy/Desktop/Programs/JavaScript/stataConference2023/fs002-19-1.docx'
# Loads the MS Word document into a Document class object
doc = Document(filepath)
# Creates an empty dict to store any of the tables in the document
tables = dict()
# Approach based on function illustrated at:
# https://medium.com/@karthikeyan.eaganathan/read-tables-from-docx-file-to-pandas-dataframes-f7e409401370
for i in range(len(doc.tables)):
# Gets an individual instance of a table in the MS Word Document
table = doc.tables[i]
# Parses the table contents into a list of lists
data = [[cell.text for cell in row.cells] for row in table.rows]
# Converts to a Pandas DataFrame object
df = pd.DataFrame(data)
# Assigns the first record as column names and drops the first record
df = df.rename(columns = df.iloc[0]).drop(df.index[0]).reset_index(drop = True)
# Adds the table to the dict of tables
tables['table' + str(i)] = df
# Gets the table with the file specification of interest
filespec = tables['table16']
# Recasts the start position to a numeric value
filespec['Start Position'] = filespec['Start Position'].astype('int', copy = False)
# Strips new line characters from data element name and the permitted values columns
filespec['Data Element Name'] = filespec['Data Element Name'].apply(lambda x: re.sub('\n', ' ', x))
filespec['Permitted Values\nAbbreviations'] = filespec['Permitted Values\nAbbreviations'].apply(lambda x: re.sub('\n', '; ', x))
filespec['Definition / Comments'] = filespec['Definition / Comments'].apply(lambda x: re.sub('\n', ' ', x))
# Strips unnecessary info from the type column
filespec['Type'] = filespec['Type'].apply(lambda x: re.sub('\nRevised!', '', x).lower()[:3])
# Renames the columns to Stata friendly names
filespec.columns = ['name', 'start', 'length', 'type', 'pop', 'comments', 'vallab' ]
# Get the column names and types to create Stata variables
varntypes = dict([(i, filespec[i].dtype.name) for i in filespec.columns ])
# Load module for Stata API
from sfi import Data
# Create variables in Stata
{ Data.addVarStrL(k) if v == 'object' else Data.addVarLong(k) for k, v in varntypes.items() }
# Allocate the number of observations
Data.addObs(filespec.shape[0], nofill = True)
# Load the file spec into Stata for further processing
Data.store(None, None, val = filespec.values.tolist())
# Ends the Python interpreter and goes back to Stata
end
// Compress the data
compress
// Creates generic variable names
g str5 varn = "var" + strofreal(_n)
// Creates the end column value
g int end = start[_n + 1] - 1
// Create stata types based on the type and length values
g str4 statatype = cond(type == "int" & real(length) > 5, "long", type)
// Clean up the variables
replace vallab = itrim(trim(vallab))
replace name = itrim(trim(name))
replace comments = itrim(trim(comments))
// Cast the start and end values as strings
tostring start end, replace
// Remove carriage return character entry if it just represents the end of the
// line of data
drop if ustrregexm(name, "(Carriage Return)|(Line Feed)|(CR)|(LF)", 1) & _n == `c(N)'
// Create a file connection for the dictionary file
file open fh using filespec.dct, w t replace
// Create first line
file write fh "infix dictionary using sampleFile.txt {" _n
// Loop over the records in the file specification
forv i = 1/`c(N)' {
// Writes the parsing specification to the file for the variable
file write fh _tab (statatype[`i']) + " " + (varn[`i']) + " " + ///
(start[`i']) + "-" + (end[`i']) _n
} // End of Loop over specification
// Closes the curly brace
file write fh "}" _n(2)
// Closes the file connection
file close fh
// Create a second file connection
file open fh using filespec.do, w t replace
// Write the line to parse the file and load into Stata
file write fh "infix using filespec.dct" _n
// Start loop over the file specification to add metadata to the data set
forv i = 1/`c(N)' {
// Adds variable labels
file write fh ("la var " + "`: di varn[`i']'" + " " + `""`: di name[`i']'""') _n
// Adds comments as characteristics
file write fh ("char " + varn[`i'] + "[comments] " + comments[`i']) _n
// Test for value labels being present for the variable
if ustrregexm(vallab[`i'], "\w\d", 1) {
// If there is something, add it as a characteristic as well
file write fh ("char " + varn[`i'] + "[decodes] " + vallab[`i']) _n
} // End IF Block testing for decode values
} // End Loop over file specification
// Closes the do file
file close fh
</script>
</code>
</pre>
<aside class="notes">
<ul>
<li>We can't read MS Word files directly into Stata, so this example uses the Python API in conjunction with Stata.</li>
<li>After reading the MS Word file, finding the table with the file specification, doing a bit of cleaning, the data set gets loaded into Stata.</li>
<li>I did a bit of cleaning and creating a few additional variables in Stata and then start writing the dictionary file, by looping over the data set and substituting the info from the data set into the output code.</li>
<li>But that wasn't enough, so I then created the .do file that would load the data, add variable labels, and add characteristics to the file automatically.</li>
</ul>
</aside>
</section>
<section>
<h2>Transformation</h2>
<pre data-id="transformation-animation" style="width: 65vw; height: 125%; margin-left: -5%;">
<code class="hljs" data-trim data-line-numbers="1-16|19-49">
<script type="text/template">
// Load example data
webuse cattaneo2.dta, clear
// Classify variables into categorical, continuous, and binary
vl set, dummy cat(7) clear
// Maps vl lists to locals
loc vlbin $vldummy
loc vlcont $vlcontinuous $vluncertain
loc vlcat $vlcategorical
// Get all variable names
qui: ds
// Use tuples to create variable lists of different variables
tuples `r(varlist)', max(5) min(2) di
// Loop over output from tuples
forv i = 1/`ntuples' {
// Get DV token and covariates
gettokens dv cvs : `tuple`i''
// Gets mappings between covariates and types
loc catvars : list cvs & vlcat
loc binvars : list cvs & vlbin
loc contvars : list cvs & vlcont
// Create updated covariate list
loc covariates `contvars'
// Loop over types of covariates that need fv notation
foreach v in catvars binvars {
// Only if there are variables in the list
if !mi(`"``v''"') loc covariates `covariates' `"i.`: di ustrregexra("``v''", " ", " i.")'"'
} // End loop over variables needing factor variable notation
// If the DV is continuous fit a linear regression
if `: list dv in vlcont' reg `dv' `covariates'
// If it is multivalued and categorical fit an mlogit model
else if `: list dv in vlcat' mlogit `dv' `covariates'
// If it is binary fit a logit
else if `: list dv in vlbin' logit `dv' `covariates'
} // End loop over variable combinations
</script>
</code>
</pre>
<aside class="notes">
<p>Remember that this is a type of Behavior Modification</p>
<ul>
<li>In this example, I'll modify the modeling behavior based on properties of the data that aren't known ahead of time.</li>
<li>Using the variable list command, the variables are classified into types, and then build arbitrary combinations of the variables</li>
<li>Next, determine what types of covariates exist in the model and the type of dependent variable. For categorical covariates, add factor variable notation.</li>
<li>Lastly, fit a statistical model based on the type of DV.</li>
</ul>
</aside>
</section>
<!-- brewscheme Templates -->
<section style="">
<h2>Code Templates</h2>
<pre data-id="template-animation" style="width: 65vw; height: 125%; margin-left: -5%;">
<code class="hljs" data-trim data-line-numbers="220-244|800-825|887-914">
<script type="text/template">
// Use a tempname for the scheme file filehandle
tempname scheme1 scheme2 scheme3 scheme4 scheme5
// Root file path to theme files
loc themeroot `"`c(sysdir_personal)'b/theme/theme"'
// Root file path for brewscheme created scheme files
loc schemeroot `"`c(sysdir_plus)'/s/scheme"'
// Write the scheme file to a location on the path
qui: file open `scheme1' using `"`schemeroot'-`schemename'.scheme"', w replace
qui: file open `scheme2' using `"`schemeroot'-`schemename'_achromatopsia.scheme"', w replace
qui: file open `scheme3' using `"`schemeroot'-`schemename'_protanopia.scheme"', w replace
qui: file open `scheme4' using `"`schemeroot'-`schemename'_deuteranopia.scheme"', w replace
qui: file open `scheme5' using `"`schemeroot'-`schemename'_tritanopia.scheme"', w replace
// Find maximum number of colors to set the recycle parameter
loc pcycles = max( `barcolors', `scatcolors', `areacolors', ///
`linecolors', `boxcolors', `dotcolors', ///
`piecolors', `suncolors', `histcolors', ///
`cicolors', `matcolors', `reflcolors', ///
`refmcolors')
// Recycle the number of symbols
qui: mata: recycle(`numsymbols', `pcycles')
// Loop over the sequence of symbols
foreach symb in `sequence' {
// Build a string with each of the symbols corresponding to the
// appropriate cycle number
loc symbolseq `"`symbolseq' "`: word `symb' of `symbols''""'
} // End Loop over symbol sequence
// Check to see if start and end contour color palettes are the same
if `"`constart'"' == `"`conend'"' {
// Get version of palette w/minimum number of colors
qui: su pcolor if palette == `"`constart'"'
// Get the RGB values for the given palette and number of colors
mata: brewc.getPalette(`"`constart'"', `r(min)')
// Overwrite the local macro with the RGB value
loc constart `: word 1 of `rgbs''
// Overwrite the local macro with the RGB value
loc conend `: word 2 of `rgbs''
} // End IF Block for case where contour start/end use same palette
// If they use different palettes
else {
// Get version of palette with minimum number of colors for start
qui: su pcolor if palette == `"`constart'"'
// Get the RGB values for the given palette and number of colors
mata: brewc.getPalette(`"`constart'"', `r(min)')
// Overwrite the local macro with the RGB value
loc constart `: word 1 of `rgbs''
// Get version of palette with minimum number of colors for start
qui: su pcolor if palette == `"`conend'"'
// Get the RGB values for the ending color for contour plots for
// the given color palette
mata: brewc.getPalette(`"`conend'"', `r(min)')
// Overwrite the local macro with the RGB value second word used here
// to prevent same color issue if the allstyle option is used.
loc conend `: word 2 of `rgbs''
} // End ELSE Block for separate start/end contour palettes
// Loop over color macros
foreach color in bar scat area line box dot pie hist ci mat ///
refl refm sun {
/* Create the sequence of color ids for each graph type based on
the maximum number of colors in any listed color argument. */
qui: mata: recycle(``color'colors', `pcycles')
// Assign the id sequence to a local with seq as suffix
loc `color'seq = `"`sequence'"'
// Get the colors for the specified palette and number of colors
mata: brewc.getPalette("``color'style'", ``color'colors')
// Loop over the rgb values to construct the graph specific
// rgb values
foreach c of loc `color'seq {
// Construct macro with RGB values for lookup
loc `color'rgb `"``color'rgb' "`: word `c' of `rgbs''" "'
} // End Loop
// Create marker for graph type with maximum number of colors
if ``color'colors' == `pcycles' {
// Set the generic color macro to reference macro w/max colors
loc gencolor `"``color'rgb'"'
} // End of IF Block to define generic color macro
} // End Loop over number of colors for graph types
// Stubs to use for line references to theme files
loc linerefs theme1 theme2 theme3 theme4 theme5
// Tempnames
tempname theme1 theme2 theme3 theme4 theme5
// Check for theme file
if `"`themefile'"' != "" {
// Themefile names
loc themerefs `"`themeroot'-`themefile'.theme"' ///
`"`themeroot'-`themefile'_achromatopsia.theme"' ///
`"`themeroot'-`themefile'_protanopia.theme"' ///
`"`themeroot'-`themefile'_deuteranopia.theme"' ///
`"`themeroot'-`themefile'_tritanopia.theme"'
// Loop over theme files
forv thf = 1/5 {
// Open the first file
file open `theme`thf'' using `"`: word `thf' of `"`themerefs'"''"', r
// zero value local macro
loc x = 1
// Read the first line of the file
file read `theme`thf'' theme`thf'_`x'
// Loop until end of file
while r(eof) == 0 {
// Increment line counter
loc x = `x' + 1
// Read line into local macro
file read `theme`thf'' theme`thf'_`x'
} // End Loop over theme file
// Close the file connection
file close `theme`thf''
} // End Loop over themefiles
} // End IF Block for user specified theme file
// If user does not specify a file
else {
// Check for default file
cap confirm file `"`themeroot'-default.theme"'
// If the default file exists
if _rc != 0 {
// Create the default brewtheme files
qui: brewtheme
} // End IF Block to open a connection to the default theme
// Themefile names
loc themerefs `"`themeroot'-default.theme"' ///
`"`themeroot'-default_achromatopsia.theme"' ///
`"`themeroot'-default_protanopia.theme"' ///
`"`themeroot'-default_deuteranopia.theme"' ///
`"`themeroot'-default_tritanopia.theme"'
// Loop over theme files
forv thf = 1/5 {
// Open the first file
file open `theme`thf'' using `"`: word `thf' of `"`themerefs'"''"', r
// zero value local macro
loc x = 1
// Read the first line of the file
file read `theme`thf'' theme`thf'_`x'
// Loop until end of file
while r(eof) == 0 {
// Increment line counter
loc x = `x' + 1
// Read line into local macro
file read `theme`thf'' theme`thf'_`x'
} // End Loop over theme file
// Close the file connection
file close `theme`thf''
} // End Loop over themefiles
} // End ELSE Block for null theme file
// Name extension macros
loc nameext "" "_achromatopsia" "_protanopia" "_deuteranopia" "_tritanopia"
// Loop over the theme/scheme file pairs
forv j = 1/5 {
// Correction for schemenames
if `j' == 1 loc schemelabel `"label "`schemename'""'
// For all other cases
else loc schemelabel `"label "`schemename'`: word `j' of `nameext''""'
file write `scheme`j'' `"* s2color.scheme"' _n
file write `scheme`j'' `""' _n
file write `scheme`j'' `"* s2 scheme family with a naturally white background (white plotregions and"' _n
file write `scheme`j'' `"* lightly colored background) and color foreground (lines, symbols, text, etc)."' _n
file write `scheme`j'' `""' _n
file write `scheme`j'' `"* For p[#][stub] scheme references the corresponding style is resolved by"' _n
file write `scheme`j'' `"* searching the scheme ids with the following preference ordering:"' _n
file write `scheme`j'' `"*"' _n
file write `scheme`j'' `"* p#stub"' _n
file write `scheme`j'' `"* pstub"' _n
file write `scheme`j'' `"* p#"' _n
file write `scheme`j'' `"* p"' _n
file write `scheme`j'' `"*"' _n
file write `scheme`j'' `"* Thus it is possible to control the selected style to great detail, or let it"' _n
file write `scheme`j'' `"* default to common defaults. In particular -p- or -pstub- without"' _n
file write `scheme`j'' `"* # can be used to designate a common plotting symbol, or back plotting"' _n
file write `scheme`j'' `"* symbol, or for that matter common color or sizes."' _n
file write `scheme`j'' `"*"' _n
file write `scheme`j'' `"* "style"s designated "special" are not styles at all, but direct signals to"' _n
file write `scheme`j'' `"* graphs, plots, or other classes and their parsers. Their contents are"' _n
file write `scheme`j'' `"* specific to the use and may only be understood by the caller."' _n
file write `scheme`j'' `""' _n
file write `scheme`j'' `"*! version 1.2.5 16jun2011"' _n(2)
file write `scheme`j'' `"sequence 1299"' _n
file write `scheme`j'' `"`schemelabel'"' _n(2)
file write `scheme`j'' `"system naturally_white 1"' _n(3)
// Loop over first 10 lines of theme file
forv i = 1/10 {
// Write each line to the scheme file
file write `scheme`j'' `theme`j'_`i''
} // End Loop over lines 1-10 of theme file
file write `scheme`j'' `"numstyle pcycle `pcycles'"' _n(2)
// Loop over lines 11-16 of the theme file
forv i = 11/16 {
// Write each line to the scheme file
file write `scheme`j'' `theme`j'_`i''
} // End loop over lines 11-16 of the theme file
file write `scheme`j'' `"numstyle contours `pcycles'"' _n(2)
// Loop over lines 17-179 of the theme file
forv i = 17/179 {
// Write each line to the scheme file
file write `scheme`j'' `theme`j'_`i''
} // End loop over lines 17-179 of the theme file
} // End Loop over scheme/theme file pairs
// Local with color type refs
loc ctyperefs rgb achromatopsia protanopia deuteranopia tritanopia
// Get the area1 rgb values
mata: brewc.brewColorSearch("`: word 1 of `areargb''")
// Loop over macros
forv rfs = 1/5 {
// Store all the translated RGB values
loc ci_area`rfs' ``: word `rfs' of `ctyperefs'''
} // End Loop over macro reassignments
// Get the cysymbol rgb values
mata: brewc.brewColorSearch("`: word 1 of `cirgb''")
// Loop over macros
forv rfs = 1/5 {
// Store all the translated RGB values
loc ci_symbol`rfs' ``: word `rfs' of `ctyperefs'''
} // End Loop over macro reassignments