-
Notifications
You must be signed in to change notification settings - Fork 7
/
LUCI_Model.ecl
172 lines (170 loc) · 6.76 KB
/
LUCI_Model.ecl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
IMPORT $ AS LR;
IMPORT LR.Types;
IMPORT Std.Str;
IMPORT Std.System.ThorLib;
// aliases for convenience
External_model := Types.External_model;
/**
* Create a LUCI model file description of the model(s) from the external
* version of the model.
*
* <p>LUCI is a proprietary format used within LexisNexis.
*
* <p>The multi-score card per model case assumes that the score
* card selection is based solely upon the work item field. If this is
* not the case, the L1SE records will need to be patched.
*
* <p>The model id and name may have a "$" character that is updated to
* match the work item when there are multiple models applied. If the
* strings do not have a "$" character, the work item string is appended.
*
* <p>The score card name may have a "$" character which is updated to
* match the work item. If the name is blank, the score card is named
* for the work item.
*
* <p>LUCI data fields may not contain comma characters. This function
* requires that the work item identification strings do not contain
* characters that need special handling for CSV data.
*
* @param rqst the information to map work items to models in LUCI_Model_Rqst
* format.
* @param mod the model with the external field names applied in External_Model
* format as returned from Named_Model.
* @param wi_field the field name holding the work item identification
* string.
* @return The lines of the LUCI file in LUCI_Rec format.
* @see Types.External_Model
* @see Named_Model
* @see Types.LUCI_Model_Rqst
* @see Types.LUCI_Rec
*/
EXPORT DATASET(Types.LUCI_Rec)
LUCI_Model(DATASET(Types.LUCI_Model_Rqst) rqst,
DATASET(Types.External_Model) mod,
STRING wi_field='work_item') := FUNCTION
// merge request info to models
ex_rq := RECORD(Types.LUCI_Model_Rqst)
UNSIGNED4 rq_nominal;
END;
ex_rq enum_rq(Types.LUCI_Model_Rqst rq, UNSIGNED c) := TRANSFORM
SELF.rq_nominal := (c-1)*ThorLib.nodes() + 1 + ThorLib.node();
SELF := rq;
END;
w_nom := PROJECT(rqst, enum_rq(LEFT, COUNTER), LOCAL);
w_rqst := RECORD
UNSIGNED4 rq_nominal;
STRING model_id;
STRING model_name;
STRING sc_name;
DATASET(Types.External_Model) mods;
END;
w_rqst append_rq(Types.External_model m, ex_rq r) := TRANSFORM
adj_id := Str.Find(r.model_id, '$', 1) > 0;
adjd_id := Str.FindReplace(r.model_id,'$', m.work_item);
adj_name := Str.Find(r.model_name, '$', 1) > 0;
adjd_name := Str.FindReplace(r.model_name, '$', m.work_item);
SELF.model_id := MAP(adj_id => adjd_id,
adj_name => r.model_id + m.work_item,
r.model_id);
SELF.model_name := MAP(adj_name => adjd_name,
adj_id => r.model_name + ' ' + m.work_item,
r.model_name);
SELF.sc_name := r.score_card_name;
SELF.mods := DATASET([m], Types.External_Model);
SELF.rq_nominal := r.rq_nominal;
END;
w_rq := JOIN(mod, w_nom,
LEFT.response_field=RIGHT.response_field
AND LEFT.work_item IN RIGHT.wi_list,
append_rq(LEFT, RIGHT), LOOKUP, MANY);
w_rqst roll_models(w_rqst cumm, w_rqst curr) := TRANSFORM
SELF.mods := MERGE(SORTED(cumm.mods,wi), SORTED(curr.mods,wi), SORTED(wi));
SELF := cumm;
END;
srted_w_rq := SORT(w_rq, rq_nominal, model_id);
rolled_w_rq := ROLLUP(srted_w_rq, roll_models(LEFT, RIGHT), rq_nominal, model_id);
ready_rq := PROJECT(rolled_w_rq,
TRANSFORM(w_rqst, SELF.rq_nominal:=COUNTER, SELF:=LEFT),
LOCAL);
// generate LUCI records, with labels for merging
Work_rec := RECORD(Types.LUCI_Rec)
UNSIGNED4 rq_nominal;
UNSIGNED2 seq;
END;
// first model record
Work_rec make_L1MD(w_rqst r) := TRANSFORM
SELF.rq_nominal := r.rq_nominal;
SELF.seq := 1;
SELF.line := 'L1MD,'
+ r.model_id + ',' + r.model_name + ','
+ IF(COUNT(r.mods)>1, 'multi,,', 'single,,');
END;
l1md := PROJECT(ready_rq, make_L1MD(LEFT));
// prepare for score cards and model coefficients
w_rq_m := RECORD
UNSIGNED4 rq_nominal;
STRING model_id;
STRING score_card;
STRING work_item;
BOOLEAN need_se;
UNSIGNED2 sc_seq;
UNSIGNED2 se_seq;
UNSIGNED2 coef_seq;
DATASET(Types.External_Coef) coef;
END;
w_rq_m extract(w_rqst r, Types.External_model m, UNSIGNED c) := TRANSFORM
append_wi := COUNT(r.mods) > 1 AND Str.Find(r.sc_name, '$', 1) = 0;
insert_wi := COUNT(r.mods) > 1 AND Str.Find(r.sc_name, '$', 1) > 0;
SELF.rq_nominal := r.rq_nominal;
SELF.model_id := r.model_id;
SELF.score_card := MAP(append_wi => r.sc_name + m.work_item,
insert_wi => Str.FindReplace(r.sc_name,'$',m.work_item),
m.work_item);
SELF.sc_seq := 1 + c;
SELF.se_seq := 1 + COUNT(r.mods) + c;
SELF.need_se := COUNT(r.mods) > 1;
SELF.coef_seq := 1 + 2 * COUNT(r.mods); // first seq for coef
SELF.coef := m.coef;
SELF.work_item := m.work_item;
END;
step1_sc := NORMALIZE(ready_rq, LEFT.mods, extract(LEFT, RIGHT, COUNTER));
step2_sc := GROUP(step1_sc,rq_nominal, LOCAL);
w_rq_m coef_seq(w_rq_m prev, w_rq_m curr) := TRANSFORM
SELF.coef_seq := IF(prev.coef_seq>0, prev.coef_seq, curr.coef_seq)
+ COUNT(prev.coef);
SELF := curr;
END;
step3_sc := ITERATE(step2_sc, coef_seq(LEFT, RIGHT));
ready_sc := UNGROUP(step3_sc);
// score card records
Work_rec make_L2SC(w_rq_m card) := TRANSFORM
SELF.rq_nominal := card.rq_nominal;
SELF.seq := card.sc_seq;
SELF.line := 'L2SC,' + card.model_id + ','
+ card.score_card
+ ',' + (STRING)card.coef(isIntercept)[1].w
+ ',1.0/(1.0 + EXP(-Raw_point)),Y,Y,0,Y,1';
END;
l2sc := PROJECT(ready_sc, make_L2SC(LEFT));
// score card election for models with multiple score cards
need_se := ready_sc(need_se);
Work_rec make_L2SE(w_rq_m card) := TRANSFORM
SELF.rq_nominal := card.rq_nominal;
SELF.seq := card.se_seq;
SELF.line := 'L2SE,' + card.model_id + ','
+ card.score_card
+ ',' + wi_field + '=' + '\'' + card.work_item + '\'';
END;
L2se := PROJECT(need_se, make_l2se(LEFT));
// model coefficients
Work_Rec make_L3AT(w_rq_m card, Types.External_Coef coef, UNSIGNED c) := TRANSFORM
SELF.rq_nominal := card.rq_nominal;
SELF.seq := card.coef_seq + c;
SELF.line := 'L3AT,' + card.model_id + ',' + card.score_card + ','
+ 'REAL,' + coef.field_name + ',,HIGH,0,'
+ (STRING)coef.w + ',FORMULA,0,0,0000';
END;
l3at := NORMALIZE(ready_sc, LEFT.coef(NOT isIntercept), make_L3AT(LEFT,RIGHT,COUNTER));
rslt := MERGE(l1md, l2sc, l2se, l3at, SORTED(rq_nominal, seq), LOCAL);
RETURN PROJECT(rslt, Types.LUCI_rec);
END;