From 5f16be60d54f79c3edb1bbfb3b762405494a9e3a Mon Sep 17 00:00:00 2001 From: Tim <50997599+TimBosman@users.noreply.github.com> Date: Tue, 26 Nov 2024 20:07:54 +0100 Subject: [PATCH] Add Sankey chart for Adding Filter Capabilities to Vega Sankey Visualizations in Kibana blog Co-Authored-By: Milos Mandic <milos-mandic@users.noreply.github.com> --- .../sankey.hjson | 476 ++++++++++++++++++ 1 file changed, 476 insertions(+) create mode 100644 supporting-blog-content/add-filter-capabilities-to-vega-sankey-chart/sankey.hjson diff --git a/supporting-blog-content/add-filter-capabilities-to-vega-sankey-chart/sankey.hjson b/supporting-blog-content/add-filter-capabilities-to-vega-sankey-chart/sankey.hjson new file mode 100644 index 00000000..fa838e14 --- /dev/null +++ b/supporting-blog-content/add-filter-capabilities-to-vega-sankey-chart/sankey.hjson @@ -0,0 +1,476 @@ +{ + $schema: https://vega.github.io/schema/vega/v5.json + data: [ + { + // query ES based on the currently selected time range and filter string + name: rawData + url: { + %context%: true + %timefield%: @timestamp + index: kibana_sample_data_logs + body: { + size: 0 + aggs: { + table: { + composite: { + size: 10000 + sources: [ + { + stk1: { + terms: { + field: machine.os.keyword + } + } + } + { + stk2: { + terms: { + field: geo.dest + } + } + } + ] + } + } + } + } + } + // From the result, take just the data we are interested in + format: { + property: aggregations.table.buckets + } + // Convert key.stk1 -> stk1 for simpler access below + transform: [ + { + type: formula + expr: datum.key.stk1 + as: stk1 + } + { + type: formula + expr: datum.key.stk2 + as: stk2 + } + { + type: formula + expr: datum.doc_count + as: size + } + ] + } + { + name: nodes + source: rawData + transform: [ + // Set new key for later lookups - identifies each node + { + type: formula + expr: datum.stk1+datum.stk2 + as: key + } + // instead of each table row, create two new rows, + // one for the source (stack=stk1) and one for destination node (stack=stk2). + // The country code stored in stk1 and stk2 fields is placed into grpId field. + { + type: fold + fields: [ + stk1 + stk2 + ] + as: [ + stack + grpId + ] + } + // Create a sortkey, different for stk1 and stk2 stacks. + // Space separator ensures proper sort order in some corner cases. + { + type: formula + expr: datum.stack == 'stk1' ? datum.stk1+' '+datum.stk2 : datum.stk2+' '+datum.stk1 + as: sortField + } + // Calculate y0 and y1 positions for stacking nodes one on top of the other, + // independently for each stack, and ensuring they are in the proper order, + // alphabetical from the top (reversed on the y axis) + { + type: stack + groupby: [ + stack + ] + sort: { + field: sortField + order: descending + } + field: size + } + // calculate vertical center point for each node, used to draw edges + { + type: formula + expr: (datum.y0+datum.y1)/2 + as: yc + } + ] + } + { + name: groups + source: nodes + transform: [ + // combine all nodes into country groups, summing up the doc counts + { + type: aggregate + groupby: [ + stack + grpId + ] + fields: [ + size + ] + ops: [ + sum + ] + as: [ + total + ] + } + // re-calculate the stacking y0,y1 values + { + type: stack + groupby: [ + stack + ] + sort: { + field: grpId + order: descending + } + field: total + } + // project y0 and y1 values to screen coordinates + // doing it once here instead of doing it several times in marks + { + type: formula + expr: scale('y', datum.y0) + as: scaledY0 + } + { + type: formula + expr: scale('y', datum.y1) + as: scaledY1 + } + // boolean flag if the label should be on the right of the stack + { + type: formula + expr: datum.stack == 'stk1' + as: rightLabel + } + // Calculate traffic percentage for this country using "y" scale + // domain upper bound, which represents the total traffic + { + type: formula + expr: datum.total/domain('y')[1] + as: percentage + } + ] + } + { + // This is a temp lookup table with all the 'stk2' stack nodes + name: destinationNodes + source: nodes + transform: [ + { + type: filter + expr: datum.stack == 'stk2' + } + ] + } + { + name: edges + source: nodes + transform: [ + // we only want nodes from the left stack + { + type: filter + expr: datum.stack == 'stk1' + } + // find corresponding node from the right stack, keep it as "target" + { + type: lookup + from: destinationNodes + key: key + fields: [ + key + ] + as: [ + target + ] + } + // calculate SVG link path between stk1 and stk2 stacks for the node pair + { + type: linkpath + orient: horizontal + shape: diagonal + sourceY: { + expr: scale('y', datum.yc) + } + sourceX: { + expr: scale('x', 'stk1') + bandwidth('x') + } + targetY: { + expr: scale('y', datum.target.yc) + } + targetX: { + expr: scale('x', 'stk2') + } + } + // A little trick to calculate the thickness of the line. + // The value needs to be the same as the hight of the node, but scaling + // size to screen's height gives inversed value because screen's Y + // coordinate goes from the top to the bottom, whereas the graph's Y=0 + // is at the bottom. So subtracting scaled doc count from screen height + // (which is the "lower" bound of the "y" scale) gives us the right value + { + type: formula + expr: range('y')[0]-scale('y', datum.size) + as: strokeWidth + } + // Tooltip needs individual link's percentage of all traffic + { + type: formula + expr: datum.size/domain('y')[1] + as: percentage + } + ] + } + ] + scales: [ + { + // calculates horizontal stack positioning + name: x + type: band + range: width + domain: [ + stk1 + stk2 + ] + paddingOuter: 0.05 + paddingInner: 0.95 + } + { + // this scale goes up as high as the highest y1 value of all nodes + name: y + type: linear + range: height + domain: { + data: nodes + field: y1 + } + } + { + // use rawData to ensure the colors stay the same when clicking. + name: color + type: ordinal + range: category + domain: { + data: rawData + fields: [ + stk1 + stk2 + ] + } + } + { + // this scale is used to map internal ids (stk1, stk2) to stack names + name: stackNames + type: ordinal + range: [ + Source + Destination + ] + domain: [ + stk1 + stk2 + ] + } + ] + axes: [ + { + // x axis should use custom label formatting to print proper stack names + orient: bottom + scale: x + encode: { + labels: { + update: { + text: { + scale: stackNames + field: value + } + } + } + } + } + { + orient: left + scale: y + } + ] + marks: [ + { + // draw the connecting line between stacks + type: path + name: edgeMark + from: { + data: edges + } + // this prevents some autosizing issues with large strokeWidth for paths + clip: true + encode: { + update: { + // By default use color of the left node, except when showing traffic + // from just one country, in which case use destination color. + stroke: [ + { + test: groupSelector && groupSelector.stack=='stk1' + scale: color + field: stk2 + } + { + scale: color + field: stk1 + } + ] + strokeWidth: { + field: strokeWidth + } + path: { + field: path + } + // when showing all traffic, and hovering over a country, + // highlight the traffic from that country. + strokeOpacity: { + signal: !groupSelector && (groupHover.stk1 == datum.stk1 || groupHover.stk2 == datum.stk2) ? 0.9 : 0.3 + } + // Ensure that the hover-selected edges show on top + zindex: { + signal: !groupSelector && (groupHover.stk1 == datum.stk1 || groupHover.stk2 == datum.stk2) ? 1 : 0 + } + // format tooltip string + tooltip: { + signal: datum.stk1 + ' → ' + datum.stk2 + ' ' + format(datum.size, ',.0f') + ' (' + format(datum.percentage, '.1%') + ')' + } + } + // Simple mouseover highlighting of a single line + hover: { + strokeOpacity: { + value: 1 + } + } + } + } + { + // draw stack groups (countries) + type: rect + name: groupMark + from: { + data: groups + } + encode: { + enter: { + fill: { + scale: color + field: grpId + } + width: { + scale: x + band: 1 + } + } + update: { + x: { + scale: x + field: stack + } + y: { + field: scaledY0 + } + y2: { + field: scaledY1 + } + fillOpacity: { + value: 0.6 + } + tooltip: { + signal: datum.grpId + ' ' + format(datum.total, ',.0f') + ' (' + format(datum.percentage, '.1%') + ')' + } + } + hover: { + fillOpacity: { + value: 1 + } + } + } + } + { + // draw country code labels on the inner side of the stack + type: text + from: { + data: groups + } + // don't process events for the labels - otherwise line mouseover is unclean + interactive: false + encode: { + update: { + // depending on which stack it is, position x with some padding + x: { + signal: scale('x', datum.stack) + (datum.rightLabel ? bandwidth('x') + 8 : -8) + } + // middle of the group + yc: { + signal: (datum.scaledY0 + datum.scaledY1)/2 + } + align: { + signal: datum.rightLabel ? 'left' : 'right' + } + baseline: { + value: middle + } + fontWeight: { + value: bold + } + // only show text label if the group's height is large enough + text: { + signal: abs(datum.scaledY0-datum.scaledY1) > 13 ? datum.grpId : '' + } + } + } + } + ] + signals: [ + { + // used to highlight traffic to/from the same country + name: groupHover + value: { + } + on: [ + { + events: @groupMark:mouseover + update: "{stk1:datum.stack=='stk1' && datum.grpId, stk2:datum.stack=='stk2' && datum.grpId}" + } + { + events: mouseout + update: "{}" + } + ] + } + // used to filter only the data related to the selected country + { + name: groupSelector + value: false + on: [ + { + // Clicking groupMark sets this signal to the filter values + events: @groupMark:click! + update: '''{stack:datum.stack == "stk1" ? kibanaAddFilter({"match_phrase": {"machine.os.keyword":datum.grpId}}) : kibanaAddFilter({"match_phrase": {"geo.dest":datum.grpId}}) }''' + } + ] + } + ] +}