Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
remove sensitive info from physical plan (#860)
Browse files Browse the repository at this point in the history
  • Loading branch information
FelixYBW authored and zhouyuan committed Apr 19, 2022
1 parent 35e20fc commit 5036b5c
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 18 deletions.
16 changes: 16 additions & 0 deletions tools/gazelle_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,22 @@
"# if you need to open the traceview from hostip instead of 127.0.0.1, hack catapult :-)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The command generate the traceview as json format. It can be open by traceviewer in https://chromium.googlesource.com/catapult.\n",
"# 1. clone catapult\n",
"# 2. copy/generate the traceview json file to a folder like /home/xxx/trace_result\n",
"# 3. cd catapult/bin/\n",
"# 4. python2.7 ./run_dev_server --no-install-hooks -d /home/xxx/trace_result -p1088\n",
"# 5. open like in browser: http://127.0.0.1:1088/tracing_examples/trace_viewer.html#/tracing/test_data/application_1647347981137_0221_traceview.json\n",
"\n",
"# if you need to open the traceview from hostip instead of 127.0.0.1, hack catapult :-)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
66 changes: 48 additions & 18 deletions tools/sparklog.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,8 @@
" queryid=kwargs.get('queryid',None)\n",
" shownops=kwargs.get(\"shownops\",['ArrowRowToColumnarExec','ColumnarToRow','RowToArrowColumnar','ArrowColumnarToRow','Filter','HashAggregate','Project','SortAggregate','SortMergeJoin','window'])\n",
" \n",
" desensitization=kwargs.get('desensitization',True)\n",
" \n",
" def get_fields(colss):\n",
" lvls=0\n",
" colns=[]\n",
Expand Down Expand Up @@ -590,9 +592,10 @@
" funcs[opname+str(len(columns))].extend(colns)\n",
" for c in colns:\n",
" if \" AS \" in c:\n",
" c=re.sub(\"#\\d+L*\",\"\",c)\n",
" colname=re.search(r\" AS (.+)\",c).group(1)\n",
" if colname not in columns:\n",
" columns[colname]=prefix+str(len(columns)) \n",
" columns[colname]=prefix\n",
" \n",
" plans=appals.queryplans.select('real_queryid','physicalPlanDescription').collect() if queryid is None else appals.queryplans.where(f\"real_queryid='{queryid}'\").select(\"physicalPlanDescription\").collect()\n",
" \n",
Expand Down Expand Up @@ -623,22 +626,29 @@
" idv=re.search(\"^\\(\\d+\\)\",l).group(0)\n",
" if idv in nodes:\n",
" desc=\"\"\n",
" while l!=\"\":\n",
" while l.strip()!=\"\":\n",
" desc+=l+\"\\n\"\n",
" idx+=1\n",
" l=lines[idx]\n",
" desc=re.sub(r\"#\\d+L*\",r\"\",desc)\n",
" desc=re.sub(r\"= [^)]+\",r\"=\",desc)\n",
" desc=re.sub(r\"IN \\([^)]\\)\",r\"IN ()\",desc)\n",
" desc=re.sub(r\"In\\([^)]\\)\",r\"In()\",desc)\n",
" desc=re.sub(r\"EqualTo\\(([^,]+),[^)]+\\)\",r\"EqualTo(\\1,)\",desc)\n",
" ## add all keyword replace here\n",
" nodes[idv].append(desc)\n",
" tables={}\n",
" columns={}\n",
" functions={}\n",
" for s in nodes.values():\n",
" p=re.search(r\"Scan arrow default\\.([^ ]+)\",s[0])\n",
" p=re.search(r\"Scan arrow [^.]*\\.([^ ]+)\",s[0])\n",
" if p:\n",
" tn=p.group(1)\n",
" if not tn in tables:\n",
" tables[tn]=\"t_\"+str(len(tables))\n",
" s[0]=s[0].replace(tn,tables[tn])\n",
" s[1]=s[1].replace(tn,tables[tn])\n",
" tables[tn]=\"table\"\n",
" if desensitization:\n",
" s[0]=s[0].replace(tn,tables[tn])\n",
" s[1]=s[1].replace(tn,tables[tn])\n",
" colsv=[]\n",
" schema=[]\n",
" for v in s[1].split(\"\\n\"):\n",
Expand All @@ -652,21 +662,40 @@
" if not ct in columns:\n",
" if len(cts)==2:\n",
" cts[1]=cts[1]\n",
" columns[ct]=cts[1]+\"_\"+str(len(columns))\n",
" columns[ct]=cts[1]+\"_\"\n",
" else:\n",
" columns[ct]=\"c_\"+str(len(columns))\n",
" columns[ct]=\"c_\"\n",
" if v.startswith(\"Location\") and desensitization:\n",
" s[1]=s[1].replace(v+\"\\n\",\"\")\n",
" \n",
" get_column_names(s, \"Project\", \"Output\", \"proj_\", columns, functions)\n",
" get_column_names(s, \"HashAggregate\", \"Results\", \"shagg_\", columns, functions)\n",
" get_column_names(s, \"SortAggregate\", \"Results\", \"stagg_\", columns, functions)\n",
"\n",
" get_column_names(s, \"ColumnarConditionProject\", \"Arguments\", \"cproj_\", columns, functions)\n",
" get_column_names(s, \"ColumnarHashAggregate\", \"Results\", \"cshagg_\", columns, functions)\n",
" get_column_names(s, \"Window\", \"Arguments\", \"window_\", columns, functions)\n",
"\n",
" keys=[]\n",
" ckeys=list(columns.keys())\n",
" for l in range(0,len(ckeys)):\n",
" k1=ckeys[l]\n",
" for k in range(0,len(keys)):\n",
" if keys[k] in k1:\n",
" keys.insert(k,k1)\n",
" break\n",
" else:\n",
" keys.append(k1)\n",
" \n",
" for s in nodes.values():\n",
" s[1]=html.escape(s[1])\n",
" for c,v in columns.items():\n",
" if v.startswith(\"array\") or v.startswith(\"map\") or v.startswith(\"struct\"):\n",
" s[1]=s[1].replace(c,'<span style=\"color:red;background-color:yellow\">'+html.escape(v)+\"</span>\")\n",
" else:\n",
" s[1]=s[1].replace(c,\"<font color=#33cc33>\"+html.escape(v)+\"</font>\")\n",
" if desensitization:\n",
" for c in keys:\n",
" v=columns[c]\n",
" if v.startswith(\"array\") or v.startswith(\"map\") or v.startswith(\"struct\"):\n",
" s[1]=re.sub(c, '<span style=\"color:red;background-color:yellow\">'+html.escape(v)+\"</span>\",s[1])\n",
" else:\n",
" s[1]=re.sub(c, \"<font color=#33cc33>\"+html.escape(v)+\"</font>\",s[1])\n",
"\n",
"\n",
" htmls=['''<table style=\"table-layout:fixed;max-width: 100%;\">''']\n",
" qid=pr+1 if queryid is None else queryid\n",
Expand Down Expand Up @@ -696,7 +725,7 @@
" colss=cols.group(1)\n",
" colns=get_fields(colss)\n",
" t=re.sub(\"\\[([^0-9].+)\\]\",\"\",t)\n",
" t+=\"[\"+'<span style=\"background-color:#ededed;\">;</span>'.join(colns)+\"]\"\n",
" t+=\"[\"+'<span style=\"background-color:#ededed;\">;</span>'.join(colns)+\"]\" \n",
" if \":\" in t:\n",
" lsx.append(re.sub(r'^([^:]+:)',r'<font color=blue>\\1</font>',t))\n",
" else:\n",
Expand All @@ -711,11 +740,12 @@
" functions[k]=[l for l in v if \"(\" in l]\n",
" for f in functions.values():\n",
" for idx in range(0,len(f)):\n",
" for c,v in columns.items():\n",
" for c in keys:\n",
" v=columns[c]\n",
" if v.startswith(\"array\") or v.startswith(\"map\") or v.startswith(\"struct\"):\n",
" f[idx]=f[idx].replace(c,'<span style=\"color:red;background-color:yellow\">'+html.escape(v)+\"</span>\")\n",
" f[idx]=re.sub(c, '<span style=\"color:red;background-color:yellow\">'+html.escape(v)+\"</span>\",f[idx])\n",
" else:\n",
" f[idx]=f[idx].replace(c,\"<font color=#33cc33>\"+html.escape(v)+\"</font>\")\n",
" f[idx]=re.sub(c, \"<font color=#33cc33>\"+html.escape(v)+\"</font>\",f[idx])\n",
" funchtml=\"<table>\"\n",
" for k,v in functions.items():\n",
" if shownops is not None:\n",
Expand Down

0 comments on commit 5036b5c

Please sign in to comment.