Skip to content

Commit

Permalink
feat: add use duckdb to compute datas
Browse files Browse the repository at this point in the history
  • Loading branch information
longxiaofei committed Aug 8, 2023
1 parent 4c9dff0 commit fc80561
Show file tree
Hide file tree
Showing 20 changed files with 726 additions and 23 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,5 +139,6 @@ poetry.lock

pygwalker/templates/graphic-walker.umd.js
pygwalker/templates/graphic-walker.iife.js
pygwalker/templates/dsl_to_sql.wasm

tests/*.csv
2 changes: 1 addition & 1 deletion app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"@headlessui/react": "^1.7.14",
"@heroicons/react": "^2.0.8",
"@kanaries/auth-wrapper": "file:./lib/auth-wrapper-v0.1.4.tgz",
"@kanaries/graphic-walker": "0.3.15",
"@kanaries/graphic-walker": "0.4.1",
"autoprefixer": "^10.3.5",
"html-to-image": "^1.11.11",
"mobx": "^6.9.0",
Expand Down
2 changes: 1 addition & 1 deletion app/src/components/codeExportModal/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ const CodeExport: React.FC<ICodeExport> = observer((props) => {

useEffect(() => {
if (props.open) {
const res = props.globalStore.current?.vizStore.exportViewSpec()!;
const res = props.globalStore.current?.vizStore.exportViewSpec()! as IVisSpec[];
setCode(res);
}
}, [props.open]);
Expand Down
7 changes: 5 additions & 2 deletions app/src/components/options.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,17 @@ const Solution: React.FC<ISolutionProps> = (props) => {
);
};

const RAND_HASH = Math.random().toString(16).split(".").at(1);
const RAND_HASH = Math.random().toString(16).split(".").at(1) + new Date().getTime().toString(16).padStart(16, "0");
const Options: React.FC<IAppProps> = (props: IAppProps) => {
const [outdated, setOutDated] = useState<Boolean>(false);
const [appMeta, setAppMeta] = useState<any>({});
const [showUpdateHint, setShowUpdateHint] = useState(false);
if (window.localStorage.getItem("HASH") === null) {
window.localStorage.setItem("HASH", RAND_HASH);
}
const UPDATE_URL = "https://5agko11g7e.execute-api.us-west-1.amazonaws.com/default/check_updates";
const VERSION = (window as any)?.__GW_VERSION || "current";
const HASH = (window as any)?.__GW_HASH || RAND_HASH;
const HASH = window.localStorage.getItem("HASH");
useEffect(() => {
if (props.userConfig?.privacy !== "offline") {
const req = `${UPDATE_URL}?pkg=pygwalker-app&v=${VERSION}&hashcode=${HASH}&env=${process.env.NODE_ENV}`;
Expand Down
18 changes: 17 additions & 1 deletion app/src/dataSource/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import type { IDataSourceProps } from "../interfaces";
import type { IRow } from "@kanaries/graphic-walker/dist/interfaces";
import type { IRow, IDataQueryPayload } from "@kanaries/graphic-walker/dist/interfaces";
import commonStore from "../store/common";
import communicationStore from "../store/communication"

declare global {
export interface Window {
dslToSql: (datasetStr: string, PayloadStr: string) => string;
}
}

interface MessagePayload extends IDataSourceProps {
action: "requestData" | "postData" | "finishData";
dataSourceId: string;
Expand Down Expand Up @@ -75,3 +82,12 @@ export function finishDataService(msg: any) {
"*"
)
}

export async function getDatasFromKernel(payload: IDataQueryPayload) {
const sql = window.dslToSql(
JSON.stringify({type: "table", source: "__mid_df"}),
JSON.stringify(payload)
);
const result = await communicationStore.comm?.sendMsg("get_datas", {"sql": sql});
return result["data"]["datas"] as IRow[];
}
14 changes: 12 additions & 2 deletions app/src/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ import Options from './components/options';
import { IAppProps } from './interfaces';
import NotificationWrapper from "./notify";

import { loadDataSource, postDataService, finishDataService } from './dataSource';
import { loadDataSource, postDataService, finishDataService, getDatasFromKernel } from './dataSource';

import { useNotification } from "./notify";
import commonStore from "./store/common";
import initCommunication from "./utils/communication";
import communicationStore from "./store/communication"
Expand Down Expand Up @@ -59,6 +60,7 @@ const App: React.FC<IAppProps> = observer((propsIn) => {
const [mounted, setMounted] = useState(false);
const [exportOpen, setExportOpen] = useState(false);
const specList = props.visSpec ? JSON.parse(props.visSpec) : [];
const { notify } = useNotification();

const setData = (data?: IRow[], rawFields?: IMutField[]) => {
if (specList.length !== 0) {
Expand Down Expand Up @@ -102,6 +104,14 @@ const App: React.FC<IAppProps> = observer((propsIn) => {
commonStore.setShowCloudTool(props.showCloudTool);
updateDataSource();
if (userConfig) setConfig(userConfig);
// temporary notifcation
if (props.useKernelCalc) {
notify({
type: "info",
title: "Tips",
message: "in `useKernelCalc` mode, If your dataset too big, not suitable for some non-aggregated charts, such as scatter.",
}, 6_000);
}
}, []);

const exportTool = getExportTool(setExportOpen);
Expand All @@ -128,7 +138,7 @@ const App: React.FC<IAppProps> = observer((propsIn) => {
mounted && checkUploadPrivacy() && commonStore.showCloudTool && <AuthWrapper id={props["id"]} wrapRef={wrapRef} />
}
<CodeExportModal open={exportOpen} setOpen={setExportOpen} globalStore={storeRef} sourceCode={props["sourceInvokeCode"] || ""} />
<GraphicWalker {...props} storeRef={storeRef} ref={gwRef} toolbar={toolbarConfig} />
<GraphicWalker {...props} storeRef={storeRef} ref={gwRef} toolbar={toolbarConfig} computation={props.useKernelCalc ? getDatasFromKernel : undefined} />
<InitModal />
<Options {...props} toolbar={toolbarConfig} />
</React.StrictMode>
Expand Down
1 change: 1 addition & 0 deletions app/src/interfaces/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export interface IAppProps extends IGWProps {
specType?: string;
showCloudTool: boolean;
needInitChart: boolean;
useKernelCalc: boolean;
}

export interface IDataSourceProps {
Expand Down
2 changes: 1 addition & 1 deletion app/src/utils/communication.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ const initCommunication = (gid: string) => {
}

if (jupyterEnv === "datalore") {
const kernel = window.parent.Jupyter.notebook.kernel;
const kernel = (window.parent as any).Jupyter.notebook.kernel;
if (kernel.__pre_can_handle_message === undefined) {
kernel.__pre_can_handle_message = kernel._can_handle_message;
}
Expand Down
8 changes: 4 additions & 4 deletions app/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -440,10 +440,10 @@
tailwindcss "^3.2.4"
uuid "^8.3.2"

"@kanaries/graphic-walker@0.3.15":
version "0.3.15"
resolved "https://registry.yarnpkg.com/@kanaries/graphic-walker/-/graphic-walker-0.3.15.tgz#546f7ef52eafff4bce3263a11ed1af5f8b4f3e99"
integrity sha512-2PhWTB/23vgiNdraYDtBDG/DYXdQ+nuUfhNN9QP+zfnSDGRUxXAHS9z7ntdcRZjNQYY41T4HqYmK/Bn+KRctuA==
"@kanaries/graphic-walker@0.4.1":
version "0.4.1"
resolved "https://registry.yarnpkg.com/@kanaries/graphic-walker/-/graphic-walker-0.4.1.tgz#fbc0e54243cae1ce4ae66a443313597f2056bfb4"
integrity sha512-9hIORY0KUchgXVTltJ23PYBw+zgVgZQWfIx7cA5dfarRz9VCvgK385Yaki0f+ad6Q0jAmdDa3ONVE3XIehUHCA==
dependencies:
"@headlessui/react" "^1.7.12"
"@heroicons/react" "^2.0.8"
Expand Down
1 change: 1 addition & 0 deletions pygwalker/api/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def to_html(
False,
False,
False,
False,
**kwargs
)

Expand Down
29 changes: 25 additions & 4 deletions pygwalker/api/pygwalker.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,15 @@ def __init__(
show_cloud_tool: bool,
use_preview: bool,
store_chart_data: bool,
use_kernel_calc: bool,
**kwargs
):
if gid is None:
self.gid = GlobalVarManager.get_global_gid()
else:
self.gid = gid
self.df = df
self._init_data_source(df, field_specs)
self.df = get_parser(df).get_inited_dataframe()
self._init_data_source(df, field_specs, use_kernel_calc)
self.spec = spec
self.source_invoke_code = source_invoke_code
self.hidedata_source_config = hidedata_source_config
Expand All @@ -64,8 +65,16 @@ def __init__(
self.use_preview = use_preview
self.store_chart_data = store_chart_data
self._init_spec(spec)
self.use_kernel_calc = use_kernel_calc

def _init_data_source(self, df: DataFrame, field_specs: Dict[str, Any]) -> None:
def _init_data_source(
self,
df: DataFrame,
field_specs: Dict[str, Any],
use_kernel_calc: bool
) -> None:
if use_kernel_calc:
df = df[:500]
data_parser = get_parser(df)
self.origin_data_source = data_parser.to_records()
self.field_specs = data_parser.raw_fields(field_specs=field_specs)
Expand Down Expand Up @@ -266,6 +275,17 @@ def update_spec(data: Dict[str, Any]):
comm.register("update_spec", update_spec)
comm.register("save_chart", save_chart_endpoint)

if self.use_kernel_calc:
# pylint: disable=import-outside-toplevel
from pygwalker.services.calculation import get_datas_from_dataframe
# pylint: enable=import-outside-toplevel

def _get_datas(data: Dict[str, Any]):
return {
"datas": get_datas_from_dataframe(self.df, data["sql"])
}
comm.register("get_datas", _get_datas)

def _get_props(
self,
env: str = "",
Expand Down Expand Up @@ -294,9 +314,10 @@ def _get_props(
},
"env": env,
"specType": self.spec_type,
"needLoadDatas": need_load_datas,
"needLoadDatas": not self.use_kernel_calc and need_load_datas,
"showCloudTool": self.show_cloud_tool,
"needInitChart": not (self.store_chart_data and self._chart_map),
"useKernelCalc": self.use_kernel_calc,
**self.other_props,
}

Expand Down
2 changes: 2 additions & 0 deletions pygwalker/api/walker.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def walk(
show_cloud_tool: bool = False,
use_preview: bool = False,
store_chart_data: bool = False,
use_kernel_calc: bool = False,
**kwargs
):
"""Walk through pandas.DataFrame df with Graphic Walker
Expand Down Expand Up @@ -63,6 +64,7 @@ def walk(
show_cloud_tool,
use_preview,
store_chart_data,
use_kernel_calc,
**kwargs
)

Expand Down
7 changes: 7 additions & 0 deletions pygwalker/data_parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,19 @@ def to_records(self) -> List[Dict[str, Any]]:
"""get records"""
raise NotImplementedError

@abc.abstractmethod
def get_inited_dataframe(self) -> DataFrame:
"""get records"""
raise NotImplementedError

class BaseDataFrameDataParser(Generic[DataFrame], BaseDataParser):
"""DataFrame property getter"""
def __init__(self, df: DataFrame):
self.df = self._init_dataframe(df)

def get_inited_dataframe(self) -> DataFrame:
return self.df

def raw_fields(self, field_specs: Dict[str, FieldSpec]) -> List[Dict[str, str]]:
return [
self._infer_prop(col, field_specs)
Expand Down
18 changes: 18 additions & 0 deletions pygwalker/services/calculation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import List, Dict, Any

import duckdb

from pygwalker._typing import DataFrame


def get_datas_from_dataframe(df: DataFrame, sql: str) -> List[Dict[str, Any]]:
"""
Get datas from dataframe by sql(duckdb).
"""
mid_table_name = "__mid_df"
sql = sql.encode('utf-8').decode('unicode_escape')
result = duckdb.query_df(df, mid_table_name, sql)
return [
dict(zip(result.columns, row))
for row in result.fetchall()
]
33 changes: 31 additions & 2 deletions pygwalker/services/render.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import json
import base64
from typing import Dict, List, Any

from jinja2 import Environment, PackageLoader
Expand Down Expand Up @@ -38,10 +39,38 @@ def render_gwalker_iframe(gid: int, srcdoc: str) -> str:
)


def render_calc_wasm_js(use_kernel_calc: bool) -> str:
if not use_kernel_calc:
return """
const initCalcWasm = async () => {};
""".strip("\n")

wasm_js_template = jinja_env.get_template("init_calc_wasm.js")
wasm_exec_file_path = os.path.join(ROOT_DIR, 'templates', 'wasm_exec.js')
wasm_file_path = os.path.join(ROOT_DIR, 'templates', 'dsl_to_sql.wasm')

with open(wasm_exec_file_path, 'r', encoding='utf8') as f:
exec_wasm_js = f.read()
with open(wasm_file_path, 'rb') as f:
wasm_content = f.read()

js_content = wasm_js_template.render(
wasm_exec_js=exec_wasm_js,
file_base64=(base64.b64encode(wasm_content)).decode(),
)

return js_content


def render_gwalker_html(gid: int, props: Dict) -> str:
walker_template = jinja_env.get_template("walk.js")
js = walker_template.render(gwalker={'id': gid, 'props': json.dumps(props, cls=DataFrameEncoder)})
js = "var exports={}, module={};" + gwalker_script() + js
js_list = [
"var exports={}, module={};",
render_calc_wasm_js(props.get('useKernelCalc', False)),
gwalker_script(),
walker_template.render(gwalker={'id': gid, 'props': json.dumps(props, cls=DataFrameEncoder)})
]
js = "\n".join(js_list)
template = jinja_env.get_template("index.html")
html = f"{template.render(gwalker={'id': gid, 'script': js})}"
return html
13 changes: 13 additions & 0 deletions pygwalker/templates/init_calc_wasm.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
const initCalcWasm = async () => {
{{ wasm_exec_js }}
let binaryString = atob("{{file_base64}}");
let bytes = new Uint8Array(binaryString.length);
for (var i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
let arrayBuffer = bytes.buffer;

const go = new window.Go();
const result = await WebAssembly.instantiate(arrayBuffer, go.importObject);
go.run(result.instance);
}
17 changes: 13 additions & 4 deletions pygwalker/templates/walk.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
var gw_id = "gwalker-div-{{ gwalker.id }}";
var props = {{ gwalker.props }};
console.log(PyGWalkerApp, props, gw_id);

const initFunc = async () => {
await initCalcWasm();
};

try{
window.__GW_HASH=props.hashcode;
window.__GW_VERSION=props.version;
PyGWalkerApp.GWalker(props, gw_id);
}catch(e){ console.error(e); }
window.__GW_HASH=props.hashcode;
window.__GW_VERSION=props.version;
initFunc().then(() => {
PyGWalkerApp.GWalker(props, gw_id);
});
} catch(e) {
console.error(e);
}
Loading

0 comments on commit fc80561

Please sign in to comment.