Skip to content

Commit

Permalink
fix: Catalog init introduces significant overhead (georgia-tech-db#1270)
Browse files Browse the repository at this point in the history
  • Loading branch information
gaurav274 authored and a0x8o committed Nov 22, 2023
1 parent e93d7af commit b593884
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 0 deletions.
3 changes: 3 additions & 0 deletions evadb/executor/apply_and_merge_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def exec(self, *args, **kwargs) -> Iterator[Batch]:
for batch in child_executor.exec(**kwargs):
func_result = self.func_expr.evaluate(batch)

<<<<<<< HEAD
<<<<<<< HEAD
=======
# persist stats of function expression
Expand All @@ -55,6 +56,8 @@ def exec(self, *args, **kwargs) -> Iterator[Batch]:
)

>>>>>>> 2dacff69 (feat: sync master staging (#1050))
=======
>>>>>>> 842cc5f8 (fix: Catalog init introduces significant overhead (#1270))
output = Batch.merge_column_wise([batch, func_result])
if self.do_unnest:
output.unnest(func_result.columns)
Expand Down
9 changes: 9 additions & 0 deletions evadb/executor/executor_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@
import os
from pathlib import Path
from typing import TYPE_CHECKING, Generator, List, Union
<<<<<<< HEAD

from evadb.catalog.catalog_utils import xform_column_definitions_to_catalog_entries
from evadb.catalog.models.utils import TableCatalogEntry
from evadb.parser.create_statement import ColumnDefinition
=======
>>>>>>> 842cc5f8 (fix: Catalog init introduces significant overhead (#1270))

from evadb.catalog.catalog_utils import xform_column_definitions_to_catalog_entries
from evadb.catalog.models.utils import TableCatalogEntry
Expand Down Expand Up @@ -73,6 +76,7 @@ def apply_project(batch: Batch, project_list: List[AbstractExpression]):
batches = [expr.evaluate(batch) for expr in project_list]
batch = Batch.merge_column_wise(batches)
<<<<<<< HEAD
<<<<<<< HEAD
=======
# persist stats of function expression
Expand All @@ -86,6 +90,8 @@ def apply_project(batch: Batch, project_list: List[AbstractExpression]):
func_expr._stats.prev_cost,
)
>>>>>>> 2dacff69 (feat: sync master staging (#1050))
=======
>>>>>>> 842cc5f8 (fix: Catalog init introduces significant overhead (#1270))
return batch
Expand All @@ -95,6 +101,7 @@ def apply_predicate(batch: Batch, predicate: AbstractExpression) -> Batch:
batch.drop_zero(outcomes)
batch.reset_index()
<<<<<<< HEAD
<<<<<<< HEAD
=======
# persist stats of function expression
Expand All @@ -105,6 +112,8 @@ def apply_predicate(batch: Batch, predicate: AbstractExpression) -> Batch:
function_id, func_expr.function_obj.name, func_expr._stats.prev_cost
)
>>>>>>> 2dacff69 (feat: sync master staging (#1050))
=======
>>>>>>> 842cc5f8 (fix: Catalog init introduces significant overhead (#1270))
return batch
Expand Down
3 changes: 3 additions & 0 deletions evadb/executor/function_scan_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def exec(self, *args, **kwargs) -> Iterator[Batch]:
if not lateral_input.empty():
res = self.func_expr.evaluate(lateral_input)

<<<<<<< HEAD
<<<<<<< HEAD
=======
# persist stats of function expression
Expand All @@ -54,6 +55,8 @@ def exec(self, *args, **kwargs) -> Iterator[Batch]:
)

>>>>>>> 2dacff69 (feat: sync master staging (#1050))
=======
>>>>>>> 842cc5f8 (fix: Catalog init introduces significant overhead (#1270))
if not res.empty():
if self.do_unnest:
res.unnest(res.columns)
Expand Down
20 changes: 20 additions & 0 deletions evadb/executor/project_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,20 @@
from evadb.database import EvaDBDatabase
from evadb.executor.abstract_executor import AbstractExecutor
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 842cc5f8 (fix: Catalog init introduces significant overhead (#1270))
from evadb.executor.executor_utils import (
ExecutorError,
apply_project,
instrument_function_expression_cost,
)
<<<<<<< HEAD
=======
from evadb.executor.executor_utils import ExecutorError, apply_project
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
=======
>>>>>>> 842cc5f8 (fix: Catalog init introduces significant overhead (#1270))
from evadb.models.storage.batch import Batch
from evadb.plan_nodes.project_plan import ProjectPlan

Expand All @@ -43,29 +49,43 @@ def exec(self, *args, **kwargs) -> Iterator[Batch]:
if len(self.children) == 0:
# Create a dummy batch with size 1
dummy_batch = Batch(pd.DataFrame([0]))
<<<<<<< HEAD
<<<<<<< HEAD
batch = apply_project(dummy_batch, self.target_list)
=======
batch = apply_project(dummy_batch, self.target_list, self.catalog())
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
=======
batch = apply_project(dummy_batch, self.target_list)
>>>>>>> 842cc5f8 (fix: Catalog init introduces significant overhead (#1270))
if not batch.empty():
yield batch
# SELECT expr FROM table;
elif len(self.children) == 1:
child_executor = self.children[0]
for batch in child_executor.exec(**kwargs):
<<<<<<< HEAD
<<<<<<< HEAD
batch = apply_project(batch, self.target_list)
=======
batch = apply_project(batch, self.target_list, self.catalog())
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
=======
batch = apply_project(batch, self.target_list)
>>>>>>> 842cc5f8 (fix: Catalog init introduces significant overhead (#1270))
if not batch.empty():
yield batch
else:
raise ExecutorError("ProjectExecutor has more than 1 children.")
<<<<<<< HEAD
<<<<<<< HEAD

# instrument required stats
instrument_function_expression_cost(self.target_list, self.catalog())
=======
>>>>>>> 40a10ce1 (Bump v0.3.4+ dev)
=======

# instrument required stats
instrument_function_expression_cost(self.target_list, self.catalog())
>>>>>>> 842cc5f8 (fix: Catalog init introduces significant overhead (#1270))

0 comments on commit b593884

Please sign in to comment.