Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added aggregation expression with tests #38

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions src/expression/abstract_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,16 @@ class ExpressionType(IntEnum):
ARITHMETIC_ADD = 12,
ARITHMETIC_SUBTRACT = 13,
ARITHMETIC_MULTIPLY = 14,
ARITHMETIC_DIVIDE = 15
ARITHMETIC_DIVIDE = 15,

FUNCTION_EXPRESSION = 16,

AGGREGATION_COUNT = 17,
AGGREGATION_SUM = 18,
AGGREGATION_MIN = 19,
AGGREGATION_MAX = 20,
AGGREGATION_AVG = 21,

FUNCTION_EXPRESSION = 16
# add other types


Expand Down
33 changes: 33 additions & 0 deletions src/expression/aggregation_expression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import numpy as np

from src.expression.abstract_expression import AbstractExpression, \
ExpressionType, \
ExpressionReturnType
import statistics
from src.models.storage.batch import FrameBatch

class AggregationExpression(AbstractExpression):
def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
right: AbstractExpression):
children = []
if left is not None:
children.append(left)
if right is not None:
children.append(right)
super().__init__(exp_type, rtype=ExpressionReturnType.INTEGER, ## can also be a float
children=children)

def evaluate(self, batch: FrameBatch):
args = [frame._data for frame in batch._frames]
values = self.get_child(0).evaluate(args)

if self.etype == ExpressionType.AGGREGATION_SUM:
return sum(sum(values))
elif self.etype == ExpressionType.AGGREGATION_COUNT:
return len(values)
elif self.etype == ExpressionType.AGGREGATION_AVG:
return np.array(values).mean()
elif self.etype == ExpressionType.AGGREGATION_MIN:
return min(values)
elif self.etype == ExpressionType.AGGREGATION_MAX:
return max(values)
8 changes: 5 additions & 3 deletions src/expression/arithmetic_expression.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from src.expression.abstract_expression import AbstractExpression, \
ExpressionType, ExpressionReturnType
from src.models.storage.batch import FrameBatch


class ArithmeticExpression(AbstractExpression):
Expand All @@ -14,9 +15,10 @@ def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
super().__init__(exp_type, rtype=ExpressionReturnType.FLOAT,
children=children)

def evaluate(self, *args):
vl = self.get_child(0).evaluate(*args)
vr = self.get_child(1).evaluate(*args)
def evaluate(self, batch:FrameBatch):
frames = [frame._data for frame in batch._frames]
vl = self.get_child(0).evaluate(frames)
vr = self.get_child(1).evaluate(frames)

if (self.etype == ExpressionType.ARITHMETIC_ADD):
return vl + vr
Expand Down
9 changes: 6 additions & 3 deletions src/expression/comparison_expression.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from src.expression.abstract_expression import AbstractExpression, \
ExpressionType, \
ExpressionReturnType
from src.models.storage.batch import FrameBatch


class ComparisonExpression(AbstractExpression):
Expand All @@ -14,9 +15,11 @@ def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
super().__init__(exp_type, rtype=ExpressionReturnType.BOOLEAN,
children=children)

def evaluate(self, *args):
left_values = self.get_child(0).evaluate(*args)
right_values = self.get_child(1).evaluate(*args)
def evaluate(self, batch: FrameBatch):
frames = [frame._data for frame in batch._frames]

left_values = self.get_child(0).evaluate(frames)
right_values = self.get_child(1).evaluate(frames)

# Broadcasting scalars
if type(right_values) is not list:
Expand Down
10 changes: 6 additions & 4 deletions src/expression/logical_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
ExpressionType, \
ExpressionReturnType

from src.models.storage.batch import FrameBatch


class LogicalExpression(AbstractExpression):
def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
Expand All @@ -14,11 +16,11 @@ def __init__(self, exp_type: ExpressionType, left: AbstractExpression,
super().__init__(exp_type, rtype=ExpressionReturnType.BOOLEAN,
children=children)

def evaluate(self, *args):
def evaluate(self, batch: FrameBatch):
if self.get_children_count() == 2:
outcomes = []
left_values = self.get_child(0).evaluate(*args)
right_values = self.get_child(1).evaluate(*args)
left_values = self.get_child(0).evaluate(batch)
right_values = self.get_child(1).evaluate(batch)
for value_left, value_right in zip(left_values, right_values):
if self.etype == ExpressionType.LOGICAL_AND:
outcomes.append(value_left and value_right)
Expand All @@ -27,7 +29,7 @@ def evaluate(self, *args):
return outcomes

else:
values = self.get_child(0).evaluate(*args)
values = self.get_child(0).evaluate(batch)

if self.etype == ExpressionType.LOGICAL_NOT:
return [not value for value in values]
8 changes: 6 additions & 2 deletions src/expression/tuple_value_expression.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from .abstract_expression import AbstractExpression, ExpressionType, \
ExpressionReturnType

import numpy as np


class TupleValueExpression(AbstractExpression):
def __init__(self, col_idx: int = None, col_name: str = None):
Expand All @@ -27,8 +29,10 @@ def evaluate(self, *args):
if args is None:
# error Handling
pass
tuple1 = args[0]
return tuple1[(self._col_idx)]
frames = args

frames_np = [np.array(tuple1)[:, self._col_idx] for tuple1 in frames]
return frames_np[0]

# ToDo
# implement other boilerplate functionality
142 changes: 142 additions & 0 deletions test/expression/test_aggregation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import unittest
import numpy as np

from src.expression.abstract_expression import ExpressionType
from src.expression.comparison_expression import ComparisonExpression
from src.expression.aggregation_expression import AggregationExpression
from src.expression.constant_value_expression import ConstantValueExpression
from src.expression.tuple_value_expression import TupleValueExpression
from src.models.storage.batch import FrameBatch
from src.models.storage.frame import Frame
from src.models.inference.classifier_prediction import Prediction


class LogicalExpressionsTest(unittest.TestCase):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)


def test_aggregation_sum(self):
columnName = TupleValueExpression(0)
aggr_expr = AggregationExpression(
ExpressionType.AGGREGATION_SUM,
None,
columnName
)

frame_1 = Frame(1, np.ones((1, 1)), None)
frame_2 = Frame(2, 2 * np.ones((1, 1)), None)
frame_3 = Frame(3, 3 * np.ones((1, 1)), None)
outcome_1 = Prediction(frame_1, ["car", "bus"], [0.5, 0.6])
outcome_2 = Prediction(frame_2, ["bus"], [0.5, 0.6])
outcome_3 = Prediction(frame_3, ["car", "train"], [0.5, 0.6])
input_batch = FrameBatch(frames=[
frame_1,
frame_2,
frame_3,
], info=None)


expected_value = 6
output_value = aggr_expr.evaluate(input_batch)
self.assertEqual(expected_value, output_value)

def test_aggregation_count(self):
columnName = TupleValueExpression(0)
aggr_expr = AggregationExpression(
ExpressionType.AGGREGATION_COUNT,
None,
columnName
)

frame_1 = Frame(1, np.ones((1, 1)), None)
frame_2 = Frame(2, 2 * np.ones((1, 1)), None)
frame_3 = Frame(3, 3 * np.ones((1, 1)), None)
outcome_1 = Prediction(frame_1, ["car", "bus"], [0.5, 0.6])
outcome_2 = Prediction(frame_2, ["bus"], [0.5, 0.6])
outcome_3 = Prediction(frame_3, ["car", "train"], [0.5, 0.6])
input_batch = FrameBatch(frames=[
frame_1,
frame_2,
frame_3,
], info=None)


expected_value = 3
output_value = aggr_expr.evaluate(input_batch)
self.assertEqual(expected_value, output_value)


def test_aggregation_avg(self):
columnName = TupleValueExpression(0)
aggr_expr = AggregationExpression(
ExpressionType.AGGREGATION_AVG,
None,
columnName
)

frame_1 = Frame(1, np.ones((1, 1)), None)
frame_2 = Frame(2, 2 * np.ones((1, 1)), None)
frame_3 = Frame(3, 3 * np.ones((1, 1)), None)
outcome_1 = Prediction(frame_1, ["car", "bus"], [0.5, 0.6])
outcome_2 = Prediction(frame_2, ["bus"], [0.5, 0.6])
outcome_3 = Prediction(frame_3, ["car", "train"], [0.5, 0.6])
input_batch = FrameBatch(frames=[
frame_1,
frame_2,
frame_3,
], info=None)

expected_value = 2
output_value = aggr_expr.evaluate(input_batch)
self.assertEqual(expected_value, output_value)


def test_aggregation_min(self):
columnName = TupleValueExpression(0)
aggr_expr = AggregationExpression(
ExpressionType.AGGREGATION_MIN,
None,
columnName
)

frame_1 = Frame(1, np.ones((1, 1)), None)
frame_2 = Frame(2, 2 * np.ones((1, 1)), None)
frame_3 = Frame(3, 3 * np.ones((1, 1)), None)
outcome_1 = Prediction(frame_1, ["car", "bus"], [0.5, 0.6])
outcome_2 = Prediction(frame_2, ["bus"], [0.5, 0.6])
outcome_3 = Prediction(frame_3, ["car", "train"], [0.5, 0.6])
input_batch = FrameBatch(frames=[
frame_1,
frame_2,
frame_3,
], info=None)

expected_value = 1
output_value = aggr_expr.evaluate(input_batch)
self.assertEqual(expected_value, output_value)

def test_aggregation_max(self):
columnName = TupleValueExpression(0)
aggr_expr = AggregationExpression(
ExpressionType.AGGREGATION_MAX,
None,
columnName
)

frame_1 = Frame(1, np.ones((1, 1)), None)
frame_2 = Frame(2, 2 * np.ones((1, 1)), None)
frame_3 = Frame(3, 3 * np.ones((1, 1)), None)
outcome_1 = Prediction(frame_1, ["car", "bus"], [0.5, 0.6])
outcome_2 = Prediction(frame_2, ["bus"], [0.5, 0.6])
outcome_3 = Prediction(frame_3, ["car", "train"], [0.5, 0.6])
input_batch = FrameBatch(frames=[
frame_1,
frame_2,
frame_3,
], info=None)

expected_value = 3
output_value = aggr_expr.evaluate(input_batch)
self.assertEqual(expected_value, output_value)
Loading