Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aggregate with key is none #541

Merged
merged 5 commits into from
Feb 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ Changes
Version 1.7.2
-------------

* Allow aggregation over the entire table (without a key)
By :user:`bmaggard`.

* Allow specifying output field name for simple aggregation
By :user:`bmaggard`, :issue:`370`.

Expand Down
184 changes: 172 additions & 12 deletions petl/test/transform/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,46 @@ def test_aggregate_simple():
('c', 1))
ieq(expect5, table5)
ieq(expect5, table5)



def test_aggregate_simple_key_is_None():

table1 = (('foo', 'bar', 'baz'),
('a', 3, True),
('a', 7, False),
('b', 2, True),
('b', 2, False),
('b', 9, False),
('c', 4, True))

# simplest signature - aggregate whole rows
table2 = aggregate(table1, None, len)
expect2 = (('value',), (6,))
ieq(expect2, table2)
ieq(expect2, table2)

# next simplest signature - aggregate single field
table3 = aggregate(table1, None, sum, 'bar')
expect3 = (('value',), (27,))
ieq(expect3, table3)
ieq(expect3, table3)

# alternative signature for simple aggregation
table4 = aggregate(table1, key=None, aggregation=list,
value=('bar', 'baz'))
expect4 = (('value',),
( [(3, True), (7, False),
(2, True), (2, False), (9, False),
(4, True)],),)

ieq(expect4, table4)
ieq(expect4, table4)

table5 = aggregate(table1, None, len, field='nrows')
expect5 = (('nrows',), (6,))
ieq(expect5, table5)
ieq(expect5, table5)

def test_aggregate_multifield():

table1 = (('foo', 'bar'),
Expand Down Expand Up @@ -186,10 +225,57 @@ def test_aggregate_multifield():
table4 = aggregate(table1, 'foo', aggregators)
ieq(expect2, table4)
ieq(expect2, table4) # check can iterate twice



def test_aggregate_multifield_key_is_None():

table1 = (('foo', 'bar'),
('a', 3),
('a', 7),
('b', 2),
('b', 1),
('b', 9),
('c', 4))

# dict arg
aggregators = OrderedDict()
aggregators['count'] = len
aggregators['minbar'] = 'bar', min
aggregators['maxbar'] = 'bar', max
aggregators['sumbar'] = 'bar', sum
aggregators['listbar'] = 'bar', list
aggregators['bars'] = 'bar', strjoin(', ')

table2 = aggregate(table1, None, aggregators)
expect2 = (('count', 'minbar', 'maxbar', 'sumbar', 'listbar',
'bars'),
(6, 1, 9, 26, [3, 7, 2, 1, 9, 4], '3, 7, 2, 1, 9, 4'))
ieq(expect2, table2)
ieq(expect2, table2) # check can iterate twice

# use suffix notation
table3 = aggregate(table1, None)
table3['count'] = len
table3['minbar'] = 'bar', min
table3['maxbar'] = 'bar', max
table3['sumbar'] = 'bar', sum
table3['listbar'] = 'bar' # default aggregation is list
table3['bars'] = 'bar', strjoin(', ')
ieq(expect2, table3)

# list arg
aggregators = [('count', len),
('minbar', 'bar', min),
('maxbar', 'bar', max),
('sumbar', 'bar', sum),
('listbar', 'bar', list),
('bars', 'bar', strjoin(', '))]

table4 = aggregate(table1, None, aggregators)
ieq(expect2, table4)
ieq(expect2, table4) # check can iterate twice

def test_aggregate_more():

table1 = (('foo', 'bar'),
('aa', 3),
('aa', 7),
Expand All @@ -198,7 +284,7 @@ def test_aggregate_more():
('bb', 9),
('cc', 4),
('dd', 3))

aggregators = OrderedDict()
aggregators['minbar'] = 'bar', min
aggregators['maxbar'] = 'bar', max
Expand All @@ -214,18 +300,51 @@ def test_aggregate_more():
('dd', 3, 3, 3, [3], '3'))
ieq(expect2, table2)
ieq(expect2, table2) # check can iterate twice

table3 = aggregate(table1, 'foo')
table3['minbar'] = 'bar', min
table3['maxbar'] = 'bar', max
table3['sumbar'] = 'bar', sum
table3['listbar'] = 'bar' # default aggregation is list
table3['bars'] = 'bar', strjoin(', ')
ieq(expect2, table3)



def test_aggregate_more_key_is_None():

table1 = (('foo', 'bar'),
('aa', 3),
('aa', 7),
('bb', 2),
('bb', 1),
('bb', 9),
('cc', 4),
('dd', 3))

aggregators = OrderedDict()
aggregators['minbar'] = 'bar', min
aggregators['maxbar'] = 'bar', max
aggregators['sumbar'] = 'bar', sum
aggregators['listbar'] = 'bar' # default aggregation is list
aggregators['bars'] = 'bar', strjoin(', ')

table2 = aggregate(table1, None, aggregators)
expect2 = (('minbar', 'maxbar', 'sumbar', 'listbar', 'bars'),
(1, 9, 29,
[3, 7, 2, 1, 9, 4, 3],
'3, 7, 2, 1, 9, 4, 3'))
ieq(expect2, table2)
ieq(expect2, table2) # check can iterate twice

table3 = aggregate(table1, None)
table3['minbar'] = 'bar', min
table3['maxbar'] = 'bar', max
table3['sumbar'] = 'bar', sum
table3['listbar'] = 'bar' # default aggregation is list
table3['bars'] = 'bar', strjoin(', ')
ieq(expect2, table3)

def test_aggregate_multiple_source_fields():

table = (('foo', 'bar', 'baz'),
('a', 3, True),
('a', 7, False),
Expand Down Expand Up @@ -255,11 +374,40 @@ def test_aggregate_multiple_source_fields():
ieq(expect, actual)
ieq(expect, actual)


def test_aggregate_multiple_source_fields_key_is_None():

table = (('foo', 'bar', 'baz'),
('a', 3, True),
('a', 7, False),
('b', 2, True),
('b', 2, False),
('b', 9, False),
('c', 4, True))

expect = (('value',),
( [(3, True), (7, False),
(2, True), (2, False), (9, False),
(4, True)],),)

actual = aggregate(table, None, list, ('bar', 'baz'))
ieq(expect, actual)
ieq(expect, actual)

actual = aggregate(table, key=None, aggregation=list,
value=('bar', 'baz'))
ieq(expect, actual)
ieq(expect, actual)

actual = aggregate(table, key=None)
actual['value'] = ('bar', 'baz'), list
ieq(expect, actual)
ieq(expect, actual)


def test_aggregate_empty():

table = (('foo', 'bar'),)

aggregators = OrderedDict()
aggregators['minbar'] = 'bar', min
aggregators['maxbar'] = 'bar', max
Expand All @@ -269,6 +417,18 @@ def test_aggregate_empty():
expect = (('foo', 'minbar', 'maxbar', 'sumbar'),)
ieq(expect, actual)

def test_aggregate_empty_key_is_None():

table = (('foo', 'bar'),)

aggregators = OrderedDict()
aggregators['minbar'] = 'bar', min
aggregators['maxbar'] = 'bar', max
aggregators['sumbar'] = 'bar', sum

actual = aggregate(table, None, aggregators)
expect = (('minbar', 'maxbar', 'sumbar'),)
ieq(expect, actual)

def test_mergeduplicates():

Expand Down
Loading