-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from mc-digital/mhiro2/setup-ci-workflow
Setup CI workflow
- Loading branch information
Showing
20 changed files
with
1,035 additions
and
597 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
name: CI | ||
|
||
on: | ||
pull_request: | ||
paths: | ||
- carling/** | ||
- .github/workflows/ci.yml | ||
|
||
jobs: | ||
test: | ||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
python: [3.7, 3.8, 3.9] | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v2 | ||
- name: Set up Python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: ${{ matrix.python }} | ||
- name: Install dependencies | ||
run: | | ||
python -m pip install poetry tox | ||
- name: Test with tox | ||
run: poetry run tox -e py,black,flake8,isort |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,59 @@ | ||
# isort: skip_file | ||
__version__ = "0.3.1" | ||
|
||
from .categorical import ( | ||
CreateCategoricalDicts, | ||
DigestCategoricalColumns, | ||
PairWithIndexNumber, | ||
ReplaceCategoricalColumns, | ||
) | ||
from .group import ( | ||
UniqueOnly, | ||
SingletonOnly, | ||
Intersection, | ||
DifferencePerKey, | ||
FilterByKey, | ||
FilterByKeyUsingSideInput, | ||
DifferencePerKey, | ||
Intersection, | ||
MaxSelectPerKey, | ||
PartitionRowsContainingNone, | ||
SingletonOnly, | ||
UniqueOnly, | ||
) | ||
from .mapping import ( | ||
Label, | ||
Select, | ||
Project, | ||
Exclude, | ||
IndexBy, | ||
Stringify, | ||
IndexBySingle, | ||
Label, | ||
Project, | ||
RenameFromTo, | ||
Exclude, | ||
Select, | ||
Stringify, | ||
) | ||
from .categorical import ( | ||
PairWithIndexNumber, | ||
DigestCategoricalColumns, | ||
CreateCategoricalDicts, | ||
ReplaceCategoricalColumns, | ||
from .util import LogSample, MemoizedValueProviderWrapper, ReifyMultiValueOption | ||
|
||
__all__ = ( | ||
# categorical | ||
"CreateCategoricalDicts", | ||
"DigestCategoricalColumns", | ||
"PairWithIndexNumber", | ||
"ReplaceCategoricalColumns", | ||
# group | ||
"DifferencePerKey", | ||
"FilterByKey", | ||
"FilterByKeyUsingSideInput", | ||
"Intersection", | ||
"MaxSelectPerKey", | ||
"PartitionRowsContainingNone", | ||
"SingletonOnly", | ||
"UniqueOnly", | ||
# mapping | ||
"Exclude", | ||
"IndexBy", | ||
"IndexBySingle", | ||
"Label", | ||
"Project", | ||
"RenameFromTo", | ||
"Select", | ||
"Stringify", | ||
# util | ||
"LogSample", | ||
"MemoizedValueProviderWrapper", | ||
"ReifyMultiValueOption", | ||
) | ||
from .util import LogSample, ReifyMultiValueOption, MemoizedValueProviderWrapper |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,9 @@ | ||
""" | ||
Generic grouping transform utils | ||
Author: Tsuyoki Kumazaki ([email protected]) | ||
""" | ||
from functools import reduce | ||
|
||
import apache_beam as beam | ||
|
||
from carling.iter_utils import is_none, is_some, take_as_singleton, take_top, unwrap_or_none | ||
from carling.mapping import IndexBy | ||
from carling.iter_utils import ( | ||
take_top, | ||
is_none, | ||
is_some, | ||
unwrap, | ||
unwrap_or_none, | ||
take_as_singleton, | ||
) | ||
|
||
|
||
def _merge_two(x, y): | ||
|
@@ -50,7 +38,6 @@ def expand(self, pcoll): | |
|
||
|
||
class UniqueOnly(beam.PTransform): | ||
|
||
"""Produces elements that are the only elements per key after deduplication. | ||
Given a `PCollection` of `(K, V)`, | ||
|
@@ -76,7 +63,6 @@ def expand(self, pcoll): | |
|
||
|
||
class SingletonOnly(beam.PTransform): | ||
|
||
"""Produces elements that are the only elements per key. | ||
Given a `PCollection` of `(K, V)`, | ||
|
@@ -93,8 +79,7 @@ def expand(self, pcoll): | |
return ( | ||
pcoll | ||
| "Group" >> beam.GroupByKey() | ||
| "Remove Non-singleton Elements" | ||
>> beam.Map(lambda kv: take_as_singleton(kv[1])) | ||
| "Remove Non-singleton Elements" >> beam.Map(lambda kv: take_as_singleton(kv[1])) | ||
| "Remove None" >> beam.Filter(lambda v: len(v) > 0) | ||
| "Unwrap Values" >> beam.Map(lambda v: v[0]) | ||
) | ||
|
@@ -115,7 +100,6 @@ def process(self, row): | |
|
||
|
||
class Intersection(beam.PTransform): | ||
|
||
"""Produces the intersection of given `PCollection`s. | ||
Given a list of `PCollection`s, | ||
|
@@ -152,7 +136,6 @@ def process(self, row): | |
|
||
|
||
class FilterByKey(beam.PTransform): | ||
|
||
"""Filters elements by their keys. | ||
The constructor receives one or more `PCollection`s of `K`s, | ||
|
@@ -179,8 +162,7 @@ def expand(self, pcoll): | |
|
||
@beam.ptransform_fn | ||
def FilterByKeyUsingSideInput(pcoll, lookup_entries, filter_key): | ||
""" | ||
Filters a single collection by a single lookup collection, using a common key. | ||
"""Filters a single collection by a single lookup collection, using a common key. | ||
Given: | ||
- a `PCollection` (lookup_entries) of `(V)`, as a lookup collection | ||
|
@@ -307,7 +289,6 @@ def process(self, row): | |
|
||
|
||
class DifferencePerKey(beam.PTransform): | ||
|
||
"""Produces the difference per key between two `PCollection`s. | ||
Given two `PCollection`s of `V`, | ||
|
@@ -351,20 +332,18 @@ def MaxSelectPerKey(pcoll, index_keys, sort_key_fn, reverse=False): | |
return ( | ||
pcoll | ||
| f"Index by {index_keys}" >> IndexBy(*index_keys) | ||
| f"Top 1 per key" | ||
>> beam.combiners.Top.PerKey(1, key=sort_key_fn, reverse=reverse) | ||
| "Top 1 per key" >> beam.combiners.Top.PerKey(1, key=sort_key_fn, reverse=reverse) | ||
| "De-Index" >> beam.Map(lambda k_v: k_v[1][0]) | ||
) | ||
|
||
|
||
@beam.ptransform_fn | ||
def PartitionRowsContainingNone(pcoll): | ||
""" | ||
Emits two tagged pcollections: | ||
"""Emits two tagged pcollections: | ||
- None: Default emitted collection. | ||
Rows are guaranteed not to have any `None` values | ||
- contains_none: At least one column in the row had a `None` value | ||
- None: Default emitted collection. | ||
Rows are guaranteed not to have any `None` values | ||
- contains_none: At least one column in the row had a `None` value | ||
""" | ||
|
||
def _separator(row): | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
#!/usr/bin/env python3 | ||
from .avro_schema import generate_avro_schema_from_template, load_avro_schema | ||
|
||
from .avro_schema import load_avro_schema, generate_avro_schema_from_template | ||
__all__ = ( | ||
"generate_avro_schema_from_template", | ||
"load_avro_schema", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
from .iter_utils import ( | ||
take_top, | ||
is_some, | ||
is_none, | ||
unwrap, | ||
unwrap_or_none, | ||
take_as_singleton, | ||
from .iter_utils import is_none, is_some, take_as_singleton, take_top, unwrap, unwrap_or_none | ||
|
||
__all__ = ( | ||
"is_none", | ||
"is_some", | ||
"take_as_singleton", | ||
"take_top", | ||
"unwrap", | ||
"unwrap_or_none", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,3 @@ | ||
""" | ||
Generic iter utils | ||
Author: Tsuyoki Kumazaki ([email protected]) | ||
""" | ||
|
||
import itertools | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,20 @@ | ||
""" | ||
Generic mapping transform utils | ||
Author: Tsuyoki Kumazaki ([email protected]) | ||
""" | ||
|
||
import json | ||
|
||
import apache_beam as beam | ||
|
||
|
||
def Label(**labels): | ||
"""Labels all elements. | ||
""" | ||
"""Labels all elements.""" | ||
return "Label" >> beam.Map(lambda r: {**r, **labels}) | ||
|
||
|
||
def Select(*keys): | ||
"""Removes all columns which are not specified in `*keys`. | ||
""" | ||
"""Removes all columns which are not specified in `*keys`.""" | ||
return "Select" >> beam.Map(lambda r: {k: r[k] for k in keys}) | ||
|
||
|
||
def Project(*keys): | ||
"""Transforms each element into a tuple of values of the specified columns. | ||
""" | ||
"""Transforms each element into a tuple of values of the specified columns.""" | ||
return "Project" >> beam.Map(lambda r: tuple(r[k] for k in keys)) | ||
|
||
|
||
|
@@ -45,8 +36,7 @@ def _decimal_default_proc(obj): | |
|
||
|
||
def Stringify(): | ||
"""Transforms each element into its JSON representation. | ||
""" | ||
"""Transforms each element into its JSON representation.""" | ||
|
||
def s(obj): | ||
return json.dumps(obj, default=_decimal_default_proc) | ||
|
@@ -66,8 +56,7 @@ def IndexBySingle(key): | |
|
||
|
||
def RenameFromTo(from_to_key_mapping): | ||
"""Rename columns according to `from_to_key_mapping`. | ||
""" | ||
"""Rename columns according to `from_to_key_mapping`.""" | ||
|
||
def rename(row): | ||
res = dict(row) | ||
|
@@ -81,8 +70,7 @@ def rename(row): | |
|
||
|
||
def Exclude(*keys): | ||
"""Removes all columns specified in `*keys`. | ||
""" | ||
"""Removes all columns specified in `*keys`.""" | ||
|
||
def exclude(row): | ||
res = dict(row) | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
from .test_utils import pprint_equal_to | ||
|
||
__all__ = ("pprint_equal_to",) |
Oops, something went wrong.