-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Setup CI workflow #3
Merged
Merged
Changes from all commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
3b1d46f
update package configurations
mhiro2 a3bd1bf
update python packages
mhiro2 5f6ca92
add py.typed file for PEP-561 support
mhiro2 1884638
apply linter and formatters
mhiro2 b8c5466
fix intersection tests to support the latest apache-beam version
mhiro2 2e623c0
setup CI workflow with tox
mhiro2 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
name: CI | ||
|
||
on: | ||
pull_request: | ||
paths: | ||
- carling/** | ||
- .github/workflows/ci.yml | ||
|
||
jobs: | ||
test: | ||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
python: [3.7, 3.8, 3.9] | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v2 | ||
- name: Set up Python | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: ${{ matrix.python }} | ||
- name: Install dependencies | ||
run: | | ||
python -m pip install poetry tox | ||
- name: Test with tox | ||
run: poetry run tox -e py,black,flake8,isort | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,59 @@ | ||
# isort: skip_file | ||
__version__ = "0.3.1" | ||
|
||
from .categorical import ( | ||
CreateCategoricalDicts, | ||
DigestCategoricalColumns, | ||
PairWithIndexNumber, | ||
ReplaceCategoricalColumns, | ||
) | ||
from .group import ( | ||
UniqueOnly, | ||
SingletonOnly, | ||
Intersection, | ||
DifferencePerKey, | ||
FilterByKey, | ||
FilterByKeyUsingSideInput, | ||
DifferencePerKey, | ||
Intersection, | ||
MaxSelectPerKey, | ||
PartitionRowsContainingNone, | ||
SingletonOnly, | ||
UniqueOnly, | ||
) | ||
from .mapping import ( | ||
Label, | ||
Select, | ||
Project, | ||
Exclude, | ||
IndexBy, | ||
Stringify, | ||
IndexBySingle, | ||
Label, | ||
Project, | ||
RenameFromTo, | ||
Exclude, | ||
Select, | ||
Stringify, | ||
) | ||
from .categorical import ( | ||
PairWithIndexNumber, | ||
DigestCategoricalColumns, | ||
CreateCategoricalDicts, | ||
ReplaceCategoricalColumns, | ||
from .util import LogSample, MemoizedValueProviderWrapper, ReifyMultiValueOption | ||
|
||
__all__ = ( | ||
# categorical | ||
"CreateCategoricalDicts", | ||
"DigestCategoricalColumns", | ||
"PairWithIndexNumber", | ||
"ReplaceCategoricalColumns", | ||
# group | ||
"DifferencePerKey", | ||
"FilterByKey", | ||
"FilterByKeyUsingSideInput", | ||
"Intersection", | ||
"MaxSelectPerKey", | ||
"PartitionRowsContainingNone", | ||
"SingletonOnly", | ||
"UniqueOnly", | ||
# mapping | ||
"Exclude", | ||
"IndexBy", | ||
"IndexBySingle", | ||
"Label", | ||
"Project", | ||
"RenameFromTo", | ||
"Select", | ||
"Stringify", | ||
# util | ||
"LogSample", | ||
"MemoizedValueProviderWrapper", | ||
"ReifyMultiValueOption", | ||
) | ||
from .util import LogSample, ReifyMultiValueOption, MemoizedValueProviderWrapper |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,9 @@ | ||
""" | ||
Generic grouping transform utils | ||
|
||
Author: Tsuyoki Kumazaki ([email protected]) | ||
""" | ||
from functools import reduce | ||
|
||
import apache_beam as beam | ||
|
||
from carling.iter_utils import is_none, is_some, take_as_singleton, take_top, unwrap_or_none | ||
from carling.mapping import IndexBy | ||
from carling.iter_utils import ( | ||
take_top, | ||
is_none, | ||
is_some, | ||
unwrap, | ||
unwrap_or_none, | ||
take_as_singleton, | ||
) | ||
|
||
|
||
def _merge_two(x, y): | ||
|
@@ -50,7 +38,6 @@ def expand(self, pcoll): | |
|
||
|
||
class UniqueOnly(beam.PTransform): | ||
|
||
"""Produces elements that are the only elements per key after deduplication. | ||
|
||
Given a `PCollection` of `(K, V)`, | ||
|
@@ -76,7 +63,6 @@ def expand(self, pcoll): | |
|
||
|
||
class SingletonOnly(beam.PTransform): | ||
|
||
"""Produces elements that are the only elements per key. | ||
|
||
Given a `PCollection` of `(K, V)`, | ||
|
@@ -93,8 +79,7 @@ def expand(self, pcoll): | |
return ( | ||
pcoll | ||
| "Group" >> beam.GroupByKey() | ||
| "Remove Non-singleton Elements" | ||
>> beam.Map(lambda kv: take_as_singleton(kv[1])) | ||
| "Remove Non-singleton Elements" >> beam.Map(lambda kv: take_as_singleton(kv[1])) | ||
| "Remove None" >> beam.Filter(lambda v: len(v) > 0) | ||
| "Unwrap Values" >> beam.Map(lambda v: v[0]) | ||
) | ||
|
@@ -115,7 +100,6 @@ def process(self, row): | |
|
||
|
||
class Intersection(beam.PTransform): | ||
|
||
"""Produces the intersection of given `PCollection`s. | ||
|
||
Given a list of `PCollection`s, | ||
|
@@ -152,7 +136,6 @@ def process(self, row): | |
|
||
|
||
class FilterByKey(beam.PTransform): | ||
|
||
"""Filters elements by their keys. | ||
|
||
The constructor receives one or more `PCollection`s of `K`s, | ||
|
@@ -179,8 +162,7 @@ def expand(self, pcoll): | |
|
||
@beam.ptransform_fn | ||
def FilterByKeyUsingSideInput(pcoll, lookup_entries, filter_key): | ||
""" | ||
Filters a single collection by a single lookup collection, using a common key. | ||
"""Filters a single collection by a single lookup collection, using a common key. | ||
|
||
Given: | ||
- a `PCollection` (lookup_entries) of `(V)`, as a lookup collection | ||
|
@@ -307,7 +289,6 @@ def process(self, row): | |
|
||
|
||
class DifferencePerKey(beam.PTransform): | ||
|
||
"""Produces the difference per key between two `PCollection`s. | ||
|
||
Given two `PCollection`s of `V`, | ||
|
@@ -351,20 +332,18 @@ def MaxSelectPerKey(pcoll, index_keys, sort_key_fn, reverse=False): | |
return ( | ||
pcoll | ||
| f"Index by {index_keys}" >> IndexBy(*index_keys) | ||
| f"Top 1 per key" | ||
>> beam.combiners.Top.PerKey(1, key=sort_key_fn, reverse=reverse) | ||
| "Top 1 per key" >> beam.combiners.Top.PerKey(1, key=sort_key_fn, reverse=reverse) | ||
| "De-Index" >> beam.Map(lambda k_v: k_v[1][0]) | ||
) | ||
|
||
|
||
@beam.ptransform_fn | ||
def PartitionRowsContainingNone(pcoll): | ||
""" | ||
Emits two tagged pcollections: | ||
"""Emits two tagged pcollections: | ||
|
||
- None: Default emitted collection. | ||
Rows are guaranteed not to have any `None` values | ||
- contains_none: At least one column in the row had a `None` value | ||
- None: Default emitted collection. | ||
Rows are guaranteed not to have any `None` values | ||
- contains_none: At least one column in the row had a `None` value | ||
""" | ||
|
||
def _separator(row): | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
#!/usr/bin/env python3 | ||
from .avro_schema import generate_avro_schema_from_template, load_avro_schema | ||
|
||
from .avro_schema import load_avro_schema, generate_avro_schema_from_template | ||
__all__ = ( | ||
"generate_avro_schema_from_template", | ||
"load_avro_schema", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
from .iter_utils import ( | ||
take_top, | ||
is_some, | ||
is_none, | ||
unwrap, | ||
unwrap_or_none, | ||
take_as_singleton, | ||
from .iter_utils import is_none, is_some, take_as_singleton, take_top, unwrap, unwrap_or_none | ||
|
||
__all__ = ( | ||
"is_none", | ||
"is_some", | ||
"take_as_singleton", | ||
"take_top", | ||
"unwrap", | ||
"unwrap_or_none", | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,3 @@ | ||
""" | ||
Generic iter utils | ||
Author: Tsuyoki Kumazaki ([email protected]) | ||
""" | ||
|
||
import itertools | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,20 @@ | ||
""" | ||
Generic mapping transform utils | ||
|
||
Author: Tsuyoki Kumazaki ([email protected]) | ||
""" | ||
|
||
import json | ||
|
||
import apache_beam as beam | ||
|
||
|
||
def Label(**labels): | ||
"""Labels all elements. | ||
""" | ||
"""Labels all elements.""" | ||
return "Label" >> beam.Map(lambda r: {**r, **labels}) | ||
|
||
|
||
def Select(*keys): | ||
"""Removes all columns which are not specified in `*keys`. | ||
""" | ||
"""Removes all columns which are not specified in `*keys`.""" | ||
return "Select" >> beam.Map(lambda r: {k: r[k] for k in keys}) | ||
|
||
|
||
def Project(*keys): | ||
"""Transforms each element into a tuple of values of the specified columns. | ||
""" | ||
"""Transforms each element into a tuple of values of the specified columns.""" | ||
return "Project" >> beam.Map(lambda r: tuple(r[k] for k in keys)) | ||
|
||
|
||
|
@@ -45,8 +36,7 @@ def _decimal_default_proc(obj): | |
|
||
|
||
def Stringify(): | ||
"""Transforms each element into its JSON representation. | ||
""" | ||
"""Transforms each element into its JSON representation.""" | ||
|
||
def s(obj): | ||
return json.dumps(obj, default=_decimal_default_proc) | ||
|
@@ -66,8 +56,7 @@ def IndexBySingle(key): | |
|
||
|
||
def RenameFromTo(from_to_key_mapping): | ||
"""Rename columns according to `from_to_key_mapping`. | ||
""" | ||
"""Rename columns according to `from_to_key_mapping`.""" | ||
|
||
def rename(row): | ||
res = dict(row) | ||
|
@@ -81,8 +70,7 @@ def rename(row): | |
|
||
|
||
def Exclude(*keys): | ||
"""Removes all columns specified in `*keys`. | ||
""" | ||
"""Removes all columns specified in `*keys`.""" | ||
|
||
def exclude(row): | ||
res = dict(row) | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
from .test_utils import pprint_equal_to | ||
|
||
__all__ = ("pprint_equal_to",) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
conf-platform