Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DoltTable Plugin #461

Merged
merged 35 commits into from
May 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
60a94f3
Fix serialization of pod specs in pod plugin (#433)
jeevb Mar 25, 2021
908df48
Remove beta version from readme install instructions (#434)
jeevb Mar 25, 2021
39e6226
Move configuraiton of a few tools to pyproject.toml (#428)
honnix Mar 26, 2021
928e865
Pass through FlyteFile and FlyteDirectory if created from a remote so…
wild-endeavor Mar 26, 2021
d8345d2
Spark2 CI (#435)
honnix Mar 26, 2021
2f05198
add requests and limits parameter to ContainerTask (#438)
migueltol22 Mar 29, 2021
438c5f0
update flytekit docs theme, fix index links (#439)
cosmicBboy Mar 30, 2021
b37cc0b
dark theme updates (#441)
cosmicBboy Mar 31, 2021
b2ee938
Added a missing test for != `ne` condition (#443)
kumare3 Apr 7, 2021
8a17786
Propagate required envs to executors and fix bug to set configs in Sp…
rubenbarragan Apr 7, 2021
6a745c1
Add a lot more user friendly launch plan creating function (#442)
wild-endeavor Apr 12, 2021
5ee9bf0
Fast register for dynamic tasks (#437)
Apr 13, 2021
68fa2ca
Do not omit variables without defaults from parameter map when constr…
jeevb Apr 13, 2021
fa063ff
Sqlalchemy Task (#445)
max-hoffman Apr 22, 2021
86af4cf
Fix 0.0 value floats (#452)
wild-endeavor Apr 26, 2021
dd0767b
Unit test for dynamic create node (#457)
wild-endeavor Apr 27, 2021
a30e4c7
add new control plane classes (#425)
cosmicBboy Apr 28, 2021
59ffbb5
Use default arguments in addition to kwargs in local wf execution (#458)
ajsalow Apr 28, 2021
04090f0
fix control_plane imports (#459)
cosmicBboy Apr 28, 2021
01a9a95
Initial DoltTable implementation
max-hoffman Apr 29, 2021
db370c6
Update reqs
max-hoffman Apr 29, 2021
03c67d8
Fix fmt
max-hoffman Apr 29, 2021
3d6bf25
Different namespace setup, dolt post-install
max-hoffman Apr 29, 2021
d3944ad
Bad merge
max-hoffman Apr 29, 2021
4a08b2e
Fix reqs
max-hoffman Apr 29, 2021
ff8ac75
Another merge error
max-hoffman Apr 29, 2021
3da4213
Flake error
max-hoffman Apr 29, 2021
aa8fb30
Flake error
max-hoffman Apr 29, 2021
2565c4e
Flake error
max-hoffman Apr 29, 2021
6da7359
Fix plugin name
max-hoffman Apr 29, 2021
51b1c18
Dep change
max-hoffman Apr 29, 2021
c2a8df4
Bump dolt version for bug fix
max-hoffman Apr 30, 2021
07e90a5
Fix config bug
max-hoffman Apr 30, 2021
3dc4800
Merge master
max-hoffman Apr 30, 2021
fa298da
Make reqs
max-hoffman Apr 30, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 2 additions & 13 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ black==21.4b2
# -c requirements.txt
# -r dev-requirements.in
# flake8-black
cached-property==1.5.2
# via docker-compose
certifi==2020.12.5
# via
# -c requirements.txt
Expand Down Expand Up @@ -100,7 +98,7 @@ flake8==3.9.1
# -r dev-requirements.in
# flake8-black
# flake8-isort
flyteidl==0.18.40
flyteidl==0.18.41
# via
# -c requirements.txt
# flytekit
Expand All @@ -115,11 +113,7 @@ idna==2.10
importlib-metadata==4.0.1
# via
# -c requirements.txt
# flake8
# jsonschema
# keyring
# pluggy
# pytest
iniconfig==1.1.1
# via pytest
isort==5.8.0
Expand Down Expand Up @@ -316,15 +310,10 @@ toml==0.10.2
# coverage
# pytest
typed-ast==1.4.3
# via
# -c requirements.txt
# black
# mypy
# via mypy
typing-extensions==3.7.4.3
# via
# -c requirements.txt
# black
# importlib-metadata
# mypy
# typing-inspect
typing-inspect==0.6.0
Expand Down
21 changes: 6 additions & 15 deletions doc-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ black==21.4b2
# via papermill
bleach==3.3.0
# via nbconvert
boto3==1.17.61
boto3==1.17.62
# via sagemaker-training
botocore==1.20.61
botocore==1.20.62
# via
# boto3
# s3transfer
Expand Down Expand Up @@ -88,7 +88,7 @@ entrypoints==0.3
# via
# nbconvert
# papermill
flyteidl==0.18.40
flyteidl==0.18.41
# via flytekit
git+git://github.com/flyteorg/furo@main
# via -r doc-requirements.in
Expand All @@ -107,9 +107,7 @@ idna==2.10
imagesize==1.2.0
# via sphinx
importlib-metadata==4.0.1
# via
# jsonschema
# keyring
# via keyring
inotify_simple==1.2.1
# via sagemaker-training
ipykernel==5.5.3
Expand Down Expand Up @@ -252,7 +250,7 @@ python-dateutil==2.8.1
# flytekit
# jupyter-client
# pandas
python-slugify[unidecode]==4.0.1
python-slugify[unidecode]==5.0.0
# via sphinx-material
pytimeparse==1.1.8
# via flytekit
Expand Down Expand Up @@ -381,15 +379,8 @@ traitlets==5.0.5
# nbclient
# nbconvert
# nbformat
typed-ast==1.4.3
# via
# astroid
# black
typing-extensions==3.7.4.3
# via
# black
# importlib-metadata
# typing-inspect
# via typing-inspect
typing-inspect==0.6.0
# via dataclasses-json
unidecode==1.2.0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .schema import DoltConfig, DoltTable, DoltTableNameTransformer
105 changes: 105 additions & 0 deletions plugins/flytekitplugins.dolt/flytekitplugins/dolt/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import tempfile
import typing
from dataclasses import dataclass
from typing import Type

import dolt_integrations.core as dolt_int
import doltcli as dolt
import pandas
from dataclasses_json import dataclass_json
from google.protobuf.struct_pb2 import Struct

from flytekit import FlyteContext
from flytekit.extend import TypeEngine, TypeTransformer
from flytekit.models import types as _type_models
from flytekit.models.literals import Literal, Scalar
from flytekit.models.types import LiteralType


@dataclass_json
@dataclass
class DoltConfig:
db_path: str
tablename: typing.Optional[str] = None
sql: typing.Optional[str] = None
io_args: typing.Optional[dict] = None
branch_conf: typing.Optional[dolt_int.Branch] = None
meta_conf: typing.Optional[dolt_int.Meta] = None
remote_conf: typing.Optional[dolt_int.Remote] = None


@dataclass_json
@dataclass
class DoltTable:
config: DoltConfig
data: typing.Optional[pandas.DataFrame] = None


class DoltTableNameTransformer(TypeTransformer[DoltTable]):
def __init__(self):
super().__init__(name="DoltTable", t=DoltTable)

def get_literal_type(self, t: Type[DoltTable]) -> LiteralType:
return LiteralType(simple=_type_models.SimpleType.STRUCT, metadata={})

def to_literal(
self,
ctx: FlyteContext,
python_val: DoltTable,
python_type: typing.Type[DoltTable],
expected: LiteralType,
) -> Literal:

if not isinstance(python_val, DoltTable):
raise AssertionError(f"Value cannot be converted to a table: {python_val}")

conf = python_val.config
if python_val.data is not None and python_val.config.tablename is not None:
db = dolt.Dolt(conf.db_path)
with tempfile.NamedTemporaryFile() as f:
python_val.data.to_csv(f.name, index=False)
dolt_int.save(
db=db,
tablename=conf.tablename,
filename=f.name,
branch_conf=conf.branch_conf,
meta_conf=conf.meta_conf,
remote_conf=conf.remote_conf,
save_args=conf.io_args,
)

s = Struct()
s.update(python_val.to_dict())
return Literal(Scalar(generic=s))

def to_python_value(
self,
ctx: FlyteContext,
lv: Literal,
expected_python_type: typing.Type[DoltTable],
) -> DoltTable:

if not (lv and lv.scalar and lv.scalar.generic and lv.scalar.generic["config"]):
return pandas.DataFrame()

conf = DoltConfig(**lv.scalar.generic["config"])
db = dolt.Dolt(conf.db_path)

with tempfile.NamedTemporaryFile() as f:
dolt_int.load(
db=db,
tablename=conf.tablename,
sql=conf.sql,
filename=f.name,
branch_conf=conf.branch_conf,
meta_conf=conf.meta_conf,
remote_conf=conf.remote_conf,
load_args=conf.io_args,
)
df = pandas.read_csv(f)
lv.data = df

return lv


TypeEngine.register(DoltTableNameTransformer())
12 changes: 12 additions & 0 deletions plugins/flytekitplugins.dolt/scripts/flytekit_install_dolt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

# Fetches and install Dolt. To be invoked by the Dockerfile

# echo commands to the terminal output
set -eox pipefail

# Install Dolt

apt-get update -y \
&& apt-get install curl \
&& sudo bash -c 'curl -L https://github.com/dolthub/dolt/releases/latest/download/install.sh | sudo bash'
66 changes: 66 additions & 0 deletions plugins/flytekitplugins.dolt/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import shlex
import subprocess
import urllib.request

from setuptools import setup
from setuptools.command.develop import develop

PLUGIN_NAME = "dolt"

microlib_name = f"flytekitplugins.{PLUGIN_NAME}"

plugin_requires = ["flytekit>=0.16.0b0,<1.0.0", "dolt_integrations>=0.1.3"]

__version__ = "0.0.0+develop"


class PostDevelopCommand(develop):
"""Post-installation for development mode."""

def run(self):
develop.run(self)
install, _ = urllib.request.urlretrieve(
"https://github.com/liquidata-inc/dolt/releases/latest/download/install.sh"
)
subprocess.call(shlex.split(f"chmod +x {install}"))
subprocess.call(shlex.split(f"sudo {install}"))

pref = "dolt config --global --add"
subprocess.call(
shlex.split(f"{pref} user.email [email protected]"),
)
subprocess.call(
shlex.split(f"{pref} user.name 'Bojack Horseman'"),
)
subprocess.call(
shlex.split(f"{pref} metrics.host eventsapi.awsdev.ld-corp.com"),
)
subprocess.call(shlex.split(f"{pref} metrics.port 443"))


setup(
name=microlib_name,
version=__version__,
author="dolthub",
author_email="[email protected]",
description="Dolt plugin for flytekit",
namespace_packages=["flytekitplugins"],
packages=[f"flytekitplugins.{PLUGIN_NAME}"],
install_requires=plugin_requires,
cmdclass=dict(develop=PostDevelopCommand),
license="apache2",
python_requires=">=3.7",
classifiers=[
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Software Development",
"Topic :: Software Development :: Libraries",
"Topic :: Software Development :: Libraries :: Python Modules",
],
scripts=["scripts/flytekit_install_dolt.sh"],
)
1 change: 1 addition & 0 deletions plugins/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"flytekitplugins-kftensorflow": "kftensorflow",
"flytekitplugins-pandera": "pandera",
"flytekitplugins-sqlalchemy": "sqlalchemy",
"flytekitplugins-dolt": "flytekitplugins.dolt",
}


Expand Down
Empty file.
Loading