Skip to content

Commit

Permalink
Initial DoltTable implementation
Browse files Browse the repository at this point in the history
Signed-off-by: Max Hoffman <[email protected]>
  • Loading branch information
max-hoffman committed Apr 29, 2021
1 parent bfdf77c commit bbf0a25
Show file tree
Hide file tree
Showing 11 changed files with 318 additions and 52 deletions.
8 changes: 2 additions & 6 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ attrs==20.3.0
# via
# -c requirements.txt
# pytest
black==20.8b1
black==21.4b2
# via
# -c requirements.txt
# -r dev-requirements.in
Expand Down Expand Up @@ -86,12 +86,8 @@ toml==0.10.2
# coverage
# pytest
typed-ast==1.4.3
# via
# -c requirements.txt
# black
# mypy
# via mypy
typing-extensions==3.7.4.3
# via
# -c requirements.txt
# black
# mypy
32 changes: 14 additions & 18 deletions doc-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ appnope==0.1.2
# via
# ipykernel
# ipython
astroid==2.5.3
astroid==2.5.6
# via sphinx-autoapi
async-generator==1.10
# via nbclient
attrs==20.3.0
# via
# jsonschema
# scantree
babel==2.9.0
babel==2.9.1
# via sphinx
backcall==0.2.0
# via ipython
Expand All @@ -35,13 +35,13 @@ beautifulsoup4==4.9.3
# furo
# sphinx-code-include
# sphinx-material
black==20.8b1
black==21.4b2
# via papermill
bleach==3.3.0
# via nbconvert
boto3==1.17.55
boto3==1.17.60
# via sagemaker-training
botocore==1.20.55
botocore==1.20.60
# via
# boto3
# s3transfer
Expand All @@ -68,7 +68,7 @@ cryptography==3.4.7
# paramiko
css-html-js-minify==2.5.5
# via sphinx-material
dataclasses-json==0.5.2
dataclasses-json==0.5.3
# via flytekit
decorator==5.0.7
# via
Expand All @@ -88,7 +88,7 @@ entrypoints==0.3
# via
# nbconvert
# papermill
flyteidl==0.18.37
flyteidl==0.18.39
# via flytekit
furo==2021.4.11b34
# via -r doc-requirements.in
Expand Down Expand Up @@ -277,19 +277,19 @@ requests==2.25.1
# papermill
# responses
# sphinx
responses==0.13.2
responses==0.13.3
# via flytekit
retry==0.9.2
# via flytekit
retrying==1.3.3
# via sagemaker-training
s3transfer==0.4.1
s3transfer==0.4.2
# via boto3
sagemaker-training==3.9.1
sagemaker-training==3.9.2
# via flytekit
scantree==0.0.1
# via dirhash
scipy==1.6.2
scipy==1.6.3
# via sagemaker-training
six==1.15.0
# via
Expand All @@ -314,13 +314,13 @@ sortedcontainers==2.3.0
# via flytekit
soupsieve==2.2.1
# via beautifulsoup4
sphinx-autoapi==1.8.0
sphinx-autoapi==1.8.1
# via -r doc-requirements.in
sphinx-code-include==1.1.1
# via -r doc-requirements.in
sphinx-copybutton==0.3.1
# via -r doc-requirements.in
sphinx-gallery==0.8.2
sphinx-gallery==0.9.0
# via -r doc-requirements.in
sphinx-material==0.0.32
# via -r doc-requirements.in
Expand Down Expand Up @@ -379,12 +379,8 @@ traitlets==5.0.5
# nbclient
# nbconvert
# nbformat
typed-ast==1.4.3
# via black
typing-extensions==3.7.4.3
# via
# black
# typing-inspect
# via typing-inspect
typing-inspect==0.6.0
# via dataclasses-json
unidecode==1.2.0
Expand Down
1 change: 1 addition & 0 deletions plugins/dolt/flytekitplugins/dolt/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .schema import DoltConfig, DoltTable, DoltTableNameTransformer
105 changes: 105 additions & 0 deletions plugins/dolt/flytekitplugins/dolt/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import tempfile
import typing
from dataclasses import dataclass
from typing import Type

import dolt_integrations.core as dolt_int
import doltcli as dolt
import pandas
from dataclasses_json import dataclass_json
from google.protobuf.struct_pb2 import Struct

from flytekit import FlyteContext
from flytekit.extend import TypeEngine, TypeTransformer
from flytekit.models import types as _type_models
from flytekit.models.literals import Literal, Scalar
from flytekit.models.types import LiteralType


@dataclass_json
@dataclass
class DoltConfig:
db_path: str
tablename: typing.Optional[str] = None
sql: typing.Optional[str] = None
io_args: typing.Optional[dict] = None
branch_conf: typing.Optional[dolt_int.Branch] = None
meta_conf: typing.Optional[dolt_int.Meta] = None
remote_conf: typing.Optional[dolt_int.Remote] = None


@dataclass_json
@dataclass
class DoltTable:
config: DoltConfig
data: typing.Optional[pandas.DataFrame] = None


class DoltTableNameTransformer(TypeTransformer[DoltTable]):
def __init__(self):
super().__init__(name="DoltTable", t=DoltTable)

def get_literal_type(self, t: Type[DoltTable]) -> LiteralType:
return LiteralType(simple=_type_models.SimpleType.STRUCT, metadata={})

def to_literal(
self,
ctx: FlyteContext,
python_val: DoltTable,
python_type: typing.Type[DoltTable],
expected: LiteralType,
) -> Literal:

if not isinstance(python_val, DoltTable):
raise AssertionError(f"Value cannot be converted to a table: {python_val}")

conf = python_val.config
if python_val.data is not None and python_val.tablename is not None:
db = dolt.Dolt(conf.db_path)
with tempfile.NamedTemporaryFile() as f:
python_val.data.to_csv(f.name, index=False)
dolt_int.save(
db=db,
tablename=conf.tablename,
filename=f.name,
branch_conf=conf.branch_conf,
meta_conf=conf.meta_conf,
remote_conf=conf.remote_conf,
save_args=conf.io_args,
)

s = Struct()
s.update(python_val.to_dict())
return Literal(Scalar(generic=s))

def to_python_value(
self,
ctx: FlyteContext,
lv: Literal,
expected_python_type: typing.Type[DoltTable],
) -> DoltTable:

if not (lv and lv.scalar and lv.scalar.generic and lv.scalar.generic["config"]):
return pandas.DataFrame()

conf = DoltConfig(**lv.scalar.generic["config"])
db = dolt.Dolt(conf.db_path)

with tempfile.NamedTemporaryFile() as f:
dolt_int.load(
db=db,
tablename=conf.tablename,
sql=conf.sql,
filename=f.name,
branch_conf=conf.branch_conf,
meta_conf=conf.meta_conf,
remote_conf=conf.remote_conf,
load_args=conf.io_args,
)
df = pandas.read_csv(f)
lv.data = df

return lv


TypeEngine.register(DoltTableNameTransformer())
12 changes: 12 additions & 0 deletions plugins/dolt/scripts/flytekit_install_dolt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

# Fetches and install Dolt. To be invoked by the Dockerfile

# echo commands to the terminal output
set -eox pipefail

# Install Dolt

apt-get update -y \
&& apt-get install curl \
&& sudo bash -c 'curl -L https://github.com/dolthub/dolt/releases/latest/download/install.sh | sudo bash'
35 changes: 35 additions & 0 deletions plugins/dolt/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from setuptools import setup

PLUGIN_NAME = "dolt"

microlib_name = f"flytekitplugins-{PLUGIN_NAME}"

plugin_requires = ["flytekit>=0.16.0b0,<1.0.0", "dolt_integrations>=0.1.3"]

__version__ = "0.0.0+develop"

setup(
name=microlib_name,
version=__version__,
author="dolthub",
author_email="[email protected]",
description="Dolt plugin for flytekit",
namespace_packages=["flytekitplugins"],
packages=[f"flytekitplugins.{PLUGIN_NAME}"],
install_requires=plugin_requires,
license="apache2",
python_requires=">=3.7",
classifiers=[
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Software Development",
"Topic :: Software Development :: Libraries",
"Topic :: Software Development :: Libraries :: Python Modules",
],
scripts=["scripts/flytekit_install_dolt.sh"],
)
1 change: 1 addition & 0 deletions plugins/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"flytekitplugins-kftensorflow": "kftensorflow",
"flytekitplugins-pandera": "pandera",
"flytekitplugins-sqlalchemy": "sqlalchemy",
"flytekitplugins-dolt": "dolt",
}


Expand Down
Empty file added plugins/tests/dolt/__init__.py
Empty file.
Loading

0 comments on commit bbf0a25

Please sign in to comment.