From 059645fad40cfc728c4029316151c0acd7974b73 Mon Sep 17 00:00:00 2001 From: Maksym Pavlenok Date: Wed, 20 Oct 2021 12:28:26 +0300 Subject: [PATCH] =?UTF-8?q?=20=F0=9F=8E=89=20=20Source=20File:=20separate?= =?UTF-8?q?=20secure=20fork=20(#6768)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * create a fork for the source-file connector * update docs and tests * update docs and tests * add secrets Co-authored-by: George Claireaux * switching among auth methods * refactoring after reviews * correction of doc * update spec file Co-authored-by: Maksym Pavlenok Co-authored-by: George Claireaux --- .github/workflows/test-command.yml | 1 + .../source-file-secure/.dockerignore | 2 + .../connectors/source-file-secure/Dockerfile | 14 ++ .../connectors/source-file-secure/README.md | 95 ++++++++++ .../acceptance-test-config.yml | 29 +++ .../acceptance-test-docker.sh | 16 ++ .../source-file-secure/build.gradle | 15 ++ .../integration_tests/__init__.py | 0 .../integration_tests/acceptance.py | 14 ++ .../configured_https_catalog.json | 13 ++ .../integration_tests/https_config.json | 10 + .../integration_tests/invalid_config.json | 10 + .../integration_tests/local_config.json | 9 + .../integration_tests/spec.json | 177 ++++++++++++++++++ .../connectors/source-file-secure/main_dev.py | 12 ++ .../source-file-secure/requirements.txt | 4 + .../connectors/source-file-secure/setup.cfg | 5 + .../connectors/source-file-secure/setup.py | 44 +++++ .../source_file_secure/__init__.py | 3 + .../source_file_secure/source.py | 80 ++++++++ .../unit_tests/unit_test.py | 34 ++++ docs/integrations/sources/file.md | 2 +- tools/bin/ci_credentials.sh | 1 + 23 files changed, 589 insertions(+), 1 deletion(-) create mode 100644 airbyte-integrations/connectors/source-file-secure/.dockerignore create mode 100644 airbyte-integrations/connectors/source-file-secure/Dockerfile create mode 100644 airbyte-integrations/connectors/source-file-secure/README.md create mode 100644 airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml create mode 100644 airbyte-integrations/connectors/source-file-secure/acceptance-test-docker.sh create mode 100644 airbyte-integrations/connectors/source-file-secure/build.gradle create mode 100644 airbyte-integrations/connectors/source-file-secure/integration_tests/__init__.py create mode 100644 airbyte-integrations/connectors/source-file-secure/integration_tests/acceptance.py create mode 100644 airbyte-integrations/connectors/source-file-secure/integration_tests/configured_https_catalog.json create mode 100644 airbyte-integrations/connectors/source-file-secure/integration_tests/https_config.json create mode 100644 airbyte-integrations/connectors/source-file-secure/integration_tests/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-file-secure/integration_tests/local_config.json create mode 100644 airbyte-integrations/connectors/source-file-secure/integration_tests/spec.json create mode 100644 airbyte-integrations/connectors/source-file-secure/main_dev.py create mode 100644 airbyte-integrations/connectors/source-file-secure/requirements.txt create mode 100644 airbyte-integrations/connectors/source-file-secure/setup.cfg create mode 100644 airbyte-integrations/connectors/source-file-secure/setup.py create mode 100644 airbyte-integrations/connectors/source-file-secure/source_file_secure/__init__.py create mode 100644 airbyte-integrations/connectors/source-file-secure/source_file_secure/source.py create mode 100644 airbyte-integrations/connectors/source-file-secure/unit_tests/unit_test.py diff --git a/.github/workflows/test-command.yml b/.github/workflows/test-command.yml index 6c6f98940e34..e85e8efbfe3f 100644 --- a/.github/workflows/test-command.yml +++ b/.github/workflows/test-command.yml @@ -92,6 +92,7 @@ jobs: EXCHANGE_RATES_TEST_CREDS: ${{ secrets.EXCHANGE_RATES_TEST_CREDS }} FACEBOOK_MARKETING_TEST_INTEGRATION_CREDS: ${{ secrets.FACEBOOK_MARKETING_TEST_INTEGRATION_CREDS }} FACEBOOK_PAGES_INTEGRATION_TEST_CREDS: ${{ secrets.FACEBOOK_PAGES_INTEGRATION_TEST_CREDS }} + FILE_SECURE_HTTPS_TEST_CREDS: ${{ secrets.FILE_SECURE_HTTPS_TEST_CREDS }} FRESHDESK_TEST_CREDS: ${{ secrets.FRESHDESK_TEST_CREDS }} GITLAB_INTEGRATION_TEST_CREDS: ${{ secrets.GITLAB_INTEGRATION_TEST_CREDS }} GH_NATIVE_INTEGRATION_TEST_CREDS: ${{ secrets.GH_NATIVE_INTEGRATION_TEST_CREDS }} diff --git a/airbyte-integrations/connectors/source-file-secure/.dockerignore b/airbyte-integrations/connectors/source-file-secure/.dockerignore new file mode 100644 index 000000000000..9ef96044faba --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/.dockerignore @@ -0,0 +1,2 @@ +build + diff --git a/airbyte-integrations/connectors/source-file-secure/Dockerfile b/airbyte-integrations/connectors/source-file-secure/Dockerfile new file mode 100644 index 000000000000..a0187b0ffcc6 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/Dockerfile @@ -0,0 +1,14 @@ +FROM airbyte/source-file:0.2.6 + +WORKDIR /airbyte/integration_code + +ENV CODE_PATH="source_file_secure" +ENV AIRBYTE_IMPL_MODULE="source_file_secure" +ENV AIRBYTE_IMPL_PATH="SourceFileSecure" + +COPY $CODE_PATH ./$CODE_PATH +RUN sed -i 's/source_file/source_file_secure/g' setup.py +RUN pip install . + +LABEL io.airbyte.name=airbyte/source-file-secure +LABEL io.airbyte.version=0.1.0 diff --git a/airbyte-integrations/connectors/source-file-secure/README.md b/airbyte-integrations/connectors/source-file-secure/README.md new file mode 100644 index 000000000000..6819056530a5 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/README.md @@ -0,0 +1,95 @@ +# File Source Secure +This is the repository for the File source connector, written in Python. +This is modificaion of another connector Source File. This version has only one difference with the origin version is this one doesn't support local file storages and is orientated for cloud and cluster platforms. +More details about dependencies and requirement are available [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-file/README.md) + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-file-secure:build +``` + +#### Create credentials +Details are explained [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-file/README.md#create-credentials) + +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. +See `sample_files/sample_config.json` for a sample config file. + + + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source file test creds` +and place them into `secrets/config.json`. + + +### Locally running the connector +``` +python main_dev.py spec +python main_dev.py check --config secrets/config.json +python main_dev.py discover --config secrets/config.json +python main_dev.py read --config secrets/config.json --catalog sample_files/configured_catalog.json +``` + +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-file-secure:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-file-secure:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-file-secure:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-file-secure:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-file-secure:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/sample_files:/sample_files airbyte/source-file-secure:dev read --config /secrets/config.json --catalog /sample_files/configured_catalog.json +``` + +### Integration Tests +1. From the airbyte project root, run `./gradlew :airbyte-integrations:connectors:source-file-secure:integrationTest` to run the standard integration test suite. +1. To run additional integration tests, place your integration tests in a new directory `integration_tests` and run them with `python -m pytest -s ../source-file/integration_tests`. + Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use SemVer). +1. Create a Pull Request +1. Pat yourself on the back for being an awesome contributor +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master diff --git a/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml b/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml new file mode 100644 index 000000000000..ddddb348377e --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/acceptance-test-config.yml @@ -0,0 +1,29 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests + +# Here we tries to test a basic tests only. +# The main part of tests should be executed for the source-file connector +connector_image: airbyte/source-file-secure:dev +tests: + spec: + - spec_path: "integration_tests/spec.json" + connection: + - config_path: "integration_tests/invalid_config.json" + status: "failed" + # for https + - config_path: "integration_tests/https_config.json" + status: "succeed" + # for local should be failed + - config_path: "integration_tests/local_config.json" + status: "exception" + + discovery: + # for https + - config_path: "integration_tests/https_config.json" + + basic_read: + # for https + - config_path: "integration_tests/https_config.json" + configured_catalog_path: "integration_tests/configured_https_catalog.json" + + diff --git a/airbyte-integrations/connectors/source-file-secure/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-file-secure/acceptance-test-docker.sh new file mode 100644 index 000000000000..5c40c48bd154 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +source_image=$(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2-) +echo "try to build the source image: ${source_image}" +docker build -t ${source_image} . + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input diff --git a/airbyte-integrations/connectors/source-file-secure/build.gradle b/airbyte-integrations/connectors/source-file-secure/build.gradle new file mode 100644 index 000000000000..e4fbfaf782f2 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/build.gradle @@ -0,0 +1,15 @@ + +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_file_secure' +} + +dependencies { + implementation files(project(':airbyte-integrations:bases:source-acceptance-test').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) +} diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/__init__.py b/airbyte-integrations/connectors/source-file-secure/integration_tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-file-secure/integration_tests/acceptance.py new file mode 100644 index 000000000000..108075487440 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """ This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/configured_https_catalog.json b/airbyte-integrations/connectors/source-file-secure/integration_tests/configured_https_catalog.json new file mode 100644 index 000000000000..d46f2b60bde3 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/integration_tests/configured_https_catalog.json @@ -0,0 +1,13 @@ +{ + "streams": [ + { + "stream": { + "name": "test", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/https_config.json b/airbyte-integrations/connectors/source-file-secure/integration_tests/https_config.json new file mode 100644 index 000000000000..2a659d71b1f7 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/integration_tests/https_config.json @@ -0,0 +1,10 @@ +{ + "dataset_name": "test", + "format": "csv", + "reader_options": "{\"sep\": \",\", \"nrows\": 20}", + "url": "https://www.stats.govt.nz/assets/Uploads/Business-price-indexes/Business-price-indexes-September-2020-quarter/Download-data/business-price-indexes-september-2020-quarter-corrections-to-previously-published-statistics.csv", + "provider": { + "storage": "HTTPS", + "reader_impl": "gcsfs" + } +} diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-file-secure/integration_tests/invalid_config.json new file mode 100644 index 000000000000..c2d398ed6c58 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/integration_tests/invalid_config.json @@ -0,0 +1,10 @@ +{ + "dataset_name": "fake_name", + "format": "csv", + "reader_options": "{\"bla\": \",\", \"nrows\": 20}", + "url": "https://fake-fake.com", + "provider": { + "storage": "fake", + "reader_impl": "fake" + } +} diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/local_config.json b/airbyte-integrations/connectors/source-file-secure/integration_tests/local_config.json new file mode 100644 index 000000000000..81c8a8c48808 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/integration_tests/local_config.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "test", + "format": "csv", + "reader_options": "{\"bla\": \",\", \"nrows\": 20}", + "url": "file:///tmp/fake_file.csv", + "provider": { + "storage": "local" + } +} diff --git a/airbyte-integrations/connectors/source-file-secure/integration_tests/spec.json b/airbyte-integrations/connectors/source-file-secure/integration_tests/spec.json new file mode 100644 index 000000000000..8fedbff4838e --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/integration_tests/spec.json @@ -0,0 +1,177 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/sources/file", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "File Source Spec", + "type": "object", + "additionalProperties": false, + "required": ["dataset_name", "format", "url", "provider"], + "properties": { + "dataset_name": { + "type": "string", + "description": "Name of the final table where to replicate this file (should include only letters, numbers dash and underscores)" + }, + "format": { + "type": "string", + "enum": ["csv", "json", "jsonl", "excel", "feather", "parquet"], + "default": "csv", + "description": "File Format of the file to be replicated (Warning: some format may be experimental, please refer to docs)." + }, + "reader_options": { + "type": "string", + "description": "This should be a valid JSON string used by each reader/parser to provide additional options and tune its behavior", + "examples": ["{}", "{'sep': ' '}"] + }, + "url": { + "type": "string", + "description": "URL path to access the file to be replicated" + }, + "provider": { + "type": "object", + "description": "Storage Provider or Location of the file(s) to be replicated.", + "default": "Public Web", + "oneOf": [ + { + "title": "HTTPS: Public Web", + "required": ["storage"], + "properties": { + "storage": { + "type": "string", + "const": "HTTPS" + } + } + }, + { + "title": "GCS: Google Cloud Storage", + "required": ["storage"], + "properties": { + "storage": { + "type": "string", + "const": "GCS" + }, + "service_account_json": { + "type": "string", + "description": "In order to access private Buckets stored on Google Cloud, this connector would need a service account json credentials with the proper permissions as described here. Please generate the credentials.json file and copy/paste its content to this field (expecting JSON formats). If accessing publicly available data, this field is not necessary." + } + } + }, + { + "title": "S3: Amazon Web Services", + "required": ["storage"], + "properties": { + "storage": { + "type": "string", + "const": "S3" + }, + "aws_access_key_id": { + "type": "string", + "description": "In order to access private Buckets stored on AWS S3, this connector would need credentials with the proper permissions. If accessing publicly available data, this field is not necessary." + }, + "aws_secret_access_key": { + "type": "string", + "description": "In order to access private Buckets stored on AWS S3, this connector would need credentials with the proper permissions. If accessing publicly available data, this field is not necessary.", + "airbyte_secret": true + } + } + }, + { + "title": "AzBlob: Azure Blob Storage", + "required": ["storage", "storage_account"], + "properties": { + "storage": { + "type": "string", + "const": "AzBlob" + }, + "storage_account": { + "type": "string", + "description": "The globally unique name of the storage account that the desired blob sits within. See here for more details." + }, + "sas_token": { + "type": "string", + "description": "To access Azure Blob Storage, this connector would need credentials with the proper permissions. One option is a SAS (Shared Access Signature) token. If accessing publicly available data, this field is not necessary.", + "airbyte_secret": true + }, + "shared_key": { + "type": "string", + "description": "To access Azure Blob Storage, this connector would need credentials with the proper permissions. One option is a storage account shared key (aka account key or access key). If accessing publicly available data, this field is not necessary.", + "airbyte_secret": true + } + } + }, + { + "title": "SSH: Secure Shell", + "required": ["storage", "user", "host"], + "properties": { + "storage": { + "type": "string", + "const": "SSH" + }, + "user": { + "type": "string" + }, + "password": { + "type": "string", + "airbyte_secret": true + }, + "host": { + "type": "string" + }, + "port": { + "type": "string", + "default": "22" + } + } + }, + { + "title": "SCP: Secure copy protocol", + "required": ["storage", "user", "host"], + "properties": { + "storage": { + "type": "string", + "const": "SCP" + }, + "user": { + "type": "string" + }, + "password": { + "type": "string", + "airbyte_secret": true + }, + "host": { + "type": "string" + }, + "port": { + "type": "string", + "default": "22" + } + } + }, + { + "title": "SFTP: Secure File Transfer Protocol", + "required": ["storage", "user", "host"], + "properties": { + "storage": { + "type": "string", + "const": "SFTP" + }, + "user": { + "type": "string" + }, + "password": { + "type": "string", + "airbyte_secret": true + }, + "host": { + "type": "string" + }, + "port": { + "type": "string", + "default": "22" + } + } + } + ] + } + } + } +} diff --git a/airbyte-integrations/connectors/source-file-secure/main_dev.py b/airbyte-integrations/connectors/source-file-secure/main_dev.py new file mode 100644 index 000000000000..4d942018cb52 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/main_dev.py @@ -0,0 +1,12 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from base_python.entrypoint import launch +from source_file_secure import SourceFileSecure + +if __name__ == "__main__": + launch(SourceFileSecure(), sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-file-secure/requirements.txt b/airbyte-integrations/connectors/source-file-secure/requirements.txt new file mode 100644 index 000000000000..16b5e8bf302a --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/requirements.txt @@ -0,0 +1,4 @@ +# This file is autogenerated -- only edit if you know what you are doing. Use setup.py for declaring dependencies. +-e ../../bases/airbyte-protocol +-e ../../bases/base-python +-e ../source-file diff --git a/airbyte-integrations/connectors/source-file-secure/setup.cfg b/airbyte-integrations/connectors/source-file-secure/setup.cfg new file mode 100644 index 000000000000..672fb95c5976 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/setup.cfg @@ -0,0 +1,5 @@ +[aliases] +test='pytest' + +[tool:pytest] +testpaths = unit_tests diff --git a/airbyte-integrations/connectors/source-file-secure/setup.py b/airbyte-integrations/connectors/source-file-secure/setup.py new file mode 100644 index 000000000000..15636060002a --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/setup.py @@ -0,0 +1,44 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-protocol", + "base-python", + "gcsfs==0.7.1", + "genson==1.2.2", + "google-cloud-storage==1.35.0", + "pandas==1.2.0", + "paramiko==2.7.2", + "s3fs==0.4.2", + "smart-open[all]==4.1.2", + "lxml==4.6.3", + "html5lib==1.1", + "beautifulsoup4==4.9.3", + "pyarrow==3.0.0", + "xlrd==2.0.1", + "openpyxl==3.0.6", + "pyxlsb==1.0.8", +] + +TEST_REQUIREMENTS = [ + "boto3==1.16.57", + "pytest==6.1.2", + "pytest-docker==0.10.1", +] + +setup( + name="source_file", + description="Source implementation for File", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-file-secure/source_file_secure/__init__.py b/airbyte-integrations/connectors/source-file-secure/source_file_secure/__init__.py new file mode 100644 index 000000000000..2107707a1cb1 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/source_file_secure/__init__.py @@ -0,0 +1,3 @@ +from .source import SourceFileSecure + +__all__ = ["SourceFileSecure"] diff --git a/airbyte-integrations/connectors/source-file-secure/source_file_secure/source.py b/airbyte-integrations/connectors/source-file-secure/source_file_secure/source.py new file mode 100644 index 000000000000..323615f59d4a --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/source_file_secure/source.py @@ -0,0 +1,80 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import json +import os +import sys + +from airbyte_protocol import ConnectorSpecification +from base_python.logger import AirbyteLogger + +# some integration tests doesn't setup dependences from +# requirements.txt file and Python can return a exception. +# Thus we should to import this parent module manually +try: + import source_file.source +except ModuleNotFoundError: + current_dir = os.path.dirname(os.path.abspath(__file__)) + parent_source_local = os.path.join(current_dir, "../../source-file") + if os.path.isdir(parent_source_local): + sys.path.append(parent_source_local) + else: + raise RuntimeError("not found parent source folder") + import source_file.source + +# import original classes of the native Source File +from source_file import SourceFile as ParentSourceFile +from source_file.client import Client +from source_file.client import URLFile as ParentURLFile + +LOCAL_STORAGE_NAME = "local" + + +class URLFileSecure(ParentURLFile): + """Updating of default logic: + This connector shouldn't work with local files. + """ + + def __init__(self, url: str, provider: dict): + storage_name = provider["storage"].lower() + if url.startswith("file://") or storage_name == LOCAL_STORAGE_NAME: + raise RuntimeError("the local file storage is not supported by this connector.") + super().__init__(url, provider) + + +class SourceFileSecure(ParentSourceFile): + """Updating of default source logic + This connector shouldn't work with local files. + The base logic of this connector are implemented in the "source-file" connector. + """ + + @property + def client_class(self): + # replace a standard class variable to the new one + class ClientSecure(Client): + reader_class = URLFileSecure + + return ClientSecure + + def spec(self, logger: AirbyteLogger) -> ConnectorSpecification: + """Tries to find and remove a spec data about local storage settings""" + + parent_code_dir = os.path.dirname(source_file.source.__file__) + parent_spec_file = os.path.join(parent_code_dir, "spec.json") + with open(parent_spec_file, "r") as f: + spec = ConnectorSpecification.parse_obj(json.load(f)) + + # correction of the "storage" property to const type + for provider in spec.connectionSpecification["properties"]["provider"]["oneOf"]: + storage = provider["properties"]["storage"] + + if "enum" in storage: + storage.pop("enum") + storage["const"] = storage.pop("default") + + for i in range(len(spec.connectionSpecification["properties"]["provider"]["oneOf"])): + provider = spec.connectionSpecification["properties"]["provider"]["oneOf"][i] + if provider["properties"]["storage"]["const"] == LOCAL_STORAGE_NAME: + spec.connectionSpecification["properties"]["provider"]["oneOf"].pop(i) + return spec diff --git a/airbyte-integrations/connectors/source-file-secure/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-file-secure/unit_tests/unit_test.py new file mode 100644 index 000000000000..2c20878f20e9 --- /dev/null +++ b/airbyte-integrations/connectors/source-file-secure/unit_tests/unit_test.py @@ -0,0 +1,34 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import pytest +from base_python.logger import AirbyteLogger +from source_file_secure import SourceFileSecure +from source_file_secure.source import LOCAL_STORAGE_NAME + +local_storage_config = { + "dataset_name": "test", + "format": "csv", + "reader_options": '{"sep": ",", "nrows": 20}', + "url": "file:///tmp/fake_file.csv", + "provider": { + "storage": LOCAL_STORAGE_NAME.upper(), + }, +} + + +def test_local_storage_spec(): + """Checks spec properties""" + source = SourceFileSecure() + spec = source.spec(logger=AirbyteLogger()) + for provider in spec.connectionSpecification["properties"]["provider"]["oneOf"]: + assert provider["properties"]["storage"]["const"] != LOCAL_STORAGE_NAME, "This connector shouldn't work with local files." + + +def test_local_storage_check(): + """Checks working with a local options""" + source = SourceFileSecure() + with pytest.raises(RuntimeError) as exc: + source.check(logger=AirbyteLogger(), config=local_storage_config) + assert "not supported" in str(exc.value) diff --git a/docs/integrations/sources/file.md b/docs/integrations/sources/file.md index c2469f77e108..835cdc86c612 100644 --- a/docs/integrations/sources/file.md +++ b/docs/integrations/sources/file.md @@ -21,7 +21,7 @@ This source produces a single table for the target file as it replicates only on | Amazon Web Services S3 | Yes | | SFTP | Yes | | SSH / SCP | Yes | -| local filesystem | Experimental | +| local filesystem | Local use only (inaccessible for Airbyte Cloud) | ### File / Stream Compression diff --git a/tools/bin/ci_credentials.sh b/tools/bin/ci_credentials.sh index e9daf7f1e7f6..f0d434c5c169 100755 --- a/tools/bin/ci_credentials.sh +++ b/tools/bin/ci_credentials.sh @@ -68,6 +68,7 @@ write_standard_creds source-exchange-rates "$EXCHANGE_RATES_TEST_CREDS" write_standard_creds source-file "$GOOGLE_CLOUD_STORAGE_TEST_CREDS" "gcs.json" write_standard_creds source-file "$AWS_S3_INTEGRATION_TEST_CREDS" "aws.json" write_standard_creds source-file "$AZURE_STORAGE_INTEGRATION_TEST_CREDS" "azblob.json" +write_standard_creds source-file-secure "$FILE_SECURE_HTTPS_TEST_CREDS" write_standard_creds source-freshdesk "$FRESHDESK_TEST_CREDS" write_standard_creds source-facebook-marketing "$FACEBOOK_MARKETING_TEST_INTEGRATION_CREDS" write_standard_creds source-facebook-pages "$FACEBOOK_PAGES_INTEGRATION_TEST_CREDS"