diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 6443c29c51c8..ed3f9374c21b 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -410,7 +410,7 @@ - name: Google Search Console sourceDefinitionId: eb4c9e00-db83-4d63-a386-39cfa91012a8 dockerRepository: airbyte/source-google-search-console - dockerImageTag: 0.1.16 + dockerImageTag: 0.1.17 documentationUrl: https://docs.airbyte.io/integrations/sources/google-search-console icon: googlesearchconsole.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 3150c8db90f3..bbbc350c84e9 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -4217,7 +4217,7 @@ - - "client_secret" oauthFlowOutputParameters: - - "refresh_token" -- dockerImage: "airbyte/source-google-search-console:0.1.16" +- dockerImage: "airbyte/source-google-search-console:0.1.17" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/google-search-console" connectionSpecification: @@ -4238,8 +4238,8 @@ \ Read more here." examples: - - "https://example1.com" - - "https://example2.com" + - "https://example1.com/" + - "https://example2.com/" order: 0 start_date: type: "string" @@ -4257,7 +4257,7 @@ \ will not be replicated. Must be greater or equal to the start date field." examples: - "2021-12-12" - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" + pattern: "^$|^[0-9]{4}-[0-9]{2}-[0-9]{2}$" order: 2 authorization: type: "object" diff --git a/airbyte-integrations/connectors/source-google-search-console/Dockerfile b/airbyte-integrations/connectors/source-google-search-console/Dockerfile index f8d9e0e92c15..1276245292c8 100755 --- a/airbyte-integrations/connectors/source-google-search-console/Dockerfile +++ b/airbyte-integrations/connectors/source-google-search-console/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.16 +LABEL io.airbyte.version=0.1.17 LABEL io.airbyte.name=airbyte/source-google-search-console diff --git a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/source.py b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/source.py index 82f608c557a9..06729e9db31d 100755 --- a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/source.py +++ b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/source.py @@ -4,7 +4,9 @@ import json from typing import Any, List, Mapping, Optional, Tuple +from urllib.parse import urlparse +import jsonschema import pendulum import requests from airbyte_cdk.logger import AirbyteLogger @@ -12,7 +14,6 @@ from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.http.auth import Oauth2Authenticator -from jsonschema import validate from source_google_search_console.exceptions import InvalidSiteURLValidationError from source_google_search_console.service_account_authenticator import ServiceAccountAuthenticator from source_google_search_console.streams import ( @@ -41,11 +42,42 @@ class SourceGoogleSearchConsole(AbstractSource): + @staticmethod + def normalize_url(url): + parse_result = urlparse(url) + if parse_result.path == "": + parse_result = parse_result._replace(path="/") + return parse_result.geturl() + + def _validate_and_transform(self, config: Mapping[str, Any]): + authorization = config["authorization"] + if authorization["auth_type"] == "Service": + try: + authorization["service_account_info"] = json.loads(authorization["service_account_info"]) + except ValueError: + raise Exception("authorization.service_account_info is not valid JSON") + + if "custom_reports" in config: + try: + config["custom_reports"] = json.loads(config["custom_reports"]) + except ValueError: + raise Exception("custom_reports is not valid JSON") + jsonschema.validate(config["custom_reports"], custom_reports_schema) + + pendulum.parse(config["start_date"]) + end_date = config.get("end_date") + if end_date: + pendulum.parse(end_date) + config["end_date"] = end_date or pendulum.now().to_date_string() + + config["site_urls"] = [self.normalize_url(url) for url in config["site_urls"]] + return config + def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Any]: try: + config = self._validate_and_transform(config) stream_kwargs = self.get_stream_kwargs(config) - self.validate_site_urls(config, stream_kwargs) - + self.validate_site_urls(config["site_urls"], stream_kwargs["authenticator"]) sites = Sites(**stream_kwargs) stream_slice = sites.stream_slices(SyncMode.full_refresh) @@ -56,7 +88,7 @@ def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> next(sites_gen) return True, None - except InvalidSiteURLValidationError as e: + except (InvalidSiteURLValidationError, jsonschema.ValidationError) as e: return False, repr(e) except Exception as error: return ( @@ -65,9 +97,7 @@ def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> ) @staticmethod - def validate_site_urls(config, stream_kwargs): - auth = stream_kwargs["authenticator"] - + def validate_site_urls(site_urls, auth): if isinstance(auth, ServiceAccountAuthenticator): request = auth(requests.Request(method="GET", url="https://www.googleapis.com/webmasters/v3/sites")) with requests.Session() as s: @@ -76,10 +106,8 @@ def validate_site_urls(config, stream_kwargs): response = requests.get("https://www.googleapis.com/webmasters/v3/sites", headers=auth.get_auth_header()) response_data = response.json() - site_urls = set([s["siteUrl"] for s in response_data["siteEntry"]]) - provided_by_client = set(config["site_urls"]) - - invalid_site_url = provided_by_client - site_urls + remote_site_urls = {s["siteUrl"] for s in response_data["siteEntry"]} + invalid_site_url = set(site_urls) - remote_site_urls if invalid_site_url: raise InvalidSiteURLValidationError(f'The following URLs are not permitted: {", ".join(invalid_site_url)}') @@ -87,7 +115,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: """ :param config: A Mapping of the user input configuration as defined in the connector spec. """ - + config = self._validate_and_transform(config) stream_config = self.get_stream_kwargs(config) streams = [ @@ -106,40 +134,32 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: return streams def get_custom_reports(self, config: Mapping[str, Any], stream_config: Mapping[str, Any]) -> List[Optional[Stream]]: - if "custom_reports" not in config: - return [] - - reports = json.loads(config["custom_reports"]) - validate(reports, custom_reports_schema) - return [ type(report["name"], (SearchAnalyticsByCustomDimensions,), {})(dimensions=report["dimensions"], **stream_config) - for report in reports + for report in config.get("custom_reports", []) ] - @staticmethod - def get_stream_kwargs(config: Mapping[str, Any]) -> Mapping[str, Any]: - authorization = config.get("authorization", {}) - - stream_kwargs = { - "site_urls": config.get("site_urls"), - "start_date": config.get("start_date"), - "end_date": config.get("end_date") or pendulum.now().to_date_string(), + def get_stream_kwargs(self, config: Mapping[str, Any]) -> Mapping[str, Any]: + return { + "site_urls": config["site_urls"], + "start_date": config["start_date"], + "end_date": config["end_date"], + "authenticator": self.get_authenticator(config), } - auth_type = authorization.get("auth_type") + def get_authenticator(self, config): + authorization = config["authorization"] + auth_type = authorization["auth_type"] + if auth_type == "Client": - stream_kwargs["authenticator"] = Oauth2Authenticator( + return Oauth2Authenticator( token_refresh_endpoint="https://oauth2.googleapis.com/token", - client_secret=authorization.get("client_secret"), - client_id=authorization.get("client_id"), - refresh_token=authorization.get("refresh_token"), + client_secret=authorization["client_secret"], + client_id=authorization["client_id"], + refresh_token=authorization["refresh_token"], ) elif auth_type == "Service": - stream_kwargs["authenticator"] = ServiceAccountAuthenticator( - service_account_info=json.loads(authorization.get("service_account_info")), email=authorization.get("email") + return ServiceAccountAuthenticator( + service_account_info=authorization["service_account_info"], + email=authorization["email"], ) - else: - raise Exception(f"Invalid auth type: {auth_type}") - - return stream_kwargs diff --git a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/spec.json b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/spec.json index 21f8f82020e7..383e755b1864 100755 --- a/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/spec.json +++ b/airbyte-integrations/connectors/source-google-search-console/source_google_search_console/spec.json @@ -13,7 +13,7 @@ }, "title": "Website URL Property", "description": "The URLs of the website property attached to your GSC account. Read more here.", - "examples": ["https://example1.com", "https://example2.com"], + "examples": ["https://example1.com/", "https://example2.com/"], "order": 0 }, "start_date": { @@ -29,7 +29,7 @@ "title": "End Date", "description": "UTC date in the format 2017-01-25. Any data after this date will not be replicated. Must be greater or equal to the start date field.", "examples": ["2021-12-12"], - "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$", + "pattern": "^$|^[0-9]{4}-[0-9]{2}-[0-9]{2}$", "order": 2 }, "authorization": { diff --git a/airbyte-integrations/connectors/source-google-search-console/unit_tests/conftest.py b/airbyte-integrations/connectors/source-google-search-console/unit_tests/conftest.py index 77ccfd4c7b05..9c3b1bff14af 100644 --- a/airbyte-integrations/connectors/source-google-search-console/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-google-search-console/unit_tests/conftest.py @@ -2,23 +2,33 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +from copy import deepcopy + from pytest import fixture @fixture(name="config") def config_fixture(requests_mock): - url = "https://oauth2.googleapis.com/token" - requests_mock.post(url, json={"access_token": "token", "expires_in": 10}) - config = { - "site_urls": ["https://example.com"], - "start_date": "start_date", - "end_date": "end_date", + return { + "site_urls": ["https://example.com/"], + "start_date": "2022-01-01", + "end_date": "2022-02-01", "authorization": { "auth_type": "Client", "client_id": "client_id", "client_secret": "client_secret", "refresh_token": "refresh_token", }, + "custom_reports": '[{"name": "custom_dimensions", "dimensions": ["date", "country", "device"]}]', } - return config + +@fixture +def config_gen(config): + def inner(**kwargs): + new_config = deepcopy(config) + # WARNING, no support deep dictionaries + new_config.update(kwargs) + return {k: v for k, v in new_config.items() if v is not ...} + + return inner diff --git a/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py index b11edf33aba6..f16c4834bc5e 100755 --- a/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-google-search-console/unit_tests/unit_test.py @@ -7,9 +7,10 @@ from urllib.parse import quote_plus import pytest -from airbyte_cdk.models import SyncMode +from airbyte_cdk.models import AirbyteConnectionStatus, Status, SyncMode from source_google_search_console.source import SourceGoogleSearchConsole from source_google_search_console.streams import ROW_LIMIT, GoogleSearchConsole, SearchAnalyticsByCustomDimensions, SearchAnalyticsByDate +from utils import command_check logger = logging.getLogger("airbyte") @@ -135,34 +136,58 @@ def test_parse_response(stream_class, expected): assert record == expected -def test_check_connection_ok(config, mocker, requests_mock): - url = "https://www.googleapis.com/webmasters/v3/sites/https%3A%2F%2Fexample.com" - requests_mock.get(url, json={}) - requests_mock.get("https://www.googleapis.com/webmasters/v3/sites", json={"siteEntry": [{"siteUrl": "https://example.com"}]}) - ok, error_msg = SourceGoogleSearchConsole().check_connection(logger, config=config) +def test_check_connection(config_gen, mocker, requests_mock): + requests_mock.get("https://www.googleapis.com/webmasters/v3/sites/https%3A%2F%2Fexample.com%2F", json={}) + requests_mock.get("https://www.googleapis.com/webmasters/v3/sites", json={"siteEntry": [{"siteUrl": "https://example.com/"}]}) + requests_mock.post("https://oauth2.googleapis.com/token", json={"access_token": "token", "expires_in": 10}) - assert ok - assert not error_msg + source = SourceGoogleSearchConsole() + assert command_check(source, config_gen()) == AirbyteConnectionStatus(status=Status.SUCCEEDED) -def test_check_connection_invalid_config(config): - config.pop("start_date") - ok, error_msg = SourceGoogleSearchConsole().check_connection(logger, config=config) - - assert not ok - assert error_msg - + # test site_urls + assert command_check(source, config_gen(site_urls=["https://example.com"])) == AirbyteConnectionStatus(status=Status.SUCCEEDED) + assert command_check(source, config_gen(site_urls=["https://missed.com"])) == AirbyteConnectionStatus( + status=Status.FAILED, message="\"InvalidSiteURLValidationError('The following URLs are not permitted: https://missed.com/')\"" + ) -def test_check_connection_exception(config): - ok, error_msg = SourceGoogleSearchConsole().check_connection(logger, config=config) + # test start_date + with pytest.raises(Exception): + assert command_check(source, config_gen(start_date=...)) + with pytest.raises(Exception): + assert command_check(source, config_gen(start_date="")) + with pytest.raises(Exception): + assert command_check(source, config_gen(start_date="start_date")) + assert command_check(source, config_gen(start_date="2022-99-99")) == AirbyteConnectionStatus( + status=Status.FAILED, + message="\"Unable to connect to Google Search Console API with the provided credentials - ParserError('Unable to parse string [2022-99-99]')\"", + ) - assert not ok - assert error_msg + # test end_date + assert command_check(source, config_gen(end_date=...)) == AirbyteConnectionStatus(status=Status.SUCCEEDED) + assert command_check(source, config_gen(end_date="")) == AirbyteConnectionStatus(status=Status.SUCCEEDED) + with pytest.raises(Exception): + assert command_check(source, config_gen(end_date="end_date")) + assert command_check(source, config_gen(end_date="2022-99-99")) == AirbyteConnectionStatus( + status=Status.FAILED, + message="\"Unable to connect to Google Search Console API with the provided credentials - ParserError('Unable to parse string [2022-99-99]')\"", + ) + # test custom_reports + assert command_check(source, config_gen(custom_reports="")) == AirbyteConnectionStatus( + status=Status.FAILED, + message="\"Unable to connect to Google Search Console API with the provided credentials - Exception('custom_reports is not valid JSON')\"", + ) + assert command_check(source, config_gen(custom_reports="{}")) == AirbyteConnectionStatus( + status=Status.FAILED, message="''" + ) -def test_streams(config): - streams = SourceGoogleSearchConsole().streams(config) +def test_streams(config_gen): + source = SourceGoogleSearchConsole() + streams = source.streams(config_gen()) + assert len(streams) == 9 + streams = source.streams(config_gen(custom_reports=...)) assert len(streams) == 8 diff --git a/airbyte-integrations/connectors/source-google-search-console/unit_tests/utils.py b/airbyte-integrations/connectors/source-google-search-console/unit_tests/utils.py new file mode 100644 index 000000000000..776315e717e6 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-search-console/unit_tests/utils.py @@ -0,0 +1,19 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +from unittest import mock + +from airbyte_cdk.models.airbyte_protocol import ConnectorSpecification +from airbyte_cdk.sources import Source +from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit, split_config + + +def command_check(source: Source, config): + logger = mock.MagicMock() + connector_config, _ = split_config(config) + if source.check_config_against_spec: + source_spec: ConnectorSpecification = source.spec(logger) + check_config_against_spec_or_exit(connector_config, source_spec) + return source.check(logger, config) diff --git a/docs/integrations/sources/google-search-console.md b/docs/integrations/sources/google-search-console.md index 646d40c14d9c..ce01a8b19d9a 100644 --- a/docs/integrations/sources/google-search-console.md +++ b/docs/integrations/sources/google-search-console.md @@ -122,6 +122,7 @@ This connector attempts to back off gracefully when it hits Reports API's rate l | Version | Date | Pull Request | Subject | | :------- | :--------- | :------------------------------------------------------------------------------------------------------------ | :---------------------------------------------------------- | +| `0.1.17` | 2022-10-08 | [17751](https://github.com/airbytehq/airbyte/pull/17751) | Improved config validation: start_date, end_date, site_urls | | `0.1.16` | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | | `0.1.15` | 2022-09-16 | [16819](https://github.com/airbytehq/airbyte/pull/16819) | Check available site urls to avoid 403 error on sync | | `0.1.14` | 2022-09-08 | [16433](https://github.com/airbytehq/airbyte/pull/16433) | Add custom analytics stream. |