Skip to content

Commit

Permalink
Feature/pods 1446 update the ingest to use dependabot for sdk updates (
Browse files Browse the repository at this point in the history
…#78)

This Pull Request: 
---
- Adds dependabot and updates from cpr-data-access to cpr-sdk.

---------

Co-authored-by: Mark <[email protected]>
  • Loading branch information
THOR300 and Mark authored Jul 17, 2024
1 parent 6959d8b commit f7f6849
Show file tree
Hide file tree
Showing 11 changed files with 1,283 additions and 505 deletions.
28 changes: 28 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
version: 2
updates:
- package-ecosystem: pip
directory: /
schedule:
interval: monthly
ignore:
- dependency-name: cpr_sdk
reviewers:
- climatepolicyradar/deng
- package-ecosystem: github-actions
directory: /
schedule:
interval: monthly
ignore:
- dependency-name: cpr_sdk
reviewers:
- climatepolicyradar/deng
- package-ecosystem: pip
directory: /
schedule:
interval: daily
allow:
- dependency-name: cpr_sdk
target-branch: main
reviewers:
- climatepolicyradar/deng

1,673 changes: 1,212 additions & 461 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ python-dotenv = "^0.19.2"
requests = "^2.28.1"
tenacity = "^8.1.0"
json-logging = "^1.3.0"
cpr-data-access = {git = "https://github.com/climatepolicyradar/data-access.git", tag = "0.4.6"}
pypdf = "^4.2.0"
cpr-sdk = "^1.1.6"

[tool.poetry.group.dev-dependencies.dependencies]
black = "^22.1.0"
Expand Down
8 changes: 4 additions & 4 deletions src/navigator_data_ingest/base/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,22 @@

import requests
from cloudpathlib import CloudPath, S3Path
from cpr_data_access.parser_models import ParserInput
from cpr_sdk.parser_models import ParserInput
from pypdf import PdfReader
from pypdf.errors import PyPdfError
from tenacity import retry
from tenacity.stop import stop_after_attempt
from tenacity.wait import wait_random_exponential

from navigator_data_ingest.base.utils import determine_content_type
from navigator_data_ingest.base.types import (
CONTENT_TYPE_PDF,
FILE_EXTENSION_MAPPING,
MULTI_FILE_CONTENT_TYPES,
SUPPORTED_CONTENT_TYPES,
FILE_EXTENSION_MAPPING,
UploadResult,
UnsupportedContentTypeError,
UploadResult,
)
from navigator_data_ingest.base.utils import determine_content_type

_LOGGER = logging.getLogger(__file__)

Expand Down
10 changes: 5 additions & 5 deletions src/navigator_data_ingest/base/new_document_actions.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import logging
import traceback
from concurrent.futures import as_completed, Executor
from concurrent.futures import Executor, as_completed
from typing import Generator, Iterable

import pydantic
import requests
from cpr_sdk.parser_models import ParserInput
from cpr_sdk.pipeline_general_models import BackendDocument
from slugify import slugify
import pydantic

from navigator_data_ingest.base.api_client import upload_document
from navigator_data_ingest.base.types import (
UploadResult,
HandleResult,
UploadResult,
)
from cpr_data_access.pipeline_general_models import BackendDocument
from cpr_data_access.parser_models import ParserInput

_LOGGER = logging.getLogger(__file__)

Expand Down
15 changes: 7 additions & 8 deletions src/navigator_data_ingest/base/types.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
"""Base definitions for data ingest"""
from abc import abstractmethod, ABC
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import (
Callable,
Generator,
Optional,
Callable,
)

from cpr_data_access.parser_models import ParserInput
from pydantic import BaseModel

from cpr_data_access.pipeline_general_models import (
from cpr_sdk.parser_models import ParserInput
from cpr_sdk.pipeline_general_models import (
CONTENT_TYPE_DOCX,
CONTENT_TYPE_HTML,
CONTENT_TYPE_PDF,
CONTENT_TYPE_DOCX,
UpdateTypes,
BackendDocument,
Update,
UpdateTypes,
)
from pydantic import BaseModel

SINGLE_FILE_CONTENT_TYPES = {
CONTENT_TYPE_PDF,
Expand Down
10 changes: 5 additions & 5 deletions src/navigator_data_ingest/base/updated_document_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@
import logging
import os
import traceback
from concurrent.futures import as_completed, Executor
from concurrent.futures import Executor, as_completed
from datetime import datetime
from typing import Generator, List, Union, Tuple
from typing import Generator, List, Tuple, Union

from cloudpathlib import S3Path
from cpr_sdk.pipeline_general_models import Update, UpdateTypes

from navigator_data_ingest.base.types import (
UpdateConfig,
UpdateResult,
Action,
PipelineFieldMapping,
UpdateConfig,
UpdateResult,
)
from cpr_data_access.pipeline_general_models import Update, UpdateTypes

_LOGGER = logging.getLogger(__file__)

Expand Down
18 changes: 9 additions & 9 deletions src/navigator_data_ingest/base/utils.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import json
import logging
from typing import Generator, List, Tuple
from typing import cast
from typing import Generator, List, Tuple, cast

from cloudpathlib import CloudPath, S3Path
from requests import Response

from navigator_data_ingest.base.types import DocumentGenerator, CONTENT_TYPE_MAPPING
from cpr_data_access.pipeline_general_models import (
Update,
PipelineUpdates,
from cpr_sdk.pipeline_general_models import (
BackendDocument,
PipelineUpdates,
Update,
)
from requests import Response

from navigator_data_ingest.base.types import CONTENT_TYPE_MAPPING, DocumentGenerator

_LOGGER = logging.getLogger(__file__)

Expand Down Expand Up @@ -80,7 +79,8 @@ def parser_input_already_exists(


def determine_content_type(response: Response, source_url: str) -> str:
"""Use the response headers and file extension to determine content type
"""
Use the response headers and file extension to determine content type
Args:
response (Response): the request object from the file download
Expand Down
2 changes: 1 addition & 1 deletion src/navigator_data_ingest/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
import click
import json_logging
from cloudpathlib import S3Path
from cpr_sdk.pipeline_general_models import ExecutionData

from navigator_data_ingest.base.api_client import (
write_error_file,
write_parser_input,
)
from navigator_data_ingest.base.new_document_actions import handle_new_documents
from navigator_data_ingest.base.types import UpdateConfig
from cpr_data_access.pipeline_general_models import ExecutionData
from navigator_data_ingest.base.updated_document_actions import handle_document_updates
from navigator_data_ingest.base.utils import LawPolicyGenerator

Expand Down
13 changes: 6 additions & 7 deletions src/navigator_data_ingest/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import re
import json
import os
import re

import boto3
import botocore.client
import pytest
from cpr_sdk.pipeline_general_models import Update, UpdateTypes
from moto import mock_s3
import json

from navigator_data_ingest.base.types import UpdateConfig
from cpr_data_access.pipeline_general_models import UpdateTypes, Update


class S3Client:
Expand Down Expand Up @@ -292,9 +293,7 @@ def test_s3_client__cdn(mock_cdn_config):

s3_client.client.create_bucket(
Bucket=mock_cdn_config["bucket"],
CreateBucketConfiguration={
"LocationConstraint": mock_cdn_config["region"]
},
CreateBucketConfiguration={"LocationConstraint": mock_cdn_config["region"]},
)

yield s3_client
Expand All @@ -303,7 +302,7 @@ def test_s3_client__cdn(mock_cdn_config):
@pytest.fixture
def pdf_bytes():
"""Bytes from a valid pdf"""
fixture_dir = os.path.join(os.path.dirname(__file__), "fixtures")
fixture_dir = os.path.join(os.path.dirname(__file__), "fixtures")
pdf_data = os.path.join(fixture_dir, "sample.pdf")
with open(pdf_data, "rb") as b:
contents = b.read()
Expand Down
9 changes: 5 additions & 4 deletions src/navigator_data_ingest/tests/test_update_actions.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from cloudpathlib import S3Path
import json

import pytest
from cloudpathlib import S3Path
from cpr_sdk.pipeline_general_models import UpdateTypes

from navigator_data_ingest.base.types import Action, PipelineFieldMapping
from cpr_data_access.pipeline_general_models import UpdateTypes
from navigator_data_ingest.base.updated_document_actions import (
update_dont_parse,
order_actions,
parse,
update_file_field,
rename,
update_dont_parse,
update_file_field,
update_type_actions,
)

Expand Down

0 comments on commit f7f6849

Please sign in to comment.