Skip to content

Commit

Permalink
Merge pull request #66 from yuwtennis/refactor/security-hardening-for…
Browse files Browse the repository at this point in the history
…-credentials

feat: improved security
  • Loading branch information
yuwtennis authored Dec 9, 2023
2 parents 0e7694d + d951f91 commit 36d8298
Show file tree
Hide file tree
Showing 15 changed files with 494 additions and 115 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/drive_to_es.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: CI/CD for drive_to_es

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.11"]
poetry-version: ["1.5.1"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Test
run: |
apt install pipx && \
pipx install poetry && \
poetry install && \
cd household_expenses/publish/drive_to_es/
make test
make lint
make static-type-check
37 changes: 0 additions & 37 deletions .github/workflows/pythonapp.yml

This file was deleted.

6 changes: 4 additions & 2 deletions household_expenses/publish/drive_to_es/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10-alpine
FROM python:3.11-alpine

ENV POETRY_HOME="/opt/poetry"
ENV POETRY_VERSION=1.5.1
Expand All @@ -9,4 +9,6 @@ RUN python3 -m venv $POETRY_HOME \

WORKDIR /app
ADD . /app
RUN $POETRY_HOME/bin/poetry install
RUN $POETRY_HOME/bin/poetry install --without dev --sync

ENTRYPOINt python3 __main__.py
12 changes: 11 additions & 1 deletion household_expenses/publish/drive_to_es/Makefile
Original file line number Diff line number Diff line change
@@ -1,2 +1,12 @@

test:
pytest -s tests/

lint:
pylint --extension-pkg-whitelist=pydantic drive_to_es/

static-type-check:
mypy drive_to_es

build:
sudo docker build -t sheets-to-es:latest .
sudo docker build -t drive-to-es:latest .
15 changes: 15 additions & 0 deletions household_expenses/publish/drive_to_es/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

## Build

```shell
make build
```

## Run

```shell
docker run \
-e ES_HOST=YOUR_ES_HOST \
-e SERVICE_ACCOUNT_INFO=YOUR_SVC_ACCOUNT_INFO \
drive_to_es:latest python3 __main__.py
```
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.setLevel(logging.INFO)
62 changes: 31 additions & 31 deletions household_expenses/publish/drive_to_es/drive_to_es/client.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,61 @@
""" Client module """
from typing import List, Dict, Any, Generator

from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from io import BytesIO, StringIO
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import csv
import logging
import os
from hashlib import md5
from datetime import datetime
from .entities import IncomeByDateEntity
from .entities import IncomeByItemEntity
from .values import LabelValue
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from drive_to_es.entities import IncomeByDateEntity, IncomeByItemEntity, Env
from drive_to_es.values import LabelValue

SCOPES = ['https://www.googleapis.com/auth/drive']
LOGGER = logging.getLogger(__name__)


def run():

es_host = os.getenv('ES_HOST', ['localhost:9200'])
service_account_file = os.getenv('SERVICE_ACCOUNT_FILE')
upload_file_name = os.getenv(
'UPLOAD_FILE_NAME',
f'income-{str(datetime.now().year)}.csv')

""" Client operations """
# Prepare credential
LOGGER.info('Prepare credential.')
credentials = service_account.Credentials.from_service_account_file(
service_account_file, scopes=SCOPES)
env: Env = Env()
credentials = service_account.Credentials.from_service_account_info(
env.service_account_info, scopes=SCOPES)

service = build('drive', 'v3', credentials=credentials)
service: Any = build('drive', 'v3', credentials=credentials)

# Access drive
LOGGER.info('Get file ids from drive.')
results = service.files().list(\
orderBy='modifiedTime desc',\
q=f"name='{upload_file_name}'",\
fields="nextPageToken, files(id, name, modifiedTime)").execute()
results = service.files().list( # pylint: disable=maybe-no-member
orderBy='modifiedTime desc',
q=f"name='{env.upload_file_name}'",
fields="nextPageToken, files(id, name, modifiedTime)")\
.execute()

LOGGER.info(dir(results))

items = results.get('files', [])
LOGGER.info(items)

request = service.files().get_media(fileId=items[0]['id'])
request = service.files().get_media(fileId=items[0]['id']) # pylint: disable=maybe-no-member

fh = BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False

while done is False:
status, done = downloader.next_chunk()
LOGGER.info("Download %d%%." % int(status.progress() * 100))
LOGGER.info("Download %d%%.", int(status.progress() * 100))

# Write to elasticsearch
LOGGER.info('Send to elasticsearch.')
fh = StringIO(fh.getvalue().decode(), newline='')

data = list(csv.DictReader(fh))
es_inst = Elasticsearch(hosts=es_host)
es_inst = Elasticsearch(hosts=env.es_host)

bulk(
es_inst,
Expand All @@ -76,15 +70,21 @@ def run():
)


def construct_esdoc_by_date(msgs: List[Dict[str, Any]], index: str) -> Generator[Dict[str, Any], None, None]:
def construct_esdoc_by_date(
msgs: List[Dict[str, Any]],
index: str) -> Generator[Dict[str, Any], None, None]:
""" Return Elasticsearch document """
for m in msgs:
doc_id = md5(m['report_date'].encode('utf-8')).hexdigest()
body = IncomeByDateEntity(updated_on=datetime.utcnow(), **m).dict()

yield dict(_id=doc_id, _op_type='index', _index=index, **body)
yield {"_id": doc_id, "_op_type": "index", "_index": index, **body}


def construct_esdoc_by_item(msgs: List[Dict[str, Any]], index: str) -> Generator[Dict[str, Any], None, None]:
def construct_esdoc_by_item(
msgs: List[Dict[str, Any]],
index: str) -> Generator[Dict[str, Any], None, None]:
""" Return Elasticsearch document """
for m in msgs:
keys = filter(lambda x: x != 'report_date', m.keys())

Expand All @@ -100,4 +100,4 @@ def construct_esdoc_by_item(msgs: List[Dict[str, Any]], index: str) -> Generator
item_value=m[k],
item_labels=item_label).dict()

yield dict(_id=doc_id, _op_type='index', _index=index, **body)
yield {"_id": doc_id, "_op_type": "index", "_index": index, **body}
21 changes: 20 additions & 1 deletion household_expenses/publish/drive_to_es/drive_to_es/entities.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,26 @@
""" Entity module """
from datetime import datetime
from pydantic import BaseModel
from typing import Any
from pydantic import BaseModel, AnyHttpUrl, Json, BaseSettings

# pylint: disable=too-few-public-methods


class Env(BaseSettings):
"""
Attributes
----------
es_host: http/https url for elasticsearch host
service_account_info; A valid API Key for Service Account
upload_file_name: The income statement csv file name
"""
es_host: AnyHttpUrl
service_account_info: Json[Any] # pylint: disable=unsubscriptable-object
upload_file_name: str = f'income-{str(datetime.now().year)}.csv'


class IncomeByDateEntity(BaseModel):
""" Entity which holds income info by date """
report_date: str
updated_on: datetime
income_tax: int = 0
Expand Down Expand Up @@ -39,6 +57,7 @@ class IncomeByDateEntity(BaseModel):


class IncomeByItemEntity(BaseModel):
""" Entity which holds income info by income attribute """
report_date: str
updated_on: datetime
item_key: str
Expand Down
5 changes: 4 additions & 1 deletion household_expenses/publish/drive_to_es/drive_to_es/values.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
""" Values """
from pydantic import BaseModel
from pydantic import Field

# pylint: disable=too-few-public-methods


class LabelValue(BaseModel):
""" Labels for each key """
report_date: list = Field([''], const=True)
updated_on: list = Field([''], const=True)
income_tax: list = Field(['withholding_tax'], const=True)
Expand Down Expand Up @@ -36,4 +40,3 @@ class LabelValue(BaseModel):
creditcard_view: list = Field(['cashout','creditcard'], const=True)
creditcard_mc: list = Field(['cashout','creditcard'], const=True)
basic_life: list = Field(['cashout'], const=True)

Loading

0 comments on commit 36d8298

Please sign in to comment.