Skip to content

Commit

Permalink
Merge pull request #34 from factly/feat/update-tags-from-gsheets
Browse files Browse the repository at this point in the history
feat: Update dictionary values from sheets
  • Loading branch information
100mi authored Jan 20, 2023
2 parents cfefeb7 + 6e26a8e commit 811ed1a
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 54 deletions.
68 changes: 68 additions & 0 deletions app/api/api_v1/routers/dictionary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import pandas as pd
from fastapi import APIRouter, HTTPException, status
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse

from app.core.config import CORE_FOLDER, Settings
from app.models.gsheets import GsheetSaveRequest
from app.utils.gsheets import get_records_from_gsheets

settings = Settings()

dictionary_router = router = APIRouter()


@router.get("/", summary="Get all Saved Entities csv file name")
async def get_entity_names():
# List down all the csv files present in the config folder
return [
csv_file.name.replace(".csv", "")
for csv_file in CORE_FOLDER.glob("**/*.csv")
]


@router.get(
"/{entity}",
summary="Get data about Saved Entity csv file",
response_class=JSONResponse,
)
async def get_entity_data(entity: str):
entity_df = pd.read_csv(CORE_FOLDER / f"{entity}.csv")
# to avoid json conversion error
entity_df = entity_df.fillna("")

# convert to json
json_compatible_item_data = jsonable_encoder(
entity_df.to_dict(orient="records")
)
return JSONResponse(content=json_compatible_item_data)


@router.put(
"/",
summary="Update an Entity csv file and save it to config",
)
async def update_entity(request: GsheetSaveRequest):
# looking for destination that needs to be updated
destination_file = CORE_FOLDER / f"{request.entity}.csv"

# only proceed if file is existing as we don not want to create new files
if not destination_file.is_file():
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Provided entity does not exist",
)

# get dataset/tags from the data dictionary google sheet
dataset_meta_data = get_records_from_gsheets(
sheet_id=request.sheet_id,
worksheet=request.worksheet,
)

# read the file in pandas and replace it to config
entity_df = pd.DataFrame(dataset_meta_data)

# Save the dataset inside config folder
entity_df.to_csv(CORE_FOLDER / f"{request.entity}.csv", index=False)

return {"entity": request.entity, "action": "UPDATE", "status": "SUCCEED"}
4 changes: 3 additions & 1 deletion app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

from pydantic import BaseSettings

APP_DIR = Path(__file__).resolve().parents[1]
FILE_PATH = Path(__file__).resolve()
APP_DIR = FILE_PATH.parents[1]
CORE_FOLDER = FILE_PATH.parents[0]


class Settings(BaseSettings):
Expand Down
34 changes: 17 additions & 17 deletions app/core/sector.csv
Original file line number Diff line number Diff line change
@@ -1,53 +1,53 @@
sector
Administration & Governance
Administration and Governance
Agriculture and Allied
Art and Culture
Automobile
Banking
Aviation
Biotechnology
Census and Surveys
Aviation
Chemicals and Fertilizers
Commerce
Corporate
Defence
Demographics
Education
Entertainment
Electoral Statistics
Energy
Environment and Forest
Finance
Food
Foreign Affairs
Governance and Administration
Health and Family Welfare
Home Affairs and Enforcement
Horticulture
Hospitality
Housing
Industries
Urban Development
Industries and Factories
Information and Broadcasting
Information Technology
Infrastructure
Insurance
International
Irrigation
Judiciary
Labour and Employment
Livestock
Law and Justice
Media
Mining
Parliament of India
Ports and Shipping
Postal
Power and Energy
Prices
Railway
Railways
Rural Development
Science and Technology
Shipping
Social Development
State Functions
Telecommunications
Trade
Tourism and Hospitality
Transport
Tourism
Urban Development
Water and Sanitation
Youth and Sports
Banking
Trade
Water Resources
Youth and Sports
Youth and Sports
36 changes: 4 additions & 32 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,11 @@
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates

# from app.api.api_v1.routers.column_mapping import column_mapper_router
from app.api.api_v1.routers.dataset import dataset_router
from app.api.api_v1.routers.dictionary import dictionary_router
from app.api.api_v1.routers.docs import docs_router
from app.api.api_v1.routers.metadata import metadata_router

# from app.api.api_v1.routers.datetime import datetime_router
# from app.api.api_v1.routers.documentation import documentation_router
# from app.api.api_v1.routers.general import general_router
# from app.api.api_v1.routers.geography import geographic_router
# from app.api.api_v1.routers.note import note_router
from app.api.api_v1.routers.s3_checks import s3_router

# from app.api.api_v1.routers.unit import unit_router
from app.core.config import Settings

settings = Settings()
Expand All @@ -40,29 +32,9 @@ async def home(request: Request):


app.include_router(dataset_router, prefix="", tags=["Compare Datasets"])

# app.include_router(
# column_mapper_router, prefix="/columns", tags=["Columns Mapped"]
# )

# app.include_router(
# datetime_router, prefix="/columns/datetime", tags=["Date & Time Column"]
# )

# app.include_router(
# geographic_router, prefix="/columns/geography", tags=["Geography Column"]
# )

# app.include_router(unit_router, prefix="/columns/unit", tags=["Unit Column"])

# app.include_router(note_router, prefix="/columns/note", tags=["Note Column"])

# app.include_router(general_router, prefix="/table", tags=["Table"])

# app.include_router(documentation_router, prefix="", tags=["Documentation"])

app.include_router(s3_router, prefix="/s3", tags=["S3 Checks"])

app.include_router(metadata_router, prefix="", tags=["Metadata"])
app.include_router(docs_router, prefix="/docs", tags=["Documentation"])
# app.include_router(column_router, prefix="/column", tags=["Column"])
app.include_router(
dictionary_router, prefix="/dictionary", tags=["Dictionary"]
)
10 changes: 10 additions & 0 deletions app/models/gsheets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from pydantic import BaseModel


class GsheetRequest(BaseModel):
sheet_id: str
worksheet: str


class GsheetSaveRequest(GsheetRequest):
entity: str
6 changes: 3 additions & 3 deletions app/utils/column_mapping.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re
from itertools import chain
from typing import Dict
from typing import Dict, Set

from app.core.config import (
AirlineSettings,
Expand All @@ -22,9 +22,9 @@


def extract_pattern_from_columns(
columns: set[str],
columns: Set[str],
pattern,
) -> Dict[str, set[str]]:
) -> Dict[str, Set[str]]:
"""Match regex pattern against columns to extract column names from
_extended_summary_
Expand Down
12 changes: 11 additions & 1 deletion app/utils/gsheets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Dict, List, Union

import gspread
from fastapi import HTTPException, status
from google.oauth2 import service_account

from app.core.config import Settings
Expand Down Expand Up @@ -28,7 +29,16 @@ def get_records_from_gsheets(
client = gspread.authorize(credentials)

# get the instance of the Spreadsheet
sheet = client.open_by_key(sheet_id)
try:
sheet = client.open_by_key(sheet_id)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail={
"error": f"{e}",
"message": "This caller does not have permission for the sheet",
},
)

# get the very first worksheet present if no tab is mentioned
# tab is worksheet window inside Gsheets
Expand Down

0 comments on commit 811ed1a

Please sign in to comment.