Merge pull request #34 from factly/feat/update-tags-from-gsheets

feat: Update dictionary values from sheets
factly · Jan 20, 2023 · 811ed1a · 811ed1a
2 parents cfefeb7 + 6e26a8e
commit 811ed1a
Show file tree

Hide file tree

Showing 7 changed files with 116 additions and 54 deletions.
diff --git a/app/api/api_v1/routers/dictionary.py b/app/api/api_v1/routers/dictionary.py
@@ -0,0 +1,68 @@
+import pandas as pd
+from fastapi import APIRouter, HTTPException, status
+from fastapi.encoders import jsonable_encoder
+from fastapi.responses import JSONResponse
+
+from app.core.config import CORE_FOLDER, Settings
+from app.models.gsheets import GsheetSaveRequest
+from app.utils.gsheets import get_records_from_gsheets
+
+settings = Settings()
+
+dictionary_router = router = APIRouter()
+
+
+@router.get("/", summary="Get all Saved Entities csv file name")
+async def get_entity_names():
+    # List down all the csv files present in the config folder
+    return [
+        csv_file.name.replace(".csv", "")
+        for csv_file in CORE_FOLDER.glob("**/*.csv")
+    ]
+
+
+@router.get(
+    "/{entity}",
+    summary="Get data about Saved Entity csv file",
+    response_class=JSONResponse,
+)
+async def get_entity_data(entity: str):
+    entity_df = pd.read_csv(CORE_FOLDER / f"{entity}.csv")
+    # to avoid json conversion error
+    entity_df = entity_df.fillna("")
+
+    # convert to json
+    json_compatible_item_data = jsonable_encoder(
+        entity_df.to_dict(orient="records")
+    )
+    return JSONResponse(content=json_compatible_item_data)
+
+
+@router.put(
+    "/",
+    summary="Update an Entity csv file and save it to config",
+)
+async def update_entity(request: GsheetSaveRequest):
+    # looking for destination that needs to be updated
+    destination_file = CORE_FOLDER / f"{request.entity}.csv"
+
+    # only proceed if file is existing as we don not want to create new files
+    if not destination_file.is_file():
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Provided entity does not exist",
+        )
+
+    # get dataset/tags from the data dictionary google sheet
+    dataset_meta_data = get_records_from_gsheets(
+        sheet_id=request.sheet_id,
+        worksheet=request.worksheet,
+    )
+
+    # read the file in pandas and replace it to config
+    entity_df = pd.DataFrame(dataset_meta_data)
+
+    # Save the dataset inside config folder
+    entity_df.to_csv(CORE_FOLDER / f"{request.entity}.csv", index=False)
+
+    return {"entity": request.entity, "action": "UPDATE", "status": "SUCCEED"}
diff --git a/app/core/config.py b/app/core/config.py
@@ -4,7 +4,9 @@
 
 from pydantic import BaseSettings
 
-APP_DIR = Path(__file__).resolve().parents[1]
+FILE_PATH = Path(__file__).resolve()
+APP_DIR = FILE_PATH.parents[1]
+CORE_FOLDER = FILE_PATH.parents[0]
 
 
 class Settings(BaseSettings):

diff --git a/app/core/sector.csv b/app/core/sector.csv
@@ -1,53 +1,53 @@
 sector
-Administration & Governance
+Administration and Governance
 Agriculture and Allied
 Art and Culture
 Automobile
-Banking
+Aviation
 Biotechnology
 Census and Surveys
-Aviation
+Chemicals and Fertilizers
 Commerce
+Corporate
 Defence
 Demographics
 Education
+Entertainment
 Electoral Statistics
-Energy
 Environment and Forest
 Finance
 Food
 Foreign Affairs
-Governance and Administration
 Health and Family Welfare
 Home Affairs and Enforcement
-Horticulture
-Hospitality
 Housing
-Industries
+Urban Development
+Industries and Factories
 Information and Broadcasting
 Information Technology
 Infrastructure
 Insurance
+International
 Irrigation
-Judiciary
 Labour and Employment
-Livestock
+Law and Justice
 Media
 Mining
 Parliament of India
+Ports and Shipping
 Postal
 Power and Energy
-Prices
-Railway
+Railways
 Rural Development
 Science and Technology
-Shipping
 Social Development
+State Functions
 Telecommunications
-Trade
+Tourism and Hospitality
 Transport
-Tourism
-Urban Development
 Water and Sanitation
+Youth and Sports
+Banking
+Trade
 Water Resources
-Youth and Sports
+Youth and Sports
diff --git a/app/main.py b/app/main.py
@@ -3,19 +3,11 @@
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 
-# from app.api.api_v1.routers.column_mapping import column_mapper_router
 from app.api.api_v1.routers.dataset import dataset_router
+from app.api.api_v1.routers.dictionary import dictionary_router
 from app.api.api_v1.routers.docs import docs_router
 from app.api.api_v1.routers.metadata import metadata_router
-
-# from app.api.api_v1.routers.datetime import datetime_router
-# from app.api.api_v1.routers.documentation import documentation_router
-# from app.api.api_v1.routers.general import general_router
-# from app.api.api_v1.routers.geography import geographic_router
-# from app.api.api_v1.routers.note import note_router
 from app.api.api_v1.routers.s3_checks import s3_router
-
-# from app.api.api_v1.routers.unit import unit_router
 from app.core.config import Settings
 
 settings = Settings()
@@ -40,29 +32,9 @@ async def home(request: Request):
 
 
 app.include_router(dataset_router, prefix="", tags=["Compare Datasets"])
-
-# app.include_router(
-#     column_mapper_router, prefix="/columns", tags=["Columns Mapped"]
-# )
-
-# app.include_router(
-#     datetime_router, prefix="/columns/datetime", tags=["Date & Time Column"]
-# )
-
-# app.include_router(
-#     geographic_router, prefix="/columns/geography", tags=["Geography Column"]
-# )
-
-# app.include_router(unit_router, prefix="/columns/unit", tags=["Unit Column"])
-
-# app.include_router(note_router, prefix="/columns/note", tags=["Note Column"])
-
-# app.include_router(general_router, prefix="/table", tags=["Table"])
-
-# app.include_router(documentation_router, prefix="", tags=["Documentation"])
-
 app.include_router(s3_router, prefix="/s3", tags=["S3 Checks"])
-
 app.include_router(metadata_router, prefix="", tags=["Metadata"])
 app.include_router(docs_router, prefix="/docs", tags=["Documentation"])
-# app.include_router(column_router, prefix="/column", tags=["Column"])
+app.include_router(
+    dictionary_router, prefix="/dictionary", tags=["Dictionary"]
+)
diff --git a/app/models/gsheets.py b/app/models/gsheets.py
@@ -0,0 +1,10 @@
+from pydantic import BaseModel
+
+
+class GsheetRequest(BaseModel):
+    sheet_id: str
+    worksheet: str
+
+
+class GsheetSaveRequest(GsheetRequest):
+    entity: str
diff --git a/app/utils/column_mapping.py b/app/utils/column_mapping.py
@@ -1,6 +1,6 @@
 import re
 from itertools import chain
-from typing import Dict
+from typing import Dict, Set
 
 from app.core.config import (
     AirlineSettings,
@@ -22,9 +22,9 @@
 
 
 def extract_pattern_from_columns(
-    columns: set[str],
+    columns: Set[str],
     pattern,
-) -> Dict[str, set[str]]:
+) -> Dict[str, Set[str]]:
     """Match regex pattern against columns to extract column names from
 
     _extended_summary_

diff --git a/app/utils/gsheets.py b/app/utils/gsheets.py
@@ -1,6 +1,7 @@
 from typing import Dict, List, Union
 
 import gspread
+from fastapi import HTTPException, status
 from google.oauth2 import service_account
 
 from app.core.config import Settings
@@ -28,7 +29,16 @@ def get_records_from_gsheets(
     client = gspread.authorize(credentials)
 
     # get the instance of the Spreadsheet
-    sheet = client.open_by_key(sheet_id)
+    try:
+        sheet = client.open_by_key(sheet_id)
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_403_FORBIDDEN,
+            detail={
+                "error": f"{e}",
+                "message": "This caller does not have permission for the sheet",
+            },
+        )
 
     # get the very first worksheet present if no tab is mentioned
     # tab is worksheet window inside Gsheets