diff --git a/poetry.lock b/poetry.lock index 506c43f..50e62e1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -3172,6 +3172,20 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "python-json-logger" version = "2.0.7" @@ -4353,4 +4367,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "b221f6df6159ddf1a012515162d9ee5d217cf8e4902cdce7e7277b378a4b8f5d" +content-hash = "236f13732306ac65d5f1fba74680b4968888a19340b7335c560a723b36e3b724" diff --git a/pyproject.toml b/pyproject.toml index 3da851f..aa28e79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ seaborn = "^0.13.2" geopandas = "^1.0.1" geodatasets = "^2024.8.0" streamlit = "^1.39.0" +python-dotenv = "^1.0.1" [tool.poetry.group.dev.dependencies] pre-commit = "^3.8.0" diff --git a/src/config.py b/src/config.py index f46d9c7..e5ece3e 100644 --- a/src/config.py +++ b/src/config.py @@ -1,7 +1,15 @@ +"""Override any of these settings by adding environment variables with the same name.""" + from pathlib import Path +import os +from dotenv import load_dotenv, find_dotenv + +load_dotenv(find_dotenv()) -REPO_NAME = "ClimatePolicyRadar/all-document-text-data" +REPO_NAME = os.getenv("REPO_NAME", "ClimatePolicyRadar/all-document-text-data") REPO_URL = f"https://huggingface.co/datasets/{REPO_NAME}" -CACHE_DIR = Path(__file__).parent / "../cache" +CACHE_DIR = os.getenv("CACHE_DIR", Path(__file__).parent / "../cache") -REVISION = "main" # Use this to set a commit hash. Recommended! +DATA_REVISION = os.getenv( + "DATA_REVISION", "main" +) # Use this to set a commit hash. Recommended! diff --git a/src/notebooks/1-loading-data.ipynb b/src/notebooks/1-loading-data.ipynb index 4ea902c..cb114f6 100644 --- a/src/notebooks/1-loading-data.ipynb +++ b/src/notebooks/1-loading-data.ipynb @@ -65,14 +65,14 @@ "source": [ "REPO_NAME = \"ClimatePolicyRadar/all-document-text-data\"\n", "REPO_URL = f\"https://huggingface.co/datasets/{REPO_NAME}\"\n", - "CACHE_DIR = \"../../cache\"\n", + "DATA_CACHE_DIR = \"../../cache\"\n", "\n", "REVISION = \"main\" # Use this to set a commit hash. Recommended!\n", "\n", "snapshot_download(\n", " repo_id=REPO_NAME,\n", " repo_type=\"dataset\",\n", - " local_dir=CACHE_DIR,\n", + " local_dir=DATA_CACHE_DIR,\n", " revision=REVISION,\n", " allow_patterns=[\"*.parquet\"],\n", ")" @@ -109,7 +109,7 @@ "# Create a view called 'open_data', and count the number of rows and distinct documents\n", "# in the view\n", "db.execute(\n", - " f\"CREATE VIEW open_data AS SELECT * FROM read_parquet('{CACHE_DIR}/*.parquet')\"\n", + " f\"CREATE VIEW open_data AS SELECT * FROM read_parquet('{DATA_CACHE_DIR}/*.parquet')\"\n", ")\n", "db.sql(\"SELECT COUNT(*), COUNT(DISTINCT document_id) FROM open_data\")" ] diff --git a/src/streamlit_apps/searchable_world_map.py b/src/streamlit_apps/searchable_world_map.py index c4d91e4..c0c06a8 100644 --- a/src/streamlit_apps/searchable_world_map.py +++ b/src/streamlit_apps/searchable_world_map.py @@ -10,14 +10,14 @@ from src.data_helpers import download_data -from src.config import CACHE_DIR, REVISION +from src.config import CACHE_DIR, DATA_REVISION @st.cache_resource def load_data(): download_data( cache_dir=str(CACHE_DIR), - revision=REVISION, + revision=DATA_REVISION, ) db = duckdb.connect()