Merge pull request #17 from climatepolicyradar/usability

Usability
climatepolicyradar · Nov 3, 2024 · 21ba05c · 21ba05c
2 parents ad6d7c1 + 276b36b
commit 21ba05c
Show file tree

Hide file tree

Showing 15 changed files with 38 additions and 23 deletions.
diff --git a/README.md b/README.md
@@ -1,3 +1,9 @@
-# Open Data Helpers
+# Climate Policy Radar Open Data
 
-This repo contains helpers for our open data: https://huggingface.co/datasets/ClimatePolicyRadar/all-document-text-data
+This repo contains helpers for our open data, which you can find on [Huggingface](https://huggingface.co/ClimatePolicyRadar/all-document-text-data).
+
+## Getting started
+
+The best place to get started is the loading data notebook at `src/notebooks`.
+
+Other examples of notebooks and streamlit demos using this open data are in the `src` directory. Feel free to raise an issue if you'd like to add or propose an interesting use case for this data.
diff --git a/src/config.py b/src/config.py
@@ -0,0 +1,7 @@
+from pathlib import Path
+
+REPO_NAME = "ClimatePolicyRadar/all-document-text-data"
+REPO_URL = f"https://huggingface.co/datasets/{REPO_NAME}"
+CACHE_DIR = Path(__file__).parent / "../cache"
+
+REVISION = "main"  # Use this to set a commit hash. Recommended!
diff --git a/...ries/ne_50m_admin_0_countries.README.html → ...tors/ne_50m_admin_0_countries.README.html b/...ries/ne_50m_admin_0_countries.README.html → ...tors/ne_50m_admin_0_countries.README.html
diff --git a/...ries/ne_50m_admin_0_countries.VERSION.txt → ...tors/ne_50m_admin_0_countries.VERSION.txt b/...ries/ne_50m_admin_0_countries.VERSION.txt → ...tors/ne_50m_admin_0_countries.VERSION.txt
diff --git a/..._0_countries/ne_50m_admin_0_countries.cpg → ...arth_vectors/ne_50m_admin_0_countries.cpg b/..._0_countries/ne_50m_admin_0_countries.cpg → ...arth_vectors/ne_50m_admin_0_countries.cpg
diff --git a/..._0_countries/ne_50m_admin_0_countries.dbf → ...arth_vectors/ne_50m_admin_0_countries.dbf b/..._0_countries/ne_50m_admin_0_countries.dbf → ...arth_vectors/ne_50m_admin_0_countries.dbf
diff --git a/..._0_countries/ne_50m_admin_0_countries.prj → ...arth_vectors/ne_50m_admin_0_countries.prj b/..._0_countries/ne_50m_admin_0_countries.prj → ...arth_vectors/ne_50m_admin_0_countries.prj
diff --git a/..._0_countries/ne_50m_admin_0_countries.shp → ...arth_vectors/ne_50m_admin_0_countries.shp b/..._0_countries/ne_50m_admin_0_countries.shp → ...arth_vectors/ne_50m_admin_0_countries.shp
diff --git a/..._0_countries/ne_50m_admin_0_countries.shx → ...arth_vectors/ne_50m_admin_0_countries.shx b/..._0_countries/ne_50m_admin_0_countries.shx → ...arth_vectors/ne_50m_admin_0_countries.shx
diff --git a/streamlit_apps/data_helpers.py → src/data_helpers.py b/streamlit_apps/data_helpers.py → src/data_helpers.py
@@ -1,11 +1,7 @@
 from typing import Optional
 from huggingface_hub import snapshot_download
 
-REPO_NAME = "ClimatePolicyRadar/all-document-text-data"
-REPO_URL = f"https://huggingface.co/datasets/{REPO_NAME}"
-CACHE_DIR = "../cache"
-
-REVISION = "main"  # Use this to set a commit hash. Recommended!
+from src.config import REPO_NAME
 
 
 def download_data(cache_dir: str, revision: Optional[str] = None) -> None:

diff --git a/experimental/bigram-search.ipynb → src/experimental/bigram-search.ipynb b/experimental/bigram-search.ipynb → src/experimental/bigram-search.ipynb
diff --git a/notebooks/1-loading-data.ipynb → src/notebooks/1-loading-data.ipynb b/notebooks/1-loading-data.ipynb → src/notebooks/1-loading-data.ipynb
diff --git a/src/streamlit_apps/README.md b/src/streamlit_apps/README.md
@@ -0,0 +1,7 @@
+# Streamlit apps
+
+## Searchable World Map
+
+A demonstration of plotting maps using Climate Policy Radar data. This app allows you to compare the relative use of different keywords in this data across the world.
+
+![A screenshot of the world map demo](world_map_screenshot.png)
diff --git a/streamlit_apps/searchable_world_map.py → src/streamlit_apps/searchable_world_map.py b/streamlit_apps/searchable_world_map.py → src/streamlit_apps/searchable_world_map.py
@@ -9,16 +9,15 @@
 import streamlit as st
 
 
-from data_helpers import download_data
-
-CACHE_DIR = Path(__file__).parent / "../cache"
+from src.data_helpers import download_data
+from src.config import CACHE_DIR, REVISION
 
 
 @st.cache_resource
 def load_data():
     download_data(
         cache_dir=str(CACHE_DIR),
-        revision="bd0abf24ae34d3150bdd8ac66f36a28e47f3ee93",
+        revision=REVISION,
     )
 
     db = duckdb.connect()
@@ -84,8 +83,7 @@ def load_world_geometries():
     Drop Antarctica and Seven seas (open ocean) geometries to make the map look nicer.
     """
     world = gpd.read_file(
-        Path(__file__).parent
-        / "./ne_50m_admin_0_countries/ne_50m_admin_0_countries.shp"
+        Path(__file__).parent / "../data/earth_vectors/ne_50m_admin_0_countries.shp"
     )
     world = world.to_crs(
         "+proj=eck4 +lon_0=0 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"
@@ -145,8 +143,9 @@ def plot_country_map(
     divider = make_axes_locatable(axis)
     cax = divider.append_axes("bottom", size="5%", pad=0.05)
     fig.colorbar(
-        mpl.cm.ScalarMappable(
-            norm=mpl.colors.Normalize(vmin=min_count, vmax=max_count), cmap="viridis_r"
+        mpl.cm.ScalarMappable(  # type: ignore
+            norm=mpl.colors.Normalize(vmin=min_count, vmax=max_count),  # type: ignore
+            cmap="viridis_r",
         ),
         cax=cax,
         orientation="horizontal",
@@ -168,7 +167,7 @@ def plot_country_map(
 
 def plot_normalised_unnormalised_subplots(
     kwds,
-) -> tuple[plt.Figure, pd.DataFrame, pd.DataFrame]:
+) -> tuple[plt.Figure, pd.DataFrame, pd.DataFrame]:  # type: ignore
     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 9), dpi=300)
 
     df_unnorm = plot_country_map(
@@ -199,7 +198,7 @@ def plot_normalised_unnormalised_subplots(
         "Search for keywords in the dataset and see where they appear on a world map."
     )
     with st.expander("You can use regex! Open for examples"):
-        st.markdown("""
+        st.markdown(r"""
         - `natural(-|\s)resource`: match "natural-resource" and "natural resource"
         - `fish(es)?`: match "fish" and "fishes"
         - `elephants?`: match "elephant" and "elephants"

diff --git a/src/streamlit_apps/world_map_screenshot.png b/src/streamlit_apps/world_map_screenshot.png