Skip to content

Commit

Permalink
Merge pull request #34 from Ingenjorsarbete-For-Klimatet/feature/extr…
Browse files Browse the repository at this point in the history
…action-script

Feature/extraction script
  • Loading branch information
mgcth authored Jun 6, 2024
2 parents 9cb11e8 + 7edc9fd commit c509ac4
Show file tree
Hide file tree
Showing 28 changed files with 823 additions and 1,239 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/github-action-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ["3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11"]

steps:
- uses: actions/checkout@v2
Expand Down Expand Up @@ -38,4 +38,4 @@ jobs:
message: ${{ env.total }}%
minColorRange: 50
maxColorRange: 90
valColorRange: ${{ env.total }}
valColorRange: ${{ env.total }}
2 changes: 1 addition & 1 deletion .github/workflows/github-action-type.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ jobs:
pip install -e ".[type]"
- name: Type checking with mypy
run: >
MYPYPATH=src
mypy
--namespace-packages
--explicit-package-bases
--allow-redefinition
--ignore-missing-imports
src
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,14 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Other
*.laz
*.trj
.DS_Store
*.geojson
*.json
*.mbtiles
*.pmtiles
*.gpkg
*.tif
*.pdf
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

<p align="center">
<a href="https://www.python.org">
<img src="https://img.shields.io/badge/Python-3.10%20|%203.11%20|%203.12-blue" alt="Python: 3.10 - 3.12" style="max-width: 100%;">
<img src="https://img.shields.io/badge/Python-3.10%20|%203.11-blue" alt="Python: 3.10 - 3.11" style="max-width: 100%;">
</a>
<a href="https://pytest.org">
<img src="https://img.shields.io/badge/Testing_framework-pytest-a04000" alt="Testing framework: pytest" style="max-width: 100%;">
Expand All @@ -36,4 +36,3 @@
## Description

Welcome to ifk-lantmateriet. This repo contains code to parse data from Lantmäteriet.

15 changes: 13 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,16 @@ readme = "README.md"
authors = [
{ name = "Mladen Gibanica", email = "[email protected]" },
]
requires-python = ">=3.9"
dependencies = ["geopandas ~= 0.14", "pyogrio ~= 0.7.0", "pyarrow ~= 14.0"]
requires-python = ">=3.10,<3.12"
dependencies = [
"geopandas ~= 0.14",
"pyogrio ~= 0.7",
"pyarrow ~= 16.0",
"unidecode ~= 1.3",
"tqdm ~= 4.66",
"typer ~= 0.12",
"ray ~= 2.20"
]

[project.optional-dependencies]
lint = [
Expand All @@ -35,6 +43,9 @@ dev = [
"ipykernel ~= 6.26",
]

[project.scripts]
ifk-lantmateriet = "lantmateriet.cli:app"

[tool.setuptools.packages.find]
where = ["src"]
exclude = ["material"]
Expand Down
129 changes: 129 additions & 0 deletions src/lantmateriet/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""API module."""

import io
import json
import logging
import os
import zipfile
from pathlib import Path

import requests
from tqdm import tqdm

STATUS_OK = 200
BLOCK_SIZE = 1024
REQUEST_TIMEOUT = 200
ORDER_URL = "https://api.lantmateriet.se"
DOWNLOAD_URL = "https://download-geotorget.lantmateriet.se"
TOKEN = os.environ["LANTMATERIET_API_TOKEN"]

logger = logging.getLogger(__name__)


def get_request(url: str) -> requests.Response:
"""Get request from url.
Args:
url: url to request from
Returns:
response
Raises:
ValueError
requests.exceptions.HTTPError
"""
logger.debug(f"Fetching from {url}.")

headers = {"Authorization": f"Bearer {TOKEN}"}
response = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT, stream=True)

if response.status_code != STATUS_OK:
raise requests.exceptions.HTTPError(f"Could not request from {url}.")

logger.debug(f"Successful request from {url}.")

return response


class Lantmateriet:
"""Lantmäteriet class."""

def __init__(self, order_id: str, save_path: str):
"""Initialise Lantmäteriet.
Args:
order_id: order id to fetch data from
save_path: path to save downloaded files to
"""
order_url = ORDER_URL + f"/geotorget/orderhanterare/v2/{order_id}"
download_url = DOWNLOAD_URL + f"/download/{order_id}/files"
self._save_path = save_path

Path(save_path).mkdir(exist_ok=True)
self._order_enpoint = json.loads(get_request(order_url).content)
available_files = json.loads(get_request(download_url).content)
self._available_files_enpoint = {
item["title"]: item for item in available_files
}

@property
def order(self) -> dict[str, str]:
"""Get order information."""
return self._order_enpoint

@property
def available_files(self) -> list[str]:
"""Get available files."""
return list(self._available_files_enpoint.keys())

def download(self, title: str) -> None:
"""Download file by title.
Args:
title: title of file to download
"""
logger.info(f"Started downloading {title}")

url = self._available_files_enpoint[title]["href"]
response = get_request(url)
buffer = self._download(response)

if zipfile.is_zipfile(buffer) is True:
self._unzip(buffer)

logger.info(f"Downloaded and unpacked {title} to {self._save_path}")

def _download(self, response: requests.Response) -> io.BytesIO:
"""Download file from url.
Args:
response: requests response object
Returns:
bytesio buffer
"""
file_size = int(response.headers.get("Content-Length", 0))
buffer = io.BytesIO()
with tqdm.wrapattr(
response.raw, "read", total=file_size, desc="Downloading"
) as r_raw:
while True:
chunk = buffer.write(r_raw.read(BLOCK_SIZE))
if not chunk:
break

return buffer

def _unzip(self, buffer: io.BytesIO):
"""Extract zip and save to disk.
Args:
buffer: buffer of downloaded content
"""
with zipfile.ZipFile(buffer) as zip:
for member in tqdm(zip.infolist(), desc="Extracting"):
try:
zip.extract(member, self._save_path)
except zipfile.error:
logger.error("Can't unzip {member}.")
38 changes: 38 additions & 0 deletions src/lantmateriet/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""CLI module."""

import typer
from lantmateriet.api import Lantmateriet
from lantmateriet.extract import extract
from tqdm import tqdm

app = typer.Typer()


@app.callback()
def callback():
"""Lantmäteriet CLI client."""


@app.command()
def download_all(order_id: str, save_path: str):
"""Download files.
Args:
order_id: lantmäteriet order id
save_path: path to save files to
"""
client = Lantmateriet(order_id, save_path)
all_files = client.available_files
for file in tqdm(all_files):
client.download(file)


@app.command()
def extract_all(source_path: str, target_path):
"""Extract geojson from gpkg files.
Args:
source_path: path to search for files
target_path: path to save extracted files to
"""
extract(source_path, target_path)
59 changes: 0 additions & 59 deletions src/lantmateriet/communication.py

This file was deleted.

Loading

0 comments on commit c509ac4

Please sign in to comment.