Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

474 etq visiteur personne non connectée je peux récupérer un catalogue de donnée au format csv #480

63 changes: 63 additions & 0 deletions server/api/catalogs/rendering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import csv
import io

from server.application.catalogs.views import CatalogExportView


def to_csv(export: CatalogExportView) -> str:
fieldnames = [
"titre",
"description",
"service",
"couv_geo",
"format",
"si",
"contact_service",
"contact_personne",
"freq_maj",
"date_maj",
"url",
"licence",
"mots_cles",
]

fieldnames.extend(extra_field.name for extra_field in export.catalog.extra_fields)

f = io.StringIO()
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()

for dataset in export.datasets:
row = {
"titre": dataset.title,
"description": dataset.description,
"service": dataset.service,
"couv_geo": dataset.geographical_coverage,
"format": ", ".join(fmt.value for fmt in dataset.formats),
"si": dataset.technical_source or "",
"contact_service": dataset.producer_email or "",
"contact_personne": ", ".join(dataset.contact_emails),
"freq_maj": (
freq.value if (freq := dataset.update_frequency) is not None else ""
),
"date_maj": (
d.strftime("%d/%m/%Y")
if (d := dataset.last_updated_at) is not None
else ""
),
"url": dataset.url or "",
"licence": dataset.license or "",
"mots_cles": ", ".join(tag.name for tag in dataset.tags),
}

extra_field_value_by_id = {
extra_field_value.extra_field_id: extra_field_value.value
for extra_field_value in dataset.extra_field_values
}

for extra_field in export.catalog.extra_fields:
row[extra_field.name] = extra_field_value_by_id.get(extra_field.id, "")

writer.writerow(row)

return f.getvalue()
35 changes: 33 additions & 2 deletions server/api/catalogs/routes.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
from fastapi import APIRouter, Depends, HTTPException
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from fastapi.responses import JSONResponse, Response

from server.application.catalogs.commands import CreateCatalog
from server.application.catalogs.queries import GetCatalogBySiret
from server.application.catalogs.queries import GetCatalogBySiret, GetCatalogExport
from server.application.catalogs.views import CatalogView
from server.config.di import resolve
from server.domain.catalogs.exceptions import CatalogAlreadyExists, CatalogDoesNotExist
from server.domain.organizations.exceptions import OrganizationDoesNotExist
from server.domain.organizations.types import Siret
from server.infrastructure.catalogs.caching import ExportCache
from server.seedwork.application.messages import MessageBus

from ..auth.permissions import HasAPIKey, IsAuthenticated
from .rendering import to_csv
from .schemas import CatalogCreate

router = APIRouter(prefix="/catalogs", tags=["catalogs"])
Expand Down Expand Up @@ -54,3 +56,32 @@ async def get_catalog(siret: Siret) -> CatalogView:
return await bus.execute(GetCatalogBySiret(siret=siret))
except CatalogDoesNotExist as exc:
raise HTTPException(404, detail=str(exc))


@router.get("/{siret}/export.csv")
async def export_catalog(siret: Siret) -> Response:
export_cache = resolve(ExportCache)

florimondmanca marked this conversation as resolved.
Show resolved Hide resolved
content = export_cache.get(siret)

if content is not None:
return Response(
content,
headers={"content-type": "text/csv", **export_cache.hit_headers},
)

bus = resolve(MessageBus)

try:
export = await bus.execute(GetCatalogExport(siret=siret))
except CatalogDoesNotExist as exc:
raise HTTPException(404, detail=str(exc))

content = to_csv(export)

export_cache.set(siret, content)

return Response(
content,
headers={"content-type": "text/csv", **export_cache.miss_headers},
)
26 changes: 24 additions & 2 deletions server/application/catalogs/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
from server.domain.catalogs.exceptions import CatalogAlreadyExists, CatalogDoesNotExist
from server.domain.catalogs.repositories import CatalogRepository
from server.domain.common.types import ID
from server.domain.datasets.repositories import DatasetRepository
from server.domain.datasets.specifications import DatasetSpec
from server.domain.organizations.exceptions import OrganizationDoesNotExist
from server.domain.organizations.repositories import OrganizationRepository
from server.domain.organizations.types import Siret

from .commands import CreateCatalog
from .queries import GetAllCatalogs, GetCatalogBySiret
from .views import CatalogView
from .queries import GetAllCatalogs, GetCatalogBySiret, GetCatalogExport
from .views import CatalogExportView, CatalogView, DatasetExportView


async def get_catalog_by_siret(query: GetCatalogBySiret) -> CatalogView:
Expand Down Expand Up @@ -64,3 +66,23 @@ async def create_catalog(
)

return await repository.insert(catalog)


async def get_catalog_export(query: GetCatalogExport) -> CatalogExportView:
repository = resolve(CatalogRepository)
dataset_repository = resolve(DatasetRepository)

siret = query.siret
catalog = await repository.get_by_siret(siret)

if catalog is None:
raise CatalogDoesNotExist(siret)

datasets, _ = await dataset_repository.get_all(
page=None, spec=DatasetSpec(organization_siret=siret)
)

return CatalogExportView(
catalog=CatalogView(**catalog.dict()),
datasets=[DatasetExportView(**dataset.dict()) for (dataset, _) in datasets],
)
6 changes: 5 additions & 1 deletion server/application/catalogs/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from server.domain.organizations.types import Siret
from server.seedwork.application.queries import Query

from .views import CatalogView
from .views import CatalogExportView, CatalogView


class GetCatalogBySiret(Query[CatalogView]):
Expand All @@ -12,3 +12,7 @@ class GetCatalogBySiret(Query[CatalogView]):

class GetAllCatalogs(Query[List[CatalogView]]):
pass


class GetCatalogExport(Query[CatalogExportView]):
siret: Siret
28 changes: 27 additions & 1 deletion server/application/catalogs/views.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from typing import List
import datetime as dt
from typing import List, Optional

from pydantic import BaseModel

from server.application.datasets.views import ExtraFieldValueView
from server.application.tags.views import TagView
from server.domain.catalogs.entities import ExtraFieldType
from server.domain.common.types import ID
from server.domain.datasets.entities import DataFormat, UpdateFrequency

from ..organizations.views import OrganizationView

Expand All @@ -20,3 +24,25 @@ class ExtraFieldView(BaseModel):
class CatalogView(BaseModel):
organization: OrganizationView
extra_fields: List[ExtraFieldView]


class DatasetExportView(BaseModel):
title: str
description: str
service: str
geographical_coverage: str
formats: List[DataFormat]
technical_source: Optional[str]
producer_email: Optional[str]
contact_emails: List[str]
update_frequency: Optional[UpdateFrequency]
last_updated_at: Optional[dt.datetime]
url: Optional[str]
license: Optional[str]
tags: List[TagView]
extra_field_values: List[ExtraFieldValueView]


class CatalogExportView(BaseModel):
catalog: CatalogView
datasets: List[DatasetExportView]
5 changes: 5 additions & 0 deletions server/config/di.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ async def create_todo(...):

Or in any custom scripts as seems fit.
"""
import datetime as dt
from typing import Type, TypeVar

from server.application.auth.passwords import PasswordEncoder, Signer
Expand Down Expand Up @@ -88,6 +89,7 @@ async def create_todo(...):
from server.infrastructure.catalog_records.repositories import (
SqlCatalogRecordRepository,
)
from server.infrastructure.catalogs.caching import ExportCache
from server.infrastructure.catalogs.repositories import SqlCatalogRepository
from server.infrastructure.database import Database
from server.infrastructure.datasets.repositories import SqlDatasetRepository
Expand Down Expand Up @@ -165,6 +167,9 @@ def configure(container: "Container") -> None:
container.register_instance(OrganizationRepository, SqlOrganizationRepository(db))
container.register_instance(CatalogRepository, SqlCatalogRepository(db))

# Caching
container.register_instance(ExportCache, ExportCache(max_age=dt.timedelta(days=1)))


_CONTAINER = Container(configure)

Expand Down
2 changes: 1 addition & 1 deletion server/domain/datasets/repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def make_id(self) -> ID:
return id_factory()

async def get_all(
self, *, page: Page = Page(), spec: DatasetSpec = DatasetSpec()
self, *, page: Optional[Page] = Page(), spec: DatasetSpec = DatasetSpec()
) -> Tuple[List[Tuple[Dataset, DatasetGetAllExtras]], int]:
raise NotImplementedError # pragma: no cover

Expand Down
50 changes: 50 additions & 0 deletions server/infrastructure/catalogs/caching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import datetime as dt
florimondmanca marked this conversation as resolved.
Show resolved Hide resolved
from typing import Callable, Dict, Optional, Tuple

from server.domain.common.datetime import now
from server.domain.organizations.types import Siret


class ExportCache:
"""
Implement two types of cache to reduce the load associated to exporting catalogs:

* Client-side caching, by adding 'Cache-Control' headers.
Individual clients will only make new requests when their cache entry has expired.

* Server-side caching, by storing exports in memory and reusing them for new
clients until we consider them as stale (configurable).
"""

def __init__(
self, max_age: dt.timedelta, nowfunc: Callable[[], dt.datetime] = now
) -> None:
self._exports: Dict[str, Tuple[dt.datetime, str]] = {}
self._max_age = max_age
self._cache_control = f"max-age={int(self._max_age.total_seconds())}"
self._now = nowfunc

def get(self, siret: Siret) -> Optional[str]:
try:
expiry_date, content = self._exports[siret]
except KeyError:
return None

is_stale = self._now() > expiry_date

if is_stale:
del self._exports[siret]
return None

return content

def set(self, siret: Siret, content: str) -> None:
self._exports[siret] = (self._now() + self._max_age, content)

@property
def hit_headers(self) -> dict:
return {"Cache-Control": self._cache_control, "X-Cache": "HIT"}

@property
def miss_headers(self) -> dict:
return {"Cache-Control": self._cache_control}
8 changes: 7 additions & 1 deletion server/infrastructure/catalogs/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@
create_catalog,
get_all_catalogs,
get_catalog_by_siret,
get_catalog_export,
)
from server.application.catalogs.queries import (
GetAllCatalogs,
GetCatalogBySiret,
GetCatalogExport,
)
from server.application.catalogs.queries import GetAllCatalogs, GetCatalogBySiret
from server.seedwork.application.modules import Module


Expand All @@ -16,4 +21,5 @@ class CatalogsModule(Module):
query_handlers = {
GetCatalogBySiret: get_catalog_by_siret,
GetAllCatalogs: get_all_catalogs,
GetCatalogExport: get_catalog_export,
}
12 changes: 9 additions & 3 deletions server/infrastructure/datasets/repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,22 @@ def __init__(self, db: Database) -> None:
async def get_all(
self,
*,
page: Page = Page(),
page: Optional[Page] = Page(),
spec: DatasetSpec = DatasetSpec(),
) -> Tuple[List[Tuple[Dataset, DatasetGetAllExtras]], int]:
limit, offset = to_limit_offset(page)

async with self._db.session() as session:
query = GetAllQuery(spec)
stmt = query.statement

count = await get_count_from(stmt, session)
result = await session.stream(stmt.limit(limit).offset(offset))

if page is not None:
limit, offset = to_limit_offset(page)
stmt = stmt.limit(limit).offset(offset)

result = await session.stream(stmt)

items = [
(make_entity(query.instance(row)), query.extras(row))
async for row in result
Expand Down
Loading