From 9136d01dec4245e16a2c292c659f1a0c6836ee6c Mon Sep 17 00:00:00 2001 From: Steffen Kleinle Date: Thu, 24 Feb 2022 14:24:18 +0100 Subject: [PATCH 1/2] Add documentation --- .../stores/importer/steps/FilterDuplicates.kt | 5 +++++ .../backend/stores/importer/steps/FilterLbe.kt | 4 ++++ .../backend/stores/importer/steps/MapFromLbe.kt | 5 +++++ .../stores/importer/steps/PostSanitizeFilter.kt | 4 ++++ .../stores/importer/steps/SanitizeAddress.kt | 15 +++++++++++++++ .../backend/stores/importer/steps/Store.kt | 10 +++++----- 6 files changed, 38 insertions(+), 5 deletions(-) diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterDuplicates.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterDuplicates.kt index c9ac3732f..0716b8190 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterDuplicates.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterDuplicates.kt @@ -7,6 +7,11 @@ import org.slf4j.Logger class FilterDuplicates(private val logger: Logger) : PipelineStep, List>() { + /** + * Filters the [input] and removes duplicates. + * For duplicates to be detected an exact match of name, postal code and street is necessary. + * The properties of the last accepting store are used if there are multiple valid properties. + */ override fun execute(input: List): List { // Group by name + postal code + street to detect duplicates val groups = input.groupBy { diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterLbe.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterLbe.kt index 15f5e95b8..cd8c4839d 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterLbe.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterLbe.kt @@ -10,6 +10,10 @@ import org.slf4j.Logger class FilterLbe(private val logger: Logger): PipelineStep, List>() { private val invalidLocations = arrayOf("Musterhausen") + /** + * Filters the [input] and removes [LbeAcceptingStore] with invalid data. + * These are especially stores without name, location or an invalid category. + */ override fun execute(input: List): List = input.filter { filterLbe(it) } private fun filterLbe(store: LbeAcceptingStore) = try { diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/MapFromLbe.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/MapFromLbe.kt index b95ecf0f1..6123cb42a 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/MapFromLbe.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/MapFromLbe.kt @@ -11,6 +11,11 @@ import org.apache.commons.text.StringEscapeUtils import org.slf4j.Logger class MapFromLbe(private val logger: Logger) : PipelineStep, List>() { + + /** + * Maps the [input] to [AcceptingStore]. + * Properties are cleaned, decoded and converted to the correct types. + */ override fun execute(input: List) = input.mapNotNull { try { AcceptingStore( diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/PostSanitizeFilter.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/PostSanitizeFilter.kt index 9475164ec..1794c52ef 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/PostSanitizeFilter.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/PostSanitizeFilter.kt @@ -12,6 +12,10 @@ import org.slf4j.Logger class PostSanitizeFilter(private val logger: Logger, httpClient: HttpClient): PipelineStep, List>() { private val featureFetcher = FeatureFetcher(httpClient) + /** + * Filters the [input] preparing storing to the database. + * Stores without longitude, latitude or postal code or outside the states bounding box are removed. + */ override fun execute(input: List): List = runBlocking { val stateBbox = featureFetcher.queryFeatures(listOf(Pair("state", STATE))).first().bbox diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeAddress.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeAddress.kt index 3be42a6c9..e69db6446 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeAddress.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeAddress.kt @@ -11,6 +11,11 @@ class SanitizeAddress(private val logger: Logger) : PipelineStep) = input.mapNotNull { try { if (it.street?.contains(STREET_EXCLUDE_PATTERN) == true) return@mapNotNull it @@ -42,6 +47,12 @@ class SanitizeAddress(private val logger: Logger) : PipelineStep 'Untere Zell'|null, 'Am Römerbad 17'|'a' -> 'Am Römerbad'|'17 a', + * 'Rückermainstr. 2; 1.'|'OG' -> 'Rückermainstr.'|'2'|'1. OG' + */ private fun AcceptingStore.sanitizeStreetHouseNumber(): AcceptingStore { val isStreetPolluted = street?.find { it.isDigit() } != null val isHouseNumberPolluted = houseNumber != null && !houseNumberRegex.matches(houseNumber) @@ -73,6 +84,10 @@ class SanitizeAddress(private val logger: Logger) : PipelineStep '86150', 'Augsburg 86161 Rathausplatz' -> '86161', 'A-1234' -> null + */ private fun AcceptingStore.sanitizePostalCode(): AcceptingStore { val oldPostalCode = postalCode ?: return this diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/Store.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/Store.kt index c54c867ff..3d76d6831 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/Store.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/Store.kt @@ -11,6 +11,10 @@ import org.slf4j.Logger class Store(private val logger: Logger, private val manualImport: Boolean) : PipelineStep, Unit>() { + /** + * Stores the given [input] to the database. + * Longitude, latitude and postal code of [AcceptingStore] must not be null. + */ override fun execute(input: List) { transaction { try { @@ -20,13 +24,9 @@ class Store(private val logger: Logger, private val manualImport: Boolean) : Pip Addresses.deleteAll() input.forEachIndexed { done, acceptingStore -> - if (acceptingStore.postalCode == null) { - logger.info("Skipping '${acceptingStore.name}' because its postal code is null.") - return@forEachIndexed - } val address = AddressEntity.new { street = acceptingStore.streetWithHouseNumber - postalCode = acceptingStore.postalCode + postalCode = acceptingStore.postalCode!! locaction = acceptingStore.location countryCode = acceptingStore.countryCode } From 686aeae7dc4141163ed1734d07c04b2e4428999d Mon Sep 17 00:00:00 2001 From: Steffen Kleinle Date: Mon, 28 Feb 2022 13:07:29 +0100 Subject: [PATCH 2/2] Move documentation to class level --- .../backend/stores/importer/steps/FilterDuplicates.kt | 10 +++++----- .../backend/stores/importer/steps/FilterLbe.kt | 8 ++++---- .../backend/stores/importer/steps/MapFromLbe.kt | 8 ++++---- .../stores/importer/steps/PostSanitizeFilter.kt | 8 ++++---- .../backend/stores/importer/steps/SanitizeAddress.kt | 10 +++++----- .../backend/stores/importer/steps/SanitizeGeocode.kt | 10 +++++----- .../backend/stores/importer/steps/Store.kt | 8 ++++---- 7 files changed, 31 insertions(+), 31 deletions(-) diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterDuplicates.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterDuplicates.kt index 0716b8190..399023118 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterDuplicates.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterDuplicates.kt @@ -5,13 +5,13 @@ import app.ehrenamtskarte.backend.stores.importer.logRemoveDuplicates import app.ehrenamtskarte.backend.stores.importer.types.AcceptingStore import org.slf4j.Logger +/** + * Filters and removes duplicates. + * For duplicates to be detected an exact match of name, postal code and street is necessary. + * The properties of the last accepting store are used if there are multiple valid properties. + */ class FilterDuplicates(private val logger: Logger) : PipelineStep, List>() { - /** - * Filters the [input] and removes duplicates. - * For duplicates to be detected an exact match of name, postal code and street is necessary. - * The properties of the last accepting store are used if there are multiple valid properties. - */ override fun execute(input: List): List { // Group by name + postal code + street to detect duplicates val groups = input.groupBy { diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterLbe.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterLbe.kt index cd8c4839d..a4b8cd165 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterLbe.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/FilterLbe.kt @@ -7,13 +7,13 @@ import app.ehrenamtskarte.backend.stores.importer.matchesNa import app.ehrenamtskarte.backend.stores.importer.types.LbeAcceptingStore import org.slf4j.Logger +/** + * Filter and removes [LbeAcceptingStore] with invalid data. + * These are especially stores without name, location or an invalid category. + */ class FilterLbe(private val logger: Logger): PipelineStep, List>() { private val invalidLocations = arrayOf("Musterhausen") - /** - * Filters the [input] and removes [LbeAcceptingStore] with invalid data. - * These are especially stores without name, location or an invalid category. - */ override fun execute(input: List): List = input.filter { filterLbe(it) } private fun filterLbe(store: LbeAcceptingStore) = try { diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/MapFromLbe.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/MapFromLbe.kt index 6123cb42a..7a93ca58a 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/MapFromLbe.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/MapFromLbe.kt @@ -10,12 +10,12 @@ import app.ehrenamtskarte.backend.stores.importer.types.LbeAcceptingStore import org.apache.commons.text.StringEscapeUtils import org.slf4j.Logger +/** + * Maps [LbeAcceptingStore] to [AcceptingStore]. + * Properties are cleaned, decoded and converted to the correct types. + */ class MapFromLbe(private val logger: Logger) : PipelineStep, List>() { - /** - * Maps the [input] to [AcceptingStore]. - * Properties are cleaned, decoded and converted to the correct types. - */ override fun execute(input: List) = input.mapNotNull { try { AcceptingStore( diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/PostSanitizeFilter.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/PostSanitizeFilter.kt index 1794c52ef..4f15e7425 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/PostSanitizeFilter.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/PostSanitizeFilter.kt @@ -9,13 +9,13 @@ import io.ktor.client.* import kotlinx.coroutines.runBlocking import org.slf4j.Logger +/** + * Filters [AcceptingStore] to prepare storing to the database. + * Stores without longitude, latitude or postal code or outside the states bounding box are removed. + */ class PostSanitizeFilter(private val logger: Logger, httpClient: HttpClient): PipelineStep, List>() { private val featureFetcher = FeatureFetcher(httpClient) - /** - * Filters the [input] preparing storing to the database. - * Stores without longitude, latitude or postal code or outside the states bounding box are removed. - */ override fun execute(input: List): List = runBlocking { val stateBbox = featureFetcher.queryFeatures(listOf(Pair("state", STATE))).first().bbox diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeAddress.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeAddress.kt index e69db6446..624720d74 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeAddress.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeAddress.kt @@ -7,15 +7,15 @@ import app.ehrenamtskarte.backend.stores.importer.types.AcceptingStore import org.intellij.lang.annotations.Language import org.slf4j.Logger +/** + * Sanitizes the addresses of the [AcceptingStore]. + * Postal codes are mapped to either the first five digits (german postcode format) or null. + * Street and house numbers are correctly separated. + */ class SanitizeAddress(private val logger: Logger) : PipelineStep, List>() { private val houseNumberRegex = houseNumberRegex() private val postalCodeRegex = Regex("""[0-9]{5}""") - /** - * Sanitizes the addresses of the [input]. - * Postal codes are mapped to either the first five digits (german postcode format) or null. - * Street and house numbers are correctly separated. - */ override fun execute(input: List) = input.mapNotNull { try { if (it.street?.contains(STREET_EXCLUDE_PATTERN) == true) return@mapNotNull it diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeGeocode.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeGeocode.kt index cd79003ba..a2bdabed9 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeGeocode.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/SanitizeGeocode.kt @@ -13,6 +13,11 @@ import org.geojson.Feature import org.geojson.Point import org.slf4j.Logger +/** + * Sanitize the postal code and the coordinates of the [AcceptingStore] using forward geocoding. + * If the coordinates are not inside the bounding box of the postal code, one of those is wrong. + * Then query by the address and use the coordinates OR postal code of the first match to sanitize the store data. + */ class SanitizeGeocode(private val logger: Logger, httpClient: HttpClient) : PipelineStep, List>() { private val featureFetcher = FeatureFetcher(httpClient) @@ -20,11 +25,6 @@ class SanitizeGeocode(private val logger: Logger, httpClient: HttpClient) : Pipe input.map { it.sanitize() } } - /** - * Sanitize the postal code and the coordinates of the [AcceptingStore] using forward geocoding. - * If the coordinates are not inside the bounding box of the postal code, one of those is wrong. - * Then query by the address and use the coordinates OR postal code of the first match to sanitize the store data. - */ private suspend fun AcceptingStore.sanitize(): AcceptingStore { if (street?.contains(STREET_EXCLUDE_PATTERN) == true) return this diff --git a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/Store.kt b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/Store.kt index 3d76d6831..cba850c99 100644 --- a/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/Store.kt +++ b/backend/src/main/kotlin/app/ehrenamtskarte/backend/stores/importer/steps/Store.kt @@ -9,12 +9,12 @@ import org.jetbrains.exposed.sql.transactions.transaction import org.postgis.Point import org.slf4j.Logger +/** + * Stores the given [AcceptingStore] to the database. + * Longitude, latitude and postal code of [AcceptingStore] must not be null. + */ class Store(private val logger: Logger, private val manualImport: Boolean) : PipelineStep, Unit>() { - /** - * Stores the given [input] to the database. - * Longitude, latitude and postal code of [AcceptingStore] must not be null. - */ override fun execute(input: List) { transaction { try {