From e8fed18b25dab3b921a904c59ae1f3c86fb625d9 Mon Sep 17 00:00:00 2001 From: almog8k <60139576+almog8k@users.noreply.github.com> Date: Mon, 25 Nov 2024 18:11:57 +0200 Subject: [PATCH] fix: prevent duplicate tiles in task creation (#29) * fix: unite parts before process * fix: prevant duplicate tiles in task creation * fix: changing variables naming --- package-lock.json | 17 ++++ package.json | 1 + src/common/interfaces.ts | 10 ++- src/task/models/tileMergeTaskManager.ts | 54 +++++++++--- tests/unit/mocks/partsMockData.ts | 38 +++++++++ .../tileMergeTaskManager.spec.ts | 85 +++++++++++-------- 6 files changed, 153 insertions(+), 52 deletions(-) diff --git a/package-lock.json b/package-lock.json index db0f272..223d6f1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "hasInstallScript": true, "license": "ISC", "dependencies": { + "@faker-js/faker": "^9.2.0", "@godaddy/terminus": "^4.12.1", "@map-colonies/error-express-handler": "^2.1.0", "@map-colonies/error-types": "^1.2.0", @@ -2603,6 +2604,22 @@ "node": "^12.22.0 || ^14.17.0 || >=16.0.0" } }, + "node_modules/@faker-js/faker": { + "version": "9.2.0", + "resolved": "https://registry.npmjs.org/@faker-js/faker/-/faker-9.2.0.tgz", + "integrity": "sha512-ulqQu4KMr1/sTFIYvqSdegHT8NIkt66tFAkugGnHA+1WAfEn6hMzNR+svjXGFRVLnapxvej67Z/LwchFrnLBUg==", + "funding": [ + { + "type": "opencollective", + "url": "https://opencollective.com/fakerjs" + } + ], + "license": "MIT", + "engines": { + "node": ">=18.0.0", + "npm": ">=9.0.0" + } + }, "node_modules/@godaddy/terminus": { "version": "4.12.1", "resolved": "https://registry.npmjs.org/@godaddy/terminus/-/terminus-4.12.1.tgz", diff --git a/package.json b/package.json index d81f2f1..d3c1299 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,7 @@ } }, "dependencies": { + "@faker-js/faker": "^9.2.0", "@godaddy/terminus": "^4.12.1", "@map-colonies/error-express-handler": "^2.1.0", "@map-colonies/error-types": "^1.2.0", diff --git a/src/common/interfaces.ts b/src/common/interfaces.ts index 52ed36e..178d733 100644 --- a/src/common/interfaces.ts +++ b/src/common/interfaces.ts @@ -12,7 +12,7 @@ import { IngestionUpdateJobParams, } from '@map-colonies/mc-model-types'; import { TilesMimeFormat } from '@map-colonies/types'; -import { BBox, Polygon } from 'geojson'; +import { BBox, Feature, MultiPolygon, Polygon } from 'geojson'; import { Footprint, ITileRange } from '@map-colonies/mc-utils'; import { LayerCacheType, SeedMode } from './constants'; @@ -122,6 +122,14 @@ export interface PartSourceContext { maxZoom: number; } +export interface UnifiedPart { + fileName: string; + tilesPath: string; + footprint: Feature; + extent: BBox; + maxZoom: number; +} + export interface MergeParameters { parts: PartSourceContext[]; destPath: string; diff --git a/src/task/models/tileMergeTaskManager.ts b/src/task/models/tileMergeTaskManager.ts index 6953fd4..98eb96e 100644 --- a/src/task/models/tileMergeTaskManager.ts +++ b/src/task/models/tileMergeTaskManager.ts @@ -1,10 +1,10 @@ import { join } from 'path'; -import { BBox } from 'geojson'; +import { BBox, Feature, MultiPolygon, Polygon } from 'geojson'; import { Logger } from '@map-colonies/js-logger'; import { InputFiles, PolygonPart, TileOutputFormat } from '@map-colonies/mc-model-types'; import { ICreateTaskBody, TaskHandler as QueueClient } from '@map-colonies/mc-priority-queue'; import { degreesPerPixelToZoomLevel, Footprint, multiIntersect, subGroupsGen, tileBatchGenerator, TileRanger } from '@map-colonies/mc-utils'; -import { bbox, featureCollection, union } from '@turf/turf'; +import { bbox, featureCollection, intersect, polygon, union } from '@turf/turf'; import { difference } from '@turf/difference'; import { inject, injectable } from 'tsyringe'; import { SERVICES, TilesStorageProvider } from '../../common/constants'; @@ -19,6 +19,7 @@ import { PartSourceContext, IntersectionState, PartsSourceWithMaxZoom, + UnifiedPart, } from '../../common/interfaces'; import { convertToFeature } from '../../utils/geoUtils'; import { fileExtensionExtractor } from '../../utils/fileutils'; @@ -166,23 +167,49 @@ export class TileMergeTaskManager { private async *createZoomLevelTasks(params: MergeParameters): AsyncGenerator { const { parts, destPath, targetFormat, isNewTarget, grid, maxZoom } = params; - this.logger.debug({ msg: 'Creating batched tasks', destPath, targetFormat }); for (let zoom = maxZoom; zoom >= 0; zoom--) { - // when we used the old implementation with the parts intersection calculation with large numbers of parts - //we encountered performance issues, as long as we are not performing ingestion from more then one file we can omit this. - // const partsIntersections = this.findPartsIntersections(parts); - for await (const part of parts) { - if (part.maxZoom < zoom) { - // checking if the layer is relevant for the current zoom level (allowing different parts with different resolutions) - continue; - } + const filteredParts = parts.filter((part) => part.maxZoom >= zoom); + const processedParts = this.unifyParts(filteredParts); + for await (const part of processedParts) { yield* this.createTasksForPart(part, zoom, { destPath, grid, isNewTarget, targetFormat }); } } } + private unifyParts(parts: PartSourceContext[]): UnifiedPart[] { + const mergedParts: UnifiedPart[] = []; + // Merge parts by union and avoid duplicate overlaps. + for (const part of parts) { + let merged = false; + const currentPart = polygon(part.footprint.coordinates); + for (let i = 0; i < mergedParts.length; i++) { + const mergedPart = mergedParts[i].footprint; + if (this.doIntersect(currentPart, mergedPart)) { + const unionResult = union(featureCollection([currentPart, mergedPart])); + if (unionResult === null) { + continue; + } + mergedParts[i].footprint = unionResult; + merged = true; + break; + } + } + if (!merged) { + const processedPart: UnifiedPart = { ...part, footprint: currentPart }; + mergedParts.push(processedPart); + } + } + this.logger.debug({ msg: 'Preprocessed parts', numberOfParts: mergedParts.length }); + return mergedParts; + } + + private doIntersect(footprint1: Feature, footprint2: Feature): boolean { + const intersection = intersect(featureCollection([footprint1, footprint2])); + return intersection !== null; + } + private async *createTasksForPart( - part: PartSourceContext, + part: UnifiedPart, zoom: number, params: { destPath: string; targetFormat: TileOutputFormat; isNewTarget: boolean; grid: Grid } ): AsyncGenerator { @@ -205,7 +232,7 @@ export class TileMergeTaskManager { } } - private createPartSources(part: PartSourceContext, grid: Grid, destPath: string): MergeSources[] { + private createPartSources(part: UnifiedPart, grid: Grid, destPath: string): MergeSources[] { this.logger.debug({ msg: 'Creating source layers' }); const sourceEntry: MergeSources = { type: this.tilesStorageProvider, path: destPath }; @@ -237,7 +264,6 @@ export class TileMergeTaskManager { let state: IntersectionState = { currentIntersection: null, accumulatedIntersection: null }; const subGroups = subGroupsGen(parts, parts.length, false); - for (const subGroup of subGroups) { const subGroupFootprints = subGroup.map((layer) => layer.footprint as Footprint); this.logger.debug({ msg: 'Processing sub group' }); diff --git a/tests/unit/mocks/partsMockData.ts b/tests/unit/mocks/partsMockData.ts index 2b9d9f8..497c29b 100644 --- a/tests/unit/mocks/partsMockData.ts +++ b/tests/unit/mocks/partsMockData.ts @@ -1,5 +1,33 @@ /* eslint-disable @typescript-eslint/no-magic-numbers */ +import { faker } from '@faker-js/faker'; +import { BBox, Polygon } from 'geojson'; import { PolygonPart } from '@map-colonies/mc-model-types'; +import { PartSourceContext } from '../../../src/common/interfaces'; + +function createFakeBBox(): BBox { + return [ + faker.location.longitude({ min: -180, max: 180 }), + faker.location.latitude({ min: -90, max: 90 }), + faker.location.longitude({ min: -180, max: 180 }), + faker.location.latitude({ min: -90, max: 90 }), + ]; +} + +function createFakePolygon(): Polygon { + const firstAndLastPoint = [faker.location.longitude({ min: -180, max: 180 }), faker.location.latitude({ min: -90, max: 90 })]; + return { + type: 'Polygon', + coordinates: [ + [ + firstAndLastPoint, + [faker.location.longitude({ min: -180, max: 180 }), faker.location.latitude({ min: -90, max: 90 })], + [faker.location.longitude({ min: -180, max: 180 }), faker.location.latitude({ min: -90, max: 90 })], + [faker.location.longitude({ min: -180, max: 180 }), faker.location.latitude({ min: -90, max: 90 })], + firstAndLastPoint, + ], + ], + }; +} export const partsData: PolygonPart[] = [ { @@ -114,3 +142,13 @@ export const multiPartDataWithDifferentResolution: PolygonPart[] = [ }, }, ]; + +export function createFakePartSource(): PartSourceContext { + return { + tilesPath: `${faker.string.uuid()}/${faker.string.uuid()}`, + fileName: `${faker.string.alpha({ length: 8 })}.gpkg`, + maxZoom: faker.number.int({ min: 0, max: 21 }), + extent: createFakeBBox(), + footprint: createFakePolygon(), + }; +} diff --git a/tests/unit/task/tileMergeTaskManager/tileMergeTaskManager.spec.ts b/tests/unit/task/tileMergeTaskManager/tileMergeTaskManager.spec.ts index 8d9e84d..e5dc0f3 100644 --- a/tests/unit/task/tileMergeTaskManager/tileMergeTaskManager.spec.ts +++ b/tests/unit/task/tileMergeTaskManager/tileMergeTaskManager.spec.ts @@ -1,9 +1,10 @@ import { randomUUID } from 'crypto'; import nock from 'nock'; import { booleanEqual } from '@turf/turf'; +import { faker } from '@faker-js/faker'; import { Feature } from 'geojson'; import { TileOutputFormat } from '@map-colonies/mc-model-types'; -import { partsData } from '../../mocks/partsMockData'; +import { createFakePartSource, partsData } from '../../mocks/partsMockData'; import { configMock, registerDefaultConfig } from '../../mocks/configMock'; import { Grid, MergeTaskParameters, MergeTilesTaskParams } from '../../../../src/common/interfaces'; import { testData } from '../../mocks/tileMergeTaskManagerMockData'; @@ -70,6 +71,16 @@ describe('tileMergeTaskManager', () => { }); }); + describe('unifyParts', () => { + it('should unify parts correctly', () => { + const { tileMergeTaskManager } = testContext; + const partsData = faker.helpers.multiple(createFakePartSource, { count: 10 }); + const result = tileMergeTaskManager['unifyParts'](partsData); + + expect(result.length).toBeGreaterThan(0); + }); + }); + describe('calculateIntersectionState', () => { // Test case 1: No intersection found it('should return null intersection when no overlap is found', () => { @@ -130,54 +141,54 @@ describe('tileMergeTaskManager', () => { expect(result.currentIntersection).toBeNull(); expect(result.accumulatedIntersection).not.toBeNull(); }); + }); - describe('pushTasks', () => { - it('should push tasks in batches correctly', async () => { - const { tileMergeTaskManager } = testContext; - const buildTasksParams: MergeTilesTaskParams = { - taskMetadata: { layerRelativePath: 'layerRelativePath', tileOutputFormat: TileOutputFormat.PNG, isNewTarget: true, grid: Grid.TWO_ON_ONE }, - partsData, - inputFiles: { originDirectory: 'originDirectory', fileNames: ['fileNames'] }, - }; + describe('pushTasks', () => { + it('should push tasks in batches correctly', async () => { + const { tileMergeTaskManager } = testContext; + const buildTasksParams: MergeTilesTaskParams = { + taskMetadata: { layerRelativePath: 'layerRelativePath', tileOutputFormat: TileOutputFormat.PNG, isNewTarget: true, grid: Grid.TWO_ON_ONE }, + partsData, + inputFiles: { originDirectory: 'originDirectory', fileNames: ['fileNames'] }, + }; - const jobId = randomUUID(); + const jobId = randomUUID(); - const jobManagerBaseUrl = configMock.get('jobManagement.config.jobManagerBaseUrl'); - const path = `/jobs/${jobId}/tasks`; - nock(jobManagerBaseUrl).post(path).reply(200).persist(); + const jobManagerBaseUrl = configMock.get('jobManagement.config.jobManagerBaseUrl'); + const path = `/jobs/${jobId}/tasks`; + nock(jobManagerBaseUrl).post(path).reply(200).persist(); - const tasks = tileMergeTaskManager.buildTasks(buildTasksParams); + const tasks = tileMergeTaskManager.buildTasks(buildTasksParams); - let error: Error | null = null; - try { - await tileMergeTaskManager.pushTasks(jobId, tasks); - } catch (err) { - error = err as Error; - } + let error: Error | null = null; + try { + await tileMergeTaskManager.pushTasks(jobId, tasks); + } catch (err) { + error = err as Error; + } - expect(error).toBeNull(); - }); + expect(error).toBeNull(); + }); - it('should handle errors in pushTasks correctly', async () => { - const { tileMergeTaskManager } = testContext; - const buildTasksParams: MergeTilesTaskParams = { - taskMetadata: { layerRelativePath: 'layerRelativePath', tileOutputFormat: TileOutputFormat.PNG, isNewTarget: true, grid: Grid.TWO_ON_ONE }, - partsData, - inputFiles: { originDirectory: 'originDirectory', fileNames: ['fileNames'] }, - }; + it('should handle errors in pushTasks correctly', async () => { + const { tileMergeTaskManager } = testContext; + const buildTasksParams: MergeTilesTaskParams = { + taskMetadata: { layerRelativePath: 'layerRelativePath', tileOutputFormat: TileOutputFormat.PNG, isNewTarget: true, grid: Grid.TWO_ON_ONE }, + partsData, + inputFiles: { originDirectory: 'originDirectory', fileNames: ['fileNames'] }, + }; - const jobId = randomUUID(); + const jobId = randomUUID(); - const jobManagerBaseUrl = configMock.get('jobManagement.config.jobManagerBaseUrl'); - const path = `/jobs/${jobId}/tasks`; - nock(jobManagerBaseUrl).post(path).reply(500).persist(); + const jobManagerBaseUrl = configMock.get('jobManagement.config.jobManagerBaseUrl'); + const path = `/jobs/${jobId}/tasks`; + nock(jobManagerBaseUrl).post(path).reply(500).persist(); - const tasks = tileMergeTaskManager.buildTasks(buildTasksParams); + const tasks = tileMergeTaskManager.buildTasks(buildTasksParams); - const action = async () => tileMergeTaskManager.pushTasks(jobId, tasks); + const action = async () => tileMergeTaskManager.pushTasks(jobId, tasks); - await expect(action).rejects.toThrow(); - }); + await expect(action).rejects.toThrow(); }); }); });