-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merge least feature count polygons with neighbouring polygons #58
Merged
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,6 @@ DROP TABLE IF EXISTS polygonsnocount; | |
DO $$ | ||
DECLARE | ||
lines_count INTEGER; | ||
num_buildings INTEGER; | ||
BEGIN | ||
-- Check if ways_line has any data | ||
SELECT COUNT(*) INTO lines_count | ||
|
@@ -135,7 +134,6 @@ USING gist (geom); | |
-- Clean up the table which may have gaps and stuff from spatial indexing | ||
-- VACUUM ANALYZE buildings; | ||
|
||
|
||
DROP TABLE IF EXISTS splitpolygons; | ||
CREATE TABLE splitpolygons AS ( | ||
WITH polygonsfeaturecount AS ( | ||
|
@@ -163,49 +161,49 @@ USING gist (geom); | |
DROP TABLE polygonsnocount; | ||
|
||
|
||
DROP TABLE IF EXISTS lowfeaturecountpolygons; | ||
CREATE TABLE lowfeaturecountpolygons AS ( | ||
-- Grab the polygons with fewer than the requisite number of features | ||
WITH lowfeaturecountpolys AS ( | ||
SELECT * | ||
FROM splitpolygons AS p | ||
-- TODO: feature count should not be hard-coded | ||
WHERE p.numfeatures < %(num_buildings)s | ||
), | ||
|
||
-- Find the neighbors of the low-feature-count polygons | ||
-- Store their ids as n_polyid, numfeatures as n_numfeatures, etc | ||
allneighborlist AS ( | ||
SELECT | ||
p.*, | ||
pf.polyid AS n_polyid, | ||
pf.area AS n_area, | ||
p.numfeatures AS n_numfeatures, | ||
-- length of shared boundary to make nice merge decisions | ||
ST_LENGTH2D(ST_INTERSECTION(p.geom, pf.geom)) AS sharedbound | ||
FROM lowfeaturecountpolys AS p | ||
INNER JOIN splitpolygons AS pf | ||
-- Anything that touches | ||
ON ST_TOUCHES(p.geom, pf.geom) | ||
-- But eliminate those whose intersection is a point, because | ||
-- polygons that only touch at a corner shouldn't be merged | ||
AND ST_GEOMETRYTYPE(ST_INTERSECTION(p.geom, pf.geom)) != 'ST_Point' | ||
-- Sort first by polyid of the low-feature-count polygons | ||
-- Then by descending featurecount and area of the | ||
-- high-feature-count neighbors (area is in case of equal | ||
-- featurecounts, we'll just pick the biggest to add to) | ||
ORDER BY p.polyid ASC, p.numfeatures DESC, pf.area DESC | ||
-- OR, maybe for more aesthetic merges: | ||
-- order by p.polyid, sharedbound desc | ||
) | ||
|
||
SELECT DISTINCT ON (a.polyid) * FROM allneighborlist AS a | ||
); | ||
ALTER TABLE lowfeaturecountpolygons ADD PRIMARY KEY (polyid); | ||
SELECT POPULATE_GEOMETRY_COLUMNS('public.lowfeaturecountpolygons'::regclass); | ||
CREATE INDEX lowfeaturecountpolygons_idx | ||
ON lowfeaturecountpolygons | ||
USING gist (geom); | ||
-- DROP TABLE IF EXISTS lowfeaturecountpolygons; | ||
-- CREATE TABLE lowfeaturecountpolygons AS ( | ||
-- -- Grab the polygons with fewer than the requisite number of features | ||
-- WITH lowfeaturecountpolys AS ( | ||
-- SELECT * | ||
-- FROM splitpolygons AS p | ||
-- -- TODO: feature count should not be hard-coded | ||
-- WHERE p.numfeatures < %(num_buildings)s | ||
-- ), | ||
|
||
-- -- Find the neighbors of the low-feature-count polygons | ||
-- -- Store their ids as n_polyid, numfeatures as n_numfeatures, etc | ||
-- allneighborlist AS ( | ||
-- SELECT | ||
-- p.*, | ||
-- pf.polyid AS n_polyid, | ||
-- pf.area AS n_area, | ||
-- p.numfeatures AS n_numfeatures, | ||
-- -- length of shared boundary to make nice merge decisions | ||
-- ST_LENGTH2D(ST_INTERSECTION(p.geom, pf.geom)) AS sharedbound | ||
-- FROM lowfeaturecountpolys AS p | ||
-- INNER JOIN splitpolygons AS pf | ||
-- -- Anything that touches | ||
-- ON ST_TOUCHES(p.geom, pf.geom) | ||
-- -- But eliminate those whose intersection is a point, because | ||
-- -- polygons that only touch at a corner shouldn't be merged | ||
-- AND ST_GEOMETRYTYPE(ST_INTERSECTION(p.geom, pf.geom)) != 'ST_Point' | ||
-- -- Sort first by polyid of the low-feature-count polygons | ||
-- -- Then by descending featurecount and area of the | ||
-- -- high-feature-count neighbors (area is in case of equal | ||
-- -- featurecounts, we'll just pick the biggest to add to) | ||
-- ORDER BY p.polyid ASC, p.numfeatures DESC, pf.area DESC | ||
-- -- OR, maybe for more aesthetic merges: | ||
-- -- order by p.polyid, sharedbound desc | ||
-- ) | ||
|
||
-- SELECT DISTINCT ON (a.polyid) * FROM allneighborlist AS a | ||
-- ); | ||
-- ALTER TABLE lowfeaturecountpolygons ADD PRIMARY KEY (polyid); | ||
-- SELECT POPULATE_GEOMETRY_COLUMNS('public.lowfeaturecountpolygons'::regclass); | ||
-- CREATE INDEX lowfeaturecountpolygons_idx | ||
-- ON lowfeaturecountpolygons | ||
-- USING gist (geom); | ||
-- VACUUM ANALYZE lowfeaturecountpolygons; | ||
|
||
|
||
|
@@ -245,7 +243,7 @@ CREATE TABLE clusteredbuildings AS ( | |
*, | ||
ST_CLUSTERKMEANS( | ||
b.geom, | ||
CAST((b.numfeatures / %(num_buildings)s) + 1 AS integer) | ||
CAST((b.numfeatures / b.%(num_buildings)s) + 1 AS integer) | ||
) | ||
OVER (PARTITION BY b.polyid) | ||
AS cid | ||
|
@@ -335,7 +333,6 @@ USING gist (geom); | |
|
||
--VACUUM ANALYZE unsimplifiedtaskpolygons; | ||
|
||
|
||
--*****************************Simplify******************************* | ||
-- Extract unique line segments | ||
DROP TABLE IF EXISTS taskpolygons; | ||
|
@@ -380,78 +377,70 @@ CREATE TABLE taskpolygons AS ( | |
FROM taskpolygonsnoindex AS tpni | ||
); | ||
|
||
-- ALTER TABLE taskpolygons ADD PRIMARY KEY(taskid); | ||
|
||
-- Step 1: Identify polygons without any buildings | ||
DROP TABLE IF EXISTS no_building_polygons; | ||
CREATE TABLE no_building_polygons AS ( | ||
SELECT | ||
tp.*, | ||
tp.geom AS no_building_geom | ||
FROM taskpolygons AS tp | ||
LEFT JOIN buildings AS b ON ST_INTERSECTS(tp.geom, b.geom) | ||
WHERE b.geom IS NULL | ||
); | ||
|
||
-- Step 2: Identify neighboring polygons | ||
DROP TABLE IF EXISTS neighboring_polygons; | ||
CREATE TABLE neighboring_polygons AS ( | ||
SELECT | ||
nb.*, | ||
nb.geom AS neighbor_geom, | ||
nb_building_count, | ||
nbp.no_building_geom | ||
FROM taskpolygons AS nb | ||
INNER JOIN no_building_polygons AS nbp | ||
-- Finds polygons that touch each other | ||
ON ST_TOUCHES(nbp.no_building_geom, nb.geom) | ||
LEFT JOIN ( | ||
-- Step 3: Count buildings in the neighboring polygons | ||
SELECT | ||
nb.geom, | ||
COUNT(b.geom) AS nb_building_count | ||
FROM taskpolygons AS nb | ||
LEFT JOIN buildings AS b ON ST_INTERSECTS(nb.geom, b.geom) | ||
GROUP BY nb.geom | ||
) AS building_counts | ||
ON nb.geom = building_counts.geom | ||
); | ||
ALTER TABLE taskpolygons ADD PRIMARY KEY (taskid); | ||
SELECT POPULATE_GEOMETRY_COLUMNS('public.taskpolygons'::regclass); | ||
CREATE INDEX taskpolygons_idx | ||
ON taskpolygons | ||
USING gist (geom); | ||
|
||
-- Step 4: Find the optimal neighboring polygon to avoid, | ||
-- same polygons with the smallest number of buildings merging into | ||
-- multiple neighboring polygons | ||
DROP TABLE IF EXISTS optimal_neighbors; | ||
CREATE TABLE optimal_neighbors AS ( | ||
SELECT | ||
nbp.no_building_geom, | ||
nbp.neighbor_geom | ||
FROM neighboring_polygons AS nbp | ||
WHERE nbp.nb_building_count = ( | ||
SELECT MIN(nb_building_count) | ||
FROM neighboring_polygons AS np | ||
WHERE np.no_building_geom = nbp.no_building_geom | ||
) | ||
); | ||
-- Merge least feature polygons with neighbouring polygons | ||
DO $$ | ||
DECLARE | ||
num_buildings INTEGER := %(num_buildings)s; | ||
min_area NUMERIC; -- Set the minimum area threshold | ||
mean_area NUMERIC; | ||
stddev_area NUMERIC; -- Set the standard deviation | ||
min_buildings INTEGER; -- Set the minimum number of buildings threshold | ||
small_polygon RECORD; -- set small_polygon and nearest_neighbor as record | ||
nearest_neighbor RECORD; -- in order to use them in the loop | ||
BEGIN | ||
min_buildings := num_buildings * 0.5; | ||
|
||
-- Step 5: Merge the small polygons with their optimal neighboring polygons | ||
UPDATE taskpolygons tp | ||
SET geom = ST_UNION(tp.geom, nbp.no_building_geom) | ||
FROM optimal_neighbors AS nbp | ||
WHERE tp.geom = nbp.neighbor_geom; | ||
DELETE FROM taskpolygons | ||
WHERE geom IN ( | ||
SELECT no_building_geom | ||
FROM no_building_polygons | ||
); | ||
-- Find the mean and standard deviation of the area | ||
SELECT | ||
AVG(ST_Area(geom)), | ||
STDDEV_POP(ST_Area(geom)) | ||
INTO mean_area, stddev_area | ||
FROM taskpolygons; | ||
|
||
-- Set the threshold as mean - standard deviation | ||
min_area := mean_area - stddev_area; | ||
|
||
DROP TABLE IF EXISTS no_building_polygons; | ||
DROP TABLE IF EXISTS neighboring_polygons; | ||
DROP TABLE IF EXISTS leastfeaturepolygons; | ||
CREATE TABLE leastfeaturepolygons AS | ||
SELECT taskid, geom | ||
FROM taskpolygons | ||
WHERE ST_Area(geom) < min_area OR ( | ||
SELECT COUNT(b.id) FROM buildings b | ||
WHERE ST_Contains(taskpolygons.geom, b.geom) | ||
) < min_buildings; -- find least feature polygons based on the area and feature | ||
|
||
FOR small_polygon IN | ||
SELECT * FROM leastfeaturepolygons | ||
LOOP | ||
-- Find the nearest neighbor to merge the small polygon with | ||
FOR nearest_neighbor IN | ||
SELECT taskid, geom, ST_LENGTH2D(ST_Intersection(small_polygon.geom, geom)) as shared_bound | ||
FROM taskpolygons | ||
WHERE taskid NOT IN (SELECT taskid FROM leastfeaturepolygons) | ||
AND ST_Touches(small_polygon.geom, geom) | ||
AND ST_GEOMETRYTYPE(ST_INTERSECTION(small_polygon.geom, geom)) != 'ST_Point' | ||
ORDER BY shared_bound DESC -- Find neighbor polygon based on shared boundary distance | ||
LIMIT 1 | ||
LOOP | ||
-- Merge the small polygon into the neighboring polygon | ||
UPDATE taskpolygons | ||
SET geom = ST_Union(geom, small_polygon.geom) | ||
WHERE taskid = nearest_neighbor.taskid; | ||
|
||
DELETE FROM taskpolygons WHERE taskid = small_polygon.taskid; | ||
-- Exit the neighboring polygon loop after one successful merge | ||
EXIT; | ||
END LOOP; | ||
END LOOP; | ||
END $$; | ||
|
||
SELECT POPULATE_GEOMETRY_COLUMNS('public.taskpolygons'::regclass); | ||
CREATE INDEX taskpolygons_idx | ||
ON taskpolygons | ||
USING gist (geom); | ||
DROP TABLE IF EXISTS leastfeaturepolygons; | ||
-- VACUUM ANALYZE taskpolygons; | ||
|
||
-- Generate GeoJSON output | ||
|
@@ -464,8 +453,14 @@ FROM ( | |
SELECT | ||
JSONB_BUILD_OBJECT( | ||
'type', 'Feature', | ||
'geometry', ST_ASGEOJSON(geom)::jsonb, | ||
'properties', JSONB_BUILD_OBJECT() | ||
'geometry', ST_ASGEOJSON(t.geom)::jsonb, | ||
'properties', JSONB_BUILD_OBJECT( | ||
'building_count', ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💫 |
||
SELECT COUNT(b.id) | ||
FROM buildings AS b | ||
WHERE ST_CONTAINS(t.geom, b.geom) | ||
) | ||
) | ||
) AS feature | ||
FROM taskpolygons | ||
FROM taskpolygons AS t | ||
) AS features; |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Have you tried modifying this number a bit to see the result?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
0.5 is ok, but as a user I would imagine if they requested 15 buildings per task, but only got 8 they might be a bit disappointed 😅
Does 0.7 change things by a lot?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This discrepancy will obviously scale linearly: say they request 50 buildings average and get 25 minimum.
But personally, I think if users are requesting 50 geoms per area there is something wrong with their methodology! I would imagine 10 buildings per task is more realistic for an individual or team 👍
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh crap i didn't see these comments , yeah i had tried with params such as 0.7 but got so big tasks in result. Yeah we can change this in future based on the practical use case scenarios