Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added subcommand to clean deleted relations for issue # 2444 #3224

Merged
merged 5 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions docs/admin/Maintenance.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,14 @@ to finish the recomputation.

## Removing large deleted objects

Command: `nominatim admin --clean-deleted --age <PostgreSQL Time Interval>`

Nominatim refuses to delete very large areas because often these deletions are
accidental and are reverted within hours. Instead the deletions are logged in
the `import_polygon_delete` table and left to the administrator to clean up.

There is currently no command to do that. You can use the following SQL
query to force a deletion on all objects that have been deleted more than
a certain timespan ago (here: 1 month):
To run this command you will need to pass a PostgreSQL time interval to the age
parameter. For example to delete any objects that have been deleted more than a
month ago you would run:
`nominatim admin --clean-deleted --age '1 month'`

```sql
SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
and age(p.indexed_date) > '1 month'::interval
```
54 changes: 0 additions & 54 deletions lib-sql/functions/place_triggers.sql
Original file line number Diff line number Diff line change
Expand Up @@ -363,57 +363,3 @@ BEGIN
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION flush_deleted_places()
RETURNS INTEGER
AS $$
BEGIN
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;

-- delete from place table
ALTER TABLE place DISABLE TRIGGER place_before_delete;
DELETE FROM place USING place_to_be_deleted
WHERE place.osm_type = place_to_be_deleted.osm_type
and place.osm_id = place_to_be_deleted.osm_id
and place.class = place_to_be_deleted.class
and place.type = place_to_be_deleted.type
and not deferred;
ALTER TABLE place ENABLE TRIGGER place_before_delete;

-- Mark for delete in the placex table
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;

-- Mark for delete in interpolations
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
WHERE place_to_be_deleted.osm_type = 'W'
and place_to_be_deleted.class = 'place'
and place_to_be_deleted.type = 'houses'
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
and not deferred;

-- Clear todo list.
TRUNCATE TABLE place_to_be_deleted;

RETURN NULL;
END;
$$ LANGUAGE plpgsql;

53 changes: 53 additions & 0 deletions lib-sql/functions/utils.sql
Original file line number Diff line number Diff line change
Expand Up @@ -487,3 +487,56 @@ BEGIN
END;
$$
LANGUAGE plpgsql;

CREATE OR REPLACE FUNCTION flush_deleted_places()
RETURNS INTEGER
AS $$
BEGIN
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;

-- delete from place table
ALTER TABLE place DISABLE TRIGGER place_before_delete;
DELETE FROM place USING place_to_be_deleted
WHERE place.osm_type = place_to_be_deleted.osm_type
and place.osm_id = place_to_be_deleted.osm_id
and place.class = place_to_be_deleted.class
and place.type = place_to_be_deleted.type
and not deferred;
ALTER TABLE place ENABLE TRIGGER place_before_delete;

-- Mark for delete in the placex table
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;

-- Mark for delete in interpolations
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
WHERE place_to_be_deleted.osm_type = 'W'
and place_to_be_deleted.class = 'place'
and place_to_be_deleted.type = 'houses'
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
and not deferred;

-- Clear todo list.
TRUNCATE TABLE place_to_be_deleted;

RETURN NULL;
END;
$$ LANGUAGE plpgsql;
16 changes: 16 additions & 0 deletions nominatim/clicmd/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class AdminFuncs:
"""

def add_args(self, parser: argparse.ArgumentParser) -> None:
self.parser = parser
group = parser.add_argument_group('Admin tasks')
objs = group.add_mutually_exclusive_group(required=True)
objs.add_argument('--warm', action='store_true',
Expand All @@ -41,6 +42,8 @@ def add_args(self, parser: argparse.ArgumentParser) -> None:
help='Print performance analysis of the indexing process')
objs.add_argument('--collect-os-info', action="store_true",
help="Generate a report about the host system information")
objs.add_argument('--clean-deleted', action='store_true',
help='Clean up deleted relations')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want you can simply make the age a required argument to --clean-deleted. Then argparse ensures that the the argument is present and the code can be a lot simpler.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your feedback @lonvia! I wasn't able to make the age required directly because it was causing errors with the other commands that didn't include age. Instead I decided to raise a parser error right in the clicmd/admin.py file:
if not args.age: self.parser.error('Age is required for --clean-deleted command')
I moved the test for that into the cli/test_cmd_admin.py file. I think the solution is a bit cleaner, but if you know a way to make the argument required directly I'd be happy to switch to that instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant to give clean-deleted an argument:

objs.add_argument('--clean-deleted', action='store', metavar='AGE', help='...')

...

age = args.clean_deleted

...

But the current solution works as well. Just one tiny nitpick: simply do raise UsageError() instead of using self.parser.error(). Just for consistency with the rest of the code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for clarifying lonvia. I misunderstood where you were going with that. I like your solution better. I've made the required changes for your comments.

group = parser.add_argument_group('Arguments for cache warming')
group.add_argument('--search-only', action='store_const', dest='target',
const='search',
Expand All @@ -54,8 +57,13 @@ def add_args(self, parser: argparse.ArgumentParser) -> None:
help='Analyse indexing of the given OSM object')
mgroup.add_argument('--place-id', type=int,
help='Analyse indexing of the given Nominatim object')
group = parser.add_argument_group('Arguments for cleaning deleted')
group.add_argument('--age', type=str,
help='Delete relations older than the given PostgreSQL time interval')


def run(self, args: NominatimArgs) -> int:
# pylint: disable=too-many-return-statements
if args.warm:
return self._warm(args)

Expand All @@ -81,6 +89,14 @@ def run(self, args: NominatimArgs) -> int:
collect_os_info.report_system_information(args.config)
return 0

if args.clean_deleted:
if not args.age:
self.parser.error('Age is required for --clean-deleted command')
LOG.warning('Cleaning up deleted relations')
from ..tools import admin
admin.clean_deleted_relations(args.config, age=args.age)
return 0

return 1


Expand Down
2 changes: 2 additions & 0 deletions nominatim/clicmd/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,12 @@ class NominatimArgs:
check_database: bool
migrate: bool
collect_os_info: bool
clean_deleted: bool
analyse_indexing: bool
target: Optional[str]
osm_id: Optional[str]
place_id: Optional[int]
age: str

# Arguments to 'import'
osm_file: List[str]
Expand Down
18 changes: 18 additions & 0 deletions nominatim/tools/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import logging

from psycopg2.extras import Json, register_hstore
from psycopg2 import DataError

from nominatim.config import Configuration
from nominatim.db.connection import connect, Cursor
Expand Down Expand Up @@ -87,3 +88,20 @@ def analyse_indexing(config: Configuration, osm_id: Optional[str] = None,

for msg in conn.notices:
print(msg)


def clean_deleted_relations(config: Configuration, age: str) -> None:
""" Clean deleted relations older than a given age
"""
with connect(config.get_libpq_dsn()) as conn:
with conn.cursor() as cur:
try:
cur.execute("""SELECT place_force_delete(p.place_id)
FROM import_polygon_delete d, placex p
WHERE p.osm_type = d.osm_type AND p.osm_id = d.osm_id
AND age(p.indexed_date) > %s::interval""",
(age, ))
except DataError as exc:
raise UsageError('Invalid PostgreSQL time interval format') from exc
conn.commit()

12 changes: 12 additions & 0 deletions test/python/cli/test_cmd_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@ def test_admin_migrate(cli_call, mock_func_factory):
assert mock.called == 1


def test_admin_clean_deleted_relations(cli_call, mock_func_factory):
mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')

assert cli_call('admin', '--clean-deleted', '--age', '1 month') == 0
assert mock.called == 1

def test_admin_clean_deleted_relations_no_age(cli_call, mock_func_factory):
mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')

with pytest.raises(SystemExit):
cli_call('admin', '--clean-deleted')

class TestCliAdminWithDb:

@pytest.fixture(autouse=True)
Expand Down
76 changes: 76 additions & 0 deletions test/python/tools/test_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from nominatim.errors import UsageError
from nominatim.tools import admin
from nominatim.tokenizer import factory
from nominatim.db.sql_preprocessor import SQLPreprocessor

@pytest.fixture(autouse=True)
def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table):
Expand Down Expand Up @@ -70,3 +71,78 @@ def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
VALUES(9988, 'N', 10000)""")

admin.analyse_indexing(project_env, osm_id='N10000')


class TestAdminCleanDeleted:

@pytest.fixture(autouse=True)
def setup_polygon_delete(self, project_env, table_factory, place_table, osmline_table, temp_db_cursor, temp_db_conn, def_config, src_dir):
""" Set up place_force_delete function and related tables
"""
self.project_env = project_env
self.temp_db_cursor = temp_db_cursor
table_factory('import_polygon_delete',
"""osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL""",
((100, 'N', 'boundary', 'administrative'),
(145, 'N', 'boundary', 'administrative'),
(175, 'R', 'landcover', 'grass')))
temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_id, osm_type, class, type, indexed_date, indexed_status)
VALUES(1, 100, 'N', 'boundary', 'administrative', current_date - INTERVAL '1 month', 1),
(2, 145, 'N', 'boundary', 'administrative', current_date - INTERVAL '3 month', 1),
(3, 175, 'R', 'landcover', 'grass', current_date - INTERVAL '3 months', 1)""")
# set up tables and triggers for utils function
table_factory('place_to_be_deleted',
"""osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL,
deferred BOOLEAN""")
table_factory('country_name', 'partition INT')
table_factory('import_polygon_error', """osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL""")
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_delete()
RETURNS TRIGGER AS $$
BEGIN RETURN NULL; END;
$$ LANGUAGE plpgsql;""")
temp_db_cursor.execute("""CREATE TRIGGER place_before_delete BEFORE DELETE ON place
FOR EACH ROW EXECUTE PROCEDURE place_delete();""")
orig_sql = def_config.lib_dir.sql
def_config.lib_dir.sql = src_dir / 'lib-sql'
sqlproc = SQLPreprocessor(temp_db_conn, def_config)
sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
def_config.lib_dir.sql = orig_sql


def test_admin_clean_deleted_no_records(self):
admin.clean_deleted_relations(self.project_env, age='1 year')
assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
(145, 'N', 'boundary', 'administrative', 1),
(175, 'R', 'landcover', 'grass', 1)}
assert self.temp_db_cursor.table_rows('import_polygon_delete') == 3


@pytest.mark.parametrize('test_age', ['T week', '1 welk', 'P1E'])
def test_admin_clean_deleted_bad_age(self, test_age):
with pytest.raises(UsageError):
admin.clean_deleted_relations(self.project_env, age = test_age)


def test_admin_clean_deleted_partial(self):
admin.clean_deleted_relations(self.project_env, age = '2 months')
assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
(145, 'N', 'boundary', 'administrative', 100),
(175, 'R', 'landcover', 'grass', 100)}
assert self.temp_db_cursor.table_rows('import_polygon_delete') == 1

@pytest.mark.parametrize('test_age', ['1 week', 'P3D', '5 hours'])
def test_admin_clean_deleted(self, test_age):
admin.clean_deleted_relations(self.project_env, age = test_age)
assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 100),
(145, 'N', 'boundary', 'administrative', 100),
(175, 'R', 'landcover', 'grass', 100)}
assert self.temp_db_cursor.table_rows('import_polygon_delete') == 0