port freeze function to python

This commit is contained in:
Sarah Hoffmann
2021-02-17 21:43:15 +01:00
parent 42ecd535b3
commit 101a1f895d
8 changed files with 182 additions and 99 deletions

View File

@@ -657,50 +657,7 @@ class SetupFunctions
public function drop() public function drop()
{ {
info('Drop tables only required for updates'); (clone($this->oNominatimCmd))->addParams('freeze')->run();
// The implementation is potentially a bit dangerous because it uses
// a positive selection of tables to keep, and deletes everything else.
// Including any tables that the unsuspecting user might have manually
// created. USE AT YOUR OWN PERIL.
// tables we want to keep. everything else goes.
$aKeepTables = array(
'*columns',
'import_polygon_*',
'import_status',
'place_addressline',
'location_postcode',
'location_property*',
'placex',
'search_name',
'seq_*',
'word',
'query_log',
'new_query_log',
'spatial_ref_sys',
'country_name',
'place_classtype_*',
'country_osm_grid'
);
$aDropTables = array();
$aHaveTables = $this->db()->getListOfTables();
foreach ($aHaveTables as $sTable) {
$bFound = false;
foreach ($aKeepTables as $sKeep) {
if (fnmatch($sKeep, $sTable)) {
$bFound = true;
break;
}
}
if (!$bFound) array_push($aDropTables, $sTable);
}
foreach ($aDropTables as $sDrop) {
$this->dropTable($sDrop);
}
$this->removeFlatnodeFile();
} }
/** /**

View File

@@ -173,27 +173,6 @@ class SetupAll:
return run_legacy_script(*params, nominatim_env=args) return run_legacy_script(*params, nominatim_env=args)
class SetupFreeze:
"""\
Make database read-only.
About half of data in the Nominatim database is kept only to be able to
keep the data up-to-date with new changes made in OpenStreetMap. This
command drops all this data and only keeps the part needed for geocoding
itself.
This command has the same effect as the `--no-updates` option for imports.
"""
@staticmethod
def add_args(parser):
pass # No options
@staticmethod
def run(args):
return run_legacy_script('setup.php', '--drop', nominatim_env=args)
class SetupSpecialPhrases: class SetupSpecialPhrases:
"""\ """\
Maintain special phrases. Maintain special phrases.
@@ -352,7 +331,7 @@ def nominatim(**kwargs):
parser = CommandlineParser('nominatim', nominatim.__doc__) parser = CommandlineParser('nominatim', nominatim.__doc__)
parser.add_subcommand('import', SetupAll) parser.add_subcommand('import', SetupAll)
parser.add_subcommand('freeze', SetupFreeze) parser.add_subcommand('freeze', clicmd.SetupFreeze)
parser.add_subcommand('replication', clicmd.UpdateReplication) parser.add_subcommand('replication', clicmd.UpdateReplication)
parser.add_subcommand('special-phrases', SetupSpecialPhrases) parser.add_subcommand('special-phrases', SetupSpecialPhrases)

View File

@@ -7,3 +7,4 @@ from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
from .index import UpdateIndex from .index import UpdateIndex
from .refresh import UpdateRefresh from .refresh import UpdateRefresh
from .admin import AdminFuncs from .admin import AdminFuncs
from .freeze import SetupFreeze

View File

@@ -0,0 +1,37 @@
"""
Implementation of the 'freeze' subcommand.
"""
from ..db.connection import connect
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
class SetupFreeze:
"""\
Make database read-only.
About half of data in the Nominatim database is kept only to be able to
keep the data up-to-date with new changes made in OpenStreetMap. This
command drops all this data and only keeps the part needed for geocoding
itself.
This command has the same effect as the `--no-updates` option for imports.
"""
@staticmethod
def add_args(parser):
pass # No options
@staticmethod
def run(args):
from ..tools import freeze
conn = connect(args.config.get_libpq_dsn())
freeze.drop_update_tables(conn)
freeze.drop_flatnode_file(args.config.FLATNODE_FILE)
conn.close()
return 0

43
nominatim/tools/freeze.py Normal file
View File

@@ -0,0 +1,43 @@
"""
Functions for removing unnecessary data from the database.
"""
from pathlib import Path
UPDATE_TABLES = [
'address_levels',
'gb_postcode',
'import_osmosis_log',
'import_polygon_%',
'location_area%',
'location_road%',
'place',
'planet_osm_%',
'search_name_%',
'us_postcode',
'wikipedia_%'
]
def drop_update_tables(conn):
""" Drop all tables only necessary for updating the database from
OSM replication data.
"""
where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES])
with conn.cursor() as cur:
cur.execute("SELECT tablename FROM pg_tables WHERE " + where)
tables = [r[0] for r in cur]
for table in tables:
cur.execute('DROP TABLE IF EXISTS "{}" CASCADE'.format(table))
conn.commit()
def drop_flatnode_file(fname):
""" Remove the flatnode file if it exists.
"""
if fname:
fpath = Path(fname)
if fpath.exists():
fpath.unlink()

View File

@@ -36,6 +36,14 @@ class _TestingCursor(psycopg2.extras.DictCursor):
return set((tuple(row) for row in self)) return set((tuple(row) for row in self))
def table_exists(self, table):
""" Check that a table with the given name exists in the database.
"""
num = self.scalar("""SELECT count(*) FROM pg_tables
WHERE tablename = %s""", (table, ))
return num == 1
@pytest.fixture @pytest.fixture
def temp_db(monkeypatch): def temp_db(monkeypatch):
""" Create an empty database for the test. The database name is also """ Create an empty database for the test. The database name is also

View File

@@ -17,6 +17,7 @@ import nominatim.clicmd.admin
import nominatim.indexer.indexer import nominatim.indexer.indexer
import nominatim.tools.refresh import nominatim.tools.refresh
import nominatim.tools.replication import nominatim.tools.replication
import nominatim.tools.freeze
from nominatim.errors import UsageError from nominatim.errors import UsageError
from nominatim.db import status from nominatim.db import status
@@ -50,6 +51,14 @@ def mock_run_legacy(monkeypatch):
monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock) monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
return mock return mock
@pytest.fixture
def mock_func_factory(monkeypatch):
def get_mock(module, func):
mock = MockParamCapture()
monkeypatch.setattr(module, func, mock)
return mock
return get_mock
def test_cli_help(capsys): def test_cli_help(capsys):
""" Running nominatim tool without arguments prints help. """ Running nominatim tool without arguments prints help.
@@ -62,7 +71,6 @@ def test_cli_help(capsys):
@pytest.mark.parametrize("command,script", [ @pytest.mark.parametrize("command,script", [
(('import', '--continue', 'load-data'), 'setup'), (('import', '--continue', 'load-data'), 'setup'),
(('freeze',), 'setup'),
(('special-phrases',), 'specialphrases'), (('special-phrases',), 'specialphrases'),
(('add-data', '--tiger-data', 'tiger'), 'setup'), (('add-data', '--tiger-data', 'tiger'), 'setup'),
(('add-data', '--file', 'foo.osm'), 'update'), (('add-data', '--file', 'foo.osm'), 'update'),
@@ -75,22 +83,30 @@ def test_legacy_commands_simple(mock_run_legacy, command, script):
assert mock_run_legacy.last_args[0] == script + '.php' assert mock_run_legacy.last_args[0] == script + '.php'
def test_freeze_command(mock_func_factory, temp_db):
mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables')
mock_flatnode = mock_func_factory(nominatim.tools.freeze, 'drop_flatnode_file')
assert 0 == call_nominatim('freeze')
assert mock_drop.called == 1
assert mock_flatnode.called == 1
@pytest.mark.parametrize("params", [('--warm', ), @pytest.mark.parametrize("params", [('--warm', ),
('--warm', '--reverse-only'), ('--warm', '--reverse-only'),
('--warm', '--search-only'), ('--warm', '--search-only'),
('--check-database', )]) ('--check-database', )])
def test_admin_command_legacy(monkeypatch, params): def test_admin_command_legacy(mock_func_factory, params):
mock_run_legacy = MockParamCapture() mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script')
monkeypatch.setattr(nominatim.clicmd.admin, 'run_legacy_script', mock_run_legacy)
assert 0 == call_nominatim('admin', *params) assert 0 == call_nominatim('admin', *params)
assert mock_run_legacy.called == 1 assert mock_run_legacy.called == 1
@pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))]) @pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))])
def test_admin_command_tool(temp_db, monkeypatch, func, params): def test_admin_command_tool(temp_db, mock_func_factory, func, params):
mock = MockParamCapture() mock = mock_func_factory(nominatim.tools.admin, func)
monkeypatch.setattr(nominatim.tools.admin, func, mock)
assert 0 == call_nominatim('admin', *params) assert 0 == call_nominatim('admin', *params)
assert mock.called == 1 assert mock.called == 1
@@ -109,12 +125,10 @@ def test_add_data_command(mock_run_legacy, name, oid):
(['--boundaries-only'], 1, 0), (['--boundaries-only'], 1, 0),
(['--no-boundaries'], 0, 1), (['--no-boundaries'], 0, 1),
(['--boundaries-only', '--no-boundaries'], 0, 0)]) (['--boundaries-only', '--no-boundaries'], 0, 0)])
def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks): def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks):
temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)") temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
bnd_mock = MockParamCapture() bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries')
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', bnd_mock) rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank')
rank_mock = MockParamCapture()
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', rank_mock)
assert 0 == call_nominatim('index', *params) assert 0 == call_nominatim('index', *params)
@@ -127,9 +141,8 @@ def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
('importance', ('update.php', '--recompute-importance')), ('importance', ('update.php', '--recompute-importance')),
('website', ('setup.php', '--setup-website')), ('website', ('setup.php', '--setup-website')),
]) ])
def test_refresh_legacy_command(monkeypatch, temp_db, command, params): def test_refresh_legacy_command(mock_func_factory, temp_db, command, params):
mock_run_legacy = MockParamCapture() mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
monkeypatch.setattr(nominatim.clicmd.refresh, 'run_legacy_script', mock_run_legacy)
assert 0 == call_nominatim('refresh', '--' + command) assert 0 == call_nominatim('refresh', '--' + command)
@@ -143,17 +156,15 @@ def test_refresh_legacy_command(monkeypatch, temp_db, command, params):
('address-levels', 'load_address_levels_from_file'), ('address-levels', 'load_address_levels_from_file'),
('functions', 'create_functions'), ('functions', 'create_functions'),
]) ])
def test_refresh_command(monkeypatch, temp_db, command, func): def test_refresh_command(mock_func_factory, temp_db, command, func):
func_mock = MockParamCapture() func_mock = mock_func_factory(nominatim.tools.refresh, func)
monkeypatch.setattr(nominatim.tools.refresh, func, func_mock)
assert 0 == call_nominatim('refresh', '--' + command) assert 0 == call_nominatim('refresh', '--' + command)
assert func_mock.called == 1 assert func_mock.called == 1
def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db): def test_refresh_importance_computed_after_wiki_import(mock_func_factory, temp_db):
mock_run_legacy = MockParamCapture() mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
monkeypatch.setattr(nominatim.clicmd.refresh, 'run_legacy_script', mock_run_legacy)
assert 0 == call_nominatim('refresh', '--importance', '--wiki-data') assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
@@ -165,9 +176,8 @@ def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
(('--init', '--no-update-functions'), 'init_replication'), (('--init', '--no-update-functions'), 'init_replication'),
(('--check-for-updates',), 'check_for_updates') (('--check-for-updates',), 'check_for_updates')
]) ])
def test_replication_command(monkeypatch, temp_db, params, func): def test_replication_command(mock_func_factory, temp_db, params, func):
func_mock = MockParamCapture() func_mock = mock_func_factory(nominatim.tools.replication, func)
monkeypatch.setattr(nominatim.tools.replication, func, func_mock)
assert 0 == call_nominatim('replication', *params) assert 0 == call_nominatim('replication', *params)
assert func_mock.called == 1 assert func_mock.called == 1
@@ -188,11 +198,10 @@ def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
@pytest.mark.parametrize("state", [nominatim.tools.replication.UpdateState.UP_TO_DATE, @pytest.mark.parametrize("state", [nominatim.tools.replication.UpdateState.UP_TO_DATE,
nominatim.tools.replication.UpdateState.NO_CHANGES]) nominatim.tools.replication.UpdateState.NO_CHANGES])
def test_replication_update_once_no_index(monkeypatch, temp_db, temp_db_conn, def test_replication_update_once_no_index(mock_func_factory, temp_db, temp_db_conn,
status_table, state): status_table, state):
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
func_mock = MockParamCapture(retval=state) func_mock = mock_func_factory(nominatim.tools.replication, 'update')
monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock)
assert 0 == call_nominatim('replication', '--once', '--no-index') assert 0 == call_nominatim('replication', '--once', '--no-index')
@@ -236,9 +245,8 @@ def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, stat
assert sleep_mock.last_args[0] == 60 assert sleep_mock.last_args[0] == 60
def test_serve_command(monkeypatch): def test_serve_command(mock_func_factory):
func = MockParamCapture() func = mock_func_factory(nominatim.cli, 'run_php_server')
monkeypatch.setattr(nominatim.cli, 'run_php_server', func)
call_nominatim('serve') call_nominatim('serve')
@@ -254,9 +262,8 @@ def test_serve_command(monkeypatch):
('details', '--place_id', '10001'), ('details', '--place_id', '10001'),
('status',) ('status',)
]) ])
def test_api_commands_simple(monkeypatch, params): def test_api_commands_simple(mock_func_factory, params):
mock_run_api = MockParamCapture() mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script')
monkeypatch.setattr(nominatim.clicmd.api, 'run_api_script', mock_run_api)
assert 0 == call_nominatim(*params) assert 0 == call_nominatim(*params)

View File

@@ -0,0 +1,51 @@
"""
Tests for freeze functions (removing unused database parts).
"""
import pytest
from nominatim.tools import freeze
NOMINATIM_RUNTIME_TABLES = [
'country_name', 'country_osm_grid',
'location_postcode', 'location_property_osmline', 'location_property_tiger',
'placex', 'place_adressline',
'search_name',
'word'
]
NOMINATIM_DROP_TABLES = [
'address_levels',
'location_area', 'location_area_country', 'location_area_large_100',
'location_road_1',
'place', 'planet_osm_nodes', 'planet_osm_rels', 'planet_osm_ways',
'search_name_111',
'wikipedia_article', 'wikipedia_redirect'
]
def test_drop_tables(temp_db_conn, temp_db_cursor):
for table in NOMINATIM_RUNTIME_TABLES + NOMINATIM_DROP_TABLES:
temp_db_cursor.execute('CREATE TABLE {} (id int)'.format(table))
freeze.drop_update_tables(temp_db_conn)
for table in NOMINATIM_RUNTIME_TABLES:
assert temp_db_cursor.table_exists(table)
for table in NOMINATIM_DROP_TABLES:
assert not temp_db_cursor.table_exists(table)
def test_drop_flatnode_file_no_file():
freeze.drop_flatnode_file('')
def test_drop_flatnode_file_file_already_gone(tmp_path):
freeze.drop_flatnode_file(str(tmp_path / 'something.store'))
def test_drop_flatnode_file_delte(tmp_path):
flatfile = tmp_path / 'flatnode.store'
flatfile.write_text('Some content')
freeze.drop_flatnode_file(str(flatfile))
assert not flatfile.exists()