port freeze function to python

This commit is contained in:
Sarah Hoffmann
2021-02-17 21:43:15 +01:00
parent 42ecd535b3
commit 101a1f895d
8 changed files with 182 additions and 99 deletions

View File

@@ -657,50 +657,7 @@ class SetupFunctions
public function drop()
{
info('Drop tables only required for updates');
// The implementation is potentially a bit dangerous because it uses
// a positive selection of tables to keep, and deletes everything else.
// Including any tables that the unsuspecting user might have manually
// created. USE AT YOUR OWN PERIL.
// tables we want to keep. everything else goes.
$aKeepTables = array(
'*columns',
'import_polygon_*',
'import_status',
'place_addressline',
'location_postcode',
'location_property*',
'placex',
'search_name',
'seq_*',
'word',
'query_log',
'new_query_log',
'spatial_ref_sys',
'country_name',
'place_classtype_*',
'country_osm_grid'
);
$aDropTables = array();
$aHaveTables = $this->db()->getListOfTables();
foreach ($aHaveTables as $sTable) {
$bFound = false;
foreach ($aKeepTables as $sKeep) {
if (fnmatch($sKeep, $sTable)) {
$bFound = true;
break;
}
}
if (!$bFound) array_push($aDropTables, $sTable);
}
foreach ($aDropTables as $sDrop) {
$this->dropTable($sDrop);
}
$this->removeFlatnodeFile();
(clone($this->oNominatimCmd))->addParams('freeze')->run();
}
/**

View File

@@ -173,27 +173,6 @@ class SetupAll:
return run_legacy_script(*params, nominatim_env=args)
class SetupFreeze:
"""\
Make database read-only.
About half of data in the Nominatim database is kept only to be able to
keep the data up-to-date with new changes made in OpenStreetMap. This
command drops all this data and only keeps the part needed for geocoding
itself.
This command has the same effect as the `--no-updates` option for imports.
"""
@staticmethod
def add_args(parser):
pass # No options
@staticmethod
def run(args):
return run_legacy_script('setup.php', '--drop', nominatim_env=args)
class SetupSpecialPhrases:
"""\
Maintain special phrases.
@@ -352,7 +331,7 @@ def nominatim(**kwargs):
parser = CommandlineParser('nominatim', nominatim.__doc__)
parser.add_subcommand('import', SetupAll)
parser.add_subcommand('freeze', SetupFreeze)
parser.add_subcommand('freeze', clicmd.SetupFreeze)
parser.add_subcommand('replication', clicmd.UpdateReplication)
parser.add_subcommand('special-phrases', SetupSpecialPhrases)

View File

@@ -7,3 +7,4 @@ from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
from .index import UpdateIndex
from .refresh import UpdateRefresh
from .admin import AdminFuncs
from .freeze import SetupFreeze

View File

@@ -0,0 +1,37 @@
"""
Implementation of the 'freeze' subcommand.
"""
from ..db.connection import connect
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
class SetupFreeze:
"""\
Make database read-only.
About half of data in the Nominatim database is kept only to be able to
keep the data up-to-date with new changes made in OpenStreetMap. This
command drops all this data and only keeps the part needed for geocoding
itself.
This command has the same effect as the `--no-updates` option for imports.
"""
@staticmethod
def add_args(parser):
pass # No options
@staticmethod
def run(args):
from ..tools import freeze
conn = connect(args.config.get_libpq_dsn())
freeze.drop_update_tables(conn)
freeze.drop_flatnode_file(args.config.FLATNODE_FILE)
conn.close()
return 0

43
nominatim/tools/freeze.py Normal file
View File

@@ -0,0 +1,43 @@
"""
Functions for removing unnecessary data from the database.
"""
from pathlib import Path
UPDATE_TABLES = [
'address_levels',
'gb_postcode',
'import_osmosis_log',
'import_polygon_%',
'location_area%',
'location_road%',
'place',
'planet_osm_%',
'search_name_%',
'us_postcode',
'wikipedia_%'
]
def drop_update_tables(conn):
""" Drop all tables only necessary for updating the database from
OSM replication data.
"""
where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES])
with conn.cursor() as cur:
cur.execute("SELECT tablename FROM pg_tables WHERE " + where)
tables = [r[0] for r in cur]
for table in tables:
cur.execute('DROP TABLE IF EXISTS "{}" CASCADE'.format(table))
conn.commit()
def drop_flatnode_file(fname):
""" Remove the flatnode file if it exists.
"""
if fname:
fpath = Path(fname)
if fpath.exists():
fpath.unlink()

View File

@@ -36,6 +36,14 @@ class _TestingCursor(psycopg2.extras.DictCursor):
return set((tuple(row) for row in self))
def table_exists(self, table):
""" Check that a table with the given name exists in the database.
"""
num = self.scalar("""SELECT count(*) FROM pg_tables
WHERE tablename = %s""", (table, ))
return num == 1
@pytest.fixture
def temp_db(monkeypatch):
""" Create an empty database for the test. The database name is also

View File

@@ -17,6 +17,7 @@ import nominatim.clicmd.admin
import nominatim.indexer.indexer
import nominatim.tools.refresh
import nominatim.tools.replication
import nominatim.tools.freeze
from nominatim.errors import UsageError
from nominatim.db import status
@@ -50,6 +51,14 @@ def mock_run_legacy(monkeypatch):
monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
return mock
@pytest.fixture
def mock_func_factory(monkeypatch):
def get_mock(module, func):
mock = MockParamCapture()
monkeypatch.setattr(module, func, mock)
return mock
return get_mock
def test_cli_help(capsys):
""" Running nominatim tool without arguments prints help.
@@ -62,7 +71,6 @@ def test_cli_help(capsys):
@pytest.mark.parametrize("command,script", [
(('import', '--continue', 'load-data'), 'setup'),
(('freeze',), 'setup'),
(('special-phrases',), 'specialphrases'),
(('add-data', '--tiger-data', 'tiger'), 'setup'),
(('add-data', '--file', 'foo.osm'), 'update'),
@@ -75,22 +83,30 @@ def test_legacy_commands_simple(mock_run_legacy, command, script):
assert mock_run_legacy.last_args[0] == script + '.php'
def test_freeze_command(mock_func_factory, temp_db):
mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables')
mock_flatnode = mock_func_factory(nominatim.tools.freeze, 'drop_flatnode_file')
assert 0 == call_nominatim('freeze')
assert mock_drop.called == 1
assert mock_flatnode.called == 1
@pytest.mark.parametrize("params", [('--warm', ),
('--warm', '--reverse-only'),
('--warm', '--search-only'),
('--check-database', )])
def test_admin_command_legacy(monkeypatch, params):
mock_run_legacy = MockParamCapture()
monkeypatch.setattr(nominatim.clicmd.admin, 'run_legacy_script', mock_run_legacy)
def test_admin_command_legacy(mock_func_factory, params):
mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script')
assert 0 == call_nominatim('admin', *params)
assert mock_run_legacy.called == 1
@pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))])
def test_admin_command_tool(temp_db, monkeypatch, func, params):
mock = MockParamCapture()
monkeypatch.setattr(nominatim.tools.admin, func, mock)
def test_admin_command_tool(temp_db, mock_func_factory, func, params):
mock = mock_func_factory(nominatim.tools.admin, func)
assert 0 == call_nominatim('admin', *params)
assert mock.called == 1
@@ -109,12 +125,10 @@ def test_add_data_command(mock_run_legacy, name, oid):
(['--boundaries-only'], 1, 0),
(['--no-boundaries'], 0, 1),
(['--boundaries-only', '--no-boundaries'], 0, 0)])
def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks):
temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
bnd_mock = MockParamCapture()
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', bnd_mock)
rank_mock = MockParamCapture()
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', rank_mock)
bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries')
rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank')
assert 0 == call_nominatim('index', *params)
@@ -127,9 +141,8 @@ def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks):
('importance', ('update.php', '--recompute-importance')),
('website', ('setup.php', '--setup-website')),
])
def test_refresh_legacy_command(monkeypatch, temp_db, command, params):
mock_run_legacy = MockParamCapture()
monkeypatch.setattr(nominatim.clicmd.refresh, 'run_legacy_script', mock_run_legacy)
def test_refresh_legacy_command(mock_func_factory, temp_db, command, params):
mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
assert 0 == call_nominatim('refresh', '--' + command)
@@ -143,17 +156,15 @@ def test_refresh_legacy_command(monkeypatch, temp_db, command, params):
('address-levels', 'load_address_levels_from_file'),
('functions', 'create_functions'),
])
def test_refresh_command(monkeypatch, temp_db, command, func):
func_mock = MockParamCapture()
monkeypatch.setattr(nominatim.tools.refresh, func, func_mock)
def test_refresh_command(mock_func_factory, temp_db, command, func):
func_mock = mock_func_factory(nominatim.tools.refresh, func)
assert 0 == call_nominatim('refresh', '--' + command)
assert func_mock.called == 1
def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
mock_run_legacy = MockParamCapture()
monkeypatch.setattr(nominatim.clicmd.refresh, 'run_legacy_script', mock_run_legacy)
def test_refresh_importance_computed_after_wiki_import(mock_func_factory, temp_db):
mock_run_legacy = mock_func_factory(nominatim.clicmd.refresh, 'run_legacy_script')
assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
@@ -165,9 +176,8 @@ def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
(('--init', '--no-update-functions'), 'init_replication'),
(('--check-for-updates',), 'check_for_updates')
])
def test_replication_command(monkeypatch, temp_db, params, func):
func_mock = MockParamCapture()
monkeypatch.setattr(nominatim.tools.replication, func, func_mock)
def test_replication_command(mock_func_factory, temp_db, params, func):
func_mock = mock_func_factory(nominatim.tools.replication, func)
assert 0 == call_nominatim('replication', *params)
assert func_mock.called == 1
@@ -188,11 +198,10 @@ def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
@pytest.mark.parametrize("state", [nominatim.tools.replication.UpdateState.UP_TO_DATE,
nominatim.tools.replication.UpdateState.NO_CHANGES])
def test_replication_update_once_no_index(monkeypatch, temp_db, temp_db_conn,
def test_replication_update_once_no_index(mock_func_factory, temp_db, temp_db_conn,
status_table, state):
status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1)
func_mock = MockParamCapture(retval=state)
monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock)
func_mock = mock_func_factory(nominatim.tools.replication, 'update')
assert 0 == call_nominatim('replication', '--once', '--no-index')
@@ -236,9 +245,8 @@ def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, stat
assert sleep_mock.last_args[0] == 60
def test_serve_command(monkeypatch):
func = MockParamCapture()
monkeypatch.setattr(nominatim.cli, 'run_php_server', func)
def test_serve_command(mock_func_factory):
func = mock_func_factory(nominatim.cli, 'run_php_server')
call_nominatim('serve')
@@ -254,9 +262,8 @@ def test_serve_command(monkeypatch):
('details', '--place_id', '10001'),
('status',)
])
def test_api_commands_simple(monkeypatch, params):
mock_run_api = MockParamCapture()
monkeypatch.setattr(nominatim.clicmd.api, 'run_api_script', mock_run_api)
def test_api_commands_simple(mock_func_factory, params):
mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script')
assert 0 == call_nominatim(*params)

View File

@@ -0,0 +1,51 @@
"""
Tests for freeze functions (removing unused database parts).
"""
import pytest
from nominatim.tools import freeze
NOMINATIM_RUNTIME_TABLES = [
'country_name', 'country_osm_grid',
'location_postcode', 'location_property_osmline', 'location_property_tiger',
'placex', 'place_adressline',
'search_name',
'word'
]
NOMINATIM_DROP_TABLES = [
'address_levels',
'location_area', 'location_area_country', 'location_area_large_100',
'location_road_1',
'place', 'planet_osm_nodes', 'planet_osm_rels', 'planet_osm_ways',
'search_name_111',
'wikipedia_article', 'wikipedia_redirect'
]
def test_drop_tables(temp_db_conn, temp_db_cursor):
for table in NOMINATIM_RUNTIME_TABLES + NOMINATIM_DROP_TABLES:
temp_db_cursor.execute('CREATE TABLE {} (id int)'.format(table))
freeze.drop_update_tables(temp_db_conn)
for table in NOMINATIM_RUNTIME_TABLES:
assert temp_db_cursor.table_exists(table)
for table in NOMINATIM_DROP_TABLES:
assert not temp_db_cursor.table_exists(table)
def test_drop_flatnode_file_no_file():
freeze.drop_flatnode_file('')
def test_drop_flatnode_file_file_already_gone(tmp_path):
freeze.drop_flatnode_file(str(tmp_path / 'something.store'))
def test_drop_flatnode_file_delte(tmp_path):
flatfile = tmp_path / 'flatnode.store'
flatfile.write_text('Some content')
freeze.drop_flatnode_file(str(flatfile))
assert not flatfile.exists()