move setup function to python

There are still back-calls to PHP for some of the sub-steps.
These needs some larger refactoring to be moved to Python.
This commit is contained in:
Sarah Hoffmann
2021-02-26 15:02:39 +01:00
parent 3ee8d9fa75
commit 15b5906790
10 changed files with 342 additions and 102 deletions

View File

@@ -43,6 +43,11 @@ class _TestingCursor(psycopg2.extras.DictCursor):
WHERE tablename = %s""", (table, ))
return num == 1
def table_rows(self, table):
""" Return the number of rows in the given table.
"""
return self.scalar('SELECT count(*) FROM ' + table)
@pytest.fixture
def temp_db(monkeypatch):
@@ -109,8 +114,12 @@ def temp_db_cursor(temp_db):
@pytest.fixture
def table_factory(temp_db_cursor):
def mk_table(name, definition='id INT'):
def mk_table(name, definition='id INT', content=None):
temp_db_cursor.execute('CREATE TABLE {} ({})'.format(name, definition))
if content is not None:
if not isinstance(content, str):
content = '),('.join([str(x) for x in content])
temp_db_cursor.execute("INSERT INTO {} VALUES ({})".format(name, content))
return mk_table
@@ -174,7 +183,7 @@ def place_row(place_table, temp_db_cursor):
temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
(osm_id or next(idseq), osm_type, cls, typ, names,
admin_level, address, extratags,
geom or 'SRID=4326;POINT(0 0 )'))
geom or 'SRID=4326;POINT(0 0)'))
return _insert
@@ -184,7 +193,7 @@ def placex_table(temp_db_with_extensions, temp_db_conn):
"""
with temp_db_conn.cursor() as cur:
cur.execute("""CREATE TABLE placex (
place_id BIGINT NOT NULL,
place_id BIGINT,
parent_place_id BIGINT,
linked_place_id BIGINT,
importance FLOAT,
@@ -207,8 +216,43 @@ def placex_table(temp_db_with_extensions, temp_db_conn):
country_code varchar(2),
housenumber TEXT,
postcode TEXT,
centroid GEOMETRY(Geometry, 4326))
""")
centroid GEOMETRY(Geometry, 4326))""")
temp_db_conn.commit()
@pytest.fixture
def osmline_table(temp_db_with_extensions, temp_db_conn):
with temp_db_conn.cursor() as cur:
cur.execute("""CREATE TABLE location_property_osmline (
place_id BIGINT,
osm_id BIGINT,
parent_place_id BIGINT,
geometry_sector INTEGER,
indexed_date TIMESTAMP,
startnumber INTEGER,
endnumber INTEGER,
partition SMALLINT,
indexed_status SMALLINT,
linegeo GEOMETRY,
interpolationtype TEXT,
address HSTORE,
postcode TEXT,
country_code VARCHAR(2))""")
temp_db_conn.commit()
@pytest.fixture
def word_table(temp_db, temp_db_conn):
with temp_db_conn.cursor() as cur:
cur.execute("""CREATE TABLE word (
word_id INTEGER,
word_token text,
word text,
class text,
type text,
country_code varchar(2),
search_name_count INTEGER,
operator TEXT)""")
temp_db_conn.commit()

View File

@@ -13,9 +13,11 @@ import nominatim.cli
import nominatim.clicmd.api
import nominatim.clicmd.refresh
import nominatim.clicmd.admin
import nominatim.clicmd.setup
import nominatim.indexer.indexer
import nominatim.tools.admin
import nominatim.tools.check_database
import nominatim.tools.database_import
import nominatim.tools.freeze
import nominatim.tools.refresh
@@ -61,7 +63,6 @@ def test_cli_help(capsys):
@pytest.mark.parametrize("command,script", [
(('import', '--continue', 'load-data'), 'setup'),
(('special-phrases',), 'specialphrases'),
(('add-data', '--tiger-data', 'tiger'), 'setup'),
(('add-data', '--file', 'foo.osm'), 'update'),
@@ -74,6 +75,36 @@ def test_legacy_commands_simple(mock_run_legacy, command, script):
assert mock_run_legacy.last_args[0] == script + '.php'
def test_import_missing_file(temp_db):
assert 1 == call_nominatim('import', '--osm-file', 'sfsafegweweggdgw.reh.erh')
def test_import_bad_file(temp_db):
assert 1 == call_nominatim('import', '--osm-file', '.')
def test_import_full(temp_db, mock_func_factory):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
mock_func_factory(nominatim.tools.database_import, 'install_module'),
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
mock_func_factory(nominatim.tools.database_import, 'load_data'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
]
cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
mock_func_factory(nominatim.clicmd.setup, 'run_legacy_script')
assert 0 == call_nominatim('import', '--osm-file', __file__)
assert cf_mock.called > 1
for mock in mocks:
assert mock.called == 1
def test_freeze_command(mock_func_factory, temp_db):
mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables')
mock_flatnode = mock_func_factory(nominatim.tools.freeze, 'drop_flatnode_file')

View File

@@ -63,6 +63,10 @@ def test_check_database_indexes_bad(temp_db_conn, def_config):
assert chkdb.check_database_indexes(temp_db_conn, def_config) == chkdb.CheckState.FAIL
def test_check_database_indexes_valid(temp_db_conn, def_config):
assert chkdb.check_database_index_valid(temp_db_conn, def_config) == chkdb.CheckState.OK
def test_check_tiger_table_disabled(temp_db_conn, def_config, monkeypatch):
monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA' , 'no')
assert chkdb.check_tiger_table(temp_db_conn, def_config) == chkdb.CheckState.NOT_APPLICABLE

View File

@@ -24,6 +24,24 @@ def nonexistant_db():
with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(dbname))
@pytest.mark.parametrize("no_partitions", (True, False))
def test_setup_skeleton(src_dir, nonexistant_db, no_partitions):
database_import.setup_database_skeleton('dbname=' + nonexistant_db,
src_dir / 'data', no_partitions)
conn = psycopg2.connect(database=nonexistant_db)
try:
with conn.cursor() as cur:
cur.execute("SELECT distinct partition FROM country_name")
partitions = set([r[0] for r in list(cur)])
if no_partitions:
assert partitions == set([0])
else:
assert len(partitions) > 10
finally:
conn.close()
def test_create_db_success(nonexistant_db):
database_import.create_db('dbname=' + nonexistant_db, rouser='www-data')
@@ -79,6 +97,22 @@ def test_install_module(tmp_path):
assert outfile.stat().st_mode == 33261
def test_install_module_custom(tmp_path):
(tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
database_import.install_module(tmp_path, tmp_path, str(tmp_path.resolve()))
assert not (tmp_path / 'module').exists()
def test_install_module_fail_access(temp_db_conn, tmp_path):
(tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
with pytest.raises(UsageError, match='.*module cannot be accessed.*'):
database_import.install_module(tmp_path, tmp_path, '',
conn=temp_db_conn)
def test_import_base_data(src_dir, temp_db, temp_db_cursor):
temp_db_cursor.execute('CREATE EXTENSION hstore')
temp_db_cursor.execute('CREATE EXTENSION postgis')
@@ -134,3 +168,35 @@ def test_import_osm_data_default_cache(temp_db_cursor,osm2pgsql_options):
osm2pgsql_options['osm2pgsql_cache'] = 0
database_import.import_osm_data(Path(__file__), osm2pgsql_options)
def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
tables = ('word', 'placex', 'place_addressline', 'location_area',
'location_area_country', 'location_property',
'location_property_tiger', 'location_property_osmline',
'location_postcode', 'search_name', 'location_road_23')
for table in tables:
table_factory(table, content=(1, 2, 3))
database_import.truncate_data_tables(temp_db_conn, max_word_frequency=23)
for table in tables:
assert temp_db_cursor.table_rows(table) == 0
@pytest.mark.parametrize("threads", (1, 5))
def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table,
temp_db_cursor, threads):
for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'):
temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT)
RETURNS TEXT AS $$ SELECT 'a' $$ LANGUAGE SQL
""".format(func))
for oid in range(100, 130):
place_row(osm_id=oid)
place_row(osm_type='W', osm_id=342, cls='place', typ='houses',
geom='SRID=4326;LINESTRING(0 0, 10 10)')
database_import.load_data(dsn, src_dir / 'data', threads)
assert temp_db_cursor.table_rows('placex') == 30
assert temp_db_cursor.table_rows('location_property_osmline') == 1