Merge pull request #2963 from lonvia/add-sqlalchemy-schema

Add table definitions for SQLAlchemy
This commit is contained in:
Sarah Hoffmann
2023-01-30 11:17:22 +01:00
committed by GitHub
12 changed files with 377 additions and 58 deletions

View File

@@ -27,10 +27,10 @@ runs:
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION}
if [ "$FLAVOUR" == "oldstuff" ]; then
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4 datrie asyncpg
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4 GeoAlchemy2==0.10.0 datrie asyncpg
else
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml python3-asyncpg
pip3 install sqlalchemy
pip3 install sqlalchemy GeoAlchemy2
fi
shell: bash
env:

View File

@@ -99,18 +99,22 @@ jobs:
if: (matrix.flavour == 'oldstuff') || (matrix.flavour == 'ubuntu-22')
- name: Install test prerequsites (from apt for Ununtu 2x)
run: sudo apt-get install -y -qq python3-pytest uvicorn
run: sudo apt-get install -y -qq python3-pytest python3-pytest-asyncio uvicorn
if: matrix.flavour != 'oldstuff'
- name: Install newer pytest-asyncio
run: pip3 install -U pytest-asyncio
if: matrix.flavour == 'ubuntu-20'
- name: Install test prerequsites (from pip for Ubuntu 18)
run: pip3 install pytest uvicorn
run: pip3 install pytest pytest-asyncio uvicorn
if: matrix.flavour == 'oldstuff'
- name: Install Python webservers
run: pip3 install falcon sanic sanic-testing sanic-cors starlette
- name: Install latest pylint/mypy
run: pip3 install -U pylint mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson typing-extensions asgi_lifespan sqlalchemy2-stubs
- name: Install latest pylint
run: pip3 install -U pylint asgi_lifespan
- name: PHP linting
run: phpcs --report-width=120 .
@@ -123,7 +127,6 @@ jobs:
- name: PHP unit tests
run: phpunit ./
working-directory: Nominatim/test/php
if: ${{ (matrix.ubuntu == 20) || (matrix.ubuntu == 22) }}
- name: Python unit tests
run: python3 -m pytest test/python
@@ -134,8 +137,8 @@ jobs:
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
working-directory: Nominatim/test/bdd
- name: Install newer Python packages (for typechecking info)
run: pip3 install -U osmium uvicorn
- name: Install mypy and typechecking info
run: pip3 install -U mypy osmium uvicorn types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson typing-extensions
if: matrix.flavour != 'oldstuff'
- name: Python static typechecking

View File

@@ -18,3 +18,6 @@ ignore_missing_imports = True
[mypy-falcon.*]
ignore_missing_imports = True
[mypy-geoalchemy2.*]
ignore_missing_imports = True

View File

@@ -49,6 +49,7 @@ For running Nominatim:
* [psutil](https://github.com/giampaolo/psutil)
* [Jinja2](https://palletsprojects.com/p/jinja/)
* [SQLAlchemy](https://www.sqlalchemy.org/) (1.4+ with greenlet support)
* [GeoAlchemy2](https://geoalchemy-2.readthedocs.io/) (0.10+)
* [asyncpg](https://magicstack.github.io/asyncpg) (0.8+)
* [PyICU](https://pypi.org/project/PyICU/)
* [PyYaml](https://pyyaml.org/) (5.1+)

View File

@@ -36,6 +36,7 @@ It has the following additional requirements:
* [mypy](http://mypy-lang.org/) (plus typing information for external libs)
* [Python Typing Extensions](https://github.com/python/typing_extensions) (for Python < 3.9)
* [pytest](https://pytest.org)
* [pytest-asyncio](https://pytest-asyncio.readthedocs.io)
For testing the Python search frontend, you need to install extra dependencies
depending on your choice of webserver framework:
@@ -62,9 +63,9 @@ To install all necessary packages run:
sudo apt install php-cgi phpunit php-codesniffer \
python3-pip python3-setuptools python3-dev
pip3 install --user behave mkdocs mkdocstrings pytest pylint \
pip3 install --user behave mkdocs mkdocstrings pytest pytest-asyncio pylint \
mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil \
types-ujosn \
types-ujosn types-requests typing-extensions\
sanic-testing httpx asgi-lifespan
```

View File

@@ -0,0 +1,86 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Extended SQLAlchemy connection class that also includes access to the schema.
"""
from typing import Any, Mapping, Sequence, Union, Dict, cast
import sqlalchemy as sa
from sqlalchemy.ext.asyncio import AsyncConnection
from nominatim.db.sqlalchemy_schema import SearchTables
class SearchConnection:
""" An extended SQLAlchemy connection class, that also contains
then table definitions. The underlying asynchronous SQLAlchemy
connection can be accessed with the 'connection' property.
The 't' property is the collection of Nominatim tables.
"""
def __init__(self, conn: AsyncConnection,
tables: SearchTables,
properties: Dict[str, Any]) -> None:
self.connection = conn
self.t = tables # pylint: disable=invalid-name
self._property_cache = properties
async def scalar(self, sql: sa.sql.base.Executable,
params: Union[Mapping[str, Any], None] = None
) -> Any:
""" Execute a 'scalar()' query on the connection.
"""
return await self.connection.scalar(sql, params)
async def execute(self, sql: sa.sql.base.Executable,
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None] = None
) -> 'sa.engine.Result[Any]':
""" Execute a 'execute()' query on the connection.
"""
return await self.connection.execute(sql, params)
async def get_property(self, name: str, cached: bool = True) -> str:
""" Get a property from Nominatim's property table.
Property values are normally cached so that they are only
retrieved from the database when they are queried for the
first time with this function. Set 'cached' to False to force
reading the property from the database.
Raises a ValueError if the property does not exist.
"""
if name.startswith('DB:'):
raise ValueError(f"Illegal property value '{name}'.")
if cached and name in self._property_cache:
return cast(str, self._property_cache[name])
sql = sa.select(self.t.properties.c.value)\
.where(self.t.properties.c.property == name)
value = await self.connection.scalar(sql)
if value is None:
raise ValueError(f"Property '{name}' not found in database.")
self._property_cache[name] = cast(str, value)
return cast(str, value)
async def get_db_property(self, name: str) -> Any:
""" Get a setting from the database. At the moment, only
'server_version', the version of the database software, can
be retrieved with this function.
Raises a ValueError if the property does not exist.
"""
if name != 'server_version':
raise ValueError(f"DB setting '{name}' not found in database.")
return self._property_cache['DB:server_version']

View File

@@ -7,7 +7,7 @@
"""
Implementation of classes for API access via libraries.
"""
from typing import Mapping, Optional, Any, AsyncIterator
from typing import Mapping, Optional, Any, AsyncIterator, Dict
import asyncio
import contextlib
from pathlib import Path
@@ -16,8 +16,10 @@ import sqlalchemy as sa
import sqlalchemy.ext.asyncio as sa_asyncio
import asyncpg
from nominatim.db.sqlalchemy_schema import SearchTables
from nominatim.config import Configuration
from nominatim.api.status import get_status, StatusResult
from nominatim.api.connection import SearchConnection
class NominatimAPIAsync:
""" API loader asynchornous version.
@@ -29,6 +31,8 @@ class NominatimAPIAsync:
self._engine_lock = asyncio.Lock()
self._engine: Optional[sa_asyncio.AsyncEngine] = None
self._tables: Optional[SearchTables] = None
self._property_cache: Dict[str, Any] = {'DB:server_version': 0}
async def setup_database(self) -> None:
@@ -61,18 +65,21 @@ class NominatimAPIAsync:
try:
async with engine.begin() as conn:
result = await conn.scalar(sa.text('SHOW server_version_num'))
self.server_version = int(result)
server_version = int(result)
except asyncpg.PostgresError:
self.server_version = 0
server_version = 0
if self.server_version >= 110000:
@sa.event.listens_for(engine.sync_engine, "connect") # type: ignore[misc]
if server_version >= 110000:
@sa.event.listens_for(engine.sync_engine, "connect")
def _on_connect(dbapi_con: Any, _: Any) -> None:
cursor = dbapi_con.cursor()
cursor.execute("SET jit_above_cost TO '-1'")
# Make sure that all connections get the new settings
await self.close()
self._property_cache['DB:server_version'] = server_version
self._tables = SearchTables(sa.MetaData(), engine.name) # pylint: disable=no-member
self._engine = engine
@@ -86,7 +93,7 @@ class NominatimAPIAsync:
@contextlib.asynccontextmanager
async def begin(self) -> AsyncIterator[sa_asyncio.AsyncConnection]:
async def begin(self) -> AsyncIterator[SearchConnection]:
""" Create a new connection with automatic transaction handling.
This function may be used to get low-level access to the database.
@@ -97,9 +104,10 @@ class NominatimAPIAsync:
await self.setup_database()
assert self._engine is not None
assert self._tables is not None
async with self._engine.begin() as conn:
yield conn
yield SearchConnection(conn, self._tables, self._property_cache)
async def status(self) -> StatusResult:

View File

@@ -7,58 +7,40 @@
"""
Classes and function releated to status call.
"""
from typing import Optional, cast
from typing import Optional
import datetime as dt
import dataclasses
import sqlalchemy as sa
from sqlalchemy.ext.asyncio.engine import AsyncConnection
import asyncpg
from nominatim.api.connection import SearchConnection
from nominatim import version
@dataclasses.dataclass
class StatusResult:
""" Result of a call to the status API.
"""
def __init__(self, status: int, msg: str):
self.status = status
self.message = msg
self.software_version = version.NOMINATIM_VERSION
self.data_updated: Optional[dt.datetime] = None
self.database_version: Optional[version.NominatimVersion] = None
status: int
message: str
software_version = version.NOMINATIM_VERSION
data_updated: Optional[dt.datetime] = None
database_version: Optional[version.NominatimVersion] = None
async def _get_database_date(conn: AsyncConnection) -> Optional[dt.datetime]:
""" Query the database date.
"""
sql = sa.text('SELECT lastimportdate FROM import_status LIMIT 1')
result = await conn.execute(sql)
for row in result:
return cast(dt.datetime, row[0])
return None
async def _get_database_version(conn: AsyncConnection) -> Optional[version.NominatimVersion]:
sql = sa.text("""SELECT value FROM nominatim_properties
WHERE property = 'database_version'""")
result = await conn.execute(sql)
for row in result:
return version.parse_version(cast(str, row[0]))
return None
async def get_status(conn: AsyncConnection) -> StatusResult:
async def get_status(conn: SearchConnection) -> StatusResult:
""" Execute a status API call.
"""
status = StatusResult(0, 'OK')
# Last update date
sql = sa.select(conn.t.import_status.c.lastimportdate).limit(1)
status.data_updated = await conn.scalar(sql)
# Database version
try:
status.data_updated = await _get_database_date(conn)
status.database_version = await _get_database_version(conn)
except asyncpg.PostgresError:
return StatusResult(700, 'Database connection failed')
verstr = await conn.get_property('database_version')
status.database_version = version.parse_version(verstr)
except ValueError:
pass
return status

View File

@@ -0,0 +1,142 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
SQLAlchemy definitions for all tables used by the frontend.
"""
from typing import Any
import sqlalchemy as sa
from geoalchemy2 import Geometry
from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
#pylint: disable=too-many-instance-attributes
class SearchTables:
""" Data class that holds the tables of the Nominatim database.
"""
def __init__(self, meta: sa.MetaData, engine_name: str) -> None:
if engine_name == 'postgresql':
Composite: Any = HSTORE
Json: Any = JSONB
IntArray: Any = ARRAY(sa.Integer()) #pylint: disable=invalid-name
elif engine_name == 'sqlite':
Composite = sqlite_json
Json = sqlite_json
IntArray = sqlite_json
else:
raise ValueError("Only 'postgresql' and 'sqlite' engines are supported.")
self.meta = meta
self.import_status = sa.Table('import_status', meta,
sa.Column('lastimportdate', sa.DateTime(True), nullable=False),
sa.Column('sequence_id', sa.Integer),
sa.Column('indexed', sa.Boolean))
self.properties = sa.Table('nominatim_properties', meta,
sa.Column('property', sa.Text, nullable=False),
sa.Column('value', sa.Text))
self.placex = sa.Table('placex', meta,
sa.Column('place_id', sa.BigInteger, nullable=False, unique=True),
sa.Column('parent_place_id', sa.BigInteger),
sa.Column('linked_place_id', sa.BigInteger),
sa.Column('importance', sa.Float),
sa.Column('indexed_date', sa.DateTime),
sa.Column('rank_address', sa.SmallInteger),
sa.Column('rank_search', sa.SmallInteger),
sa.Column('partition', sa.SmallInteger),
sa.Column('indexed_status', sa.SmallInteger),
sa.Column('osm_type', sa.String(1), nullable=False),
sa.Column('osm_id', sa.BigInteger, nullable=False),
sa.Column('class', sa.Text, nullable=False, key='class_'),
sa.Column('type', sa.Text, nullable=False),
sa.Column('admin_level', sa.SmallInteger),
sa.Column('name', Composite),
sa.Column('address', Composite),
sa.Column('extratags', Composite),
sa.Column('geometry', Geometry(srid=4326), nullable=False),
sa.Column('wikipedia', sa.Text),
sa.Column('country_code', sa.String(2)),
sa.Column('housenumber', sa.Text),
sa.Column('postcode', sa.Text),
sa.Column('centroid', Geometry(srid=4326, spatial_index=False)))
self.addressline = sa.Table('place_addressline', meta,
sa.Column('place_id', sa.BigInteger, index=True),
sa.Column('address_place_id', sa.BigInteger, index=True),
sa.Column('distance', sa.Float),
sa.Column('cached_rank_address', sa.SmallInteger),
sa.Column('fromarea', sa.Boolean),
sa.Column('isaddress', sa.Boolean))
self.postcode = sa.Table('location_postcode', meta,
sa.Column('place_id', sa.BigInteger, unique=True),
sa.Column('parent_place_id', sa.BigInteger),
sa.Column('rank_search', sa.SmallInteger),
sa.Column('rank_address', sa.SmallInteger),
sa.Column('indexed_status', sa.SmallInteger),
sa.Column('indexed_date', sa.DateTime),
sa.Column('country_code', sa.String(2)),
sa.Column('postcode', sa.Text, index=True),
sa.Column('geometry', Geometry(srid=4326)))
self.osmline = sa.Table('location_property_osmline', meta,
sa.Column('place_id', sa.BigInteger, nullable=False, unique=True),
sa.Column('osm_id', sa.BigInteger),
sa.Column('parent_place_id', sa.BigInteger),
sa.Column('indexed_date', sa.DateTime),
sa.Column('startnumber', sa.Integer),
sa.Column('endnumber', sa.Integer),
sa.Column('step', sa.SmallInteger),
sa.Column('partition', sa.SmallInteger),
sa.Column('indexed_status', sa.SmallInteger),
sa.Column('linegeo', Geometry(srid=4326)),
sa.Column('address', Composite),
sa.Column('postcode', sa.Text),
sa.Column('country_code', sa.String(2)))
self.word = sa.Table('word', meta,
sa.Column('word_id', sa.Integer),
sa.Column('word_token', sa.Text, nullable=False),
sa.Column('type', sa.Text, nullable=False),
sa.Column('word', sa.Text),
sa.Column('info', Json))
self.country_name = sa.Table('country_name', meta,
sa.Column('country_code', sa.String(2)),
sa.Column('name', Composite),
sa.Column('derived_name', Composite),
sa.Column('country_default_language_code', sa.Text),
sa.Column('partition', sa.Integer))
self.country_grid = sa.Table('country_osm_grid', meta,
sa.Column('country_code', sa.String(2)),
sa.Column('area', sa.Float),
sa.Column('geometry', Geometry(srid=4326)))
# The following tables are not necessarily present.
self.search_name = sa.Table('search_name', meta,
sa.Column('place_id', sa.BigInteger, index=True),
sa.Column('importance', sa.Float),
sa.Column('search_rank', sa.SmallInteger),
sa.Column('address_rank', sa.SmallInteger),
sa.Column('name_vector', IntArray, index=True),
sa.Column('nameaddress_vector', IntArray, index=True),
sa.Column('country_code', sa.String(2)),
sa.Column('centroid', Geometry(srid=4326)))
self.tiger = sa.Table('location_property_tiger', meta,
sa.Column('place_id', sa.BigInteger),
sa.Column('parent_place_id', sa.BigInteger),
sa.Column('startnumber', sa.Integer),
sa.Column('endnumber', sa.Integer),
sa.Column('step', sa.SmallInteger),
sa.Column('partition', sa.SmallInteger),
sa.Column('linegeo', Geometry(srid=4326, spatial_index=False)),
sa.Column('postcode', sa.Text))

View File

@@ -0,0 +1,93 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for enhanced connection class for API functions.
"""
from pathlib import Path
import pytest
import pytest_asyncio
import sqlalchemy as sa
from nominatim.api import NominatimAPIAsync
@pytest_asyncio.fixture
async def apiobj(temp_db):
""" Create an asynchronous SQLAlchemy engine for the test DB.
"""
api = NominatimAPIAsync(Path('/invalid'), {})
yield api
await api.close()
@pytest.mark.asyncio
async def test_run_scalar(apiobj, table_factory):
table_factory('foo', definition='that TEXT', content=(('a', ),))
async with apiobj.begin() as conn:
assert await conn.scalar(sa.text('SELECT * FROM foo')) == 'a'
@pytest.mark.asyncio
async def test_run_execute(apiobj, table_factory):
table_factory('foo', definition='that TEXT', content=(('a', ),))
async with apiobj.begin() as conn:
result = await conn.execute(sa.text('SELECT * FROM foo'))
assert result.fetchone()[0] == 'a'
@pytest.mark.asyncio
async def test_get_property_existing_cached(apiobj, table_factory):
table_factory('nominatim_properties',
definition='property TEXT, value TEXT',
content=(('dbv', '96723'), ))
async with apiobj.begin() as conn:
assert await conn.get_property('dbv') == '96723'
await conn.execute(sa.text('TRUNCATE nominatim_properties'))
assert await conn.get_property('dbv') == '96723'
@pytest.mark.asyncio
async def test_get_property_existing_uncached(apiobj, table_factory):
table_factory('nominatim_properties',
definition='property TEXT, value TEXT',
content=(('dbv', '96723'), ))
async with apiobj.begin() as conn:
assert await conn.get_property('dbv') == '96723'
await conn.execute(sa.text("UPDATE nominatim_properties SET value = '1'"))
assert await conn.get_property('dbv', cached=False) == '1'
@pytest.mark.asyncio
@pytest.mark.parametrize('param', ['foo', 'DB:server_version'])
async def test_get_property_missing(apiobj, table_factory, param):
table_factory('nominatim_properties',
definition='property TEXT, value TEXT')
async with apiobj.begin() as conn:
with pytest.raises(ValueError):
await conn.get_property(param)
@pytest.mark.asyncio
async def test_get_db_property_existing(apiobj):
async with apiobj.begin() as conn:
assert await conn.get_db_property('server_version') > 0
@pytest.mark.asyncio
async def test_get_db_property_existing(apiobj):
async with apiobj.begin() as conn:
with pytest.raises(ValueError):
await conn.get_db_property('dfkgjd.rijg')

View File

@@ -33,7 +33,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
# Some of the Python packages that come with Ubuntu 20.04 are too old, so
# install the latest version from pip:
pip3 install --user sqlalchemy asyncpg
pip3 install --user sqlalchemy GeoAlchemy2 asyncpg
#

View File

@@ -29,7 +29,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
php-cli php-pgsql php-intl libicu-dev python3-dotenv \
python3-psycopg2 python3-psutil python3-jinja2 \
python3-icu python3-datrie python3-sqlalchemy \
python3-asyncpg git
python3-geoalchemy2 python3-asyncpg git
#
# System Configuration