mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-15 19:07:58 +00:00
Compare commits
1 Commits
8a96e4f802
...
docs-5.0.x
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4e79533f1f |
3
.flake8
3
.flake8
@@ -6,6 +6,3 @@ extend-ignore =
|
||||
E711
|
||||
per-file-ignores =
|
||||
__init__.py: F401
|
||||
test/python/utils/test_json_writer.py: E131
|
||||
**/conftest.py: E402
|
||||
test/bdd/*: F821
|
||||
|
||||
4
.github/actions/setup-postgresql/action.yml
vendored
4
.github/actions/setup-postgresql/action.yml
vendored
@@ -11,8 +11,10 @@ runs:
|
||||
steps:
|
||||
- name: Remove existing PostgreSQL
|
||||
run: |
|
||||
sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y
|
||||
sudo apt-get purge -yq postgresql*
|
||||
sudo apt install curl ca-certificates gnupg
|
||||
curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/apt.postgresql.org.gpg >/dev/null
|
||||
sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
|
||||
sudo apt-get update -qq
|
||||
|
||||
shell: bash
|
||||
|
||||
53
.github/workflows/ci-tests.yml
vendored
53
.github/workflows/ci-tests.yml
vendored
@@ -37,10 +37,10 @@ jobs:
|
||||
needs: create-archive
|
||||
strategy:
|
||||
matrix:
|
||||
flavour: ["ubuntu-22", "ubuntu-24"]
|
||||
flavour: ["ubuntu-20", "ubuntu-24"]
|
||||
include:
|
||||
- flavour: ubuntu-22
|
||||
ubuntu: 22
|
||||
- flavour: ubuntu-20
|
||||
ubuntu: 20
|
||||
postgresql: 12
|
||||
lua: '5.1'
|
||||
dependencies: pip
|
||||
@@ -68,34 +68,26 @@ jobs:
|
||||
with:
|
||||
dependencies: ${{ matrix.dependencies }}
|
||||
|
||||
- uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
/usr/local/bin/osm2pgsql
|
||||
key: osm2pgsql-bin-22-1
|
||||
if: matrix.ubuntu == '22'
|
||||
|
||||
- name: Compile osm2pgsql
|
||||
run: |
|
||||
if [ ! -f /usr/local/bin/osm2pgsql ]; then
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua-dkjson nlohmann-json3-dev
|
||||
mkdir osm2pgsql-build
|
||||
cd osm2pgsql-build
|
||||
git clone https://github.com/osm2pgsql-dev/osm2pgsql
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ../osm2pgsql
|
||||
make
|
||||
sudo make install
|
||||
cd ../..
|
||||
rm -rf osm2pgsql-build
|
||||
else
|
||||
sudo apt-get install -y -qq libexpat1 liblua${LUA_VERSION}
|
||||
fi
|
||||
if: matrix.ubuntu == '22'
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua-dkjson nlohmann-json3-dev
|
||||
mkdir osm2pgsql-build
|
||||
cd osm2pgsql-build
|
||||
git clone https://github.com/osm2pgsql-dev/osm2pgsql
|
||||
mkdir build
|
||||
cd build
|
||||
cmake ../osm2pgsql
|
||||
make
|
||||
sudo make install
|
||||
cd ../..
|
||||
rm -rf osm2pgsql-build
|
||||
if: matrix.ubuntu == '20'
|
||||
env:
|
||||
LUA_VERSION: ${{ matrix.lua }}
|
||||
|
||||
- name: Install test prerequisites
|
||||
run: ./venv/bin/pip install behave==1.2.6
|
||||
|
||||
- name: Install test prerequisites (apt)
|
||||
run: sudo apt-get install -y -qq python3-pytest python3-pytest-asyncio uvicorn python3-falcon python3-aiosqlite python3-pyosmium
|
||||
if: matrix.dependencies == 'apt'
|
||||
@@ -104,14 +96,11 @@ jobs:
|
||||
run: ./venv/bin/pip install pytest-asyncio falcon starlette asgi_lifespan aiosqlite osmium uvicorn
|
||||
if: matrix.dependencies == 'pip'
|
||||
|
||||
- name: Install test prerequisites
|
||||
run: ./venv/bin/pip install pytest-bdd
|
||||
|
||||
- name: Install latest flake8
|
||||
run: ./venv/bin/pip install -U flake8
|
||||
|
||||
- name: Python linting
|
||||
run: ../venv/bin/python -m flake8 src test/python test/bdd
|
||||
run: ../venv/bin/python -m flake8 src
|
||||
working-directory: Nominatim
|
||||
|
||||
- name: Install mypy and typechecking info
|
||||
@@ -129,8 +118,8 @@ jobs:
|
||||
|
||||
- name: BDD tests
|
||||
run: |
|
||||
../venv/bin/python -m pytest test/bdd --nominatim-purge
|
||||
working-directory: Nominatim
|
||||
../../../venv/bin/python -m behave -DREMOVE_TEMPLATE=1 --format=progress3
|
||||
working-directory: Nominatim/test/bdd
|
||||
|
||||
install:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -113,5 +113,3 @@ Checklist for releases:
|
||||
* run `nominatim --version` to confirm correct version
|
||||
* [ ] tag new release and add a release on github.com
|
||||
* [ ] build pip packages and upload to pypi
|
||||
* `make build`
|
||||
* `twine upload dist/*`
|
||||
|
||||
19
ChangeLog
19
ChangeLog
@@ -1,22 +1,3 @@
|
||||
5.1.0
|
||||
* replace datrie with simple internal trie implementation
|
||||
* add pattern-based postcode parser for queries,
|
||||
postcodes no longer need to be present in OSM to be found
|
||||
* take variants into account when computing token similarity
|
||||
* add extratags output to geocodejson format
|
||||
* fix default layer setting used for structured queries
|
||||
* update abbreviation lists for Russian and English
|
||||
(thanks @shoorick, @IvanShift, @mhsrn21)
|
||||
* fix variant generation for Norwegian
|
||||
* fix normalization around space-like characters
|
||||
* improve postcode search and handling of postcodes in queries
|
||||
* reorganise internal query structure and get rid of slow enums
|
||||
* enable code linting for tests
|
||||
* various code moderinsations in test code (thanks @eumiro)
|
||||
* remove setting osm2pgsql location via config.lib_dir
|
||||
* make SQL functions parallel save as far as possible (thanks @otbutz)
|
||||
* various fixes and improvements to documentation (thanks @TuringVerified)
|
||||
|
||||
5.0.0
|
||||
* increase required versions for PostgreSQL (12+), PostGIS (3.0+)
|
||||
* remove installation via cmake and debundle osm2pgsql
|
||||
|
||||
4
Makefile
4
Makefile
@@ -24,10 +24,10 @@ pytest:
|
||||
pytest test/python
|
||||
|
||||
lint:
|
||||
flake8 src test/python test/bdd
|
||||
flake8 src
|
||||
|
||||
bdd:
|
||||
pytest test/bdd --nominatim-purge
|
||||
cd test/bdd; behave -DREMOVE_TEMPLATE=1
|
||||
|
||||
# Documentation
|
||||
|
||||
|
||||
15
README.md
15
README.md
@@ -27,25 +27,18 @@ can be found at nominatim.org as well.
|
||||
|
||||
A quick summary of the necessary steps:
|
||||
|
||||
|
||||
1. Clone this git repository and download the country grid
|
||||
|
||||
git clone https://github.com/osm-search/Nominatim.git
|
||||
wget -O Nominatim/data/country_osm_grid.sql.gz https://nominatim.org/data/country_grid.sql.gz
|
||||
|
||||
2. Create a Python virtualenv and install the packages:
|
||||
1. Create a Python virtualenv and install the packages:
|
||||
|
||||
python3 -m venv nominatim-venv
|
||||
./nominatim-venv/bin/pip install packaging/nominatim-{api,db}
|
||||
|
||||
3. Create a project directory, get OSM data and import:
|
||||
2. Create a project directory, get OSM data and import:
|
||||
|
||||
mkdir nominatim-project
|
||||
cd nominatim-project
|
||||
../nominatim-venv/bin/nominatim import --osm-file <your planet file> 2>&1 | tee setup.log
|
||||
../nominatim-venv/bin/nominatim import --osm-file <your planet file>
|
||||
|
||||
|
||||
4. Start the webserver:
|
||||
3. Start the webserver:
|
||||
|
||||
./nominatim-venv/bin/pip install uvicorn falcon
|
||||
../nominatim-venv/bin/nominatim serve
|
||||
|
||||
@@ -9,8 +9,7 @@ versions.
|
||||
|
||||
| Version | End of support for security updates |
|
||||
| ------- | ----------------------------------- |
|
||||
| 5.1.x | 2027-04-01 |
|
||||
| 5.0.x | 2027-02-06 |
|
||||
| 5.0.x | 2027-02-06
|
||||
| 4.5.x | 2026-09-12 |
|
||||
| 4.4.x | 2026-03-07 |
|
||||
| 4.3.x | 2025-09-07 |
|
||||
|
||||
@@ -37,6 +37,7 @@ Furthermore the following Python libraries are required:
|
||||
* [Jinja2](https://palletsprojects.com/p/jinja/)
|
||||
* [PyICU](https://pypi.org/project/PyICU/)
|
||||
* [PyYaml](https://pyyaml.org/) (5.1+)
|
||||
* [datrie](https://github.com/pytries/datrie)
|
||||
|
||||
These will be installed automatically when using pip installation.
|
||||
|
||||
@@ -110,14 +111,17 @@ Then you can install Nominatim with:
|
||||
|
||||
pip install nominatim-db nominatim-api
|
||||
|
||||
## Downloading and building Nominatim
|
||||
## Downloading and building Nominatim from source
|
||||
|
||||
### Downloading the latest release
|
||||
The following instructions are only relevant, if you want to build and
|
||||
install Nominatim **from source**.
|
||||
|
||||
### Downloading the source for the latest release
|
||||
|
||||
You can download the [latest release from nominatim.org](https://nominatim.org/downloads/).
|
||||
The release contains all necessary files. Just unpack it.
|
||||
|
||||
### Downloading the latest development version
|
||||
### Downloading the source for the latest development version
|
||||
|
||||
If you want to install latest development version from github:
|
||||
|
||||
@@ -131,7 +135,7 @@ The development version does not include the country grid. Download it separatel
|
||||
wget -O Nominatim/data/country_osm_grid.sql.gz https://nominatim.org/data/country_grid.sql.gz
|
||||
```
|
||||
|
||||
### Building Nominatim
|
||||
### Building Nominatim from source
|
||||
|
||||
Nominatim is easiest to run from its own virtual environment. To create one, run:
|
||||
|
||||
|
||||
@@ -36,11 +36,11 @@ The website is now available at `http://localhost:8765`.
|
||||
## Forwarding searches to nominatim-ui
|
||||
|
||||
Nominatim used to provide the search interface directly by itself when
|
||||
`format=html` was requested. For the `/search` endpoint this even used
|
||||
to be the default.
|
||||
`format=html` was requested. For all endpoints except for `/reverse` and
|
||||
`/lookup` this even used to be the default.
|
||||
|
||||
The following section describes how to set up Apache or nginx, so that your
|
||||
users are forwarded to nominatim-ui when they go to a URL that formerly presented
|
||||
users are forwarded to nominatim-ui when they go to URL that formerly presented
|
||||
the UI.
|
||||
|
||||
### Setting up forwarding in Nginx
|
||||
@@ -73,28 +73,41 @@ map $args $format {
|
||||
|
||||
# Determine from the URI and the format parameter above if forwarding is needed.
|
||||
map $uri/$format $forward_to_ui {
|
||||
default 0; # no forwarding by default
|
||||
~/search.*/default 1; # Use this line only, if search should go to UI by default.
|
||||
~/reverse.*/html 1; # Forward API calls that UI supports, when
|
||||
~/status.*/html 1; # format=html is explicitly requested.
|
||||
~/search.*/html 1;
|
||||
~/details.*/html 1;
|
||||
default 1; # The default is to forward.
|
||||
~^/ui 0; # If the URI point to the UI already, we are done.
|
||||
~/other$ 0; # An explicit non-html format parameter. No forwarding.
|
||||
~/reverse.*/default 0; # Reverse and lookup assume xml format when
|
||||
~/lookup.*/default 0; # no format parameter is given. No forwarding.
|
||||
}
|
||||
```
|
||||
|
||||
The `$forward_to_ui` parameter can now be used to conditionally forward the
|
||||
calls:
|
||||
|
||||
``` nginx
|
||||
location / {
|
||||
if ($forward_to_ui) {
|
||||
rewrite ^(/[^/.]*) https://$http_host/ui$1.html redirect;
|
||||
}
|
||||
```
|
||||
# When no endpoint is given, default to search.
|
||||
# Need to add a rewrite so that the rewrite rules below catch it correctly.
|
||||
rewrite ^/$ /search;
|
||||
|
||||
# proxy_pass commands
|
||||
location @php {
|
||||
# fastcgi stuff..
|
||||
if ($forward_to_ui) {
|
||||
rewrite ^(/[^/]*) https://yourserver.com/ui$1.html redirect;
|
||||
}
|
||||
}
|
||||
|
||||
location ~ [^/]\.php(/|$) {
|
||||
# fastcgi stuff..
|
||||
if ($forward_to_ui) {
|
||||
rewrite (.*).php https://yourserver.com/ui$1.html redirect;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
!!! warning
|
||||
Be aware that the rewrite commands are slightly different for URIs with and
|
||||
without the .php suffix.
|
||||
|
||||
Reload nginx and the UI should be available.
|
||||
|
||||
### Setting up forwarding in Apache
|
||||
@@ -146,16 +159,18 @@ directory like this:
|
||||
RewriteBase "/nominatim/"
|
||||
|
||||
# If no endpoint is given, then use search.
|
||||
RewriteRule ^(/|$) "search"
|
||||
RewriteRule ^(/|$) "search.php"
|
||||
|
||||
# If format-html is explicitly requested, forward to the UI.
|
||||
RewriteCond %{QUERY_STRING} "format=html"
|
||||
RewriteRule ^([^/.]+) ui/$1.html [R,END]
|
||||
RewriteRule ^([^/]+)(.php)? ui/$1.html [R,END]
|
||||
|
||||
# Optionally: if no format parameter is there then forward /search.
|
||||
# If no format parameter is there then forward anything
|
||||
# but /reverse and /lookup to the UI.
|
||||
RewriteCond %{QUERY_STRING} "!format="
|
||||
RewriteCond %{REQUEST_URI} "/search"
|
||||
RewriteRule ^([^/.]+) ui/$1.html [R,END]
|
||||
RewriteCond %{REQUEST_URI} "!/lookup"
|
||||
RewriteCond %{REQUEST_URI} "!/reverse"
|
||||
RewriteRule ^([^/]+)(.php)? ui/$1.html [R,END]
|
||||
</Directory>
|
||||
```
|
||||
|
||||
|
||||
@@ -106,11 +106,8 @@ The following feature attributes are implemented:
|
||||
* `name` - localised name of the place
|
||||
* `housenumber`, `street`, `locality`, `district`, `postcode`, `city`,
|
||||
`county`, `state`, `country` -
|
||||
provided when it can be determined from the address (only with `addressdetails=1`)
|
||||
provided when it can be determined from the address
|
||||
* `admin` - list of localised names of administrative boundaries (only with `addressdetails=1`)
|
||||
* `extra` - dictionary with additional useful tags like `website` or `maxspeed`
|
||||
(only with `extratags=1`)
|
||||
|
||||
|
||||
Use `polygon_geojson` to output the full geometry of the object instead
|
||||
of the centroid.
|
||||
|
||||
@@ -212,7 +212,7 @@ other layers.
|
||||
The featureType allows to have a more fine-grained selection for places
|
||||
from the address layer. Results can be restricted to places that make up
|
||||
the 'state', 'country' or 'city' part of an address. A featureType of
|
||||
`settlement` selects any human inhabited feature from 'state' down to
|
||||
settlement selects any human inhabited feature from 'state' down to
|
||||
'neighbourhood'.
|
||||
|
||||
When featureType is set, then results are automatically restricted
|
||||
|
||||
@@ -556,6 +556,16 @@ the Nominatim topic.
|
||||
```
|
||||
Discarding country-level boundaries when running under themepark.
|
||||
|
||||
## osm2pgsql gazetteer output
|
||||
|
||||
Nominatim still allows you to configure the gazetteer output to remain
|
||||
backwards compatible with older imports. It will be automatically used
|
||||
when the style file name ends in `.style`. For documentation of the
|
||||
old import style, please refer to the documentation of older releases
|
||||
of Nominatim. Do not use the gazetteer output for new imports. There is no
|
||||
guarantee that new versions of Nominatim are fully compatible with the
|
||||
gazetteer output.
|
||||
|
||||
## Changing the style of existing databases
|
||||
|
||||
There is usually no issue changing the style of a database that is already
|
||||
|
||||
@@ -602,44 +602,25 @@ results gathered so far.
|
||||
Note that under high load you may observe that users receive different results
|
||||
than usual without seeing an error. This may cause some confusion.
|
||||
|
||||
#### NOMINATIM_OUTPUT_NAMES
|
||||
### Logging Settings
|
||||
|
||||
#### NOMINATIM_LOG_DB
|
||||
|
||||
| Summary | |
|
||||
| -------------- | --------------------------------------------------- |
|
||||
| **Description:** | Specifies order of name tags |
|
||||
| **Format:** | string: comma-separated list of tag names |
|
||||
| **Default:** | name:XX,name,brand,official_name:XX,short_name:XX,official_name,short_name,ref |
|
||||
| **Description:** | Log requests into the database |
|
||||
| **Format:** | boolean |
|
||||
| **Default:** | no |
|
||||
| **After Changes:** | run `nominatim refresh --website` |
|
||||
|
||||
Specifies the order in which different name tags are used.
|
||||
The values in this list determine the preferred order of name variants,
|
||||
including language-specific names (in OSM: the name tag with and without any language suffix).
|
||||
Enable logging requests into a database table with this setting. The logs
|
||||
can be found in the table `new_query_log`.
|
||||
|
||||
Comma-separated list, where :XX stands for language suffix
|
||||
(e.g. name:en) and no :XX stands for general tags (e.g. name).
|
||||
When using this logging method, it is advisable to set up a job that
|
||||
regularly clears out old logging information. Nominatim will not do that
|
||||
on its own.
|
||||
|
||||
See also [NOMINATIM_DEFAULT_LANGUAGE](#nominatim_default_language).
|
||||
|
||||
!!! note
|
||||
If NOMINATIM_OUTPUT_NAMES = `name:XX,name,short_name:XX,short_name` the search follows
|
||||
|
||||
```
|
||||
'name', 'short_name'
|
||||
```
|
||||
|
||||
if we have no preferred language order for showing search results.
|
||||
|
||||
For languages ['en', 'es'] the search follows
|
||||
|
||||
```
|
||||
'name:en', 'name:es',
|
||||
'name',
|
||||
'short_name:en', 'short_name:es',
|
||||
'short_name'
|
||||
```
|
||||
|
||||
For those familiar with the internal implementation, the `_place_*` expansion is added, but to simplify, it is not included in this example.
|
||||
|
||||
### Logging Settings
|
||||
Can be used as the same time as NOMINATIM_LOG_FILE.
|
||||
|
||||
#### NOMINATIM_LOG_FILE
|
||||
|
||||
@@ -664,6 +645,8 @@ given in seconds and includes the entire time the query was queued and executed
|
||||
in the frontend.
|
||||
type contains the name of the endpoint used.
|
||||
|
||||
Can be used as the same time as NOMINATIM_LOG_DB.
|
||||
|
||||
#### NOMINATIM_DEBUG_SQL
|
||||
|
||||
| Summary | |
|
||||
|
||||
@@ -67,13 +67,7 @@ Here is an example configuration file:
|
||||
|
||||
``` yaml
|
||||
query-preprocessing:
|
||||
- step: split_japanese_phrases
|
||||
- step: regex_replace
|
||||
replacements:
|
||||
- pattern: https?://[^\s]* # Filter URLs starting with http or https
|
||||
replace: ''
|
||||
- step: normalize
|
||||
|
||||
- normalize
|
||||
normalization:
|
||||
- ":: lower ()"
|
||||
- "ß > 'ss'" # German szet is unambiguously equal to double ss
|
||||
@@ -94,8 +88,8 @@ token-analysis:
|
||||
replacements: ['ä', 'ae']
|
||||
```
|
||||
|
||||
The configuration file contains five sections:
|
||||
`query-preprocessing`, `normalization`, `transliteration`, `sanitizers` and `token-analysis`.
|
||||
The configuration file contains four sections:
|
||||
`normalization`, `transliteration`, `sanitizers` and `token-analysis`.
|
||||
|
||||
#### Query preprocessing
|
||||
|
||||
@@ -112,19 +106,6 @@ The following is a list of preprocessors that are shipped with Nominatim.
|
||||
heading_level: 6
|
||||
docstring_section_style: spacy
|
||||
|
||||
##### regex-replace
|
||||
|
||||
::: nominatim_api.query_preprocessing.regex_replace
|
||||
options:
|
||||
members: False
|
||||
heading_level: 6
|
||||
docstring_section_style: spacy
|
||||
description:
|
||||
This option runs any given regex pattern on the input and replaces values accordingly
|
||||
replacements:
|
||||
- pattern: regex pattern
|
||||
replace: string to replace with
|
||||
|
||||
|
||||
#### Normalization and Transliteration
|
||||
|
||||
|
||||
@@ -3,7 +3,8 @@
|
||||
### Import tables
|
||||
|
||||
OSM data is initially imported using [osm2pgsql](https://osm2pgsql.org).
|
||||
Nominatim uses a custom flex style to create the initial import tables.
|
||||
Nominatim uses its own data output style 'gazetteer', which differs from the
|
||||
output style created for map rendering.
|
||||
|
||||
The import process creates the following tables:
|
||||
|
||||
@@ -13,7 +14,7 @@ The `planet_osm_*` tables are the usual backing tables for OSM data. Note
|
||||
that Nominatim uses them to look up special relations and to find nodes on
|
||||
ways.
|
||||
|
||||
The osm2pgsql import produces a single table `place` as output with the following
|
||||
The gazetteer style produces a single table `place` as output with the following
|
||||
columns:
|
||||
|
||||
* `osm_type` - kind of OSM object (**N** - node, **W** - way, **R** - relation)
|
||||
|
||||
@@ -25,15 +25,15 @@ following packages should get you started:
|
||||
|
||||
## Prerequisites for testing and documentation
|
||||
|
||||
The Nominatim test suite consists of behavioural tests (using pytest-bdd) and
|
||||
The Nominatim test suite consists of behavioural tests (using behave) and
|
||||
unit tests (using pytest). It has the following additional requirements:
|
||||
|
||||
* [behave test framework](https://behave.readthedocs.io) >= 1.2.6
|
||||
* [flake8](https://flake8.pycqa.org/en/stable/) (CI always runs the latest version from pip)
|
||||
* [mypy](http://mypy-lang.org/) (plus typing information for external libs)
|
||||
* [Python Typing Extensions](https://github.com/python/typing_extensions) (for Python < 3.9)
|
||||
* [pytest](https://pytest.org)
|
||||
* [pytest-asyncio](https://pytest-asyncio.readthedocs.io)
|
||||
* [pytest-bdd](https://pytest-bdd.readthedocs.io)
|
||||
|
||||
For testing the Python search frontend, you need to install extra dependencies
|
||||
depending on your choice of webserver framework:
|
||||
@@ -48,6 +48,9 @@ The documentation is built with mkdocs:
|
||||
* [mkdocs-material](https://squidfunk.github.io/mkdocs-material/)
|
||||
* [mkdocs-gen-files](https://oprypin.github.io/mkdocs-gen-files/)
|
||||
|
||||
Please be aware that tests always run against the globally installed
|
||||
osm2pgsql, so you need to have this set up. If you want to test against
|
||||
the vendored version of osm2pgsql, you need to set the PATH accordingly.
|
||||
|
||||
### Installing prerequisites on Ubuntu/Debian
|
||||
|
||||
@@ -66,10 +69,9 @@ To set up the virtual environment with all necessary packages run:
|
||||
```sh
|
||||
virtualenv ~/nominatim-dev-venv
|
||||
~/nominatim-dev-venv/bin/pip install\
|
||||
psutil 'psycopg[binary]' PyICU SQLAlchemy \
|
||||
python-dotenv jinja2 pyYAML \
|
||||
mkdocs 'mkdocstrings[python]' mkdocs-gen-files \
|
||||
pytest pytest-asyncio pytest-bdd flake8 \
|
||||
psutil psycopg[binary] PyICU SQLAlchemy \
|
||||
python-dotenv jinja2 pyYAML datrie behave \
|
||||
mkdocs mkdocstrings mkdocs-gen-files pytest pytest-asyncio flake8 \
|
||||
types-jinja2 types-markupsafe types-psutil types-psycopg2 \
|
||||
types-pygments types-pyyaml types-requests types-ujson \
|
||||
types-urllib3 typing-extensions unicorn falcon starlette \
|
||||
|
||||
@@ -60,19 +60,13 @@ The order of phrases matters to Nominatim when doing further processing.
|
||||
Thus, while you may split or join phrases, you should not reorder them
|
||||
unless you really know what you are doing.
|
||||
|
||||
Phrase types can further help narrowing down how the tokens in the phrase
|
||||
are interpreted. The following phrase types are known:
|
||||
Phrase types (`nominatim_api.search.PhraseType`) can further help narrowing
|
||||
down how the tokens in the phrase are interpreted. The following phrase types
|
||||
are known:
|
||||
|
||||
| Name | Description |
|
||||
|----------------|-------------|
|
||||
| PHRASE_ANY | No specific designation (i.e. source is free-form query) |
|
||||
| PHRASE_AMENITY | Contains name or type of a POI |
|
||||
| PHRASE_STREET | Contains a street name optionally with a housenumber |
|
||||
| PHRASE_CITY | Contains the postal city |
|
||||
| PHRASE_COUNTY | Contains the equivalent of a county |
|
||||
| PHRASE_STATE | Contains a state or province |
|
||||
| PHRASE_POSTCODE| Contains a postal code |
|
||||
| PHRASE_COUNTRY | Contains the country name or code |
|
||||
::: nominatim_api.search.PhraseType
|
||||
options:
|
||||
heading_level: 6
|
||||
|
||||
|
||||
## Custom sanitizer modules
|
||||
|
||||
@@ -43,53 +43,53 @@ The name of the pytest binary depends on your installation.
|
||||
## BDD Functional Tests (`test/bdd`)
|
||||
|
||||
Functional tests are written as BDD instructions. For more information on
|
||||
the philosophy of BDD testing, read the Wikipedia article on
|
||||
[Behaviour-driven development](https://en.wikipedia.org/wiki/Behavior-driven_development).
|
||||
the philosophy of BDD testing, see the
|
||||
[Behave manual](http://pythonhosted.org/behave/philosophy.html).
|
||||
|
||||
The following explanation assume that the reader is familiar with the BDD
|
||||
notations of features, scenarios and steps.
|
||||
|
||||
All possible steps can be found in the `steps` directory and should ideally
|
||||
be documented.
|
||||
|
||||
### General Usage
|
||||
|
||||
To run the functional tests, do
|
||||
|
||||
pytest test/bdd
|
||||
cd test/bdd
|
||||
behave
|
||||
|
||||
The BDD tests create databases for the tests. You can set name of the databases
|
||||
through configuration variables in your `pytest.ini`:
|
||||
The tests can be configured with a set of environment variables (`behave -D key=val`):
|
||||
|
||||
* `nominatim_test_db` defines the name of the temporary database created for
|
||||
a single test (default: `test_nominatim`)
|
||||
* `nominatim_api_test_db` defines the name of the database containing
|
||||
the API test data, see also below (default: `test_api_nominatim`)
|
||||
* `nominatim_template_db` defines the name of the template database used
|
||||
for creating the temporary test databases. It contains some static setup
|
||||
which usually doesn't change between imports of OSM data
|
||||
(default: `test_template_nominatim`)
|
||||
|
||||
To change other connection parameters for the PostgreSQL database, use
|
||||
the [libpq enivronment variables](https://www.postgresql.org/docs/current/libpq-envars.html).
|
||||
Never set a password through these variables. Use a
|
||||
[password file](https://www.postgresql.org/docs/current/libpq-pgpass.html) instead.
|
||||
|
||||
The API test database and the template database are only created once and then
|
||||
left untouched. This is usually what you want because it speeds up subsequent
|
||||
runs of BDD tests. If you do change code that has an influence on the content
|
||||
of these databases, you can run pytest with the `--nominatim-purge` parameter
|
||||
and the databases will be dropped and recreated from scratch.
|
||||
|
||||
When running the BDD tests with make (using `make tests` or `make bdd`), then
|
||||
the databases will always be purged.
|
||||
|
||||
The temporary test database is usually dropped directly after the test, so
|
||||
it does not take up unnecessary space. If you want to keep the database around,
|
||||
for example while debugging a specific BDD test, use the parameter
|
||||
`--nominatim-keep-db`.
|
||||
* `TEMPLATE_DB` - name of template database used as a skeleton for
|
||||
the test databases (db tests)
|
||||
* `TEST_DB` - name of test database (db tests)
|
||||
* `API_TEST_DB` - name of the database containing the API test data (api tests)
|
||||
* `API_TEST_FILE` - OSM file to be imported into the API test database (api tests)
|
||||
* `API_ENGINE` - webframe to use for running search queries, same values as
|
||||
`nominatim serve --engine` parameter
|
||||
* `DB_HOST` - (optional) hostname of database host
|
||||
* `DB_PORT` - (optional) port of database on host
|
||||
* `DB_USER` - (optional) username of database login
|
||||
* `DB_PASS` - (optional) password for database login
|
||||
* `REMOVE_TEMPLATE` - if true, the template and API database will not be reused
|
||||
during the next run. Reusing the base templates speeds
|
||||
up tests considerably but might lead to outdated errors
|
||||
for some changes in the database layout.
|
||||
* `KEEP_TEST_DB` - if true, the test database will not be dropped after a test
|
||||
is finished. Should only be used if one single scenario is
|
||||
run, otherwise the result is undefined.
|
||||
|
||||
Logging can be defined through command line parameters of behave itself. Check
|
||||
out `behave --help` for details. Also have a look at the 'work-in-progress'
|
||||
feature of behave which comes in handy when writing new tests.
|
||||
|
||||
### API Tests (`test/bdd/api`)
|
||||
|
||||
These tests are meant to test the different API endpoints and their parameters.
|
||||
They require to import several datasets into a test database. This is normally
|
||||
done automatically during setup of the test. The API test database is then
|
||||
kept around and reused in subsequent runs of behave. Use `--nominatim-purge`
|
||||
kept around and reused in subsequent runs of behave. Use `behave -DREMOVE_TEMPLATE`
|
||||
to force a reimport of the database.
|
||||
|
||||
The official test dataset is saved in the file `test/testdb/apidb-test-data.pbf`
|
||||
@@ -109,12 +109,12 @@ test the correctness of osm2pgsql. Each test will write some data into the `plac
|
||||
table (and optionally the `planet_osm_*` tables if required) and then run
|
||||
Nominatim's processing functions on that.
|
||||
|
||||
These tests use the template database and create temporary test databases for
|
||||
each test.
|
||||
These tests need to create their own test databases. By default they will be
|
||||
called `test_template_nominatim` and `test_nominatim`. Names can be changed with
|
||||
the environment variables `TEMPLATE_DB` and `TEST_DB`. The user running the tests
|
||||
needs superuser rights for postgres.
|
||||
|
||||
### Import Tests (`test/bdd/osm2pgsql`)
|
||||
|
||||
These tests check that data is imported correctly into the place table.
|
||||
|
||||
These tests also use the template database and create temporary test databases
|
||||
for each test.
|
||||
These tests check that data is imported correctly into the place table. They
|
||||
use the same template database as the DB Creation tests, so the same remarks apply.
|
||||
|
||||
@@ -9,7 +9,7 @@ the address computation and the search frontend.
|
||||
The __data import__ stage reads the raw OSM data and extracts all information
|
||||
that is useful for geocoding. This part is done by osm2pgsql, the same tool
|
||||
that can also be used to import a rendering database. It uses the special
|
||||
flex output style defined in the directory `/lib-lua`. The result of
|
||||
gazetteer output plugin in `osm2pgsql/src/output-gazetter.[ch]pp`. The result of
|
||||
the import can be found in the database table `place`.
|
||||
|
||||
The __address computation__ or __indexing__ stage takes the data from `place`
|
||||
|
||||
@@ -187,7 +187,7 @@ module.MAIN_TAGS_POIS = function (group)
|
||||
passing_place = group,
|
||||
street_lamp = 'named',
|
||||
traffic_signals = 'named'},
|
||||
historic = {'fallback',
|
||||
historic = {'always',
|
||||
yes = group,
|
||||
no = group},
|
||||
information = {include_when_tag_present('tourism', 'information'),
|
||||
@@ -196,7 +196,6 @@ module.MAIN_TAGS_POIS = function (group)
|
||||
trail_blaze = 'never'},
|
||||
junction = {'fallback',
|
||||
no = group},
|
||||
landuse = {cemetery = 'always'},
|
||||
leisure = {'always',
|
||||
nature_reserve = 'fallback',
|
||||
swimming_pool = 'named',
|
||||
@@ -230,7 +229,6 @@ module.MAIN_TAGS_POIS = function (group)
|
||||
shop = {'always',
|
||||
no = group},
|
||||
tourism = {'always',
|
||||
attraction = 'fallback',
|
||||
no = group,
|
||||
yes = group,
|
||||
information = exclude_when_key_present('information')},
|
||||
@@ -332,7 +330,7 @@ module.NAME_TAGS.core = {main = {'name', 'name:*',
|
||||
}
|
||||
module.NAME_TAGS.address = {house = {'addr:housename'}}
|
||||
module.NAME_TAGS.poi = group_merge({main = {'brand'},
|
||||
extra = {'iata', 'icao', 'faa'}},
|
||||
extra = {'iata', 'icao'}},
|
||||
module.NAME_TAGS.core)
|
||||
|
||||
-- Address tagging
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
{% include('functions/utils.sql') %}
|
||||
{% include('functions/ranking.sql') %}
|
||||
{% include('functions/importance.sql') %}
|
||||
{% include('functions/address_lookup.sql') %}
|
||||
{% include('functions/interpolation.sql') %}
|
||||
|
||||
{% if 'place' in db.tables %}
|
||||
|
||||
334
lib-sql/functions/address_lookup.sql
Normal file
334
lib-sql/functions/address_lookup.sql
Normal file
@@ -0,0 +1,334 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2022 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
-- Functions for returning address information for a place.
|
||||
|
||||
DROP TYPE IF EXISTS addressline CASCADE;
|
||||
CREATE TYPE addressline as (
|
||||
place_id BIGINT,
|
||||
osm_type CHAR(1),
|
||||
osm_id BIGINT,
|
||||
name HSTORE,
|
||||
class TEXT,
|
||||
type TEXT,
|
||||
place_type TEXT,
|
||||
admin_level INTEGER,
|
||||
fromarea BOOLEAN,
|
||||
isaddress BOOLEAN,
|
||||
rank_address INTEGER,
|
||||
distance FLOAT
|
||||
);
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_name_by_language(name hstore, languagepref TEXT[])
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
DECLARE
|
||||
result TEXT;
|
||||
BEGIN
|
||||
IF name is null THEN
|
||||
RETURN null;
|
||||
END IF;
|
||||
|
||||
FOR j IN 1..array_upper(languagepref,1) LOOP
|
||||
IF name ? languagepref[j] THEN
|
||||
result := trim(name->languagepref[j]);
|
||||
IF result != '' THEN
|
||||
return result;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
-- as a fallback - take the last element since it is the default name
|
||||
RETURN trim((avals(name))[array_length(avals(name), 1)]);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
--housenumber only needed for tiger data
|
||||
CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id BIGINT,
|
||||
housenumber INTEGER,
|
||||
languagepref TEXT[])
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
DECLARE
|
||||
result TEXT[];
|
||||
currresult TEXT;
|
||||
prevresult TEXT;
|
||||
location RECORD;
|
||||
BEGIN
|
||||
|
||||
result := '{}';
|
||||
prevresult := '';
|
||||
|
||||
FOR location IN
|
||||
SELECT name,
|
||||
CASE WHEN place_id = for_place_id THEN 99 ELSE rank_address END as rank_address
|
||||
FROM get_addressdata(for_place_id, housenumber)
|
||||
WHERE isaddress order by rank_address desc
|
||||
LOOP
|
||||
currresult := trim(get_name_by_language(location.name, languagepref));
|
||||
IF currresult != prevresult AND currresult IS NOT NULL
|
||||
AND result[(100 - location.rank_address)] IS NULL
|
||||
THEN
|
||||
result[(100 - location.rank_address)] := currresult;
|
||||
prevresult := currresult;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
RETURN array_to_string(result,', ');
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
DROP TYPE IF EXISTS addressdata_place;
|
||||
CREATE TYPE addressdata_place AS (
|
||||
place_id BIGINT,
|
||||
country_code VARCHAR(2),
|
||||
housenumber TEXT,
|
||||
postcode TEXT,
|
||||
class TEXT,
|
||||
type TEXT,
|
||||
name HSTORE,
|
||||
address HSTORE,
|
||||
centroid GEOMETRY
|
||||
);
|
||||
|
||||
-- Compute the list of address parts for the given place.
|
||||
--
|
||||
-- If in_housenumber is greator or equal 0, look for an interpolation.
|
||||
CREATE OR REPLACE FUNCTION get_addressdata(in_place_id BIGINT, in_housenumber INTEGER)
|
||||
RETURNS setof addressline
|
||||
AS $$
|
||||
DECLARE
|
||||
place addressdata_place;
|
||||
location RECORD;
|
||||
country RECORD;
|
||||
current_rank_address INTEGER;
|
||||
location_isaddress BOOLEAN;
|
||||
BEGIN
|
||||
-- The place in question might not have a direct entry in place_addressline.
|
||||
-- Look for the parent of such places then and save it in place.
|
||||
|
||||
-- first query osmline (interpolation lines)
|
||||
IF in_housenumber >= 0 THEN
|
||||
SELECT parent_place_id as place_id, country_code,
|
||||
in_housenumber as housenumber, postcode,
|
||||
'place' as class, 'house' as type,
|
||||
null as name, null as address,
|
||||
ST_Centroid(linegeo) as centroid
|
||||
INTO place
|
||||
FROM location_property_osmline
|
||||
WHERE place_id = in_place_id
|
||||
AND in_housenumber between startnumber and endnumber;
|
||||
END IF;
|
||||
|
||||
--then query tiger data
|
||||
{% if config.get_bool('USE_US_TIGER_DATA') %}
|
||||
IF place IS NULL AND in_housenumber >= 0 THEN
|
||||
SELECT parent_place_id as place_id, 'us' as country_code,
|
||||
in_housenumber as housenumber, postcode,
|
||||
'place' as class, 'house' as type,
|
||||
null as name, null as address,
|
||||
ST_Centroid(linegeo) as centroid
|
||||
INTO place
|
||||
FROM location_property_tiger
|
||||
WHERE place_id = in_place_id
|
||||
AND in_housenumber between startnumber and endnumber;
|
||||
END IF;
|
||||
{% endif %}
|
||||
|
||||
-- postcode table
|
||||
IF place IS NULL THEN
|
||||
SELECT parent_place_id as place_id, country_code,
|
||||
null::text as housenumber, postcode,
|
||||
'place' as class, 'postcode' as type,
|
||||
null as name, null as address,
|
||||
null as centroid
|
||||
INTO place
|
||||
FROM location_postcode
|
||||
WHERE place_id = in_place_id;
|
||||
END IF;
|
||||
|
||||
-- POI objects in the placex table
|
||||
IF place IS NULL THEN
|
||||
SELECT parent_place_id as place_id, country_code,
|
||||
coalesce(address->'housenumber',
|
||||
address->'streetnumber',
|
||||
address->'conscriptionnumber')::text as housenumber,
|
||||
postcode,
|
||||
class, type,
|
||||
name, address,
|
||||
centroid
|
||||
INTO place
|
||||
FROM placex
|
||||
WHERE place_id = in_place_id and rank_search > 27;
|
||||
END IF;
|
||||
|
||||
-- If place is still NULL at this point then the object has its own
|
||||
-- entry in place_address line. However, still check if there is not linked
|
||||
-- place we should be using instead.
|
||||
IF place IS NULL THEN
|
||||
select coalesce(linked_place_id, place_id) as place_id, country_code,
|
||||
null::text as housenumber, postcode,
|
||||
class, type,
|
||||
null as name, address,
|
||||
null as centroid
|
||||
INTO place
|
||||
FROM placex where place_id = in_place_id;
|
||||
END IF;
|
||||
|
||||
--RAISE WARNING '% % % %',searchcountrycode, searchhousenumber, searchpostcode;
|
||||
|
||||
-- --- Return the record for the base entry.
|
||||
|
||||
current_rank_address := 1000;
|
||||
FOR location IN
|
||||
SELECT placex.place_id, osm_type, osm_id, name,
|
||||
coalesce(extratags->'linked_place', extratags->'place') as place_type,
|
||||
class, type, admin_level,
|
||||
CASE WHEN rank_address = 0 THEN 100
|
||||
WHEN rank_address = 11 THEN 5
|
||||
ELSE rank_address END as rank_address,
|
||||
country_code
|
||||
FROM placex
|
||||
WHERE place_id = place.place_id
|
||||
LOOP
|
||||
--RAISE WARNING '%',location;
|
||||
-- mix in default names for countries
|
||||
IF location.rank_address = 4 and place.country_code is not NULL THEN
|
||||
FOR country IN
|
||||
SELECT coalesce(name, ''::hstore) as name FROM country_name
|
||||
WHERE country_code = place.country_code LIMIT 1
|
||||
LOOP
|
||||
place.name := country.name || place.name;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
IF location.rank_address < 4 THEN
|
||||
-- no country locations for ranks higher than country
|
||||
place.country_code := NULL::varchar(2);
|
||||
ELSEIF place.country_code IS NULL AND location.country_code IS NOT NULL THEN
|
||||
place.country_code := location.country_code;
|
||||
END IF;
|
||||
|
||||
RETURN NEXT ROW(location.place_id, location.osm_type, location.osm_id,
|
||||
location.name, location.class, location.type,
|
||||
location.place_type,
|
||||
location.admin_level, true,
|
||||
location.type not in ('postcode', 'postal_code'),
|
||||
location.rank_address, 0)::addressline;
|
||||
|
||||
current_rank_address := location.rank_address;
|
||||
END LOOP;
|
||||
|
||||
-- --- Return records for address parts.
|
||||
|
||||
FOR location IN
|
||||
SELECT placex.place_id, osm_type, osm_id, name, class, type,
|
||||
coalesce(extratags->'linked_place', extratags->'place') as place_type,
|
||||
admin_level, fromarea, isaddress,
|
||||
CASE WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address,
|
||||
distance, country_code, postcode
|
||||
FROM place_addressline join placex on (address_place_id = placex.place_id)
|
||||
WHERE place_addressline.place_id IN (place.place_id, in_place_id)
|
||||
AND linked_place_id is null
|
||||
AND (placex.country_code IS NULL OR place.country_code IS NULL
|
||||
OR placex.country_code = place.country_code)
|
||||
ORDER BY rank_address desc,
|
||||
(place_addressline.place_id = in_place_id) desc,
|
||||
(CASE WHEN coalesce((avals(name) && avals(place.address)), False) THEN 2
|
||||
WHEN isaddress THEN 0
|
||||
WHEN fromarea
|
||||
and place.centroid is not null
|
||||
and ST_Contains(geometry, place.centroid) THEN 1
|
||||
ELSE -1 END) desc,
|
||||
fromarea desc, distance asc, rank_search desc
|
||||
LOOP
|
||||
-- RAISE WARNING '%',location;
|
||||
location_isaddress := location.rank_address != current_rank_address;
|
||||
|
||||
IF place.country_code IS NULL AND location.country_code IS NOT NULL THEN
|
||||
place.country_code := location.country_code;
|
||||
END IF;
|
||||
IF location.type in ('postcode', 'postal_code')
|
||||
AND place.postcode is not null
|
||||
THEN
|
||||
-- If the place had a postcode assigned, take this one only
|
||||
-- into consideration when it is an area and the place does not have
|
||||
-- a postcode itself.
|
||||
IF location.fromarea AND location_isaddress
|
||||
AND (place.address is null or not place.address ? 'postcode')
|
||||
THEN
|
||||
place.postcode := null; -- remove the less exact postcode
|
||||
ELSE
|
||||
location_isaddress := false;
|
||||
END IF;
|
||||
END IF;
|
||||
RETURN NEXT ROW(location.place_id, location.osm_type, location.osm_id,
|
||||
location.name, location.class, location.type,
|
||||
location.place_type,
|
||||
location.admin_level, location.fromarea,
|
||||
location_isaddress,
|
||||
location.rank_address,
|
||||
location.distance)::addressline;
|
||||
|
||||
current_rank_address := location.rank_address;
|
||||
END LOOP;
|
||||
|
||||
-- If no country was included yet, add the name information from country_name.
|
||||
IF current_rank_address > 4 THEN
|
||||
FOR location IN
|
||||
SELECT name || coalesce(derived_name, ''::hstore) as name FROM country_name
|
||||
WHERE country_code = place.country_code LIMIT 1
|
||||
LOOP
|
||||
--RAISE WARNING '% % %',current_rank_address,searchcountrycode,countryname;
|
||||
RETURN NEXT ROW(null, null, null, location.name, 'place', 'country', NULL,
|
||||
null, true, true, 4, 0)::addressline;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
-- Finally add some artificial rows.
|
||||
IF place.country_code IS NOT NULL THEN
|
||||
location := ROW(null, null, null, hstore('ref', place.country_code),
|
||||
'place', 'country_code', null, null, true, false, 4, 0)::addressline;
|
||||
RETURN NEXT location;
|
||||
END IF;
|
||||
|
||||
IF place.name IS NOT NULL THEN
|
||||
location := ROW(in_place_id, null, null, place.name, place.class,
|
||||
place.type, null, null, true, true, 29, 0)::addressline;
|
||||
RETURN NEXT location;
|
||||
END IF;
|
||||
|
||||
IF place.housenumber IS NOT NULL THEN
|
||||
location := ROW(null, null, null, hstore('ref', place.housenumber),
|
||||
'place', 'house_number', null, null, true, true, 28, 0)::addressline;
|
||||
RETURN NEXT location;
|
||||
END IF;
|
||||
|
||||
IF place.address is not null and place.address ? '_unlisted_place' THEN
|
||||
RETURN NEXT ROW(null, null, null, hstore('name', place.address->'_unlisted_place'),
|
||||
'place', 'locality', null, null, true, true, 25, 0)::addressline;
|
||||
END IF;
|
||||
|
||||
IF place.postcode is not null THEN
|
||||
location := ROW(null, null, null, hstore('ref', place.postcode), 'place',
|
||||
'postcode', null, null, false, true, 5, 0)::addressline;
|
||||
RETURN NEXT location;
|
||||
ELSEIF place.address is not null and place.address ? 'postcode'
|
||||
and not place.address->'postcode' SIMILAR TO '%(,|;)%' THEN
|
||||
location := ROW(null, null, null, hstore('ref', place.address->'postcode'), 'place',
|
||||
'postcode', null, null, false, true, 5, 0)::addressline;
|
||||
RETURN NEXT location;
|
||||
END IF;
|
||||
|
||||
RETURN;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
@@ -65,7 +65,7 @@ BEGIN
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
{% else %}
|
||||
|
||||
@@ -78,7 +78,7 @@ SELECT convert_from(CAST(E'\\x' || array_to_string(ARRAY(
|
||||
FROM regexp_matches($1, '%[0-9a-f][0-9a-f]|.', 'gi') AS r(m)
|
||||
), '') AS bytea), 'UTF8');
|
||||
$$
|
||||
LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION catch_decode_url_part(p varchar)
|
||||
@@ -91,7 +91,7 @@ EXCEPTION
|
||||
WHEN others THEN return null;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_wikipedia_match(extratags HSTORE, country_code varchar(2))
|
||||
@@ -139,7 +139,7 @@ BEGIN
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
{% endif %}
|
||||
|
||||
@@ -203,5 +203,5 @@ BEGIN
|
||||
RETURN result;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql PARALLEL SAFE;
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ BEGIN
|
||||
RETURN in_address;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
|
||||
@@ -70,7 +70,7 @@ BEGIN
|
||||
RETURN parent_place_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION reinsert_interpolation(way_id BIGINT, addr HSTORE,
|
||||
|
||||
@@ -58,7 +58,7 @@ BEGIN
|
||||
RAISE EXCEPTION 'Unknown partition %', in_partition;
|
||||
END
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_address_place(in_partition SMALLINT, feature GEOMETRY,
|
||||
@@ -87,7 +87,7 @@ BEGIN
|
||||
RAISE EXCEPTION 'Unknown partition %', in_partition;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
create or replace function deleteLocationArea(in_partition INTEGER, in_place_id BIGINT, in_rank_search INTEGER) RETURNS BOOLEAN AS $$
|
||||
@@ -172,7 +172,7 @@ BEGIN
|
||||
RAISE EXCEPTION 'Unknown partition %', in_partition;
|
||||
END
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION getNearestNamedPlacePlaceId(in_partition INTEGER,
|
||||
point GEOMETRY,
|
||||
@@ -202,7 +202,7 @@ BEGIN
|
||||
RAISE EXCEPTION 'Unknown partition %', in_partition;
|
||||
END
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
create or replace function insertSearchName(
|
||||
in_partition INTEGER, in_place_id BIGINT, in_name_vector INTEGER[],
|
||||
@@ -310,7 +310,7 @@ BEGIN
|
||||
RAISE EXCEPTION 'Unknown partition %', in_partition;
|
||||
END
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION getNearestParallelRoadFeature(in_partition INTEGER,
|
||||
line GEOMETRY)
|
||||
@@ -354,4 +354,4 @@ BEGIN
|
||||
RAISE EXCEPTION 'Unknown partition %', in_partition;
|
||||
END
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
@@ -109,7 +109,7 @@ BEGIN
|
||||
RETURN result;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION find_associated_street(poi_osm_type CHAR(1),
|
||||
@@ -200,7 +200,7 @@ BEGIN
|
||||
RETURN result;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
-- Find the parent road of a POI.
|
||||
@@ -286,7 +286,7 @@ BEGIN
|
||||
RETURN parent_place_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
-- Try to find a linked place for the given object.
|
||||
CREATE OR REPLACE FUNCTION find_linked_place(bnd placex)
|
||||
@@ -404,7 +404,7 @@ BEGIN
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT,
|
||||
@@ -638,10 +638,8 @@ BEGIN
|
||||
|
||||
-- Add it to the list of search terms
|
||||
{% if not db.reverse_only %}
|
||||
IF location.rank_address != 11 AND location.rank_address != 5 THEN
|
||||
nameaddress_vector := array_merge(nameaddress_vector,
|
||||
location.keywords::integer[]);
|
||||
END IF;
|
||||
nameaddress_vector := array_merge(nameaddress_vector,
|
||||
location.keywords::integer[]);
|
||||
{% endif %}
|
||||
|
||||
INSERT INTO place_addressline (place_id, address_place_id, fromarea,
|
||||
|
||||
@@ -29,7 +29,7 @@ BEGIN
|
||||
RETURN 0.02;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
-- Return an approximate update radius according to the search rank.
|
||||
@@ -60,7 +60,7 @@ BEGIN
|
||||
RETURN 0;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- Compute a base address rank from the extent of the given geometry.
|
||||
--
|
||||
@@ -88,10 +88,6 @@ BEGIN
|
||||
area := area / 3;
|
||||
ELSIF country_code IN ('bo', 'ar', 'sd', 'mn', 'in', 'et', 'cd', 'mz', 'ly', 'cl', 'zm') THEN
|
||||
area := area / 2;
|
||||
ELSIF country_code IN ('sg', 'ws', 'st', 'kn') THEN
|
||||
area := area * 5;
|
||||
ELSIF country_code IN ('dm', 'mt', 'lc', 'gg', 'sc', 'nr') THEN
|
||||
area := area * 20;
|
||||
END IF;
|
||||
|
||||
IF area > 1 THEN
|
||||
@@ -111,7 +107,7 @@ BEGIN
|
||||
RETURN 23;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
-- Guess a ranking for postcodes from country and postcode format.
|
||||
@@ -171,7 +167,7 @@ BEGIN
|
||||
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
-- Get standard search and address rank for an object.
|
||||
@@ -240,7 +236,7 @@ BEGIN
|
||||
END IF;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_addr_tag_rank(key TEXT, country TEXT,
|
||||
OUT from_rank SMALLINT,
|
||||
@@ -287,7 +283,7 @@ BEGIN
|
||||
END LOOP;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[],
|
||||
@@ -308,4 +304,4 @@ BEGIN
|
||||
RETURN def_weight;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
@@ -24,7 +24,7 @@ BEGIN
|
||||
RETURN ST_PointOnSurface(place);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION geometry_sector(partition INTEGER, place GEOMETRY)
|
||||
@@ -34,7 +34,7 @@ BEGIN
|
||||
RETURN (partition*1000000) + (500-ST_X(place)::INTEGER)*1000 + (500-ST_Y(place)::INTEGER);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
|
||||
@@ -60,7 +60,7 @@ BEGIN
|
||||
RETURN r;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- Return the node members with a given label from a relation member list
|
||||
-- as a set.
|
||||
@@ -88,7 +88,7 @@ BEGIN
|
||||
RETURN;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_rel_node_members(members JSONB, memberLabels TEXT[])
|
||||
@@ -107,7 +107,7 @@ BEGIN
|
||||
RETURN;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
-- Copy 'name' to or from the default language.
|
||||
@@ -136,7 +136,7 @@ BEGIN
|
||||
END IF;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
-- Find the nearest artificial postcode for the given geometry.
|
||||
@@ -172,7 +172,7 @@ BEGIN
|
||||
RETURN outcode;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_country_code(place geometry)
|
||||
@@ -233,7 +233,7 @@ BEGIN
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_country_language_code(search_country_code VARCHAR(2))
|
||||
@@ -251,7 +251,7 @@ BEGIN
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_partition(in_country_code VARCHAR(10))
|
||||
@@ -268,7 +268,7 @@ BEGIN
|
||||
RETURN 0;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
-- Find the parent of an address with addr:street/addr:place tag.
|
||||
@@ -299,7 +299,7 @@ BEGIN
|
||||
RETURN parent_place_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION delete_location(OLD_place_id BIGINT)
|
||||
@@ -337,7 +337,7 @@ BEGIN
|
||||
ST_Project(geom::geography, radius, 3.9269908)::geometry));
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION add_location(place_id BIGINT, country_code varchar(2),
|
||||
@@ -455,7 +455,7 @@ BEGIN
|
||||
RETURN;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION split_geometry(geometry GEOMETRY)
|
||||
@@ -483,7 +483,7 @@ BEGIN
|
||||
RETURN;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION simplify_large_polygons(geometry GEOMETRY)
|
||||
RETURNS GEOMETRY
|
||||
@@ -497,7 +497,7 @@ BEGIN
|
||||
RETURN geometry;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION place_force_delete(placeid BIGINT)
|
||||
|
||||
@@ -12,7 +12,7 @@ CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'names')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Get tokens for matching the place name against others.
|
||||
@@ -22,7 +22,7 @@ CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'names')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Return the housenumber tokens applicable for the place.
|
||||
@@ -30,7 +30,7 @@ CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'hnr_tokens')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Return the housenumber in the form that it can be matched during search.
|
||||
@@ -38,77 +38,77 @@ CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
SELECT info->>'hnr';
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_is_street_address(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT info->>'street' is not null or info->>'place' is null;
|
||||
$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT info->>'street' is not null and info->>'street' != '{}';
|
||||
$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_has_addr_place(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT info->>'place' is not null;
|
||||
$$ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[] && street_tokens
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_matches_place(info JSONB, place_tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->>'place')::INTEGER[] <@ place_tokens
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'place')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_keys(info JSONB)
|
||||
RETURNS SETOF TEXT
|
||||
AS $$
|
||||
SELECT * FROM jsonb_object_keys(info->'addr');
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_search_tokens(info JSONB, key TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->'addr'->>key)::INTEGER[];
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_matches_address(info JSONB, key TEXT, tokens INTEGER[])
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT (info->'addr'->>key)::INTEGER[] <@ tokens;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_postcode(info JSONB)
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
SELECT info->>'postcode';
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Return token info that should be saved permanently in the database.
|
||||
@@ -116,7 +116,7 @@ CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
|
||||
RETURNS JSONB
|
||||
AS $$
|
||||
SELECT NULL::JSONB;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
--------------- private functions ----------------------------------------------
|
||||
|
||||
@@ -128,14 +128,16 @@ DECLARE
|
||||
partial_terms TEXT[] = '{}'::TEXT[];
|
||||
term TEXT;
|
||||
term_id INTEGER;
|
||||
term_count INTEGER;
|
||||
BEGIN
|
||||
SELECT min(word_id) INTO full_token
|
||||
FROM word WHERE word = norm_term and type = 'W';
|
||||
|
||||
IF full_token IS NULL THEN
|
||||
full_token := nextval('seq_word');
|
||||
INSERT INTO word (word_id, word_token, type, word)
|
||||
SELECT full_token, lookup_term, 'W', norm_term
|
||||
INSERT INTO word (word_id, word_token, type, word, info)
|
||||
SELECT full_token, lookup_term, 'W', norm_term,
|
||||
json_build_object('count', 0)
|
||||
FROM unnest(lookup_terms) as lookup_term;
|
||||
END IF;
|
||||
|
||||
@@ -148,67 +150,14 @@ BEGIN
|
||||
|
||||
partial_tokens := '{}'::INT[];
|
||||
FOR term IN SELECT unnest(partial_terms) LOOP
|
||||
SELECT min(word_id) INTO term_id
|
||||
SELECT min(word_id), max(info->>'count') INTO term_id, term_count
|
||||
FROM word WHERE word_token = term and type = 'w';
|
||||
|
||||
IF term_id IS NULL THEN
|
||||
term_id := nextval('seq_word');
|
||||
INSERT INTO word (word_id, word_token, type)
|
||||
VALUES (term_id, term, 'w');
|
||||
END IF;
|
||||
|
||||
partial_tokens := array_merge(partial_tokens, ARRAY[term_id]);
|
||||
END LOOP;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_full_word(norm_term TEXT,
|
||||
lookup_terms TEXT[],
|
||||
lookup_norm_terms TEXT[],
|
||||
OUT full_token INT,
|
||||
OUT partial_tokens INT[])
|
||||
AS $$
|
||||
DECLARE
|
||||
partial_terms TEXT[] = '{}'::TEXT[];
|
||||
term TEXT;
|
||||
term_id INTEGER;
|
||||
BEGIN
|
||||
SELECT min(word_id) INTO full_token
|
||||
FROM word WHERE word = norm_term and type = 'W';
|
||||
|
||||
IF full_token IS NULL THEN
|
||||
full_token := nextval('seq_word');
|
||||
IF lookup_norm_terms IS NULL THEN
|
||||
INSERT INTO word (word_id, word_token, type, word)
|
||||
SELECT full_token, lookup_term, 'W', norm_term
|
||||
FROM unnest(lookup_terms) as lookup_term;
|
||||
ELSE
|
||||
INSERT INTO word (word_id, word_token, type, word, info)
|
||||
SELECT full_token, t.lookup, 'W', norm_term,
|
||||
CASE WHEN norm_term = t.norm THEN null
|
||||
ELSE json_build_object('lookup', t.norm) END
|
||||
FROM unnest(lookup_terms, lookup_norm_terms) as t(lookup, norm);
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
FOR term IN SELECT unnest(string_to_array(unnest(lookup_terms), ' ')) LOOP
|
||||
term := trim(term);
|
||||
IF NOT (ARRAY[term] <@ partial_terms) THEN
|
||||
partial_terms := partial_terms || term;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
partial_tokens := '{}'::INT[];
|
||||
FOR term IN SELECT unnest(partial_terms) LOOP
|
||||
SELECT min(word_id) INTO term_id
|
||||
FROM word WHERE word_token = term and type = 'w';
|
||||
|
||||
IF term_id IS NULL THEN
|
||||
term_id := nextval('seq_word');
|
||||
INSERT INTO word (word_id, word_token, type)
|
||||
VALUES (term_id, term, 'w');
|
||||
term_count := 0;
|
||||
INSERT INTO word (word_id, word_token, type, info)
|
||||
VALUES (term_id, term, 'w', json_build_object('count', term_count));
|
||||
END IF;
|
||||
|
||||
partial_tokens := array_merge(partial_tokens, ARRAY[term_id]);
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
site_name: Nominatim Manual
|
||||
site_name: Nominatim 5.0.0 Manual
|
||||
theme:
|
||||
font: false
|
||||
name: material
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Helper script for development to run nominatim from the source directory.
|
||||
@@ -15,4 +15,4 @@ sys.path.insert(1, str((Path(__file__) / '..' / 'src').resolve()))
|
||||
|
||||
from nominatim_db import cli
|
||||
|
||||
exit(cli.nominatim())
|
||||
exit(cli.nominatim(module_dir=None, osm2pgsql_path=None))
|
||||
|
||||
@@ -19,6 +19,7 @@ dependencies = [
|
||||
"python-dotenv",
|
||||
"jinja2",
|
||||
"pyYAML>=5.1",
|
||||
"datrie",
|
||||
"psutil",
|
||||
"PyICU"
|
||||
]
|
||||
|
||||
@@ -2,4 +2,4 @@
|
||||
|
||||
from nominatim_db import cli
|
||||
|
||||
exit(cli.nominatim())
|
||||
exit(cli.nominatim(osm2pgsql_path=None))
|
||||
|
||||
@@ -216,14 +216,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "countries" : ["sa"],
|
||||
"tags" : {
|
||||
"place" : {
|
||||
"province" : 12,
|
||||
"municipality" : 18
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "countries" : ["sk"],
|
||||
"tags" : {
|
||||
"boundary" : {
|
||||
|
||||
@@ -944,7 +944,7 @@ kp:
|
||||
# South Korea (대한민국)
|
||||
kr:
|
||||
partition: 49
|
||||
languages: ko
|
||||
languages: ko, en
|
||||
names: !include country-names/kr.yaml
|
||||
postcode:
|
||||
pattern: "ddddd"
|
||||
@@ -1809,8 +1809,7 @@ us:
|
||||
languages: en
|
||||
names: !include country-names/us.yaml
|
||||
postcode:
|
||||
pattern: "(ddddd)(?:-dddd)?"
|
||||
output: \1
|
||||
pattern: "ddddd"
|
||||
|
||||
|
||||
# Uruguay (Uruguay)
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
# Database connection string.
|
||||
# Add host, port, user etc through additional semicolon-separated attributes.
|
||||
# e.g. ;host=...;port=...;user=...;password=...
|
||||
# Changing this variable requires to run 'nominatim refresh --website'.
|
||||
NOMINATIM_DATABASE_DSN="pgsql:dbname=nominatim"
|
||||
|
||||
# Database web user.
|
||||
@@ -35,11 +36,11 @@ NOMINATIM_TOKENIZER_CONFIG=
|
||||
|
||||
# Search in the Tiger house number data for the US.
|
||||
# Note: The tables must already exist or queries will throw errors.
|
||||
# Changing this value requires to run ./utils/setup --create-functions.
|
||||
# Changing this value requires to run ./utils/setup --create-functions --setup-website.
|
||||
NOMINATIM_USE_US_TIGER_DATA=no
|
||||
|
||||
# Search in the auxiliary housenumber table.
|
||||
# Changing this value requires to run ./utils/setup --create-functions.
|
||||
# Changing this value requires to run ./utils/setup --create-functions --setup-website.
|
||||
NOMINATIM_USE_AUX_LOCATION_DATA=no
|
||||
|
||||
# Proxy settings
|
||||
@@ -142,7 +143,8 @@ NOMINATIM_REPLICATION_RECHECK_INTERVAL=60
|
||||
|
||||
### API settings
|
||||
#
|
||||
# The following settings configure the API responses.
|
||||
# The following settings configure the API responses. You must rerun
|
||||
# 'nominatim refresh --website' after changing any of them.
|
||||
|
||||
# Send permissive CORS access headers.
|
||||
# When enabled, send CORS headers to allow access to everybody.
|
||||
@@ -190,17 +192,16 @@ NOMINATIM_REQUEST_TIMEOUT=60
|
||||
# to geocode" instead.
|
||||
NOMINATIM_SEARCH_WITHIN_COUNTRIES=False
|
||||
|
||||
# Specifies the order in which different name tags are used.
|
||||
# The values in this list determine the preferred order of name variants,
|
||||
# including language-specific names.
|
||||
# Comma-separated list, where :XX stands for language-specific tags
|
||||
# (e.g. name:en) and no :XX stands for general tags (e.g. name).
|
||||
NOMINATIM_OUTPUT_NAMES=name:XX,name,brand,official_name:XX,short_name:XX,official_name,short_name,ref
|
||||
|
||||
### Log settings
|
||||
#
|
||||
# The following options allow to enable logging of API requests.
|
||||
# You must rerun 'nominatim refresh --website' after changing any of them.
|
||||
#
|
||||
# Enable logging of requests into the DB.
|
||||
# The request will be logged into the new_query_log table.
|
||||
# You should set up a cron job that regularly clears out this table.
|
||||
NOMINATIM_LOG_DB=no
|
||||
|
||||
# Enable logging of requests into a file.
|
||||
# To enable logging set this setting to the file to log to.
|
||||
NOMINATIM_LOG_FILE=
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
- aparcament -> aparc
|
||||
- apartament -> apmt
|
||||
- apartat -> apt
|
||||
- àtic -> àt
|
||||
- àtic -> àt
|
||||
- autopista -> auto
|
||||
- autopista -> autop
|
||||
- autovia -> autov
|
||||
@@ -19,6 +19,7 @@
|
||||
- biblioteca -> bibl
|
||||
- bloc -> bl
|
||||
- carrer -> c
|
||||
- carrer -> c/
|
||||
- carreró -> cró
|
||||
- carretera -> ctra
|
||||
- cantonada -> cant
|
||||
@@ -57,6 +58,7 @@
|
||||
- número -> n
|
||||
- sense número -> s/n
|
||||
- parada -> par
|
||||
- parcel·la -> parc
|
||||
- passadís -> pdís
|
||||
- passatge -> ptge
|
||||
- passeig -> pg
|
||||
|
||||
@@ -1,393 +1,438 @@
|
||||
# Source: https://wiki.openstreetmap.org/wiki/Name_finder:Abbreviations#English
|
||||
# Source: https://pe.usps.com/text/pub28/28apc_002.htm
|
||||
- lang: en
|
||||
words:
|
||||
- Access -> Accs
|
||||
- Air Force Base -> AFB
|
||||
- Air National Guard Base -> ANGB
|
||||
- Airport -> Aprt
|
||||
- Alley -> Al,All,Ally,Aly
|
||||
- Alley -> Al
|
||||
- Alley -> All
|
||||
- Alley -> Ally
|
||||
- Alley -> Aly
|
||||
- Alleyway -> Alwy
|
||||
- Amble -> Ambl
|
||||
- Anex -> Anx
|
||||
- Apartments -> Apts
|
||||
- Approach -> Apch,App
|
||||
- Approach -> Apch
|
||||
- Approach -> App
|
||||
- Arcade -> Arc
|
||||
- Arterial -> Artl
|
||||
- Artery -> Arty
|
||||
- Avenue -> Av,Ave
|
||||
- Avenue -> Av
|
||||
- Avenue -> Ave
|
||||
- Back -> Bk
|
||||
- Banan -> Ba
|
||||
- Basin -> Basn,Bsn
|
||||
- Bayou -> Byu
|
||||
- Basin -> Basn
|
||||
- Basin -> Bsn
|
||||
- Beach -> Bch
|
||||
- Bend -> Bend
|
||||
- Bend -> Bnd
|
||||
- Block -> Blk
|
||||
- Bluff -> Blf
|
||||
- Bluffs -> Blfs
|
||||
- Boardwalk -> Bwlk
|
||||
- Bottom -> Btm
|
||||
- Boulevard -> Blvd,Bvd
|
||||
- Boulevard -> Blvd
|
||||
- Boulevard -> Bvd
|
||||
- Boundary -> Bdy
|
||||
- Bowl -> Bl
|
||||
- Brace -> Br
|
||||
- Brae -> Br
|
||||
- Branch -> Br
|
||||
- Brae -> Brae
|
||||
- Break -> Brk
|
||||
- Bridge$ -> Bdge,Br,Brdg,Brg,Bri
|
||||
- Broadway -> Bdwy,Bway,Bwy
|
||||
- Bridge -> Bdge
|
||||
- Bridge -> Br
|
||||
- Bridge -> Brdg
|
||||
- Bridge -> Bri
|
||||
- Broadway -> Bdwy
|
||||
- Broadway -> Bway
|
||||
- Broadway -> Bwy
|
||||
- Brook -> Brk
|
||||
- Brooks -> Brks
|
||||
- Brow -> Brw
|
||||
- Buildings -> Bldgs,Bldngs
|
||||
- Brow -> Brow
|
||||
- Buildings -> Bldgs
|
||||
- Buildings -> Bldngs
|
||||
- Business -> Bus
|
||||
- Burg -> Bg
|
||||
- Burgs -> Bgs
|
||||
- Bypass -> Bps,Byp,Bypa
|
||||
- Bypass -> Bps
|
||||
- Bypass -> Byp
|
||||
- Bypass -> Bypa
|
||||
- Byway -> Bywy
|
||||
- Camp -> Cp
|
||||
- Canyon -> Cyn
|
||||
- Cape -> Cpe
|
||||
- Caravan -> Cvn
|
||||
- Causeway -> Caus,Cswy,Cway
|
||||
- Center,Centre -> Cen,Ctr
|
||||
- Centers -> Ctrs
|
||||
- Causeway -> Caus
|
||||
- Causeway -> Cswy
|
||||
- Causeway -> Cway
|
||||
- Center -> Cen
|
||||
- Center -> Ctr
|
||||
- Central -> Ctrl
|
||||
- Centre -> Cen
|
||||
- Centre -> Ctr
|
||||
- Centreway -> Cnwy
|
||||
- Chase -> Ch
|
||||
- Church -> Ch
|
||||
- Circle -> Cir
|
||||
- Circles -> Cirs
|
||||
- Circuit -> Cct,Ci
|
||||
- Circus -> Crc,Crcs
|
||||
- Circuit -> Cct
|
||||
- Circuit -> Ci
|
||||
- Circus -> Crc
|
||||
- Circus -> Crcs
|
||||
- City -> Cty
|
||||
- Cliff -> Clf
|
||||
- Cliffs -> Clfs
|
||||
- Close -> Cl
|
||||
- Club -> Clb
|
||||
- Common -> Cmn,Comm
|
||||
- Commons -> Cmns
|
||||
- Common -> Cmn
|
||||
- Common -> Comm
|
||||
- Community -> Comm
|
||||
- Concourse -> Cnc
|
||||
- Concourse -> Con
|
||||
- Copse -> Cps
|
||||
- Corner -> Cor,Cnr,Crn
|
||||
- Corners -> Cors
|
||||
- Corner -> Cnr
|
||||
- Corner -> Crn
|
||||
- Corso -> Cso
|
||||
- Cottages -> Cotts
|
||||
- County -> Co
|
||||
- County Road -> CR
|
||||
- County Route -> CR
|
||||
- Course -> Crse
|
||||
- Court -> Crt,Ct
|
||||
- Courts -> Cts
|
||||
- Court -> Crt
|
||||
- Court -> Ct
|
||||
- Courtyard -> Cyd
|
||||
- Courtyard -> Ctyd
|
||||
- Cove$ -> Ce,Cov,Cv
|
||||
- Coves -> Cvs
|
||||
- Creek$ -> Ck,Cr,Crk
|
||||
- Cove -> Ce
|
||||
- Cove -> Cov
|
||||
- Cove -> Cove
|
||||
- Cove -> Cv
|
||||
- Creek -> Ck
|
||||
- Creek -> Cr
|
||||
- Creek -> Crk
|
||||
- Crescent -> Cr
|
||||
- Crescent -> Cres
|
||||
- Crest -> Crst,Cst
|
||||
- Crest -> Crst
|
||||
- Crest -> Cst
|
||||
- Croft -> Cft
|
||||
- Cross -> Cs,Crss
|
||||
- Crossing -> Crsg,Csg,Xing
|
||||
- Crossroad -> Crd,Xrd
|
||||
- Crossroads -> Xrds
|
||||
- Cross -> Cs
|
||||
- Cross -> Crss
|
||||
- Crossing -> Crsg
|
||||
- Crossing -> Csg
|
||||
- Crossing -> Xing
|
||||
- Crossroad -> Crd
|
||||
- Crossway -> Cowy
|
||||
- Cul-de-sac -> Cds,Csac
|
||||
- Curve -> Cve,Curv
|
||||
- Cul-de-sac -> Cds
|
||||
- Cul-de-sac -> Csac
|
||||
- Curve -> Cve
|
||||
- Cutting -> Cutt
|
||||
- Dale -> Dle
|
||||
- Dam -> Dm
|
||||
- Dale -> Dale
|
||||
- Deviation -> Devn
|
||||
- Dip -> Dip
|
||||
- Distributor -> Dstr
|
||||
- Divide -> Dv
|
||||
- Down -> Dn
|
||||
- Downs -> Dn
|
||||
- Drive -> Dr,Drv,Dv
|
||||
- Drives -> Drs
|
||||
- Drive -> Dr
|
||||
- Drive -> Drv
|
||||
- Drive -> Dv
|
||||
- Drive-In => Drive-In # prevent abbreviation here
|
||||
- Driveway -> Drwy,Dvwy,Dwy
|
||||
- Driveway -> Drwy
|
||||
- Driveway -> Dvwy
|
||||
- Driveway -> Dwy
|
||||
- East -> E
|
||||
- Edge -> Edg
|
||||
- Edge -> Edge
|
||||
- Elbow -> Elb
|
||||
- End -> End
|
||||
- Entrance -> Ent
|
||||
- Esplanade -> Esp
|
||||
- Estate -> Est
|
||||
- Estates -> Ests
|
||||
- Expressway -> Exp,Expy,Expwy,Xway
|
||||
- Expressway -> Exp
|
||||
- Expressway -> Expy
|
||||
- Expressway -> Expwy
|
||||
- Expressway -> Xway
|
||||
- Extension -> Ex
|
||||
- Extensions -> Exts
|
||||
- Fairway -> Fawy,Fy
|
||||
- Falls -> Fls
|
||||
- Fairway -> Fawy
|
||||
- Fairway -> Fy
|
||||
- Father -> Fr
|
||||
- Ferry -> Fy,Fry
|
||||
- Field -> Fd,Fld
|
||||
- Fields -> Flds
|
||||
- Ferry -> Fy
|
||||
- Field -> Fd
|
||||
- Fire Track -> Ftrk
|
||||
- Firetrail -> Fit
|
||||
- Flat -> Fl,Flt
|
||||
- Flats -> Flts
|
||||
- Flat -> Fl
|
||||
- Flat -> Flat
|
||||
- Follow -> Folw
|
||||
- Footway -> Ftwy
|
||||
- Ford -> Frd
|
||||
- Fords -> Frds
|
||||
- Foreshore -> Fshr
|
||||
- Forest -> Frst
|
||||
- Forest Service Road -> FSR
|
||||
- Forge -> Frg
|
||||
- Forges -> Frgs
|
||||
- Formation -> Form
|
||||
- Fork -> Frk
|
||||
- Forks -> Frks
|
||||
- Fort -> Ft
|
||||
- Freeway -> Frwy,Fwy
|
||||
- Freeway -> Frwy
|
||||
- Freeway -> Fwy
|
||||
- Front -> Frnt
|
||||
- Frontage -> Fr,Frtg
|
||||
- Frontage -> Fr
|
||||
- Frontage -> Frtg
|
||||
- Gap -> Gap
|
||||
- Garden -> Gdn
|
||||
- Gardens -> Gdn,Gdns
|
||||
- Gate,Gates -> Ga,Gte
|
||||
- Gateway -> Gwy,Gtwy
|
||||
- Gardens -> Gdn
|
||||
- Gardens -> Gdns
|
||||
- Gate -> Ga
|
||||
- Gate -> Gte
|
||||
- Gates -> Ga
|
||||
- Gates -> Gte
|
||||
- Gateway -> Gwy
|
||||
- George -> Geo
|
||||
- Glade$ -> Gl,Gld,Glde
|
||||
- Glade -> Gl
|
||||
- Glade -> Gld
|
||||
- Glade -> Glde
|
||||
- Glen -> Gln
|
||||
- Glens -> Glns
|
||||
- Glen -> Glen
|
||||
- Grange -> Gra
|
||||
- Green -> Gn,Grn
|
||||
- Greens -> Grns
|
||||
- Green -> Gn
|
||||
- Green -> Grn
|
||||
- Ground -> Grnd
|
||||
- Grove$ -> Gr,Gro,Grv
|
||||
- Groves -> Grvs
|
||||
- Grove -> Gr
|
||||
- Grove -> Gro
|
||||
- Grovet -> Gr
|
||||
- Gully -> Gly
|
||||
- Harbor -> Hbr,Harbour
|
||||
- Harbors -> Hbrs
|
||||
- Harbour -> Hbr,Harbor
|
||||
- Harbor -> Hbr
|
||||
- Harbour -> Hbr
|
||||
- Haven -> Hvn
|
||||
- Head -> Hd
|
||||
- Heads -> Hd
|
||||
- Heights -> Hgts,Ht,Hts
|
||||
- Heights -> Hgts
|
||||
- Heights -> Ht
|
||||
- Heights -> Hts
|
||||
- High School -> HS
|
||||
- Highroad -> Hird,Hrd
|
||||
- Highroad -> Hird
|
||||
- Highroad -> Hrd
|
||||
- Highway -> Hwy
|
||||
- Hill -> Hill
|
||||
- Hill -> Hl
|
||||
- Hills -> Hl,Hls
|
||||
- Hollow -> Holw
|
||||
- Hills -> Hl
|
||||
- Hills -> Hls
|
||||
- Hospital -> Hosp
|
||||
- House -> Ho,Hse
|
||||
- House -> Ho
|
||||
- House -> Hse
|
||||
- Industrial -> Ind
|
||||
- Inlet -> Inlt
|
||||
- Interchange -> Intg
|
||||
- International -> Intl
|
||||
- Island -> I,Is
|
||||
- Islands -> Iss
|
||||
- Junction -> Jct,Jctn,Jnc
|
||||
- Junctions -> Jcts
|
||||
- Island -> I
|
||||
- Island -> Is
|
||||
- Junction -> Jctn
|
||||
- Junction -> Jnc
|
||||
- Junior -> Jr
|
||||
- Key -> Ky
|
||||
- Keys -> Kys
|
||||
- Knoll -> Knl
|
||||
- Knolls -> Knls
|
||||
- Key -> Key
|
||||
- Lagoon -> Lgn
|
||||
- Lake -> Lk
|
||||
- Lakes -> L,Lks
|
||||
- Landing -> Ldg,Lndg
|
||||
- Lane -> La,Ln
|
||||
- Lakes -> L
|
||||
- Landing -> Ldg
|
||||
- Lane -> La
|
||||
- Lane -> Lane
|
||||
- Lane -> Ln
|
||||
- Laneway -> Lnwy
|
||||
- Light -> Lgt
|
||||
- Lights -> Lgts
|
||||
- Line -> Line
|
||||
- Line -> Ln
|
||||
- Link -> Link
|
||||
- Link -> Lk
|
||||
- Little -> Lit,Lt
|
||||
- Loaf -> Lf
|
||||
- Lock -> Lck
|
||||
- Locks -> Lcks
|
||||
- Little -> Lit
|
||||
- Little -> Lt
|
||||
- Lodge -> Ldg
|
||||
- Lookout -> Lkt
|
||||
- Loop -> Loop
|
||||
- Loop -> Lp
|
||||
- Lower -> Low,Lr,Lwr
|
||||
- Lower -> Low
|
||||
- Lower -> Lr
|
||||
- Lower -> Lwr
|
||||
- Mall -> Mall
|
||||
- Mall -> Ml
|
||||
- Manor -> Mnr
|
||||
- Manors -> Mnrs
|
||||
- Mansions -> Mans
|
||||
- Market -> Mkt
|
||||
- Meadow -> Mdw
|
||||
- Meadows -> Mdw,Mdws
|
||||
- Meadows -> Mdw
|
||||
- Meadows -> Mdws
|
||||
- Mead -> Md
|
||||
- Meander -> Mdr,Mndr,Mr
|
||||
- Meander -> Mdr
|
||||
- Meander -> Mndr
|
||||
- Meander -> Mr
|
||||
- Medical -> Med
|
||||
- Memorial -> Mem
|
||||
- Mews -> Mews
|
||||
- Mews -> Mw
|
||||
- Middle -> Mid
|
||||
- Middle School -> MS
|
||||
- Mile -> Mi
|
||||
- Military -> Mil
|
||||
- Mill -> Ml
|
||||
- Mills -> Mls
|
||||
- Mission -> Msn
|
||||
- Motorway -> Mtwy,Mwy
|
||||
- Motorway -> Mtwy
|
||||
- Motorway -> Mwy
|
||||
- Mount -> Mt
|
||||
- Mountain -> Mtn
|
||||
- Mountains$ -> Mtn,Mtns
|
||||
- Mountains -> Mtn
|
||||
- Municipal -> Mun
|
||||
- Museum -> Mus
|
||||
- National Park -> NP
|
||||
- National Recreation Area -> NRA
|
||||
- National Wildlife Refuge Area -> NWRA
|
||||
- Neck -> Nck
|
||||
- Nook -> Nk
|
||||
- Nook -> Nook
|
||||
- North -> N
|
||||
- Northeast -> NE
|
||||
- Northwest -> NW
|
||||
- Orchard -> Orch
|
||||
- Outlook -> Out,Otlk
|
||||
- Overpass -> Opas
|
||||
- Outlook -> Out
|
||||
- Outlook -> Otlk
|
||||
- Parade -> Pde
|
||||
- Paradise -> Pdse
|
||||
- Park -> Park
|
||||
- Park -> Pk
|
||||
- Parklands -> Pkld
|
||||
- Parkway -> Pkwy,Pky,Pwy
|
||||
- Parkways -> Pkwy
|
||||
- Parkway -> Pkwy
|
||||
- Parkway -> Pky
|
||||
- Parkway -> Pwy
|
||||
- Pass -> Pass
|
||||
- Pass -> Ps
|
||||
- Passage -> Psge
|
||||
- Pathway -> Phwy,Pway,Pwy
|
||||
- Path -> Path
|
||||
- Pathway -> Phwy
|
||||
- Pathway -> Pway
|
||||
- Pathway -> Pwy
|
||||
- Piazza -> Piaz
|
||||
- Pike -> Pk
|
||||
- Pine -> Pne
|
||||
- Pines -> Pnes
|
||||
- Place -> Pl
|
||||
- Plain -> Pl,Pln
|
||||
- Plains -> Pl,Plns
|
||||
- Plain -> Pl
|
||||
- Plains -> Pl
|
||||
- Plateau -> Plat
|
||||
- Plaza -> Pl,Plz,Plza
|
||||
- Plaza -> Pl
|
||||
- Plaza -> Plz
|
||||
- Plaza -> Plza
|
||||
- Pocket -> Pkt
|
||||
- Point -> Pnt,Pt
|
||||
- Points -> Pts
|
||||
- Port -> Prt,Pt
|
||||
- Ports -> Prts
|
||||
- Point -> Pnt
|
||||
- Point -> Pt
|
||||
- Port -> Port
|
||||
- Port -> Pt
|
||||
- Post Office -> PO
|
||||
- Prairie -> Pr
|
||||
- Precinct -> Pct
|
||||
- Promenade -> Prm,Prom
|
||||
- Promenade -> Prm
|
||||
- Promenade -> Prom
|
||||
- Quad -> Quad
|
||||
- Quadrangle -> Qdgl
|
||||
- Quadrant -> Qdrt,Qd
|
||||
- Quadrant -> Qdrt
|
||||
- Quadrant -> Qd
|
||||
- Quay -> Qy
|
||||
- Quays -> Qy
|
||||
- Quays -> Qys
|
||||
- Radial -> Radl
|
||||
- Ramble -> Ra
|
||||
- Ramble -> Rmbl
|
||||
- Ranch -> Rnch
|
||||
- Range -> Rge,Rnge
|
||||
- Rapid -> Rpd
|
||||
- Rapids -> Rpds
|
||||
- Range -> Rge
|
||||
- Range -> Rnge
|
||||
- Reach -> Rch
|
||||
- Reservation -> Res
|
||||
- Reserve -> Res
|
||||
- Reservoir -> Res
|
||||
- Rest -> Rest
|
||||
- Rest -> Rst
|
||||
- Retreat -> Rt,Rtt
|
||||
- Retreat -> Rt
|
||||
- Retreat -> Rtt
|
||||
- Return -> Rtn
|
||||
- Ridge -> Rdg,Rdge
|
||||
- Ridges -> Rdgs
|
||||
- Ridge -> Rdg
|
||||
- Ridge -> Rdge
|
||||
- Ridgeway -> Rgwy
|
||||
- Right of Way -> Rowy
|
||||
- Rise -> Ri
|
||||
- ^River -> R,Riv,Rvr
|
||||
- River$ -> R,Riv,Rvr
|
||||
- Rise -> Rise
|
||||
- River -> R
|
||||
- River -> Riv
|
||||
- River -> Rvr
|
||||
- Riverway -> Rvwy
|
||||
- Riviera -> Rvra
|
||||
- Road -> Rd
|
||||
- Roads -> Rds
|
||||
- Roadside -> Rdsd
|
||||
- Roadway -> Rdwy,Rdy
|
||||
- Roadway -> Rdwy
|
||||
- Roadway -> Rdy
|
||||
- Robert -> Robt
|
||||
- Rocks -> Rks
|
||||
- Ronde -> Rnde
|
||||
- Rosebowl -> Rsbl
|
||||
- Rotary -> Rty
|
||||
- Round -> Rnd
|
||||
- Route -> Rt,Rte
|
||||
- Route -> Rt
|
||||
- Route -> Rte
|
||||
- Row -> Row
|
||||
- Rue -> Rue
|
||||
- Run -> Run
|
||||
- Saint -> St
|
||||
- Saints -> SS
|
||||
- Senior -> Sr
|
||||
- Serviceway -> Swy,Svwy
|
||||
- Shoal -> Shl
|
||||
- Shore -> Shr
|
||||
- Shores -> Shrs
|
||||
- Serviceway -> Swy
|
||||
- Serviceway -> Svwy
|
||||
- Shunt -> Shun
|
||||
- Siding -> Sdng
|
||||
- Sister -> Sr
|
||||
- Skyway -> Skwy
|
||||
- Slope -> Slpe
|
||||
- Sound -> Snd
|
||||
- South -> S,Sth
|
||||
- South -> S
|
||||
- South -> Sth
|
||||
- Southeast -> SE
|
||||
- Southwest -> SW
|
||||
- Spring -> Spg
|
||||
- Springs -> Spgs
|
||||
- Spurs -> Spur
|
||||
- Spur -> Spur
|
||||
- Square -> Sq
|
||||
- Squares -> Sqs
|
||||
- Stairway -> Strwy
|
||||
- State Highway -> SH,SHwy
|
||||
- State Highway -> SH
|
||||
- State Highway -> SHwy
|
||||
- State Route -> SR
|
||||
- Station -> Sta,Stn
|
||||
- Strand -> Sd,Stra
|
||||
- Stravenue -> Stra
|
||||
- Stream -> Strm
|
||||
- Station -> Sta
|
||||
- Station -> Stn
|
||||
- Strand -> Sd
|
||||
- Strand -> Stra
|
||||
- Street -> St
|
||||
- Streets -> Sts
|
||||
- Strip -> Strp
|
||||
- Subway -> Sbwy
|
||||
- Summit -> Smt
|
||||
- Tarn -> Tn
|
||||
- Tarn -> Tarn
|
||||
- Terminal -> Term
|
||||
- Terrace -> Tce,Ter,Terr
|
||||
- Thoroughfare -> Thfr,Thor
|
||||
- Throughway -> Trwy
|
||||
- Tollway -> Tlwy,Twy
|
||||
- Terrace -> Tce
|
||||
- Terrace -> Ter
|
||||
- Terrace -> Terr
|
||||
- Thoroughfare -> Thfr
|
||||
- Thoroughfare -> Thor
|
||||
- Tollway -> Tlwy
|
||||
- Tollway -> Twy
|
||||
- Top -> Top
|
||||
- Tor -> Tor
|
||||
- Towers -> Twrs
|
||||
- Township -> Twp
|
||||
- Trace -> Trce
|
||||
- Track -> Tr,Trak,Trk
|
||||
- Trafficway -> Trfy
|
||||
- Track -> Tr
|
||||
- Track -> Trk
|
||||
- Trail -> Trl
|
||||
- Trailer -> Trlr
|
||||
- Triangle -> Tri
|
||||
- Trunkway -> Tkwy
|
||||
- Tunnel -> Tun,Tunl
|
||||
- Turn -> Tn,Trn
|
||||
- Turnpike -> Tpk,Tpke
|
||||
- Underpass -> Upas,Ups
|
||||
- Union -> Un
|
||||
- Unions -> Uns
|
||||
- University -> Uni,Univ
|
||||
- Tunnel -> Tun
|
||||
- Turn -> Tn
|
||||
- Turn -> Trn
|
||||
- Turn -> Turn
|
||||
- Turnpike -> Tpk
|
||||
- Turnpike -> Tpke
|
||||
- Underpass -> Upas
|
||||
- Underpass -> Ups
|
||||
- University -> Uni
|
||||
- University -> Univ
|
||||
- Upper -> Up
|
||||
- Upper -> Upr
|
||||
- Vale -> Va
|
||||
- Valley -> Vly
|
||||
- Vale -> Vale
|
||||
- Valley -> Vy
|
||||
- Valleys -> Vlys
|
||||
- Viaduct$ -> Vdct,Via,Viad
|
||||
- Viaduct -> Vdct
|
||||
- Viaduct -> Via
|
||||
- Viaduct -> Viad
|
||||
- View -> Vw
|
||||
- Views -> Vws
|
||||
- Village -> Vill,Vlg
|
||||
- Villages -> Vlgs
|
||||
- View -> View
|
||||
- Village -> Vill
|
||||
- Villas -> Vlls
|
||||
- Ville -> Vl
|
||||
- Vista -> Vis,Vst,Vsta
|
||||
- Walk -> Wk,Wlk
|
||||
- Walks -> Walk
|
||||
- Walkway -> Wkwy,Wky
|
||||
- Vista -> Vst
|
||||
- Vista -> Vsta
|
||||
- Walk -> Walk
|
||||
- Walk -> Wk
|
||||
- Walk -> Wlk
|
||||
- Walkway -> Wkwy
|
||||
- Walkway -> Wky
|
||||
- Waters -> Wtr
|
||||
- Way -> Way
|
||||
- Way -> Wy
|
||||
- Well -> Wl
|
||||
- Wells -> Wls
|
||||
- West -> W
|
||||
- Wharf -> Whrf
|
||||
- William -> Wm
|
||||
- Wynd -> Wyn
|
||||
- Wynd -> Wynd
|
||||
- Yard -> Yard
|
||||
- Yard -> Yd
|
||||
- lang: en
|
||||
country: ca
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
- Bloque -> Blq
|
||||
- Bulevar -> Blvr
|
||||
- Boulevard -> Blvd
|
||||
- Calle -> C/
|
||||
- Calle -> C
|
||||
- Calle -> Cl
|
||||
- Calleja -> Cllja
|
||||
|
||||
@@ -3,16 +3,20 @@
|
||||
words:
|
||||
- Abbaye -> ABE
|
||||
- Agglomération -> AGL
|
||||
- Aire -> AIRE
|
||||
- Aires -> AIRE
|
||||
- Allée -> ALL
|
||||
- Allée -> All
|
||||
- Allées -> ALL
|
||||
- Ancien chemin -> ACH
|
||||
- Ancienne route -> ART
|
||||
- Anciennes routes -> ART
|
||||
- Anse -> ANSE
|
||||
- Arcade -> ARC
|
||||
- Arcades -> ARC
|
||||
- Autoroute -> AUT
|
||||
- Avenue -> AV
|
||||
- Avenue -> Av
|
||||
- Barrière -> BRE
|
||||
- Barrières -> BRE
|
||||
- Bas chemin -> BCH
|
||||
@@ -24,11 +28,16 @@
|
||||
- Berges -> BER
|
||||
- Bois -> BOIS
|
||||
- Boucle -> BCLE
|
||||
- Boulevard -> Bd
|
||||
- Boulevard -> BD
|
||||
- Bourg -> BRG
|
||||
- Butte -> BUT
|
||||
- Cité -> CITE
|
||||
- Cités -> CITE
|
||||
- Côte -> COTE
|
||||
- Côteau -> COTE
|
||||
- Cale -> CALE
|
||||
- Camp -> CAMP
|
||||
- Campagne -> CGNE
|
||||
- Camping -> CPG
|
||||
- Carreau -> CAU
|
||||
@@ -47,13 +56,17 @@
|
||||
- Chaussées -> CHS
|
||||
- Chemin -> Ch
|
||||
- Chemin -> CHE
|
||||
- Chemin -> Che
|
||||
- Chemin vicinal -> CHV
|
||||
- Cheminement -> CHEM
|
||||
- Cheminements -> CHEM
|
||||
- Chemins -> CHE
|
||||
- Chemins vicinaux -> CHV
|
||||
- Chez -> CHEZ
|
||||
- Château -> CHT
|
||||
- Cloître -> CLOI
|
||||
- Clos -> CLOS
|
||||
- Col -> COL
|
||||
- Colline -> COLI
|
||||
- Collines -> COLI
|
||||
- Contour -> CTR
|
||||
@@ -61,7 +74,9 @@
|
||||
- Corniches -> COR
|
||||
- Cottage -> COTT
|
||||
- Cottages -> COTT
|
||||
- Cour -> COUR
|
||||
- Cours -> CRS
|
||||
- Cours -> Crs
|
||||
- Darse -> DARS
|
||||
- Degré -> DEG
|
||||
- Degrés -> DEG
|
||||
@@ -72,8 +87,11 @@
|
||||
- Domaine -> DOM
|
||||
- Domaines -> DOM
|
||||
- Écluse -> ECL
|
||||
- Écluse -> ÉCL
|
||||
- Écluses -> ECL
|
||||
- Écluses -> ÉCL
|
||||
- Église -> EGL
|
||||
- Église -> ÉGL
|
||||
- Enceinte -> EN
|
||||
- Enclave -> ENV
|
||||
- Enclos -> ENC
|
||||
@@ -82,16 +100,21 @@
|
||||
- Espace -> ESPA
|
||||
- Esplanade -> ESP
|
||||
- Esplanades -> ESP
|
||||
- Étang -> ETANG
|
||||
- Étang -> ÉTANG
|
||||
- Faubourg -> FG
|
||||
- Faubourg -> Fg
|
||||
- Ferme -> FRM
|
||||
- Fermes -> FRM
|
||||
- Fontaine -> FON
|
||||
- Fort -> FORT
|
||||
- Forum -> FORM
|
||||
- Fosse -> FOS
|
||||
- Fosses -> FOS
|
||||
- Foyer -> FOYR
|
||||
- Galerie -> GAL
|
||||
- Galeries -> GAL
|
||||
- Gare -> GARE
|
||||
- Garenne -> GARN
|
||||
- Grand boulevard -> GBD
|
||||
- Grand ensemble -> GDEN
|
||||
@@ -111,9 +134,13 @@
|
||||
- Haut chemin -> HCH
|
||||
- Hauts chemins -> HCH
|
||||
- Hippodrome -> HIP
|
||||
- HLM -> HLM
|
||||
- Île -> ILE
|
||||
- Île -> ÎLE
|
||||
- Immeuble -> IMM
|
||||
- Immeubles -> IMM
|
||||
- Impasse -> IMP
|
||||
- Impasse -> Imp
|
||||
- Impasses -> IMP
|
||||
- Jardin -> JARD
|
||||
- Jardins -> JARD
|
||||
@@ -123,11 +150,13 @@
|
||||
- Lieu-dit -> LD
|
||||
- Lotissement -> LOT
|
||||
- Lotissements -> LOT
|
||||
- Mail -> MAIL
|
||||
- Maison forestière -> MF
|
||||
- Manoir -> MAN
|
||||
- Marche -> MAR
|
||||
- Marches -> MAR
|
||||
- Maréchal -> MAL
|
||||
- Mas -> MAS
|
||||
- Monseigneur -> Mgr
|
||||
- Mont -> Mt
|
||||
- Montée -> MTE
|
||||
@@ -139,9 +168,13 @@
|
||||
- Métro -> MÉT
|
||||
- Nouvelle route -> NTE
|
||||
- Palais -> PAL
|
||||
- Parc -> PARC
|
||||
- Parcs -> PARC
|
||||
- Parking -> PKG
|
||||
- Parvis -> PRV
|
||||
- Passage -> PAS
|
||||
- Passage -> Pas
|
||||
- Passage -> Pass
|
||||
- Passage à niveau -> PN
|
||||
- Passe -> PASS
|
||||
- Passerelle -> PLE
|
||||
@@ -158,14 +191,19 @@
|
||||
- Petite rue -> PTR
|
||||
- Petites allées -> PTA
|
||||
- Place -> PL
|
||||
- Place -> Pl
|
||||
- Placis -> PLCI
|
||||
- Plage -> PLAG
|
||||
- Plages -> PLAG
|
||||
- Plaine -> PLN
|
||||
- Plan -> PLAN
|
||||
- Plateau -> PLT
|
||||
- Plateaux -> PLT
|
||||
- Pointe -> PNT
|
||||
- Pont -> PONT
|
||||
- Ponts -> PONT
|
||||
- Porche -> PCH
|
||||
- Port -> PORT
|
||||
- Porte -> PTE
|
||||
- Portique -> PORQ
|
||||
- Portiques -> PORQ
|
||||
@@ -173,19 +211,25 @@
|
||||
- Pourtour -> POUR
|
||||
- Presqu’île -> PRQ
|
||||
- Promenade -> PROM
|
||||
- Promenade -> Prom
|
||||
- Pré -> PRE
|
||||
- Pré -> PRÉ
|
||||
- Périphérique -> PERI
|
||||
- Péristyle -> PSTY
|
||||
- Quai -> QU
|
||||
- Quai -> Qu
|
||||
- Quartier -> QUA
|
||||
- Raccourci -> RAC
|
||||
- Raidillon -> RAID
|
||||
- Rampe -> RPE
|
||||
- Rempart -> REM
|
||||
- Roc -> ROC
|
||||
- Rocade -> ROC
|
||||
- Rond point -> RPT
|
||||
- Roquet -> ROQT
|
||||
- Rotonde -> RTD
|
||||
- Route -> RTE
|
||||
- Route -> Rte
|
||||
- Routes -> RTE
|
||||
- Rue -> R
|
||||
- Rue -> R
|
||||
@@ -201,6 +245,7 @@
|
||||
- Sentier -> SEN
|
||||
- Sentiers -> SEN
|
||||
- Square -> SQ
|
||||
- Square -> Sq
|
||||
- Stade -> STDE
|
||||
- Station -> STA
|
||||
- Terrain -> TRN
|
||||
@@ -209,11 +254,13 @@
|
||||
- Terre plein -> TPL
|
||||
- Tertre -> TRT
|
||||
- Tertres -> TRT
|
||||
- Tour -> TOUR
|
||||
- Traverse -> TRA
|
||||
- Vallon -> VAL
|
||||
- Vallée -> VAL
|
||||
- Venelle -> VEN
|
||||
- Venelles -> VEN
|
||||
- Via -> VIA
|
||||
- Vieille route -> VTE
|
||||
- Vieux chemin -> VCHE
|
||||
- Villa -> VLA
|
||||
@@ -222,6 +269,7 @@
|
||||
- Villas -> VLA
|
||||
- Voie -> VOI
|
||||
- Voies -> VOI
|
||||
- Zone -> ZONE
|
||||
- Zone artisanale -> ZA
|
||||
- Zone d'aménagement concerté -> ZAC
|
||||
- Zone d'aménagement différé -> ZAD
|
||||
@@ -241,6 +289,7 @@
|
||||
- Esplanade -> ESPL
|
||||
- Passage -> PASS
|
||||
- Plateau -> PLAT
|
||||
- Rang -> RANG
|
||||
- Rond-point -> RDPT
|
||||
- Sentier -> SENT
|
||||
- Subdivision -> SUBDIV
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
- Prima -> I
|
||||
- Primo -> I
|
||||
- Primo -> 1
|
||||
- Primo -> 1°
|
||||
- Quarta -> IV
|
||||
- Quarto -> IV
|
||||
- Quattro -> IV
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
# Source: https://wiki.openstreetmap.org/wiki/Name_finder:Abbreviations#Norsk_-_Norwegian
|
||||
- lang: "no"
|
||||
- lang: no
|
||||
words:
|
||||
# convert between Nynorsk and Bookmal here
|
||||
- ~vei, ~veg -> v,vei,veg
|
||||
- ~veien, ~vegen -> vn,veien,vegen
|
||||
- vei, veg => v,vn,vei,veg
|
||||
- veien, vegen -> v,vn,veien,vegen
|
||||
- gate -> g,gt
|
||||
# convert between the two female forms
|
||||
- gate, gaten, gata -> g,gt
|
||||
- gaten, gata => g,gt,gaten,gata
|
||||
- plass, plassen -> pl
|
||||
- sving, svingen -> sv
|
||||
|
||||
@@ -1,128 +1,14 @@
|
||||
# Source: https://wiki.openstreetmap.org/wiki/Name_finder:Abbreviations#.D0.A0.D1.83.D1.81.D1.81.D0.BA.D0.B8.D0.B9_-_Russian
|
||||
# Source: https://www.plantarium.ru/page/help/topic/abbreviations.html
|
||||
# Source: https://dic.academic.ru/dic.nsf/ruwiki/1871310
|
||||
- lang: ru
|
||||
words:
|
||||
- Академик, Академика -> Ак
|
||||
- акционерное общество -> АО
|
||||
- аллея -> ал
|
||||
- архипелаг -> арх
|
||||
- атомная электростанция -> АЭС
|
||||
- аэродром -> аэрд
|
||||
- аэропорт -> аэрп
|
||||
- Башкирский, Башкирская, Башкирское, Башкирские -> Баш, Башк, Башкир
|
||||
- Белый, Белая, Белое. Белые -> Бел
|
||||
- болото -> бол
|
||||
- больница -> больн
|
||||
- Большой, Большая, Большое, Большие -> Б, Бол
|
||||
- брод -> бр
|
||||
- бульвар -> бул
|
||||
- бухта -> бух
|
||||
- бывший, бывшая, бывшее, бывшие -> бывш
|
||||
- Великий, Великая, Великое, Великие -> Вел
|
||||
- Верхний, Верхняя, Верхнее, Верхние -> В, Верх
|
||||
- водокачка -> вдкч
|
||||
- водопад -> вдп
|
||||
- водохранилище -> вдхр
|
||||
- вокзал -> вкз, вокз
|
||||
- Восточный, Восточная, Восточное, Восточные -> В, Вост
|
||||
- вулкан -> влк
|
||||
- гидроэлектростанция -> ГЭС
|
||||
- гора -> г
|
||||
- город -> г
|
||||
- дворец культуры, дом культуры -> ДК
|
||||
- дворец спорта -> ДС
|
||||
- деревня -> д, дер
|
||||
- детский оздоровительный лагерь -> ДОЛ
|
||||
- дом -> д
|
||||
- дом отдыха -> Д О
|
||||
- железная дорога -> ж д
|
||||
- железнодорожный, железнодорожная, железнодорожное -> ж-д
|
||||
- железобетонных изделий -> ЖБИ
|
||||
- жилой комплекс -> ЖК
|
||||
- завод -> з-д
|
||||
- закрытое административно-территориальное образование -> ЗАТО
|
||||
- залив -> зал
|
||||
- Западный, Западная, Западное, Западные -> З, Зап, Запад
|
||||
- заповедник -> запов
|
||||
- имени -> им
|
||||
- институт -> инст
|
||||
- исправительная колония -> ИК
|
||||
- километр -> км
|
||||
- Красный, Красная, Красное, Красные -> Кр, Крас
|
||||
- лагерь -> лаг
|
||||
- Левый, Левая,Левое, Левые -> Л, Лев
|
||||
- ледник -> ледн
|
||||
- лесничество -> леснич
|
||||
- лесной, лесная, лесное -> лес
|
||||
- линия электропередачи -> ЛЭП
|
||||
- Малый, Малая, Малое, Малые -> М, Мал
|
||||
- Мордовский, Мордовская, Мордовское, Мордовские -> Мордов
|
||||
- морской, морская, морское -> мор
|
||||
- Московский, Московская, Московское, Московские -> Мос, Моск
|
||||
- мыс -> м
|
||||
- набережная -> наб
|
||||
- Нижний, Нижняя, Нижнее, Нижние -> Ниж, Н
|
||||
- Новый, Новая, Новое, Новые -> Нов, Н
|
||||
- обгонный пункт -> обг п
|
||||
- область -> обл
|
||||
- озеро -> оз
|
||||
- особо охраняемая природная территория -> ООПТ
|
||||
- остановочный пункт -> о п
|
||||
- остров -> о
|
||||
- острова -> о-ва
|
||||
- парк культуры и отдыха -> ПКиО
|
||||
- перевал -> пер
|
||||
- переулок -> пер
|
||||
- пещера -> пещ
|
||||
- пионерский лагерь -> пионерлаг
|
||||
- платформа -> пл, платф
|
||||
- площадь -> пл
|
||||
- подсобное хозяйство -> подсоб хоз
|
||||
- полуостров -> п-ов
|
||||
- посёлок -> пос, п
|
||||
- посёлок городского типа -> п г т, пгт
|
||||
- Правый, Правая, Правое, Правые -> П, Пр, Прав
|
||||
- проезд -> пр
|
||||
- проспект -> просп
|
||||
- пруд -> пр
|
||||
- пустыня -> пуст
|
||||
- разъезд -> рзд
|
||||
- район -> р-н
|
||||
- резинотехнических изделий -> РТИ
|
||||
- река -> р
|
||||
- речной, речная, речное -> реч, речн
|
||||
- Российский, Российская, Российское, Российские -> Рос
|
||||
- Русский, Русская, Русское, Русские -> Рус, Русск
|
||||
- ручей -> руч
|
||||
- садовое некоммерческое товарищество -> СНТ
|
||||
- садовые участки -> сад уч
|
||||
- санаторий -> сан
|
||||
- сарай -> сар
|
||||
- Северный, Северная, Северное, Северные -> С, Сев
|
||||
- село -> с
|
||||
- Сибирский, Сибирская, Сибирское, Сибирские -> Сиб
|
||||
- Советский, Советская, Советское, Советские -> Сов
|
||||
- совхоз -> свх
|
||||
- Сортировочный, Сортировочная, Сортировочное, Сортировочные -> Сорт
|
||||
- станция -> ст
|
||||
- Старый, Старая, Среднее, Средние -> Ср
|
||||
- Татарский, Татарская, Татарское, Татарские -> Тат, Татар
|
||||
- теплоэлекстростанция -> ТЭС
|
||||
- теплоэлектроцентраль -> ТЭЦ
|
||||
- техникум -> техн
|
||||
- тоннель, туннель -> тун
|
||||
- шоссе -> ш
|
||||
- тупик -> туп
|
||||
- улица -> ул
|
||||
- Уральский, Уральская, Уральское, Уральские -> Ур, Урал
|
||||
- урочище -> ур
|
||||
- хозяйство -> хоз, хоз-во
|
||||
- хребет -> хр
|
||||
- хутор -> хут
|
||||
- Чёрный, Чёрная, Чёрное, Чёрные -> Черн
|
||||
- Чувашский, Чувашская, Чувашское, Чувашские -> Чуваш
|
||||
- шахта -> шах
|
||||
- школа -> шк
|
||||
- шоссе -> ш
|
||||
- элеватор -> элев
|
||||
- Южный, Южная, Южное, Южные -> Ю, Юж, Южн
|
||||
- область -> обл
|
||||
|
||||
@@ -46,7 +46,7 @@ sanitizers:
|
||||
- step: strip-brace-terms
|
||||
- step: tag-analyzer-by-language
|
||||
filter-kind: [".*name.*"]
|
||||
whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,"no",pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]
|
||||
whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,no,pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]
|
||||
use-defaults: all
|
||||
mode: append
|
||||
- step: tag-japanese
|
||||
@@ -158,7 +158,7 @@ token-analysis:
|
||||
mode: variant-only
|
||||
variants:
|
||||
- !include icu-rules/variants-nl.yaml
|
||||
- id: "no"
|
||||
- id: no
|
||||
analyzer: generic
|
||||
mode: variant-only
|
||||
variants:
|
||||
|
||||
@@ -26,7 +26,7 @@ from .connection import SearchConnection
|
||||
from .status import get_status, StatusResult
|
||||
from .lookup import get_places, get_detailed_place
|
||||
from .reverse import ReverseGeocoder
|
||||
from . import search as nsearch
|
||||
from .search import ForwardGeocoder, Phrase, PhraseType, make_query_analyzer
|
||||
from . import types as ntyp
|
||||
from .results import DetailedResult, ReverseResult, SearchResults
|
||||
|
||||
@@ -207,7 +207,7 @@ class NominatimAPIAsync:
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
if details.keywords:
|
||||
await nsearch.make_query_analyzer(conn)
|
||||
await make_query_analyzer(conn)
|
||||
return await get_detailed_place(conn, place, details)
|
||||
|
||||
async def lookup(self, places: Sequence[ntyp.PlaceRef], **params: Any) -> SearchResults:
|
||||
@@ -219,7 +219,7 @@ class NominatimAPIAsync:
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
if details.keywords:
|
||||
await nsearch.make_query_analyzer(conn)
|
||||
await make_query_analyzer(conn)
|
||||
return await get_places(conn, places, details)
|
||||
|
||||
async def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]:
|
||||
@@ -237,7 +237,7 @@ class NominatimAPIAsync:
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
if details.keywords:
|
||||
await nsearch.make_query_analyzer(conn)
|
||||
await make_query_analyzer(conn)
|
||||
geocoder = ReverseGeocoder(conn, details,
|
||||
self.reverse_restrict_to_country_area)
|
||||
return await geocoder.lookup(coord)
|
||||
@@ -251,10 +251,10 @@ class NominatimAPIAsync:
|
||||
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
geocoder = nsearch.ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params),
|
||||
self.config.get_int('REQUEST_TIMEOUT')
|
||||
if self.config.REQUEST_TIMEOUT else None)
|
||||
phrases = [nsearch.Phrase(nsearch.PHRASE_ANY, p.strip()) for p in query.split(',')]
|
||||
geocoder = ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params),
|
||||
self.config.get_int('REQUEST_TIMEOUT')
|
||||
if self.config.REQUEST_TIMEOUT else None)
|
||||
phrases = [Phrase(PhraseType.NONE, p.strip()) for p in query.split(',')]
|
||||
return await geocoder.lookup(phrases)
|
||||
|
||||
async def search_address(self, amenity: Optional[str] = None,
|
||||
@@ -271,22 +271,22 @@ class NominatimAPIAsync:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
details = ntyp.SearchDetails.from_kwargs(params)
|
||||
|
||||
phrases: List[nsearch.Phrase] = []
|
||||
phrases: List[Phrase] = []
|
||||
|
||||
if amenity:
|
||||
phrases.append(nsearch.Phrase(nsearch.PHRASE_AMENITY, amenity))
|
||||
phrases.append(Phrase(PhraseType.AMENITY, amenity))
|
||||
if street:
|
||||
phrases.append(nsearch.Phrase(nsearch.PHRASE_STREET, street))
|
||||
phrases.append(Phrase(PhraseType.STREET, street))
|
||||
if city:
|
||||
phrases.append(nsearch.Phrase(nsearch.PHRASE_CITY, city))
|
||||
phrases.append(Phrase(PhraseType.CITY, city))
|
||||
if county:
|
||||
phrases.append(nsearch.Phrase(nsearch.PHRASE_COUNTY, county))
|
||||
phrases.append(Phrase(PhraseType.COUNTY, county))
|
||||
if state:
|
||||
phrases.append(nsearch.Phrase(nsearch.PHRASE_STATE, state))
|
||||
phrases.append(Phrase(PhraseType.STATE, state))
|
||||
if postalcode:
|
||||
phrases.append(nsearch.Phrase(nsearch.PHRASE_POSTCODE, postalcode))
|
||||
phrases.append(Phrase(PhraseType.POSTCODE, postalcode))
|
||||
if country:
|
||||
phrases.append(nsearch.Phrase(nsearch.PHRASE_COUNTRY, country))
|
||||
phrases.append(Phrase(PhraseType.COUNTRY, country))
|
||||
|
||||
if not phrases:
|
||||
raise UsageError('Nothing to search for.')
|
||||
@@ -304,14 +304,14 @@ class NominatimAPIAsync:
|
||||
else:
|
||||
details.restrict_min_max_rank(4, 4)
|
||||
|
||||
if details.layers is None:
|
||||
if 'layers' not in params:
|
||||
details.layers = ntyp.DataLayer.ADDRESS
|
||||
if amenity:
|
||||
details.layers |= ntyp.DataLayer.POI
|
||||
|
||||
geocoder = nsearch.ForwardGeocoder(conn, details,
|
||||
self.config.get_int('REQUEST_TIMEOUT')
|
||||
if self.config.REQUEST_TIMEOUT else None)
|
||||
geocoder = ForwardGeocoder(conn, details,
|
||||
self.config.get_int('REQUEST_TIMEOUT')
|
||||
if self.config.REQUEST_TIMEOUT else None)
|
||||
return await geocoder.lookup(phrases)
|
||||
|
||||
async def search_category(self, categories: List[Tuple[str, str]],
|
||||
@@ -328,15 +328,15 @@ class NominatimAPIAsync:
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
if near_query:
|
||||
phrases = [nsearch.Phrase(nsearch.PHRASE_ANY, p) for p in near_query.split(',')]
|
||||
phrases = [Phrase(PhraseType.NONE, p) for p in near_query.split(',')]
|
||||
else:
|
||||
phrases = []
|
||||
if details.keywords:
|
||||
await nsearch.make_query_analyzer(conn)
|
||||
await make_query_analyzer(conn)
|
||||
|
||||
geocoder = nsearch.ForwardGeocoder(conn, details,
|
||||
self.config.get_int('REQUEST_TIMEOUT')
|
||||
if self.config.REQUEST_TIMEOUT else None)
|
||||
geocoder = ForwardGeocoder(conn, details,
|
||||
self.config.get_int('REQUEST_TIMEOUT')
|
||||
if self.config.REQUEST_TIMEOUT else None)
|
||||
return await geocoder.lookup_pois(categories, phrases)
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
Helper functions for localizing names of results.
|
||||
"""
|
||||
from typing import Mapping, List, Optional
|
||||
from .config import Configuration
|
||||
|
||||
import re
|
||||
|
||||
@@ -21,18 +20,14 @@ class Locales:
|
||||
"""
|
||||
|
||||
def __init__(self, langs: Optional[List[str]] = None):
|
||||
self.config = Configuration(None)
|
||||
self.languages = langs or []
|
||||
self.name_tags: List[str] = []
|
||||
|
||||
parts = self.config.OUTPUT_NAMES.split(',')
|
||||
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if part.endswith(":XX"):
|
||||
self._add_lang_tags(part[:-3])
|
||||
else:
|
||||
self._add_tags(part)
|
||||
# Build the list of supported tags. It is currently hard-coded.
|
||||
self._add_lang_tags('name')
|
||||
self._add_tags('name', 'brand')
|
||||
self._add_lang_tags('official_name', 'short_name')
|
||||
self._add_tags('official_name', 'short_name', 'ref')
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return len(self.languages) > 0
|
||||
|
||||
@@ -342,8 +342,7 @@ HTML_HEADER: str = """<!DOCTYPE html>
|
||||
<title>Nominatim - Debug</title>
|
||||
<style>
|
||||
""" + \
|
||||
(HtmlFormatter(nobackground=True).get_style_defs('.highlight') # type: ignore[no-untyped-call]
|
||||
if CODE_HIGHLIGHT else '') + \
|
||||
(HtmlFormatter(nobackground=True).get_style_defs('.highlight') if CODE_HIGHLIGHT else '') + \
|
||||
"""
|
||||
h2 { font-size: x-large }
|
||||
|
||||
|
||||
@@ -27,5 +27,5 @@ def create(config: QueryConfig) -> QueryProcessingFunc:
|
||||
|
||||
return lambda phrases: list(
|
||||
filter(lambda p: p.text,
|
||||
(Phrase(p.ptype, cast(str, normalizer.transliterate(p.text)).strip('-: '))
|
||||
(Phrase(p.ptype, cast(str, normalizer.transliterate(p.text)))
|
||||
for p in phrases)))
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
This preprocessor replaces values in a given input based on pre-defined regex rules.
|
||||
|
||||
Arguments:
|
||||
pattern: Regex pattern to be applied on the input
|
||||
replace: The string that it is to be replaced with
|
||||
"""
|
||||
from typing import List
|
||||
import re
|
||||
|
||||
from .config import QueryConfig
|
||||
from .base import QueryProcessingFunc
|
||||
from ..search.query import Phrase
|
||||
|
||||
|
||||
class _GenericPreprocessing:
|
||||
"""Perform replacements to input phrases using custom regex patterns."""
|
||||
|
||||
def __init__(self, config: QueryConfig) -> None:
|
||||
"""Initialise the _GenericPreprocessing class with patterns from the ICU config file."""
|
||||
self.config = config
|
||||
|
||||
match_patterns = self.config.get('replacements', 'Key not found')
|
||||
self.compiled_patterns = [
|
||||
(re.compile(item['pattern']), item['replace']) for item in match_patterns
|
||||
]
|
||||
|
||||
def split_phrase(self, phrase: Phrase) -> Phrase:
|
||||
"""This function performs replacements on the given text using regex patterns."""
|
||||
for item in self.compiled_patterns:
|
||||
phrase.text = item[0].sub(item[1], phrase.text)
|
||||
|
||||
return phrase
|
||||
|
||||
def __call__(self, phrases: List[Phrase]) -> List[Phrase]:
|
||||
"""
|
||||
Return the final Phrase list.
|
||||
Returns an empty list if there is nothing left after split_phrase.
|
||||
"""
|
||||
result = [p for p in map(self.split_phrase, phrases) if p.text.strip()]
|
||||
return result
|
||||
|
||||
|
||||
def create(config: QueryConfig) -> QueryProcessingFunc:
|
||||
""" Create a function for generic preprocessing."""
|
||||
return _GenericPreprocessing(config)
|
||||
@@ -9,12 +9,5 @@ Module for forward search.
|
||||
"""
|
||||
from .geocoder import (ForwardGeocoder as ForwardGeocoder)
|
||||
from .query import (Phrase as Phrase,
|
||||
PHRASE_ANY as PHRASE_ANY,
|
||||
PHRASE_AMENITY as PHRASE_AMENITY,
|
||||
PHRASE_STREET as PHRASE_STREET,
|
||||
PHRASE_CITY as PHRASE_CITY,
|
||||
PHRASE_COUNTY as PHRASE_COUNTY,
|
||||
PHRASE_STATE as PHRASE_STATE,
|
||||
PHRASE_POSTCODE as PHRASE_POSTCODE,
|
||||
PHRASE_COUNTRY as PHRASE_COUNTRY)
|
||||
PhraseType as PhraseType)
|
||||
from .query_analyzer_factory import (make_query_analyzer as make_query_analyzer)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Conversion from token assignment to an abstract DB search.
|
||||
@@ -11,7 +11,7 @@ from typing import Optional, List, Tuple, Iterator, Dict
|
||||
import heapq
|
||||
|
||||
from ..types import SearchDetails, DataLayer
|
||||
from . import query as qmod
|
||||
from .query import QueryStruct, Token, TokenType, TokenRange, BreakType
|
||||
from .token_assignment import TokenAssignment
|
||||
from . import db_search_fields as dbf
|
||||
from . import db_searches as dbs
|
||||
@@ -51,7 +51,7 @@ class SearchBuilder:
|
||||
""" Build the abstract search queries from token assignments.
|
||||
"""
|
||||
|
||||
def __init__(self, query: qmod.QueryStruct, details: SearchDetails) -> None:
|
||||
def __init__(self, query: QueryStruct, details: SearchDetails) -> None:
|
||||
self.query = query
|
||||
self.details = details
|
||||
|
||||
@@ -97,7 +97,7 @@ class SearchBuilder:
|
||||
builder = self.build_poi_search(sdata)
|
||||
elif assignment.housenumber:
|
||||
hnr_tokens = self.query.get_tokens(assignment.housenumber,
|
||||
qmod.TOKEN_HOUSENUMBER)
|
||||
TokenType.HOUSENUMBER)
|
||||
builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
|
||||
else:
|
||||
builder = self.build_special_search(sdata, assignment.address,
|
||||
@@ -128,7 +128,7 @@ class SearchBuilder:
|
||||
yield dbs.PoiSearch(sdata)
|
||||
|
||||
def build_special_search(self, sdata: dbf.SearchData,
|
||||
address: List[qmod.TokenRange],
|
||||
address: List[TokenRange],
|
||||
is_category: bool) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Build abstract search queries for searches that do not involve
|
||||
a named place.
|
||||
@@ -146,51 +146,45 @@ class SearchBuilder:
|
||||
if address:
|
||||
sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for r in address
|
||||
for t in self.query.iter_partials(r)],
|
||||
for t in self.query.get_partials_list(r)],
|
||||
lookups.Restrict)]
|
||||
penalty += 0.2
|
||||
yield dbs.PostcodeSearch(penalty, sdata)
|
||||
|
||||
def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[qmod.Token],
|
||||
address: List[qmod.TokenRange]) -> Iterator[dbs.AbstractSearch]:
|
||||
def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token],
|
||||
address: List[TokenRange]) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Build a simple address search for special entries where the
|
||||
housenumber is the main name token.
|
||||
"""
|
||||
partials = dbf.CountedTokenIDs((t for trange in address
|
||||
for t in self.query.iter_partials(trange)),
|
||||
'addr_count')
|
||||
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], lookups.LookupAny)]
|
||||
expected_count = sum(t.count for t in hnrs)
|
||||
|
||||
partials = {t.token: t.addr_count for trange in address
|
||||
for t in self.query.get_partials_list(trange)}
|
||||
|
||||
if not partials:
|
||||
# can happen when none of the partials is indexed
|
||||
return
|
||||
|
||||
expected_count = sum(t.count for t in hnrs)
|
||||
hnr_tokens = [t.token for t in hnrs]
|
||||
|
||||
if expected_count < 10000:
|
||||
sdata.lookups = [dbf.FieldLookup('name_vector', hnr_tokens, lookups.LookupAny),
|
||||
dbf.FieldLookup('nameaddress_vector',
|
||||
partials.get_tokens(),
|
||||
lookups.Restrict)]
|
||||
if expected_count < 8000:
|
||||
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
|
||||
list(partials), lookups.Restrict))
|
||||
elif len(partials) != 1 or list(partials.values())[0] < 10000:
|
||||
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
|
||||
list(partials), lookups.LookupAll))
|
||||
else:
|
||||
split = partials.get_num_lookup_tokens(20000, 5)
|
||||
if split > 0:
|
||||
sdata.lookups = partials.split_lookup(split, 'nameaddress_vector')
|
||||
sdata.lookups.append(
|
||||
dbf.FieldLookup('name_vector', hnr_tokens, lookups.Restrict))
|
||||
else:
|
||||
addr_fulls = [t.token for t in
|
||||
self.query.get_tokens(address[0], qmod.TOKEN_WORD)]
|
||||
if len(addr_fulls) > 5:
|
||||
return
|
||||
sdata.lookups = [
|
||||
dbf.FieldLookup('name_vector', hnr_tokens, lookups.LookupAny),
|
||||
dbf.FieldLookup('nameaddress_vector', addr_fulls, lookups.LookupAny)]
|
||||
addr_fulls = [t.token for t
|
||||
in self.query.get_tokens(address[0], TokenType.WORD)]
|
||||
if len(addr_fulls) > 5:
|
||||
return
|
||||
sdata.lookups.append(
|
||||
dbf.FieldLookup('nameaddress_vector', addr_fulls, lookups.LookupAny))
|
||||
|
||||
sdata.housenumbers = dbf.WeightedStrings([], [])
|
||||
yield dbs.PlaceSearch(0.05, sdata, expected_count, True)
|
||||
yield dbs.PlaceSearch(0.05, sdata, expected_count)
|
||||
|
||||
def build_name_search(self, sdata: dbf.SearchData,
|
||||
name: qmod.TokenRange, address: List[qmod.TokenRange],
|
||||
name: TokenRange, address: List[TokenRange],
|
||||
is_category: bool) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Build abstract search queries for simple name or address searches.
|
||||
"""
|
||||
@@ -201,102 +195,48 @@ class SearchBuilder:
|
||||
sdata.rankings.append(ranking)
|
||||
for penalty, count, lookup in self.yield_lookups(name, address):
|
||||
sdata.lookups = lookup
|
||||
if sdata.housenumbers:
|
||||
yield dbs.AddressSearch(penalty + name_penalty, sdata, count, bool(address))
|
||||
else:
|
||||
yield dbs.PlaceSearch(penalty + name_penalty, sdata, count, bool(address))
|
||||
yield dbs.PlaceSearch(penalty + name_penalty, sdata, count)
|
||||
|
||||
def yield_lookups(self, name: qmod.TokenRange, address: List[qmod.TokenRange]
|
||||
def yield_lookups(self, name: TokenRange, address: List[TokenRange]
|
||||
) -> Iterator[Tuple[float, int, List[dbf.FieldLookup]]]:
|
||||
""" Yield all variants how the given name and address should best
|
||||
be searched for. This takes into account how frequent the terms
|
||||
are and tries to find a lookup that optimizes index use.
|
||||
"""
|
||||
name_partials = dbf.CountedTokenIDs(self.query.iter_partials(name))
|
||||
addr_partials = dbf.CountedTokenIDs((t for r in address
|
||||
for t in self.query.iter_partials(r)),
|
||||
'addr_count')
|
||||
|
||||
if not addr_partials:
|
||||
yield from self.yield_name_only_lookups(name_partials, name)
|
||||
else:
|
||||
yield from self.yield_address_lookups(name_partials, addr_partials, name)
|
||||
|
||||
def yield_name_only_lookups(self, partials: dbf.CountedTokenIDs, name: qmod.TokenRange
|
||||
) -> Iterator[Tuple[float, int, List[dbf.FieldLookup]]]:
|
||||
""" Yield the best lookup for a name-only search.
|
||||
"""
|
||||
split = partials.get_num_lookup_tokens(30000, 6)
|
||||
|
||||
if split > 0:
|
||||
yield 0.0, partials.expected_for_all_search(5), \
|
||||
partials.split_lookup(split, 'name_vector')
|
||||
else:
|
||||
# lots of results expected: try lookup by full names first
|
||||
name_fulls = list(filter(lambda t: t.count < 50000,
|
||||
self.query.get_tokens(name, qmod.TOKEN_WORD)))
|
||||
if name_fulls:
|
||||
yield 0.0, sum(t.count for t in name_fulls), \
|
||||
dbf.lookup_by_any_name([t.token for t in name_fulls], [], [])
|
||||
|
||||
# look the name up by its partials
|
||||
exp_count = partials.expected_for_all_search(5)
|
||||
if exp_count < 50000:
|
||||
yield 1.0, exp_count, \
|
||||
[dbf.FieldLookup('name_vector', partials.get_tokens(), lookups.LookupAll)]
|
||||
|
||||
def yield_address_lookups(self, name_partials: dbf.CountedTokenIDs,
|
||||
addr_partials: dbf.CountedTokenIDs, name: qmod.TokenRange,
|
||||
) -> Iterator[Tuple[float, int, List[dbf.FieldLookup]]]:
|
||||
penalty = 0.0 # extra penalty
|
||||
name_partials = {t.token: t for t in self.query.get_partials_list(name)}
|
||||
|
||||
name_split = name_partials.get_num_lookup_tokens(20000, 6)
|
||||
addr_split = addr_partials.get_num_lookup_tokens(10000, 3)
|
||||
addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
|
||||
addr_tokens = list({t.token for t in addr_partials})
|
||||
|
||||
if name_split < 0 and addr_split < 0:
|
||||
# Partial term too frequent. Try looking up by rare full names first.
|
||||
name_fulls = self.query.get_tokens(name, qmod.TOKEN_WORD)
|
||||
if name_fulls:
|
||||
fulls_count = sum(t.count for t in name_fulls)
|
||||
exp_count = min(t.count for t in name_partials.values()) / (2**(len(name_partials) - 1))
|
||||
|
||||
if fulls_count < 80000:
|
||||
yield 0.0, fulls_count, \
|
||||
dbf.lookup_by_any_name([t.token for t in name_fulls],
|
||||
addr_partials.get_tokens(),
|
||||
[])
|
||||
penalty += 0.2
|
||||
penalty += 0.4
|
||||
if (len(name_partials) > 3 or exp_count < 8000):
|
||||
yield penalty, exp_count, dbf.lookup_by_names(list(name_partials.keys()), addr_tokens)
|
||||
return
|
||||
|
||||
name_split = name_partials.get_num_lookup_tokens(50000, 10)
|
||||
addr_split = addr_partials.get_num_lookup_tokens(30000, 5)
|
||||
addr_count = min(t.addr_count for t in addr_partials) if addr_partials else 30000
|
||||
# Partial term to frequent. Try looking up by rare full names first.
|
||||
name_fulls = self.query.get_tokens(name, TokenType.WORD)
|
||||
if name_fulls:
|
||||
fulls_count = sum(t.count for t in name_fulls)
|
||||
|
||||
if name_split > 0 \
|
||||
and (addr_split < 0 or name_partials.min_count() <= addr_partials.min_count()):
|
||||
# lookup by name
|
||||
lookup = name_partials.split_lookup(name_split, 'name_vector')
|
||||
lookup.append(dbf.FieldLookup('nameaddress_vector',
|
||||
addr_partials.get_tokens(), lookups.Restrict))
|
||||
yield penalty, name_partials.expected_for_all_search(5), lookup
|
||||
elif addr_split > 0:
|
||||
# lookup by address
|
||||
lookup = addr_partials.split_lookup(addr_split, 'nameaddress_vector')
|
||||
lookup.append(dbf.FieldLookup('name_vector',
|
||||
name_partials.get_tokens(), lookups.Restrict))
|
||||
yield penalty, addr_partials.expected_for_all_search(3), lookup
|
||||
elif len(name_partials) > 1:
|
||||
penalty += 0.5
|
||||
# To catch remaining results, lookup by name and address
|
||||
# We only do this if there is a reasonable number of results expected.
|
||||
exp_count = min(name_partials.min_count(), addr_partials.min_count())
|
||||
exp_count = int(exp_count / (min(3, len(name_partials)) + min(3, len(addr_partials))))
|
||||
if exp_count < 50000:
|
||||
lookup = name_partials.split_lookup(3, 'name_vector')
|
||||
lookup.extend(addr_partials.split_lookup(3, 'nameaddress_vector'))
|
||||
if fulls_count < 50000 or addr_count < 30000:
|
||||
yield penalty, fulls_count / (2**len(addr_tokens)), \
|
||||
self.get_full_name_ranking(name_fulls, addr_partials,
|
||||
fulls_count > 30000 / max(1, len(addr_tokens)))
|
||||
|
||||
yield penalty, exp_count, lookup
|
||||
# To catch remaining results, lookup by name and address
|
||||
# We only do this if there is a reasonable number of results expected.
|
||||
exp_count = exp_count / (2**len(addr_tokens)) if addr_tokens else exp_count
|
||||
if exp_count < 10000 and addr_count < 20000:
|
||||
penalty += 0.35 * max(1 if name_fulls else 0.1,
|
||||
5 - len(name_partials) - len(addr_tokens))
|
||||
yield penalty, exp_count, \
|
||||
self.get_name_address_ranking(list(name_partials.keys()), addr_partials)
|
||||
|
||||
def get_name_address_ranking(self, name_tokens: List[int],
|
||||
addr_partials: List[qmod.Token]) -> List[dbf.FieldLookup]:
|
||||
addr_partials: List[Token]) -> List[dbf.FieldLookup]:
|
||||
""" Create a ranking expression looking up by name and address.
|
||||
"""
|
||||
lookup = [dbf.FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
|
||||
@@ -318,16 +258,23 @@ class SearchBuilder:
|
||||
|
||||
return lookup
|
||||
|
||||
def get_full_name_ranking(self, name_fulls: List[qmod.Token], addr_partials: List[qmod.Token],
|
||||
def get_full_name_ranking(self, name_fulls: List[Token], addr_partials: List[Token],
|
||||
use_lookup: bool) -> List[dbf.FieldLookup]:
|
||||
""" Create a ranking expression with full name terms and
|
||||
additional address lookup. When 'use_lookup' is true, then
|
||||
address lookups will use the index, when the occurrences are not
|
||||
too many.
|
||||
"""
|
||||
# At this point drop unindexed partials from the address.
|
||||
# This might yield wrong results, nothing we can do about that.
|
||||
if use_lookup:
|
||||
addr_restrict_tokens = []
|
||||
addr_lookup_tokens = [t.token for t in addr_partials]
|
||||
addr_lookup_tokens = []
|
||||
for t in addr_partials:
|
||||
if t.addr_count > 20000:
|
||||
addr_restrict_tokens.append(t.token)
|
||||
else:
|
||||
addr_lookup_tokens.append(t.token)
|
||||
else:
|
||||
addr_restrict_tokens = [t.token for t in addr_partials]
|
||||
addr_lookup_tokens = []
|
||||
@@ -335,22 +282,19 @@ class SearchBuilder:
|
||||
return dbf.lookup_by_any_name([t.token for t in name_fulls],
|
||||
addr_restrict_tokens, addr_lookup_tokens)
|
||||
|
||||
def get_name_ranking(self, trange: qmod.TokenRange,
|
||||
def get_name_ranking(self, trange: TokenRange,
|
||||
db_field: str = 'name_vector') -> dbf.FieldRanking:
|
||||
""" Create a ranking expression for a name term in the given range.
|
||||
"""
|
||||
name_fulls = self.query.get_tokens(trange, qmod.TOKEN_WORD)
|
||||
full_word_penalty = self.query.get_in_word_penalty(trange)
|
||||
ranks = [dbf.RankedTokens(t.penalty + full_word_penalty, [t.token])
|
||||
for t in name_fulls]
|
||||
name_fulls = self.query.get_tokens(trange, TokenType.WORD)
|
||||
ranks = [dbf.RankedTokens(t.penalty, [t.token]) for t in name_fulls]
|
||||
ranks.sort(key=lambda r: r.penalty)
|
||||
# Fallback, sum of penalty for partials
|
||||
default = sum(t.penalty for t in self.query.iter_partials(trange)) + 0.2
|
||||
default += sum(n.word_break_penalty
|
||||
for n in self.query.nodes[trange.start + 1:trange.end])
|
||||
name_partials = self.query.get_partials_list(trange)
|
||||
default = sum(t.penalty for t in name_partials) + 0.2
|
||||
return dbf.FieldRanking(db_field, default, ranks)
|
||||
|
||||
def get_addr_ranking(self, trange: qmod.TokenRange) -> dbf.FieldRanking:
|
||||
def get_addr_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
|
||||
""" Create a list of ranking expressions for an address term
|
||||
for the given ranges.
|
||||
"""
|
||||
@@ -359,40 +303,36 @@ class SearchBuilder:
|
||||
ranks: List[dbf.RankedTokens] = []
|
||||
|
||||
while todo:
|
||||
_, pos, rank = heapq.heappop(todo)
|
||||
# partial node
|
||||
partial = self.query.nodes[pos].partial
|
||||
if partial is not None:
|
||||
if pos + 1 < trange.end:
|
||||
penalty = rank.penalty + partial.penalty \
|
||||
+ self.query.nodes[pos + 1].word_break_penalty
|
||||
heapq.heappush(todo, (-(pos + 1), pos + 1,
|
||||
dbf.RankedTokens(penalty, rank.tokens)))
|
||||
else:
|
||||
ranks.append(dbf.RankedTokens(rank.penalty + partial.penalty,
|
||||
rank.tokens))
|
||||
# full words
|
||||
neglen, pos, rank = heapq.heappop(todo)
|
||||
for tlist in self.query.nodes[pos].starting:
|
||||
if tlist.ttype == qmod.TOKEN_WORD:
|
||||
if tlist.ttype in (TokenType.PARTIAL, TokenType.WORD):
|
||||
if tlist.end < trange.end:
|
||||
chgpenalty = self.query.nodes[tlist.end].word_break_penalty \
|
||||
+ self.query.get_in_word_penalty(
|
||||
qmod.TokenRange(pos, tlist.end))
|
||||
for t in tlist.tokens:
|
||||
heapq.heappush(todo, (-tlist.end, tlist.end,
|
||||
rank.with_token(t, chgpenalty)))
|
||||
chgpenalty = PENALTY_WORDCHANGE[self.query.nodes[tlist.end].btype]
|
||||
if tlist.ttype == TokenType.PARTIAL:
|
||||
penalty = rank.penalty + chgpenalty \
|
||||
+ max(t.penalty for t in tlist.tokens)
|
||||
heapq.heappush(todo, (neglen - 1, tlist.end,
|
||||
dbf.RankedTokens(penalty, rank.tokens)))
|
||||
else:
|
||||
for t in tlist.tokens:
|
||||
heapq.heappush(todo, (neglen - 1, tlist.end,
|
||||
rank.with_token(t, chgpenalty)))
|
||||
elif tlist.end == trange.end:
|
||||
ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens)
|
||||
|
||||
if len(ranks) >= 10:
|
||||
# Too many variants, bail out and only add
|
||||
# Worst-case Fallback: sum of penalty of partials
|
||||
default = sum(t.penalty for t in self.query.iter_partials(trange)) + 0.2
|
||||
default += sum(n.word_break_penalty
|
||||
for n in self.query.nodes[trange.start + 1:trange.end])
|
||||
ranks.append(dbf.RankedTokens(rank.penalty + default, []))
|
||||
# Bail out of outer loop
|
||||
break
|
||||
if tlist.ttype == TokenType.PARTIAL:
|
||||
ranks.append(dbf.RankedTokens(rank.penalty
|
||||
+ max(t.penalty for t in tlist.tokens),
|
||||
rank.tokens))
|
||||
else:
|
||||
ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens)
|
||||
if len(ranks) >= 10:
|
||||
# Too many variants, bail out and only add
|
||||
# Worst-case Fallback: sum of penalty of partials
|
||||
name_partials = self.query.get_partials_list(trange)
|
||||
default = sum(t.penalty for t in name_partials) + 0.2
|
||||
ranks.append(dbf.RankedTokens(rank.penalty + default, []))
|
||||
# Bail out of outer loop
|
||||
todo.clear()
|
||||
break
|
||||
|
||||
ranks.sort(key=lambda r: len(r.tokens))
|
||||
default = ranks[0].penalty + 0.3
|
||||
@@ -412,54 +352,58 @@ class SearchBuilder:
|
||||
if not tokens:
|
||||
return None
|
||||
sdata.set_strings('countries', tokens)
|
||||
sdata.penalty += self.query.get_in_word_penalty(assignment.country)
|
||||
elif self.details.countries:
|
||||
sdata.countries = dbf.WeightedStrings(self.details.countries,
|
||||
[0.0] * len(self.details.countries))
|
||||
if assignment.housenumber:
|
||||
sdata.set_strings('housenumbers',
|
||||
self.query.get_tokens(assignment.housenumber,
|
||||
qmod.TOKEN_HOUSENUMBER))
|
||||
sdata.penalty += self.query.get_in_word_penalty(assignment.housenumber)
|
||||
TokenType.HOUSENUMBER))
|
||||
if assignment.postcode:
|
||||
sdata.set_strings('postcodes',
|
||||
self.query.get_tokens(assignment.postcode,
|
||||
qmod.TOKEN_POSTCODE))
|
||||
sdata.penalty += self.query.get_in_word_penalty(assignment.postcode)
|
||||
TokenType.POSTCODE))
|
||||
if assignment.qualifier:
|
||||
tokens = self.get_qualifier_tokens(assignment.qualifier)
|
||||
if not tokens:
|
||||
return None
|
||||
sdata.set_qualifiers(tokens)
|
||||
sdata.penalty += self.query.get_in_word_penalty(assignment.qualifier)
|
||||
elif self.details.categories:
|
||||
sdata.qualifiers = dbf.WeightedCategories(self.details.categories,
|
||||
[0.0] * len(self.details.categories))
|
||||
|
||||
if assignment.address:
|
||||
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
|
||||
if not assignment.name and assignment.housenumber:
|
||||
# housenumber search: the first item needs to be handled like
|
||||
# a name in ranking or penalties are not comparable with
|
||||
# normal searches.
|
||||
sdata.set_ranking([self.get_name_ranking(assignment.address[0],
|
||||
db_field='nameaddress_vector')]
|
||||
+ [self.get_addr_ranking(r) for r in assignment.address[1:]])
|
||||
else:
|
||||
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
|
||||
else:
|
||||
sdata.rankings = []
|
||||
|
||||
return sdata
|
||||
|
||||
def get_country_tokens(self, trange: qmod.TokenRange) -> List[qmod.Token]:
|
||||
def get_country_tokens(self, trange: TokenRange) -> List[Token]:
|
||||
""" Return the list of country tokens for the given range,
|
||||
optionally filtered by the country list from the details
|
||||
parameters.
|
||||
"""
|
||||
tokens = self.query.get_tokens(trange, qmod.TOKEN_COUNTRY)
|
||||
tokens = self.query.get_tokens(trange, TokenType.COUNTRY)
|
||||
if self.details.countries:
|
||||
tokens = [t for t in tokens if t.lookup_word in self.details.countries]
|
||||
|
||||
return tokens
|
||||
|
||||
def get_qualifier_tokens(self, trange: qmod.TokenRange) -> List[qmod.Token]:
|
||||
def get_qualifier_tokens(self, trange: TokenRange) -> List[Token]:
|
||||
""" Return the list of qualifier tokens for the given range,
|
||||
optionally filtered by the qualifier list from the details
|
||||
parameters.
|
||||
"""
|
||||
tokens = self.query.get_tokens(trange, qmod.TOKEN_QUALIFIER)
|
||||
tokens = self.query.get_tokens(trange, TokenType.QUALIFIER)
|
||||
if self.details.categories:
|
||||
tokens = [t for t in tokens if t.get_category() in self.details.categories]
|
||||
|
||||
@@ -472,7 +416,7 @@ class SearchBuilder:
|
||||
"""
|
||||
if assignment.near_item:
|
||||
tokens: Dict[Tuple[str, str], float] = {}
|
||||
for t in self.query.get_tokens(assignment.near_item, qmod.TOKEN_NEAR_ITEM):
|
||||
for t in self.query.get_tokens(assignment.near_item, TokenType.NEAR_ITEM):
|
||||
cat = t.get_category()
|
||||
# The category of a near search will be that of near_item.
|
||||
# Thus, if search is restricted to a category parameter,
|
||||
@@ -483,3 +427,14 @@ class SearchBuilder:
|
||||
return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values()))
|
||||
|
||||
return None
|
||||
|
||||
|
||||
PENALTY_WORDCHANGE = {
|
||||
BreakType.START: 0.0,
|
||||
BreakType.END: 0.0,
|
||||
BreakType.PHRASE: 0.0,
|
||||
BreakType.SOFT_PHRASE: 0.0,
|
||||
BreakType.WORD: 0.1,
|
||||
BreakType.PART: 0.2,
|
||||
BreakType.TOKEN: 0.4
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"""
|
||||
Data structures for more complex fields in abstract search descriptions.
|
||||
"""
|
||||
from typing import List, Tuple, Iterator, Dict, Type, cast
|
||||
from typing import List, Tuple, Iterator, Dict, Type
|
||||
import dataclasses
|
||||
|
||||
import sqlalchemy as sa
|
||||
@@ -18,66 +18,6 @@ from .query import Token
|
||||
from . import db_search_lookups as lookups
|
||||
|
||||
|
||||
class CountedTokenIDs:
|
||||
""" A list of token IDs with their respective counts, sorted
|
||||
from least frequent to most frequent.
|
||||
|
||||
If a token count is one, then statistics are likely to be unavaible
|
||||
and a relatively high count is assumed instead.
|
||||
"""
|
||||
|
||||
def __init__(self, tokens: Iterator[Token], count_column: str = 'count'):
|
||||
self.tokens = list({(cast(int, getattr(t, count_column)), t.token) for t in tokens})
|
||||
self.tokens.sort(key=lambda t: t[0] if t[0] > 1 else 100000)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.tokens)
|
||||
|
||||
def get_num_lookup_tokens(self, limit: int, fac: int) -> int:
|
||||
""" Suggest the number of tokens to be used for an index lookup.
|
||||
The idea here is to use as few items as possible while making
|
||||
sure the number of rows returned stays below 'limit' which
|
||||
makes recheck of the returned rows more expensive than adding
|
||||
another item for the index lookup. 'fac' is the factor by which
|
||||
the limit is increased every time a lookup item is added.
|
||||
|
||||
If the list of tokens doesn't seem suitable at all for index
|
||||
lookup, -1 is returned.
|
||||
"""
|
||||
length = len(self.tokens)
|
||||
min_count = self.tokens[0][0]
|
||||
if min_count == 1:
|
||||
return min(length, 3) # no statistics available, use index
|
||||
|
||||
for i in range(min(length, 3)):
|
||||
if min_count < limit:
|
||||
return i + 1
|
||||
limit = limit * fac
|
||||
|
||||
return -1
|
||||
|
||||
def min_count(self) -> int:
|
||||
return self.tokens[0][0]
|
||||
|
||||
def expected_for_all_search(self, fac: int = 5) -> int:
|
||||
return int(self.tokens[0][0] / (fac**(len(self.tokens) - 1)))
|
||||
|
||||
def get_tokens(self) -> List[int]:
|
||||
return [t[1] for t in self.tokens]
|
||||
|
||||
def get_head_tokens(self, num_tokens: int) -> List[int]:
|
||||
return [t[1] for t in self.tokens[:num_tokens]]
|
||||
|
||||
def get_tail_tokens(self, first: int) -> List[int]:
|
||||
return [t[1] for t in self.tokens[first:]]
|
||||
|
||||
def split_lookup(self, split: int, column: str) -> 'List[FieldLookup]':
|
||||
lookup = [FieldLookup(column, self.get_head_tokens(split), lookups.LookupAll)]
|
||||
if split < len(self.tokens):
|
||||
lookup.append(FieldLookup(column, self.get_tail_tokens(split), lookups.Restrict))
|
||||
return lookup
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class WeightedStrings:
|
||||
""" A list of strings together with a penalty.
|
||||
|
||||
863
src/nominatim_api/search/db_searches.py
Normal file
863
src/nominatim_api/search/db_searches.py
Normal file
@@ -0,0 +1,863 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the actual database accesses for forward search.
|
||||
"""
|
||||
from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast
|
||||
import abc
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from ..typing import SaFromClause, SaScalarSelect, SaColumn, \
|
||||
SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
|
||||
from ..sql.sqlalchemy_types import Geometry, IntArray
|
||||
from ..connection import SearchConnection
|
||||
from ..types import SearchDetails, DataLayer, GeometryFormat, Bbox
|
||||
from .. import results as nres
|
||||
from .db_search_fields import SearchData, WeightedCategories
|
||||
|
||||
|
||||
def no_index(expr: SaColumn) -> SaColumn:
|
||||
""" Wrap the given expression, so that the query planner will
|
||||
refrain from using the expression for index lookup.
|
||||
"""
|
||||
return sa.func.coalesce(sa.null(), expr)
|
||||
|
||||
|
||||
def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
|
||||
""" Create a dictionary from search parameters that can be used
|
||||
as bind parameter for SQL execute.
|
||||
"""
|
||||
return {'limit': details.max_results,
|
||||
'min_rank': details.min_rank,
|
||||
'max_rank': details.max_rank,
|
||||
'viewbox': details.viewbox,
|
||||
'viewbox2': details.viewbox_x2,
|
||||
'near': details.near,
|
||||
'near_radius': details.near_radius,
|
||||
'excluded': details.excluded,
|
||||
'countries': details.countries}
|
||||
|
||||
|
||||
LIMIT_PARAM: SaBind = sa.bindparam('limit')
|
||||
MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
|
||||
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
|
||||
VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
|
||||
VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
|
||||
NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
|
||||
NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
|
||||
COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
|
||||
|
||||
|
||||
def filter_by_area(sql: SaSelect, t: SaFromClause,
|
||||
details: SearchDetails, avoid_index: bool = False) -> SaSelect:
|
||||
""" Apply SQL statements for filtering by viewbox and near point,
|
||||
if applicable.
|
||||
"""
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
if details.near_radius < 0.1 and not avoid_index:
|
||||
sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM))
|
||||
else:
|
||||
sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM)
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM,
|
||||
use_index=not avoid_index and
|
||||
details.viewbox.area < 0.2))
|
||||
|
||||
return sql
|
||||
|
||||
|
||||
def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
|
||||
return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
|
||||
|
||||
|
||||
def _select_placex(t: SaFromClause) -> SaSelect:
|
||||
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.wikipedia,
|
||||
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
|
||||
t.c.linked_place_id, t.c.admin_level,
|
||||
t.c.centroid,
|
||||
t.c.geometry.ST_Expand(0).label('bbox'))
|
||||
|
||||
|
||||
def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
|
||||
out = []
|
||||
|
||||
if details.geometry_simplification > 0.0:
|
||||
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
|
||||
|
||||
if details.geometry_output & GeometryFormat.GEOJSON:
|
||||
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
|
||||
if details.geometry_output & GeometryFormat.TEXT:
|
||||
out.append(sa.func.ST_AsText(col).label('geometry_text'))
|
||||
if details.geometry_output & GeometryFormat.KML:
|
||||
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
|
||||
if details.geometry_output & GeometryFormat.SVG:
|
||||
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
|
||||
|
||||
return sql.add_columns(*out)
|
||||
|
||||
|
||||
def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
|
||||
numerals: List[int], details: SearchDetails) -> SaScalarSelect:
|
||||
all_ids = sa.func.ArrayAgg(table.c.place_id)
|
||||
sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
|
||||
|
||||
if len(numerals) == 1:
|
||||
sql = sql.where(sa.between(numerals[0], table.c.startnumber, table.c.endnumber))\
|
||||
.where((numerals[0] - table.c.startnumber) % table.c.step == 0)
|
||||
else:
|
||||
sql = sql.where(sa.or_(
|
||||
*(sa.and_(sa.between(n, table.c.startnumber, table.c.endnumber),
|
||||
(n - table.c.startnumber) % table.c.step == 0)
|
||||
for n in numerals)))
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(table))
|
||||
|
||||
return sql.scalar_subquery()
|
||||
|
||||
|
||||
def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
|
||||
orexpr: List[SaExpression] = []
|
||||
if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
|
||||
orexpr.append(no_index(table.c.rank_address).between(1, 30))
|
||||
elif layers & DataLayer.ADDRESS:
|
||||
orexpr.append(no_index(table.c.rank_address).between(1, 29))
|
||||
orexpr.append(sa.func.IsAddressPoint(table))
|
||||
elif layers & DataLayer.POI:
|
||||
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
|
||||
table.c.class_.not_in(('place', 'building'))))
|
||||
|
||||
if layers & DataLayer.MANMADE:
|
||||
exclude = []
|
||||
if not layers & DataLayer.RAILWAY:
|
||||
exclude.append('railway')
|
||||
if not layers & DataLayer.NATURAL:
|
||||
exclude.extend(('natural', 'water', 'waterway'))
|
||||
orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
|
||||
no_index(table.c.rank_address) == 0))
|
||||
else:
|
||||
include = []
|
||||
if layers & DataLayer.RAILWAY:
|
||||
include.append('railway')
|
||||
if layers & DataLayer.NATURAL:
|
||||
include.extend(('natural', 'water', 'waterway'))
|
||||
orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
|
||||
no_index(table.c.rank_address) == 0))
|
||||
|
||||
if len(orexpr) == 1:
|
||||
return orexpr[0]
|
||||
|
||||
return sa.or_(*orexpr)
|
||||
|
||||
|
||||
def _interpolated_position(table: SaFromClause, nr: SaColumn) -> SaColumn:
|
||||
pos = sa.cast(nr - table.c.startnumber, sa.Float) / (table.c.endnumber - table.c.startnumber)
|
||||
return sa.case(
|
||||
(table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
|
||||
else_=table.c.linegeo.ST_LineInterpolatePoint(pos)).label('centroid')
|
||||
|
||||
|
||||
async def _get_placex_housenumbers(conn: SearchConnection,
|
||||
place_ids: List[int],
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.placex
|
||||
sql = _select_placex(t).add_columns(t.c.importance)\
|
||||
.where(t.c.place_id.in_(place_ids))
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
yield result
|
||||
|
||||
|
||||
def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery':
|
||||
""" Create a subselect that returns the given list of integers
|
||||
as rows in the column 'nr'.
|
||||
"""
|
||||
vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\
|
||||
.table_valued(sa.column('value', type_=sa.JSON))
|
||||
return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery()
|
||||
|
||||
|
||||
async def _get_osmline(conn: SearchConnection, place_ids: List[int],
|
||||
numerals: List[int],
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.osmline
|
||||
|
||||
values = _int_list_to_subquery(numerals)
|
||||
sql = sa.select(t.c.place_id, t.c.osm_id,
|
||||
t.c.parent_place_id, t.c.address,
|
||||
values.c.nr.label('housenumber'),
|
||||
_interpolated_position(t, values.c.nr),
|
||||
t.c.postcode, t.c.country_code)\
|
||||
.where(t.c.place_id.in_(place_ids))\
|
||||
.join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
|
||||
|
||||
if details.geometry_output:
|
||||
sub = sql.subquery()
|
||||
sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result = nres.create_from_osmline_row(row, nres.SearchResult)
|
||||
assert result
|
||||
yield result
|
||||
|
||||
|
||||
async def _get_tiger(conn: SearchConnection, place_ids: List[int],
|
||||
numerals: List[int], osm_id: int,
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.tiger
|
||||
values = _int_list_to_subquery(numerals)
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
sa.literal('W').label('osm_type'),
|
||||
sa.literal(osm_id).label('osm_id'),
|
||||
values.c.nr.label('housenumber'),
|
||||
_interpolated_position(t, values.c.nr),
|
||||
t.c.postcode)\
|
||||
.where(t.c.place_id.in_(place_ids))\
|
||||
.join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
|
||||
|
||||
if details.geometry_output:
|
||||
sub = sql.subquery()
|
||||
sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result = nres.create_from_tiger_row(row, nres.SearchResult)
|
||||
assert result
|
||||
yield result
|
||||
|
||||
|
||||
class AbstractSearch(abc.ABC):
|
||||
""" Encapuslation of a single lookup in the database.
|
||||
"""
|
||||
SEARCH_PRIO: int = 2
|
||||
|
||||
def __init__(self, penalty: float) -> None:
|
||||
self.penalty = penalty
|
||||
|
||||
@abc.abstractmethod
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
|
||||
|
||||
class NearSearch(AbstractSearch):
|
||||
""" Category search of a place type near the result of another search.
|
||||
"""
|
||||
def __init__(self, penalty: float, categories: WeightedCategories,
|
||||
search: AbstractSearch) -> None:
|
||||
super().__init__(penalty)
|
||||
self.search = search
|
||||
self.categories = categories
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
results = nres.SearchResults()
|
||||
base = await self.search.lookup(conn, details)
|
||||
|
||||
if not base:
|
||||
return results
|
||||
|
||||
base.sort(key=lambda r: (r.accuracy, r.rank_search))
|
||||
max_accuracy = base[0].accuracy + 0.5
|
||||
if base[0].rank_address == 0:
|
||||
min_rank = 0
|
||||
max_rank = 0
|
||||
elif base[0].rank_address < 26:
|
||||
min_rank = 1
|
||||
max_rank = min(25, base[0].rank_address + 4)
|
||||
else:
|
||||
min_rank = 26
|
||||
max_rank = 30
|
||||
base = nres.SearchResults(r for r in base
|
||||
if (r.source_table == nres.SourceTable.PLACEX
|
||||
and r.accuracy <= max_accuracy
|
||||
and r.bbox and r.bbox.area < 20
|
||||
and r.rank_address >= min_rank
|
||||
and r.rank_address <= max_rank))
|
||||
|
||||
if base:
|
||||
baseids = [b.place_id for b in base[:5] if b.place_id]
|
||||
|
||||
for category, penalty in self.categories:
|
||||
await self.lookup_category(results, conn, baseids, category, penalty, details)
|
||||
if len(results) >= details.max_results:
|
||||
break
|
||||
|
||||
return results
|
||||
|
||||
async def lookup_category(self, results: nres.SearchResults,
|
||||
conn: SearchConnection, ids: List[int],
|
||||
category: Tuple[str, str], penalty: float,
|
||||
details: SearchDetails) -> None:
|
||||
""" Find places of the given category near the list of
|
||||
place ids and add the results to 'results'.
|
||||
"""
|
||||
table = await conn.get_class_table(*category)
|
||||
|
||||
tgeom = conn.t.placex.alias('pgeom')
|
||||
|
||||
if table is None:
|
||||
# No classtype table available, do a simplified lookup in placex.
|
||||
table = conn.t.placex
|
||||
sql = sa.select(table.c.place_id,
|
||||
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
|
||||
.label('dist'))\
|
||||
.join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
|
||||
.where(table.c.class_ == category[0])\
|
||||
.where(table.c.type == category[1])
|
||||
else:
|
||||
# Use classtype table. We can afford to use a larger
|
||||
# radius for the lookup.
|
||||
sql = sa.select(table.c.place_id,
|
||||
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
|
||||
.label('dist'))\
|
||||
.join(tgeom,
|
||||
table.c.centroid.ST_CoveredBy(
|
||||
sa.case((sa.and_(tgeom.c.rank_address > 9,
|
||||
tgeom.c.geometry.is_area()),
|
||||
tgeom.c.geometry),
|
||||
else_=tgeom.c.centroid.ST_Expand(0.05))))
|
||||
|
||||
inner = sql.where(tgeom.c.place_id.in_(ids))\
|
||||
.group_by(table.c.place_id).subquery()
|
||||
|
||||
t = conn.t.placex
|
||||
sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
|
||||
.join(inner, inner.c.place_id == t.c.place_id)\
|
||||
.order_by(inner.c.dist)
|
||||
|
||||
sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
|
||||
if details.countries:
|
||||
sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
if details.layers is not None:
|
||||
sql = sql.where(_filter_by_layer(t, details.layers))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = self.penalty + penalty
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
|
||||
|
||||
class PoiSearch(AbstractSearch):
|
||||
""" Category search in a geographic area.
|
||||
"""
|
||||
def __init__(self, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty)
|
||||
self.qualifiers = sdata.qualifiers
|
||||
self.countries = sdata.countries
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
bind_params = _details_to_bind_params(details)
|
||||
t = conn.t.placex
|
||||
|
||||
rows: List[SaRow] = []
|
||||
|
||||
if details.near and details.near_radius is not None and details.near_radius < 0.2:
|
||||
# simply search in placex table
|
||||
def _base_query() -> SaSelect:
|
||||
return _select_placex(t) \
|
||||
.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))\
|
||||
.where(t.c.linked_place_id == None) \
|
||||
.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
|
||||
.order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
|
||||
.limit(LIMIT_PARAM)
|
||||
|
||||
classtype = self.qualifiers.values
|
||||
if len(classtype) == 1:
|
||||
cclass, ctype = classtype[0]
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(
|
||||
lambda: _base_query().where(t.c.class_ == cclass)
|
||||
.where(t.c.type == ctype))
|
||||
else:
|
||||
sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
|
||||
for cls, typ in classtype)))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
|
||||
|
||||
rows.extend(await conn.execute(sql, bind_params))
|
||||
else:
|
||||
# use the class type tables
|
||||
for category in self.qualifiers.values:
|
||||
table = await conn.get_class_table(*category)
|
||||
if table is not None:
|
||||
sql = _select_placex(t)\
|
||||
.add_columns(t.c.importance)\
|
||||
.join(table, t.c.place_id == table.c.place_id)\
|
||||
.where(t.c.class_ == category[0])\
|
||||
.where(t.c.type == category[1])
|
||||
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM))
|
||||
|
||||
if details.near and details.near_radius is not None:
|
||||
sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
|
||||
.where(table.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
rows.extend(await conn.execute(sql, bind_params))
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in rows:
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class CountrySearch(AbstractSearch):
|
||||
""" Search for a country name or country code.
|
||||
"""
|
||||
SEARCH_PRIO = 0
|
||||
|
||||
def __init__(self, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty)
|
||||
self.countries = sdata.countries
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.placex
|
||||
|
||||
ccodes = self.countries.values
|
||||
sql = _select_placex(t)\
|
||||
.add_columns(t.c.importance)\
|
||||
.where(t.c.country_code.in_(ccodes))\
|
||||
.where(t.c.rank_address == 4)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
|
||||
sql = filter_by_area(sql, t, details)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
|
||||
if not results:
|
||||
results = await self.lookup_in_country_table(conn, details)
|
||||
|
||||
if results:
|
||||
details.min_rank = min(5, details.max_rank)
|
||||
details.max_rank = min(25, details.max_rank)
|
||||
|
||||
return results
|
||||
|
||||
async def lookup_in_country_table(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Look up the country in the fallback country tables.
|
||||
"""
|
||||
# Avoid the fallback search when this is a more search. Country results
|
||||
# usually are in the first batch of results and it is not possible
|
||||
# to exclude these fallbacks.
|
||||
if details.excluded:
|
||||
return nres.SearchResults()
|
||||
|
||||
t = conn.t.country_name
|
||||
tgrid = conn.t.country_grid
|
||||
|
||||
sql = sa.select(tgrid.c.country_code,
|
||||
tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
|
||||
.label('centroid'),
|
||||
tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
|
||||
.where(tgrid.c.country_code.in_(self.countries.values))\
|
||||
.group_by(tgrid.c.country_code)
|
||||
|
||||
sql = filter_by_area(sql, tgrid, details, avoid_index=True)
|
||||
|
||||
sub = sql.subquery('grid')
|
||||
|
||||
sql = sa.select(t.c.country_code,
|
||||
t.c.name.merge(t.c.derived_name).label('name'),
|
||||
sub.c.centroid, sub.c.bbox)\
|
||||
.join(sub, t.c.country_code == sub.c.country_code)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, sub.c.centroid, details)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_country_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class PostcodeSearch(AbstractSearch):
|
||||
""" Search for a postcode.
|
||||
"""
|
||||
def __init__(self, extra_penalty: float, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty + extra_penalty)
|
||||
self.countries = sdata.countries
|
||||
self.postcodes = sdata.postcodes
|
||||
self.lookups = sdata.lookups
|
||||
self.rankings = sdata.rankings
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.postcode
|
||||
pcs = self.postcodes.values
|
||||
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
t.c.rank_search, t.c.rank_address,
|
||||
t.c.postcode, t.c.country_code,
|
||||
t.c.geometry.label('centroid'))\
|
||||
.where(t.c.postcode.in_(pcs))
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
penalty: SaExpression = sa.literal(self.penalty)
|
||||
|
||||
if details.viewbox is not None and not details.bounded_viewbox:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
|
||||
|
||||
sql = filter_by_area(sql, t, details)
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
|
||||
if self.lookups:
|
||||
assert len(self.lookups) == 1
|
||||
tsearch = conn.t.search_name
|
||||
sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
|
||||
.where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
|
||||
.contains(sa.type_coerce(self.lookups[0].tokens,
|
||||
IntArray)))
|
||||
|
||||
for ranking in self.rankings:
|
||||
penalty += ranking.sql_penalty(conn.t.search_name)
|
||||
penalty += sa.case(*((t.c.postcode == v, p) for v, p in self.postcodes),
|
||||
else_=1.0)
|
||||
|
||||
sql = sql.add_columns(penalty.label('accuracy'))
|
||||
sql = sql.order_by('accuracy').limit(LIMIT_PARAM)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
p = conn.t.placex
|
||||
placex_sql = _select_placex(p)\
|
||||
.add_columns(p.c.importance)\
|
||||
.where(sa.text("""class = 'boundary'
|
||||
AND type = 'postal_code'
|
||||
AND osm_type = 'R'"""))\
|
||||
.where(p.c.country_code == row.country_code)\
|
||||
.where(p.c.postcode == row.postcode)\
|
||||
.limit(1)
|
||||
|
||||
if details.geometry_output:
|
||||
placex_sql = _add_geometry_columns(placex_sql, p.c.geometry, details)
|
||||
|
||||
for prow in await conn.execute(placex_sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_placex_row(prow, nres.SearchResult)
|
||||
if result is not None:
|
||||
result.bbox = Bbox.from_wkb(prow.bbox)
|
||||
break
|
||||
else:
|
||||
result = nres.create_from_postcode_row(row, nres.SearchResult)
|
||||
|
||||
assert result
|
||||
if result.place_id not in details.excluded:
|
||||
result.accuracy = row.accuracy
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class PlaceSearch(AbstractSearch):
|
||||
""" Generic search for an address or named place.
|
||||
"""
|
||||
SEARCH_PRIO = 1
|
||||
|
||||
def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
|
||||
super().__init__(sdata.penalty + extra_penalty)
|
||||
self.countries = sdata.countries
|
||||
self.postcodes = sdata.postcodes
|
||||
self.housenumbers = sdata.housenumbers
|
||||
self.qualifiers = sdata.qualifiers
|
||||
self.lookups = sdata.lookups
|
||||
self.rankings = sdata.rankings
|
||||
self.expected_count = expected_count
|
||||
|
||||
def _inner_search_name_cte(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> 'sa.CTE':
|
||||
""" Create a subquery that preselects the rows in the search_name
|
||||
table.
|
||||
"""
|
||||
t = conn.t.search_name
|
||||
|
||||
penalty: SaExpression = sa.literal(self.penalty)
|
||||
for ranking in self.rankings:
|
||||
penalty += ranking.sql_penalty(t)
|
||||
|
||||
sql = sa.select(t.c.place_id, t.c.search_rank, t.c.address_rank,
|
||||
t.c.country_code, t.c.centroid,
|
||||
t.c.name_vector, t.c.nameaddress_vector,
|
||||
sa.case((t.c.importance > 0, t.c.importance),
|
||||
else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75))
|
||||
.label('importance'),
|
||||
penalty.label('penalty'))
|
||||
|
||||
for lookup in self.lookups:
|
||||
sql = sql.where(lookup.sql_condition(t))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
if self.postcodes:
|
||||
# if a postcode is given, don't search for state or country level objects
|
||||
sql = sql.where(t.c.address_rank > 9)
|
||||
if self.expected_count > 10000:
|
||||
# Many results expected. Restrict by postcode.
|
||||
tpc = conn.t.postcode
|
||||
sql = sql.where(sa.select(tpc.c.postcode)
|
||||
.where(tpc.c.postcode.in_(self.postcodes.values))
|
||||
.where(t.c.centroid.within_distance(tpc.c.geometry, 0.4))
|
||||
.exists())
|
||||
|
||||
if details.viewbox is not None:
|
||||
if details.bounded_viewbox:
|
||||
sql = sql.where(t.c.centroid
|
||||
.intersects(VIEWBOX_PARAM,
|
||||
use_index=details.viewbox.area < 0.2))
|
||||
elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000:
|
||||
sql = sql.where(t.c.centroid
|
||||
.intersects(VIEWBOX2_PARAM,
|
||||
use_index=details.viewbox.area < 0.5))
|
||||
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
if details.near_radius < 0.1:
|
||||
sql = sql.where(t.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
else:
|
||||
sql = sql.where(t.c.centroid
|
||||
.ST_Distance(NEAR_PARAM) < NEAR_RADIUS_PARAM)
|
||||
|
||||
if self.housenumbers:
|
||||
sql = sql.where(t.c.address_rank.between(16, 30))
|
||||
else:
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
if details.min_rank > 0:
|
||||
sql = sql.where(sa.or_(t.c.address_rank >= MIN_RANK_PARAM,
|
||||
t.c.search_rank >= MIN_RANK_PARAM))
|
||||
if details.max_rank < 30:
|
||||
sql = sql.where(sa.or_(t.c.address_rank <= MAX_RANK_PARAM,
|
||||
t.c.search_rank <= MAX_RANK_PARAM))
|
||||
|
||||
inner = sql.limit(10000).order_by(sa.desc(sa.text('importance'))).subquery()
|
||||
|
||||
sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
|
||||
inner.c.country_code, inner.c.centroid, inner.c.importance,
|
||||
inner.c.penalty)
|
||||
|
||||
# If the query is not an address search or has a geographic preference,
|
||||
# preselect most important items to restrict the number of places
|
||||
# that need to be looked up in placex.
|
||||
if not self.housenumbers\
|
||||
and (details.viewbox is None or details.bounded_viewbox)\
|
||||
and (details.near is None or details.near_radius is not None)\
|
||||
and not self.qualifiers:
|
||||
sql = sql.add_columns(sa.func.first_value(inner.c.penalty - inner.c.importance)
|
||||
.over(order_by=inner.c.penalty - inner.c.importance)
|
||||
.label('min_penalty'))
|
||||
|
||||
inner = sql.subquery()
|
||||
|
||||
sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
|
||||
inner.c.country_code, inner.c.centroid, inner.c.importance,
|
||||
inner.c.penalty)\
|
||||
.where(inner.c.penalty - inner.c.importance < inner.c.min_penalty + 0.5)
|
||||
|
||||
return sql.cte('searches')
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.placex
|
||||
tsearch = self._inner_search_name_cte(conn, details)
|
||||
|
||||
sql = _select_placex(t).join(tsearch, t.c.place_id == tsearch.c.place_id)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
penalty: SaExpression = tsearch.c.penalty
|
||||
|
||||
if self.postcodes:
|
||||
tpc = conn.t.postcode
|
||||
pcs = self.postcodes.values
|
||||
|
||||
pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(t.c.centroid)))\
|
||||
.where(tpc.c.postcode.in_(pcs))\
|
||||
.scalar_subquery()
|
||||
penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
|
||||
else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
|
||||
|
||||
if details.viewbox is not None and not details.bounded_viewbox:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))
|
||||
sql = sql.order_by(sa.desc(sa.text('importance')))
|
||||
else:
|
||||
sql = sql.order_by(penalty - tsearch.c.importance)
|
||||
sql = sql.add_columns(tsearch.c.importance)
|
||||
|
||||
sql = sql.add_columns(penalty.label('accuracy'))\
|
||||
.order_by(sa.text('accuracy'))
|
||||
|
||||
if self.housenumbers:
|
||||
hnr_list = '|'.join(self.housenumbers.values)
|
||||
inner = sql.where(sa.or_(tsearch.c.address_rank < 30,
|
||||
sa.func.RegexpWord(hnr_list, t.c.housenumber)))\
|
||||
.subquery()
|
||||
|
||||
# Housenumbers from placex
|
||||
thnr = conn.t.placex.alias('hnr')
|
||||
pid_list = sa.func.ArrayAgg(thnr.c.place_id)
|
||||
place_sql = sa.select(pid_list)\
|
||||
.where(thnr.c.parent_place_id == inner.c.place_id)\
|
||||
.where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\
|
||||
.where(thnr.c.linked_place_id == None)\
|
||||
.where(thnr.c.indexed_status == 0)
|
||||
|
||||
if details.excluded:
|
||||
place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
|
||||
if self.qualifiers:
|
||||
place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
|
||||
|
||||
numerals = [int(n) for n in self.housenumbers.values
|
||||
if n.isdigit() and len(n) < 8]
|
||||
interpol_sql: SaColumn
|
||||
tiger_sql: SaColumn
|
||||
if numerals and \
|
||||
(not self.qualifiers or ('place', 'house') in self.qualifiers.values):
|
||||
# Housenumbers from interpolations
|
||||
interpol_sql = _make_interpolation_subquery(conn.t.osmline, inner,
|
||||
numerals, details)
|
||||
# Housenumbers from Tiger
|
||||
tiger_sql = sa.case((inner.c.country_code == 'us',
|
||||
_make_interpolation_subquery(conn.t.tiger, inner,
|
||||
numerals, details)
|
||||
), else_=None)
|
||||
else:
|
||||
interpol_sql = sa.null()
|
||||
tiger_sql = sa.null()
|
||||
|
||||
unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
|
||||
interpol_sql.label('interpol_hnr'),
|
||||
tiger_sql.label('tiger_hnr')).subquery('unsort')
|
||||
sql = sa.select(unsort)\
|
||||
.order_by(sa.case((unsort.c.placex_hnr != None, 1),
|
||||
(unsort.c.interpol_hnr != None, 2),
|
||||
(unsort.c.tiger_hnr != None, 3),
|
||||
else_=4),
|
||||
unsort.c.accuracy)
|
||||
else:
|
||||
sql = sql.where(t.c.linked_place_id == None)\
|
||||
.where(t.c.indexed_status == 0)
|
||||
if self.qualifiers:
|
||||
sql = sql.where(self.qualifiers.sql_restrict(t))
|
||||
if details.layers is not None:
|
||||
sql = sql.where(_filter_by_layer(t, details.layers))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
result.accuracy = row.accuracy
|
||||
if self.housenumbers and row.rank_address < 30:
|
||||
if row.placex_hnr:
|
||||
subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
|
||||
elif row.interpol_hnr:
|
||||
subs = _get_osmline(conn, row.interpol_hnr, numerals, details)
|
||||
elif row.tiger_hnr:
|
||||
subs = _get_tiger(conn, row.tiger_hnr, numerals, row.osm_id, details)
|
||||
else:
|
||||
subs = None
|
||||
|
||||
if subs is not None:
|
||||
async for sub in subs:
|
||||
assert sub.housenumber
|
||||
sub.accuracy = result.accuracy
|
||||
if not any(nr in self.housenumbers.values
|
||||
for nr in sub.housenumber.split(';')):
|
||||
sub.accuracy += 0.6
|
||||
results.append(sub)
|
||||
|
||||
# Only add the street as a result, if it meets all other
|
||||
# filter conditions.
|
||||
if (not details.excluded or result.place_id not in details.excluded)\
|
||||
and (not self.qualifiers or result.category in self.qualifiers.values)\
|
||||
and result.rank_address >= details.min_rank:
|
||||
result.accuracy += 1.0 # penalty for missing housenumber
|
||||
results.append(result)
|
||||
else:
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
@@ -1,17 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Module implementing the actual database accesses for forward search.
|
||||
"""
|
||||
|
||||
from .base import AbstractSearch as AbstractSearch
|
||||
from .near_search import NearSearch as NearSearch
|
||||
from .poi_search import PoiSearch as PoiSearch
|
||||
from .country_search import CountrySearch as CountrySearch
|
||||
from .postcode_search import PostcodeSearch as PostcodeSearch
|
||||
from .place_search import PlaceSearch as PlaceSearch
|
||||
from .address_search import AddressSearch as AddressSearch
|
||||
@@ -1,360 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of search for an address (search with housenumber).
|
||||
"""
|
||||
from typing import cast, List, AsyncIterator
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from . import base
|
||||
from ...typing import SaBind, SaExpression, SaColumn, SaFromClause, SaScalarSelect
|
||||
from ...types import SearchDetails, Bbox
|
||||
from ...sql.sqlalchemy_types import Geometry
|
||||
from ...connection import SearchConnection
|
||||
from ... import results as nres
|
||||
from ..db_search_fields import SearchData
|
||||
|
||||
|
||||
LIMIT_PARAM: SaBind = sa.bindparam('limit')
|
||||
MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
|
||||
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
|
||||
VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
|
||||
VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
|
||||
NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
|
||||
NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
|
||||
COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
|
||||
|
||||
|
||||
def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery':
|
||||
""" Create a subselect that returns the given list of integers
|
||||
as rows in the column 'nr'.
|
||||
"""
|
||||
vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\
|
||||
.table_valued(sa.column('value', type_=sa.JSON))
|
||||
return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery()
|
||||
|
||||
|
||||
def _interpolated_position(table: SaFromClause, nr: SaColumn) -> SaColumn:
|
||||
pos = sa.cast(nr - table.c.startnumber, sa.Float) / (table.c.endnumber - table.c.startnumber)
|
||||
return sa.case(
|
||||
(table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
|
||||
else_=table.c.linegeo.ST_LineInterpolatePoint(pos)).label('centroid')
|
||||
|
||||
|
||||
def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
|
||||
numerals: List[int], details: SearchDetails) -> SaScalarSelect:
|
||||
all_ids = sa.func.ArrayAgg(table.c.place_id)
|
||||
sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
|
||||
|
||||
if len(numerals) == 1:
|
||||
sql = sql.where(sa.between(numerals[0], table.c.startnumber, table.c.endnumber))\
|
||||
.where((numerals[0] - table.c.startnumber) % table.c.step == 0)
|
||||
else:
|
||||
sql = sql.where(sa.or_(
|
||||
*(sa.and_(sa.between(n, table.c.startnumber, table.c.endnumber),
|
||||
(n - table.c.startnumber) % table.c.step == 0)
|
||||
for n in numerals)))
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(base.exclude_places(table))
|
||||
|
||||
return sql.scalar_subquery()
|
||||
|
||||
|
||||
async def _get_placex_housenumbers(conn: SearchConnection,
|
||||
place_ids: List[int],
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.placex
|
||||
sql = base.select_placex(t).add_columns(t.c.importance)\
|
||||
.where(t.c.place_id.in_(place_ids))
|
||||
|
||||
if details.geometry_output:
|
||||
sql = base.add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
yield result
|
||||
|
||||
|
||||
async def _get_osmline(conn: SearchConnection, place_ids: List[int],
|
||||
numerals: List[int],
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.osmline
|
||||
|
||||
values = _int_list_to_subquery(numerals)
|
||||
sql = sa.select(t.c.place_id, t.c.osm_id,
|
||||
t.c.parent_place_id, t.c.address,
|
||||
values.c.nr.label('housenumber'),
|
||||
_interpolated_position(t, values.c.nr),
|
||||
t.c.postcode, t.c.country_code)\
|
||||
.where(t.c.place_id.in_(place_ids))\
|
||||
.join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
|
||||
|
||||
if details.geometry_output:
|
||||
sub = sql.subquery()
|
||||
sql = base.add_geometry_columns(sa.select(sub), sub.c.centroid, details)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result = nres.create_from_osmline_row(row, nres.SearchResult)
|
||||
assert result
|
||||
yield result
|
||||
|
||||
|
||||
async def _get_tiger(conn: SearchConnection, place_ids: List[int],
|
||||
numerals: List[int], osm_id: int,
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.tiger
|
||||
values = _int_list_to_subquery(numerals)
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
sa.literal('W').label('osm_type'),
|
||||
sa.literal(osm_id).label('osm_id'),
|
||||
values.c.nr.label('housenumber'),
|
||||
_interpolated_position(t, values.c.nr),
|
||||
t.c.postcode)\
|
||||
.where(t.c.place_id.in_(place_ids))\
|
||||
.join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
|
||||
|
||||
if details.geometry_output:
|
||||
sub = sql.subquery()
|
||||
sql = base.add_geometry_columns(sa.select(sub), sub.c.centroid, details)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result = nres.create_from_tiger_row(row, nres.SearchResult)
|
||||
assert result
|
||||
yield result
|
||||
|
||||
|
||||
class AddressSearch(base.AbstractSearch):
|
||||
""" Generic search for an address or named place.
|
||||
"""
|
||||
SEARCH_PRIO = 1
|
||||
|
||||
def __init__(self, extra_penalty: float, sdata: SearchData,
|
||||
expected_count: int, has_address_terms: bool) -> None:
|
||||
assert sdata.housenumbers
|
||||
super().__init__(sdata.penalty + extra_penalty)
|
||||
self.countries = sdata.countries
|
||||
self.postcodes = sdata.postcodes
|
||||
self.housenumbers = sdata.housenumbers
|
||||
self.qualifiers = sdata.qualifiers
|
||||
self.lookups = sdata.lookups
|
||||
self.rankings = sdata.rankings
|
||||
self.expected_count = expected_count
|
||||
self.has_address_terms = has_address_terms
|
||||
|
||||
def _inner_search_name_cte(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> 'sa.CTE':
|
||||
""" Create a subquery that preselects the rows in the search_name
|
||||
table.
|
||||
"""
|
||||
t = conn.t.search_name
|
||||
|
||||
penalty: SaExpression = sa.literal(self.penalty)
|
||||
for ranking in self.rankings:
|
||||
penalty += ranking.sql_penalty(t)
|
||||
|
||||
sql = sa.select(t.c.place_id, t.c.search_rank, t.c.address_rank,
|
||||
t.c.country_code, t.c.centroid,
|
||||
t.c.name_vector, t.c.nameaddress_vector,
|
||||
sa.case((t.c.importance > 0, t.c.importance),
|
||||
else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75))
|
||||
.label('importance'),
|
||||
penalty.label('penalty'))
|
||||
|
||||
for lookup in self.lookups:
|
||||
sql = sql.where(lookup.sql_condition(t))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
if self.postcodes:
|
||||
if self.expected_count > 10000:
|
||||
tpc = conn.t.postcode
|
||||
sql = sql.where(sa.select(tpc.c.postcode)
|
||||
.where(tpc.c.postcode.in_(self.postcodes.values))
|
||||
.where(tpc.c.country_code == t.c.country_code)
|
||||
.where(t.c.centroid.within_distance(tpc.c.geometry, 0.4))
|
||||
.exists())
|
||||
|
||||
if details.viewbox is not None:
|
||||
if details.bounded_viewbox:
|
||||
sql = sql.where(t.c.centroid
|
||||
.intersects(VIEWBOX_PARAM,
|
||||
use_index=details.viewbox.area < 0.2))
|
||||
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
if details.near_radius < 0.1:
|
||||
sql = sql.where(t.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
else:
|
||||
sql = sql.where(t.c.centroid
|
||||
.ST_Distance(NEAR_PARAM) < NEAR_RADIUS_PARAM)
|
||||
|
||||
if self.has_address_terms:
|
||||
sql = sql.where(t.c.address_rank.between(16, 30))
|
||||
else:
|
||||
# If no further address terms are given, then the base street must
|
||||
# be in the name. No search for named POIs with the given house number.
|
||||
sql = sql.where(t.c.address_rank.between(16, 27))
|
||||
|
||||
inner = sql.limit(10000).order_by(sa.desc(sa.text('importance'))).subquery()
|
||||
|
||||
sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
|
||||
inner.c.country_code, inner.c.centroid, inner.c.importance,
|
||||
inner.c.penalty)
|
||||
|
||||
return sql.cte('searches')
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.placex
|
||||
tsearch = self._inner_search_name_cte(conn, details)
|
||||
|
||||
sql = base.select_placex(t).join(tsearch, t.c.place_id == tsearch.c.place_id)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = base.add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
penalty: SaExpression = tsearch.c.penalty
|
||||
|
||||
if self.postcodes:
|
||||
tpc = conn.t.postcode
|
||||
pcs = self.postcodes.values
|
||||
|
||||
pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(t.c.centroid)
|
||||
* (tpc.c.rank_search - 19)))\
|
||||
.where(tpc.c.postcode.in_(pcs))\
|
||||
.where(tpc.c.country_code == t.c.country_code)\
|
||||
.scalar_subquery()
|
||||
penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
|
||||
else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
|
||||
|
||||
if details.viewbox is not None and not details.bounded_viewbox:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))
|
||||
sql = sql.order_by(sa.desc(sa.text('importance')))
|
||||
else:
|
||||
sql = sql.order_by(penalty - tsearch.c.importance)
|
||||
sql = sql.add_columns(tsearch.c.importance)
|
||||
|
||||
sql = sql.add_columns(penalty.label('accuracy'))\
|
||||
.order_by(sa.text('accuracy'))
|
||||
|
||||
hnr_list = '|'.join(self.housenumbers.values)
|
||||
|
||||
if self.has_address_terms:
|
||||
sql = sql.where(sa.or_(tsearch.c.address_rank < 30,
|
||||
sa.func.RegexpWord(hnr_list, t.c.housenumber)))
|
||||
|
||||
inner = sql.subquery()
|
||||
|
||||
# Housenumbers from placex
|
||||
thnr = conn.t.placex.alias('hnr')
|
||||
pid_list = sa.func.ArrayAgg(thnr.c.place_id)
|
||||
place_sql = sa.select(pid_list)\
|
||||
.where(thnr.c.parent_place_id == inner.c.place_id)\
|
||||
.where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\
|
||||
.where(thnr.c.linked_place_id == None)\
|
||||
.where(thnr.c.indexed_status == 0)
|
||||
|
||||
if details.excluded:
|
||||
place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
|
||||
if self.qualifiers:
|
||||
place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
|
||||
|
||||
numerals = [int(n) for n in self.housenumbers.values
|
||||
if n.isdigit() and len(n) < 8]
|
||||
interpol_sql: SaColumn
|
||||
tiger_sql: SaColumn
|
||||
if numerals and \
|
||||
(not self.qualifiers or ('place', 'house') in self.qualifiers.values):
|
||||
# Housenumbers from interpolations
|
||||
interpol_sql = _make_interpolation_subquery(conn.t.osmline, inner,
|
||||
numerals, details)
|
||||
# Housenumbers from Tiger
|
||||
tiger_sql = sa.case((inner.c.country_code == 'us',
|
||||
_make_interpolation_subquery(conn.t.tiger, inner,
|
||||
numerals, details)
|
||||
), else_=None)
|
||||
else:
|
||||
interpol_sql = sa.null()
|
||||
tiger_sql = sa.null()
|
||||
|
||||
unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
|
||||
interpol_sql.label('interpol_hnr'),
|
||||
tiger_sql.label('tiger_hnr')).subquery('unsort')
|
||||
sql = sa.select(unsort)\
|
||||
.order_by(unsort.c.accuracy +
|
||||
sa.case((unsort.c.placex_hnr != None, 0),
|
||||
(unsort.c.interpol_hnr != None, 0),
|
||||
(unsort.c.tiger_hnr != None, 0),
|
||||
else_=1),
|
||||
sa.case((unsort.c.placex_hnr != None, 1),
|
||||
(unsort.c.interpol_hnr != None, 2),
|
||||
(unsort.c.tiger_hnr != None, 3),
|
||||
else_=4))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
|
||||
bind_params = {
|
||||
'limit': details.max_results,
|
||||
'min_rank': details.min_rank,
|
||||
'max_rank': details.max_rank,
|
||||
'viewbox': details.viewbox,
|
||||
'viewbox2': details.viewbox_x2,
|
||||
'near': details.near,
|
||||
'near_radius': details.near_radius,
|
||||
'excluded': details.excluded,
|
||||
'countries': details.countries
|
||||
}
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, bind_params):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
result.accuracy = row.accuracy
|
||||
if row.rank_address < 30:
|
||||
if row.placex_hnr:
|
||||
subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
|
||||
elif row.interpol_hnr:
|
||||
subs = _get_osmline(conn, row.interpol_hnr, numerals, details)
|
||||
elif row.tiger_hnr:
|
||||
subs = _get_tiger(conn, row.tiger_hnr, numerals, row.osm_id, details)
|
||||
else:
|
||||
subs = None
|
||||
|
||||
if subs is not None:
|
||||
async for sub in subs:
|
||||
assert sub.housenumber
|
||||
sub.accuracy = result.accuracy
|
||||
if not any(nr in self.housenumbers.values
|
||||
for nr in sub.housenumber.split(';')):
|
||||
sub.accuracy += 0.6
|
||||
results.append(sub)
|
||||
|
||||
# Only add the street as a result, if it meets all other
|
||||
# filter conditions.
|
||||
if (not details.excluded or result.place_id not in details.excluded)\
|
||||
and (not self.qualifiers or result.category in self.qualifiers.values)\
|
||||
and result.rank_address >= details.min_rank:
|
||||
result.accuracy += 1.0 # penalty for missing housenumber
|
||||
results.append(result)
|
||||
else:
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
@@ -1,144 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Interface for classes implementing a database search.
|
||||
"""
|
||||
from typing import Callable, List
|
||||
import abc
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from ...typing import SaFromClause, SaSelect, SaColumn, SaExpression, SaLambdaSelect
|
||||
from ...sql.sqlalchemy_types import Geometry
|
||||
from ...connection import SearchConnection
|
||||
from ...types import SearchDetails, DataLayer, GeometryFormat
|
||||
from ...results import SearchResults
|
||||
|
||||
|
||||
class AbstractSearch(abc.ABC):
|
||||
""" Encapuslation of a single lookup in the database.
|
||||
"""
|
||||
SEARCH_PRIO: int = 2
|
||||
|
||||
def __init__(self, penalty: float) -> None:
|
||||
self.penalty = penalty
|
||||
|
||||
@abc.abstractmethod
|
||||
async def lookup(self, conn: SearchConnection, details: SearchDetails) -> SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
|
||||
|
||||
def select_placex(t: SaFromClause) -> SaSelect:
|
||||
""" Return the basic select query for placex which returns all
|
||||
fields necessary to fill a Nominatim result. 't' must either be
|
||||
the placex table or a subquery returning appropriate fields from
|
||||
a placex-related query.
|
||||
"""
|
||||
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.wikipedia,
|
||||
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
|
||||
t.c.linked_place_id, t.c.admin_level,
|
||||
t.c.centroid,
|
||||
t.c.geometry.ST_Expand(0).label('bbox'))
|
||||
|
||||
|
||||
def exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
|
||||
""" Return an expression to exclude place IDs from the list in the
|
||||
SearchDetails.
|
||||
|
||||
Requires the excluded IDs to be supplied as a bind parameter in SQL.
|
||||
"""
|
||||
return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
|
||||
|
||||
|
||||
def filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
|
||||
""" Return an expression that filters the given table by layers.
|
||||
"""
|
||||
orexpr: List[SaExpression] = []
|
||||
if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
|
||||
orexpr.append(no_index(table.c.rank_address).between(1, 30))
|
||||
elif layers & DataLayer.ADDRESS:
|
||||
orexpr.append(no_index(table.c.rank_address).between(1, 29))
|
||||
orexpr.append(sa.func.IsAddressPoint(table))
|
||||
elif layers & DataLayer.POI:
|
||||
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
|
||||
table.c.class_.not_in(('place', 'building'))))
|
||||
|
||||
if layers & DataLayer.MANMADE:
|
||||
exclude = []
|
||||
if not layers & DataLayer.RAILWAY:
|
||||
exclude.append('railway')
|
||||
if not layers & DataLayer.NATURAL:
|
||||
exclude.extend(('natural', 'water', 'waterway'))
|
||||
orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
|
||||
no_index(table.c.rank_address) == 0))
|
||||
else:
|
||||
include = []
|
||||
if layers & DataLayer.RAILWAY:
|
||||
include.append('railway')
|
||||
if layers & DataLayer.NATURAL:
|
||||
include.extend(('natural', 'water', 'waterway'))
|
||||
orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
|
||||
no_index(table.c.rank_address) == 0))
|
||||
|
||||
if len(orexpr) == 1:
|
||||
return orexpr[0]
|
||||
|
||||
return sa.or_(*orexpr)
|
||||
|
||||
|
||||
def no_index(expr: SaColumn) -> SaColumn:
|
||||
""" Wrap the given expression, so that the query planner will
|
||||
refrain from using the expression for index lookup.
|
||||
"""
|
||||
return sa.func.coalesce(sa.null(), expr)
|
||||
|
||||
|
||||
def filter_by_area(sql: SaSelect, t: SaFromClause,
|
||||
details: SearchDetails, avoid_index: bool = False) -> SaSelect:
|
||||
""" Apply SQL statements for filtering by viewbox and near point,
|
||||
if applicable.
|
||||
"""
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
if details.near_radius < 0.1 and not avoid_index:
|
||||
sql = sql.where(
|
||||
t.c.geometry.within_distance(sa.bindparam('near', type_=Geometry),
|
||||
sa.bindparam('near_radius')))
|
||||
else:
|
||||
sql = sql.where(
|
||||
t.c.geometry.ST_Distance(
|
||||
sa.bindparam('near', type_=Geometry)) <= sa.bindparam('near_radius'))
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(t.c.geometry.intersects(sa.bindparam('viewbox', type_=Geometry),
|
||||
use_index=not avoid_index and
|
||||
details.viewbox.area < 0.2))
|
||||
|
||||
return sql
|
||||
|
||||
|
||||
def add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
|
||||
""" Add columns for requested geometry formats and return the new query.
|
||||
"""
|
||||
out = []
|
||||
|
||||
if details.geometry_simplification > 0.0:
|
||||
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
|
||||
|
||||
if details.geometry_output & GeometryFormat.GEOJSON:
|
||||
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
|
||||
if details.geometry_output & GeometryFormat.TEXT:
|
||||
out.append(sa.func.ST_AsText(col).label('geometry_text'))
|
||||
if details.geometry_output & GeometryFormat.KML:
|
||||
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
|
||||
if details.geometry_output & GeometryFormat.SVG:
|
||||
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
|
||||
|
||||
return sql.add_columns(*out)
|
||||
@@ -1,119 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of searches for a country.
|
||||
"""
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from . import base
|
||||
from ..db_search_fields import SearchData
|
||||
from ... import results as nres
|
||||
from ...connection import SearchConnection
|
||||
from ...types import SearchDetails, Bbox
|
||||
|
||||
|
||||
class CountrySearch(base.AbstractSearch):
|
||||
""" Search for a country name or country code.
|
||||
"""
|
||||
SEARCH_PRIO = 0
|
||||
|
||||
def __init__(self, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty)
|
||||
self.countries = sdata.countries
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.placex
|
||||
|
||||
ccodes = self.countries.values
|
||||
sql = base.select_placex(t)\
|
||||
.add_columns(t.c.importance)\
|
||||
.where(t.c.country_code.in_(ccodes))\
|
||||
.where(t.c.rank_address == 4)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = base.add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(base.exclude_places(t))
|
||||
|
||||
sql = base.filter_by_area(sql, t, details)
|
||||
|
||||
bind_params = {
|
||||
'excluded': details.excluded,
|
||||
'viewbox': details.viewbox,
|
||||
'near': details.near,
|
||||
'near_radius': details.near_radius
|
||||
}
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, bind_params):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
|
||||
if not results:
|
||||
results = await self.lookup_in_country_table(conn, details)
|
||||
|
||||
if results:
|
||||
details.min_rank = min(5, details.max_rank)
|
||||
details.max_rank = min(25, details.max_rank)
|
||||
|
||||
return results
|
||||
|
||||
async def lookup_in_country_table(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Look up the country in the fallback country tables.
|
||||
"""
|
||||
# Avoid the fallback search when this is a more search. Country results
|
||||
# usually are in the first batch of results and it is not possible
|
||||
# to exclude these fallbacks.
|
||||
if details.excluded:
|
||||
return nres.SearchResults()
|
||||
|
||||
t = conn.t.country_name
|
||||
tgrid = conn.t.country_grid
|
||||
|
||||
sql = sa.select(tgrid.c.country_code,
|
||||
tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
|
||||
.label('centroid'),
|
||||
tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
|
||||
.where(tgrid.c.country_code.in_(self.countries.values))\
|
||||
.group_by(tgrid.c.country_code)
|
||||
|
||||
sql = base.filter_by_area(sql, tgrid, details, avoid_index=True)
|
||||
|
||||
sub = sql.subquery('grid')
|
||||
|
||||
sql = sa.select(t.c.country_code,
|
||||
t.c.name.merge(t.c.derived_name).label('name'),
|
||||
sub.c.centroid, sub.c.bbox)\
|
||||
.join(sub, t.c.country_code == sub.c.country_code)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = base.add_geometry_columns(sql, sub.c.centroid, details)
|
||||
|
||||
bind_params = {
|
||||
'viewbox': details.viewbox,
|
||||
'near': details.near,
|
||||
'near_radius': details.near_radius
|
||||
}
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, bind_params):
|
||||
result = nres.create_from_country_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
@@ -1,136 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of a category search around a place.
|
||||
"""
|
||||
from typing import List, Tuple
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from . import base
|
||||
from ...typing import SaBind
|
||||
from ...types import SearchDetails, Bbox
|
||||
from ...connection import SearchConnection
|
||||
from ... import results as nres
|
||||
from ..db_search_fields import WeightedCategories
|
||||
|
||||
|
||||
LIMIT_PARAM: SaBind = sa.bindparam('limit')
|
||||
MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
|
||||
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
|
||||
COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
|
||||
|
||||
|
||||
class NearSearch(base.AbstractSearch):
|
||||
""" Category search of a place type near the result of another search.
|
||||
"""
|
||||
def __init__(self, penalty: float, categories: WeightedCategories,
|
||||
search: base.AbstractSearch) -> None:
|
||||
super().__init__(penalty)
|
||||
self.search = search
|
||||
self.categories = categories
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
results = nres.SearchResults()
|
||||
base = await self.search.lookup(conn, details)
|
||||
|
||||
if not base:
|
||||
return results
|
||||
|
||||
base.sort(key=lambda r: (r.accuracy, r.rank_search))
|
||||
max_accuracy = base[0].accuracy + 0.5
|
||||
if base[0].rank_address == 0:
|
||||
min_rank = 0
|
||||
max_rank = 0
|
||||
elif base[0].rank_address < 26:
|
||||
min_rank = 1
|
||||
max_rank = min(25, base[0].rank_address + 4)
|
||||
else:
|
||||
min_rank = 26
|
||||
max_rank = 30
|
||||
base = nres.SearchResults(r for r in base
|
||||
if (r.source_table == nres.SourceTable.PLACEX
|
||||
and r.accuracy <= max_accuracy
|
||||
and r.bbox and r.bbox.area < 20
|
||||
and r.rank_address >= min_rank
|
||||
and r.rank_address <= max_rank))
|
||||
|
||||
if base:
|
||||
baseids = [b.place_id for b in base[:5] if b.place_id]
|
||||
|
||||
for category, penalty in self.categories:
|
||||
await self.lookup_category(results, conn, baseids, category, penalty, details)
|
||||
if len(results) >= details.max_results:
|
||||
break
|
||||
|
||||
return results
|
||||
|
||||
async def lookup_category(self, results: nres.SearchResults,
|
||||
conn: SearchConnection, ids: List[int],
|
||||
category: Tuple[str, str], penalty: float,
|
||||
details: SearchDetails) -> None:
|
||||
""" Find places of the given category near the list of
|
||||
place ids and add the results to 'results'.
|
||||
"""
|
||||
table = await conn.get_class_table(*category)
|
||||
|
||||
tgeom = conn.t.placex.alias('pgeom')
|
||||
|
||||
if table is None:
|
||||
# No classtype table available, do a simplified lookup in placex.
|
||||
table = conn.t.placex
|
||||
sql = sa.select(table.c.place_id,
|
||||
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
|
||||
.label('dist'))\
|
||||
.join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
|
||||
.where(table.c.class_ == category[0])\
|
||||
.where(table.c.type == category[1])
|
||||
else:
|
||||
# Use classtype table. We can afford to use a larger
|
||||
# radius for the lookup.
|
||||
sql = sa.select(table.c.place_id,
|
||||
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
|
||||
.label('dist'))\
|
||||
.join(tgeom,
|
||||
table.c.centroid.ST_CoveredBy(
|
||||
sa.case((sa.and_(tgeom.c.rank_address > 9,
|
||||
tgeom.c.geometry.is_area()),
|
||||
tgeom.c.geometry),
|
||||
else_=tgeom.c.centroid.ST_Expand(0.05))))
|
||||
|
||||
inner = sql.where(tgeom.c.place_id.in_(ids))\
|
||||
.group_by(table.c.place_id).subquery()
|
||||
|
||||
t = conn.t.placex
|
||||
sql = base.select_placex(t).add_columns((-inner.c.dist).label('importance'))\
|
||||
.join(inner, inner.c.place_id == t.c.place_id)\
|
||||
.order_by(inner.c.dist)
|
||||
|
||||
sql = sql.where(base.no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
|
||||
if details.countries:
|
||||
sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
|
||||
if details.excluded:
|
||||
sql = sql.where(base.exclude_places(t))
|
||||
if details.layers is not None:
|
||||
sql = sql.where(base.filter_by_layer(t, details.layers))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
|
||||
bind_params = {'limit': details.max_results,
|
||||
'min_rank': details.min_rank,
|
||||
'max_rank': details.max_rank,
|
||||
'excluded': details.excluded,
|
||||
'countries': details.countries}
|
||||
for row in await conn.execute(sql, bind_params):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = self.penalty + penalty
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
@@ -1,214 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of search for a named place (without housenumber).
|
||||
"""
|
||||
from typing import cast
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from . import base
|
||||
from ...typing import SaBind, SaExpression, SaColumn
|
||||
from ...types import SearchDetails, Bbox
|
||||
from ...sql.sqlalchemy_types import Geometry
|
||||
from ...connection import SearchConnection
|
||||
from ... import results as nres
|
||||
from ..db_search_fields import SearchData
|
||||
|
||||
|
||||
LIMIT_PARAM: SaBind = sa.bindparam('limit')
|
||||
MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
|
||||
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
|
||||
VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
|
||||
VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
|
||||
NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
|
||||
NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
|
||||
COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
|
||||
|
||||
|
||||
class PlaceSearch(base.AbstractSearch):
|
||||
""" Generic search for a named place.
|
||||
"""
|
||||
SEARCH_PRIO = 1
|
||||
|
||||
def __init__(self, extra_penalty: float, sdata: SearchData,
|
||||
expected_count: int, has_address_terms: bool) -> None:
|
||||
assert not sdata.housenumbers
|
||||
super().__init__(sdata.penalty + extra_penalty)
|
||||
self.countries = sdata.countries
|
||||
self.postcodes = sdata.postcodes
|
||||
self.qualifiers = sdata.qualifiers
|
||||
self.lookups = sdata.lookups
|
||||
self.rankings = sdata.rankings
|
||||
self.expected_count = expected_count
|
||||
self.has_address_terms = has_address_terms
|
||||
|
||||
def _inner_search_name_cte(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> 'sa.CTE':
|
||||
""" Create a subquery that preselects the rows in the search_name
|
||||
table.
|
||||
"""
|
||||
t = conn.t.search_name
|
||||
|
||||
penalty: SaExpression = sa.literal(self.penalty)
|
||||
for ranking in self.rankings:
|
||||
penalty += ranking.sql_penalty(t)
|
||||
|
||||
sql = sa.select(t.c.place_id, t.c.search_rank, t.c.address_rank,
|
||||
t.c.country_code, t.c.centroid,
|
||||
t.c.name_vector, t.c.nameaddress_vector,
|
||||
sa.case((t.c.importance > 0, t.c.importance),
|
||||
else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75))
|
||||
.label('importance'),
|
||||
penalty.label('penalty'))
|
||||
|
||||
for lookup in self.lookups:
|
||||
sql = sql.where(lookup.sql_condition(t))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
if self.postcodes:
|
||||
# if a postcode is given, don't search for state or country level objects
|
||||
sql = sql.where(t.c.address_rank > 9)
|
||||
if self.expected_count > 10000:
|
||||
# Many results expected. Restrict by postcode.
|
||||
tpc = conn.t.postcode
|
||||
sql = sql.where(sa.select(tpc.c.postcode)
|
||||
.where(tpc.c.postcode.in_(self.postcodes.values))
|
||||
.where(t.c.centroid.within_distance(tpc.c.geometry, 0.4))
|
||||
.exists())
|
||||
|
||||
if details.viewbox is not None:
|
||||
if details.bounded_viewbox:
|
||||
sql = sql.where(t.c.centroid
|
||||
.intersects(VIEWBOX_PARAM,
|
||||
use_index=details.viewbox.area < 0.2))
|
||||
elif not self.postcodes and self.expected_count >= 10000:
|
||||
sql = sql.where(t.c.centroid
|
||||
.intersects(VIEWBOX2_PARAM,
|
||||
use_index=details.viewbox.area < 0.5))
|
||||
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
if details.near_radius < 0.1:
|
||||
sql = sql.where(t.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
else:
|
||||
sql = sql.where(t.c.centroid
|
||||
.ST_Distance(NEAR_PARAM) < NEAR_RADIUS_PARAM)
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(base.exclude_places(t))
|
||||
if details.min_rank > 0:
|
||||
sql = sql.where(sa.or_(t.c.address_rank >= MIN_RANK_PARAM,
|
||||
t.c.search_rank >= MIN_RANK_PARAM))
|
||||
if details.max_rank < 30:
|
||||
sql = sql.where(sa.or_(t.c.address_rank <= MAX_RANK_PARAM,
|
||||
t.c.search_rank <= MAX_RANK_PARAM))
|
||||
|
||||
inner = sql.limit(5000 if self.qualifiers else 1000)\
|
||||
.order_by(sa.desc(sa.text('importance')))\
|
||||
.subquery()
|
||||
|
||||
sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
|
||||
inner.c.country_code, inner.c.centroid, inner.c.importance,
|
||||
inner.c.penalty)
|
||||
|
||||
# If the query is not an address search or has a geographic preference,
|
||||
# preselect most important items to restrict the number of places
|
||||
# that need to be looked up in placex.
|
||||
if (details.viewbox is None or details.bounded_viewbox)\
|
||||
and (details.near is None or details.near_radius is not None)\
|
||||
and not self.qualifiers:
|
||||
sql = sql.add_columns(sa.func.first_value(inner.c.penalty - inner.c.importance)
|
||||
.over(order_by=inner.c.penalty - inner.c.importance)
|
||||
.label('min_penalty'))
|
||||
|
||||
inner = sql.subquery()
|
||||
|
||||
sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
|
||||
inner.c.country_code, inner.c.centroid, inner.c.importance,
|
||||
inner.c.penalty)\
|
||||
.where(inner.c.penalty - inner.c.importance < inner.c.min_penalty + 0.5)
|
||||
|
||||
return sql.cte('searches')
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.placex
|
||||
tsearch = self._inner_search_name_cte(conn, details)
|
||||
|
||||
sql = base.select_placex(t).join(tsearch, t.c.place_id == tsearch.c.place_id)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = base.add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
penalty: SaExpression = tsearch.c.penalty
|
||||
|
||||
if self.postcodes:
|
||||
if self.has_address_terms:
|
||||
tpc = conn.t.postcode
|
||||
pcs = self.postcodes.values
|
||||
|
||||
pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(t.c.centroid)))\
|
||||
.where(tpc.c.postcode.in_(pcs))\
|
||||
.scalar_subquery()
|
||||
penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
|
||||
else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
|
||||
else:
|
||||
# High penalty if the postcode is not an exact match.
|
||||
# The postcode search needs to get priority here.
|
||||
penalty += sa.case((t.c.postcode.in_(self.postcodes.values), 0.0), else_=1.0)
|
||||
|
||||
if details.viewbox is not None and not details.bounded_viewbox:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))
|
||||
sql = sql.order_by(sa.desc(sa.text('importance')))
|
||||
else:
|
||||
sql = sql.order_by(penalty - tsearch.c.importance)
|
||||
sql = sql.add_columns(tsearch.c.importance)
|
||||
|
||||
sql = sql.add_columns(penalty.label('accuracy'))\
|
||||
.order_by(sa.text('accuracy'))
|
||||
|
||||
sql = sql.where(t.c.linked_place_id == None)\
|
||||
.where(t.c.indexed_status == 0)
|
||||
if self.qualifiers:
|
||||
sql = sql.where(self.qualifiers.sql_restrict(t))
|
||||
if details.layers is not None:
|
||||
sql = sql.where(base.filter_by_layer(t, details.layers))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
|
||||
bind_params = {
|
||||
'limit': details.max_results,
|
||||
'min_rank': details.min_rank,
|
||||
'max_rank': details.max_rank,
|
||||
'viewbox': details.viewbox,
|
||||
'viewbox2': details.viewbox_x2,
|
||||
'near': details.near,
|
||||
'near_radius': details.near_radius,
|
||||
'excluded': details.excluded,
|
||||
'countries': details.countries
|
||||
}
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, bind_params):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
result.accuracy = row.accuracy
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
@@ -1,114 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of category search.
|
||||
"""
|
||||
from typing import List
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from . import base
|
||||
from ..db_search_fields import SearchData
|
||||
from ... import results as nres
|
||||
from ...typing import SaBind, SaRow, SaSelect, SaLambdaSelect
|
||||
from ...sql.sqlalchemy_types import Geometry
|
||||
from ...connection import SearchConnection
|
||||
from ...types import SearchDetails, Bbox
|
||||
|
||||
|
||||
LIMIT_PARAM: SaBind = sa.bindparam('limit')
|
||||
VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
|
||||
NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
|
||||
NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
|
||||
|
||||
|
||||
class PoiSearch(base.AbstractSearch):
|
||||
""" Category search in a geographic area.
|
||||
"""
|
||||
def __init__(self, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty)
|
||||
self.qualifiers = sdata.qualifiers
|
||||
self.countries = sdata.countries
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
bind_params = {
|
||||
'limit': details.max_results,
|
||||
'viewbox': details.viewbox,
|
||||
'near': details.near,
|
||||
'near_radius': details.near_radius,
|
||||
'excluded': details.excluded
|
||||
}
|
||||
|
||||
t = conn.t.placex
|
||||
|
||||
rows: List[SaRow] = []
|
||||
|
||||
if details.near and details.near_radius is not None and details.near_radius < 0.2:
|
||||
# simply search in placex table
|
||||
def _base_query() -> SaSelect:
|
||||
return base.select_placex(t) \
|
||||
.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))\
|
||||
.where(t.c.linked_place_id == None) \
|
||||
.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
|
||||
.order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
|
||||
.limit(LIMIT_PARAM)
|
||||
|
||||
classtype = self.qualifiers.values
|
||||
if len(classtype) == 1:
|
||||
cclass, ctype = classtype[0]
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(
|
||||
lambda: _base_query().where(t.c.class_ == cclass)
|
||||
.where(t.c.type == ctype))
|
||||
else:
|
||||
sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
|
||||
for cls, typ in classtype)))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
|
||||
|
||||
rows.extend(await conn.execute(sql, bind_params))
|
||||
else:
|
||||
# use the class type tables
|
||||
for category in self.qualifiers.values:
|
||||
table = await conn.get_class_table(*category)
|
||||
if table is not None:
|
||||
sql = base.select_placex(t)\
|
||||
.add_columns(t.c.importance)\
|
||||
.join(table, t.c.place_id == table.c.place_id)\
|
||||
.where(t.c.class_ == category[0])\
|
||||
.where(t.c.type == category[1])
|
||||
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM))
|
||||
|
||||
if details.near and details.near_radius is not None:
|
||||
sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
|
||||
.where(table.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
rows.extend(await conn.execute(sql, bind_params))
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in rows:
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
@@ -1,129 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of search for a postcode.
|
||||
"""
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from . import base
|
||||
from ...typing import SaBind, SaExpression
|
||||
from ...sql.sqlalchemy_types import Geometry, IntArray
|
||||
from ...connection import SearchConnection
|
||||
from ...types import SearchDetails, Bbox
|
||||
from ... import results as nres
|
||||
from ..db_search_fields import SearchData
|
||||
|
||||
|
||||
LIMIT_PARAM: SaBind = sa.bindparam('limit')
|
||||
VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
|
||||
VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
|
||||
NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
|
||||
|
||||
|
||||
class PostcodeSearch(base.AbstractSearch):
|
||||
""" Search for a postcode.
|
||||
"""
|
||||
def __init__(self, extra_penalty: float, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty + extra_penalty)
|
||||
self.countries = sdata.countries
|
||||
self.postcodes = sdata.postcodes
|
||||
self.lookups = sdata.lookups
|
||||
self.rankings = sdata.rankings
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.postcode
|
||||
pcs = self.postcodes.values
|
||||
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
t.c.rank_search, t.c.rank_address,
|
||||
t.c.postcode, t.c.country_code,
|
||||
t.c.geometry.label('centroid'))\
|
||||
.where(t.c.postcode.in_(pcs))
|
||||
|
||||
if details.geometry_output:
|
||||
sql = base.add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
penalty: SaExpression = sa.literal(self.penalty)
|
||||
|
||||
if details.viewbox is not None and not details.bounded_viewbox:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
|
||||
|
||||
sql = base.filter_by_area(sql, t, details)
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(base.exclude_places(t))
|
||||
|
||||
if self.lookups:
|
||||
assert len(self.lookups) == 1
|
||||
tsearch = conn.t.search_name
|
||||
sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
|
||||
.where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
|
||||
.contains(sa.type_coerce(self.lookups[0].tokens,
|
||||
IntArray)))
|
||||
# Do NOT add rerank penalties based on the address terms.
|
||||
# The standard rerank penalty only checks the address vector
|
||||
# while terms may appear in name and address vector. This would
|
||||
# lead to overly high penalties.
|
||||
# We assume that a postcode is precise enough to not require
|
||||
# additional full name matches.
|
||||
|
||||
penalty += sa.case(*((t.c.postcode == v, p) for v, p in self.postcodes),
|
||||
else_=1.0)
|
||||
|
||||
sql = sql.add_columns(penalty.label('accuracy'))
|
||||
sql = sql.order_by('accuracy').limit(LIMIT_PARAM)
|
||||
|
||||
bind_params = {
|
||||
'limit': details.max_results,
|
||||
'viewbox': details.viewbox,
|
||||
'viewbox2': details.viewbox_x2,
|
||||
'near': details.near,
|
||||
'near_radius': details.near_radius,
|
||||
'excluded': details.excluded
|
||||
}
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, bind_params):
|
||||
p = conn.t.placex
|
||||
placex_sql = base.select_placex(p)\
|
||||
.add_columns(p.c.importance)\
|
||||
.where(sa.text("""class = 'boundary'
|
||||
AND type = 'postal_code'
|
||||
AND osm_type = 'R'"""))\
|
||||
.where(p.c.country_code == row.country_code)\
|
||||
.where(p.c.postcode == row.postcode)\
|
||||
.limit(1)
|
||||
|
||||
if details.geometry_output:
|
||||
placex_sql = base.add_geometry_columns(placex_sql, p.c.geometry, details)
|
||||
|
||||
for prow in await conn.execute(placex_sql, bind_params):
|
||||
result = nres.create_from_placex_row(prow, nres.SearchResult)
|
||||
if result is not None:
|
||||
result.bbox = Bbox.from_wkb(prow.bbox)
|
||||
break
|
||||
else:
|
||||
result = nres.create_from_postcode_row(row, nres.SearchResult)
|
||||
|
||||
assert result
|
||||
if result.place_id not in details.excluded:
|
||||
result.accuracy = row.accuracy
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Public interface to the search code.
|
||||
@@ -50,9 +50,6 @@ class ForwardGeocoder:
|
||||
self.query_analyzer = await make_query_analyzer(self.conn)
|
||||
|
||||
query = await self.query_analyzer.analyze_query(phrases)
|
||||
query.compute_direction_penalty()
|
||||
log().var_dump('Query direction penalty',
|
||||
lambda: f"[{'LR' if query.dir_penalty < 0 else 'RL'}] {query.dir_penalty}")
|
||||
|
||||
searches: List[AbstractSearch] = []
|
||||
if query.num_token_slots() > 0:
|
||||
@@ -83,7 +80,7 @@ class ForwardGeocoder:
|
||||
min_ranking = searches[0].penalty + 2.0
|
||||
prev_penalty = 0.0
|
||||
for i, search in enumerate(searches):
|
||||
if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 15):
|
||||
if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
|
||||
break
|
||||
log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
|
||||
log().var_dump('Params', self.params)
|
||||
@@ -118,20 +115,17 @@ class ForwardGeocoder:
|
||||
""" Remove badly matching results, sort by ranking and
|
||||
limit to the configured number of results.
|
||||
"""
|
||||
results.sort(key=lambda r: (r.ranking, 0 if r.bbox is None else -r.bbox.area))
|
||||
if results:
|
||||
results.sort(key=lambda r: (r.ranking, 0 if r.bbox is None else -r.bbox.area))
|
||||
min_rank = results[0].rank_search
|
||||
min_ranking = results[0].ranking
|
||||
results = SearchResults(r for r in results
|
||||
if (r.ranking + 0.03 * (r.rank_search - min_rank)
|
||||
< min_ranking + 0.5))
|
||||
|
||||
final = SearchResults()
|
||||
min_rank = results[0].rank_search
|
||||
min_ranking = results[0].ranking
|
||||
results = SearchResults(results[:self.limit])
|
||||
|
||||
for r in results:
|
||||
if r.ranking + 0.03 * (r.rank_search - min_rank) < min_ranking + 0.5:
|
||||
final.append(r)
|
||||
min_rank = min(r.rank_search, min_rank)
|
||||
if len(final) == self.limit:
|
||||
break
|
||||
|
||||
return final
|
||||
return results
|
||||
|
||||
def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
|
||||
""" Adjust the accuracy of the localized result according to how well
|
||||
@@ -156,16 +150,17 @@ class ForwardGeocoder:
|
||||
if not words:
|
||||
continue
|
||||
for qword in qwords:
|
||||
# only add distance penalty if there is no perfect match
|
||||
if qword not in words:
|
||||
wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
|
||||
distance += len(qword) if wdist < 0.4 else 1
|
||||
wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
|
||||
if wdist < 0.5:
|
||||
distance += len(qword)
|
||||
else:
|
||||
distance += (1.0 - wdist) * len(qword)
|
||||
# Compensate for the fact that country names do not get a
|
||||
# match penalty yet by the tokenizer.
|
||||
# Temporary hack that needs to be removed!
|
||||
if result.rank_address == 4:
|
||||
distance *= 2
|
||||
result.accuracy += distance * 0.3 / sum(len(w) for w in qwords)
|
||||
result.accuracy += distance * 0.4 / sum(len(w) for w in qwords)
|
||||
|
||||
async def lookup_pois(self, categories: List[Tuple[str, str]],
|
||||
phrases: List[Phrase]) -> SearchResults:
|
||||
@@ -213,10 +208,9 @@ class ForwardGeocoder:
|
||||
results = self.pre_filter_results(results)
|
||||
await add_result_details(self.conn, results, self.params)
|
||||
log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
|
||||
if len(results) > 1:
|
||||
self.rerank_by_query(query, results)
|
||||
log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
|
||||
results = self.sort_and_cut_results(results)
|
||||
self.rerank_by_query(query, results)
|
||||
log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
|
||||
results = self.sort_and_cut_results(results)
|
||||
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
|
||||
|
||||
return results
|
||||
@@ -244,7 +238,7 @@ def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
|
||||
if not lk:
|
||||
return ''
|
||||
|
||||
return f"{lk.lookup_type.__name__}({lk.column}{tk(lk.tokens)})"
|
||||
return f"{lk.lookup_type}({lk.column}{tk(lk.tokens)})"
|
||||
|
||||
def fmt_cstr(c: Any) -> str:
|
||||
if not c:
|
||||
|
||||
@@ -2,12 +2,13 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of query analysis for the ICU tokenizer.
|
||||
"""
|
||||
from typing import Tuple, Dict, List, Optional, Iterator, Any, cast
|
||||
from collections import defaultdict
|
||||
import dataclasses
|
||||
import difflib
|
||||
import re
|
||||
@@ -24,30 +25,62 @@ from ..connection import SearchConnection
|
||||
from ..logging import log
|
||||
from . import query as qmod
|
||||
from ..query_preprocessing.config import QueryConfig
|
||||
from ..query_preprocessing.base import QueryProcessingFunc
|
||||
from .query_analyzer_factory import AbstractQueryAnalyzer
|
||||
from .postcode_parser import PostcodeParser
|
||||
|
||||
|
||||
DB_TO_TOKEN_TYPE = {
|
||||
'W': qmod.TOKEN_WORD,
|
||||
'w': qmod.TOKEN_PARTIAL,
|
||||
'H': qmod.TOKEN_HOUSENUMBER,
|
||||
'P': qmod.TOKEN_POSTCODE,
|
||||
'C': qmod.TOKEN_COUNTRY
|
||||
'W': qmod.TokenType.WORD,
|
||||
'w': qmod.TokenType.PARTIAL,
|
||||
'H': qmod.TokenType.HOUSENUMBER,
|
||||
'P': qmod.TokenType.POSTCODE,
|
||||
'C': qmod.TokenType.COUNTRY
|
||||
}
|
||||
|
||||
PENALTY_BREAK = {
|
||||
qmod.BREAK_START: -0.5,
|
||||
qmod.BREAK_END: -0.5,
|
||||
qmod.BREAK_PHRASE: -0.5,
|
||||
qmod.BREAK_SOFT_PHRASE: -0.5,
|
||||
qmod.BREAK_WORD: 0.1,
|
||||
qmod.BREAK_PART: 0.2,
|
||||
qmod.BREAK_TOKEN: 0.4
|
||||
PENALTY_IN_TOKEN_BREAK = {
|
||||
qmod.BreakType.START: 0.5,
|
||||
qmod.BreakType.END: 0.5,
|
||||
qmod.BreakType.PHRASE: 0.5,
|
||||
qmod.BreakType.SOFT_PHRASE: 0.5,
|
||||
qmod.BreakType.WORD: 0.1,
|
||||
qmod.BreakType.PART: 0.0,
|
||||
qmod.BreakType.TOKEN: 0.0
|
||||
}
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class QueryPart:
|
||||
""" Normalized and transliterated form of a single term in the query.
|
||||
When the term came out of a split during the transliteration,
|
||||
the normalized string is the full word before transliteration.
|
||||
The word number keeps track of the word before transliteration
|
||||
and can be used to identify partial transliterated terms.
|
||||
Penalty is the break penalty for the break following the token.
|
||||
"""
|
||||
token: str
|
||||
normalized: str
|
||||
word_number: int
|
||||
penalty: float
|
||||
|
||||
|
||||
QueryParts = List[QueryPart]
|
||||
WordDict = Dict[str, List[qmod.TokenRange]]
|
||||
|
||||
|
||||
def yield_words(terms: List[QueryPart], start: int) -> Iterator[Tuple[str, qmod.TokenRange]]:
|
||||
""" Return all combinations of words in the terms list after the
|
||||
given position.
|
||||
"""
|
||||
total = len(terms)
|
||||
for first in range(start, total):
|
||||
word = terms[first].token
|
||||
penalty = PENALTY_IN_TOKEN_BREAK[qmod.BreakType.WORD]
|
||||
yield word, qmod.TokenRange(first, first + 1, penalty=penalty)
|
||||
for last in range(first + 1, min(first + 20, total)):
|
||||
word = ' '.join((word, terms[last].token))
|
||||
penalty += terms[last - 1].penalty
|
||||
yield word, qmod.TokenRange(first, last + 1, penalty=penalty)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ICUToken(qmod.Token):
|
||||
""" Specialised token for ICU tokenizer.
|
||||
@@ -78,13 +111,13 @@ class ICUToken(qmod.Token):
|
||||
self.penalty += (distance/len(self.lookup_word))
|
||||
|
||||
@staticmethod
|
||||
def from_db_row(row: SaRow) -> 'ICUToken':
|
||||
def from_db_row(row: SaRow, base_penalty: float = 0.0) -> 'ICUToken':
|
||||
""" Create a ICUToken from the row of the word table.
|
||||
"""
|
||||
count = 1 if row.info is None else row.info.get('count', 1)
|
||||
addr_count = 1 if row.info is None else row.info.get('addr_count', 1)
|
||||
|
||||
penalty = 0.0
|
||||
penalty = base_penalty
|
||||
if row.type == 'w':
|
||||
penalty += 0.3
|
||||
elif row.type == 'W':
|
||||
@@ -113,51 +146,60 @@ class ICUToken(qmod.Token):
|
||||
addr_count=max(1, addr_count))
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ICUAnalyzerConfig:
|
||||
postcode_parser: PostcodeParser
|
||||
normalizer: Transliterator
|
||||
transliterator: Transliterator
|
||||
preprocessors: List[QueryProcessingFunc]
|
||||
class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
""" Converter for query strings into a tokenized query
|
||||
using the tokens created by a ICU tokenizer.
|
||||
"""
|
||||
def __init__(self, conn: SearchConnection) -> None:
|
||||
self.conn = conn
|
||||
|
||||
@staticmethod
|
||||
async def create(conn: SearchConnection) -> 'ICUAnalyzerConfig':
|
||||
rules = await conn.get_property('tokenizer_import_normalisation')
|
||||
normalizer = Transliterator.createFromRules("normalization", rules)
|
||||
async def setup(self) -> None:
|
||||
""" Set up static data structures needed for the analysis.
|
||||
"""
|
||||
async def _make_normalizer() -> Any:
|
||||
rules = await self.conn.get_property('tokenizer_import_normalisation')
|
||||
return Transliterator.createFromRules("normalization", rules)
|
||||
|
||||
rules = await conn.get_property('tokenizer_import_transliteration')
|
||||
transliterator = Transliterator.createFromRules("transliteration", rules)
|
||||
self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer',
|
||||
_make_normalizer)
|
||||
|
||||
preprocessing_rules = conn.config.load_sub_configuration('icu_tokenizer.yaml',
|
||||
config='TOKENIZER_CONFIG')\
|
||||
.get('query-preprocessing', [])
|
||||
async def _make_transliterator() -> Any:
|
||||
rules = await self.conn.get_property('tokenizer_import_transliteration')
|
||||
return Transliterator.createFromRules("transliteration", rules)
|
||||
|
||||
self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator',
|
||||
_make_transliterator)
|
||||
|
||||
await self._setup_preprocessing()
|
||||
|
||||
if 'word' not in self.conn.t.meta.tables:
|
||||
sa.Table('word', self.conn.t.meta,
|
||||
sa.Column('word_id', sa.Integer),
|
||||
sa.Column('word_token', sa.Text, nullable=False),
|
||||
sa.Column('type', sa.Text, nullable=False),
|
||||
sa.Column('word', sa.Text),
|
||||
sa.Column('info', Json))
|
||||
|
||||
async def _setup_preprocessing(self) -> None:
|
||||
""" Load the rules for preprocessing and set up the handlers.
|
||||
"""
|
||||
|
||||
rules = self.conn.config.load_sub_configuration('icu_tokenizer.yaml',
|
||||
config='TOKENIZER_CONFIG')
|
||||
preprocessing_rules = rules.get('query-preprocessing', [])
|
||||
|
||||
self.preprocessors = []
|
||||
|
||||
preprocessors: List[QueryProcessingFunc] = []
|
||||
for func in preprocessing_rules:
|
||||
if 'step' not in func:
|
||||
raise UsageError("Preprocessing rule is missing the 'step' attribute.")
|
||||
if not isinstance(func['step'], str):
|
||||
raise UsageError("'step' attribute must be a simple string.")
|
||||
|
||||
module = conn.config.load_plugin_module(
|
||||
module = self.conn.config.load_plugin_module(
|
||||
func['step'], 'nominatim_api.query_preprocessing')
|
||||
preprocessors.append(
|
||||
module.create(QueryConfig(func).set_normalizer(normalizer)))
|
||||
|
||||
return ICUAnalyzerConfig(PostcodeParser(conn.config),
|
||||
normalizer, transliterator, preprocessors)
|
||||
|
||||
|
||||
class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
""" Converter for query strings into a tokenized query
|
||||
using the tokens created by a ICU tokenizer.
|
||||
"""
|
||||
def __init__(self, conn: SearchConnection, config: ICUAnalyzerConfig) -> None:
|
||||
self.conn = conn
|
||||
self.postcode_parser = config.postcode_parser
|
||||
self.normalizer = config.normalizer
|
||||
self.transliterator = config.transliterator
|
||||
self.preprocessors = config.preprocessors
|
||||
self.preprocessors.append(
|
||||
module.create(QueryConfig(func).set_normalizer(self.normalizer)))
|
||||
|
||||
async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
|
||||
""" Analyze the given list of phrases and return the
|
||||
@@ -172,38 +214,26 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
if not query.source:
|
||||
return query
|
||||
|
||||
self.split_query(query)
|
||||
log().var_dump('Transliterated query', lambda: query.get_transliterated_query())
|
||||
words = query.extract_words()
|
||||
parts, words = self.split_query(query)
|
||||
log().var_dump('Transliterated query', lambda: _dump_transliterated(query, parts))
|
||||
|
||||
for row in await self.lookup_in_db(list(words.keys())):
|
||||
for trange in words[row.word_token]:
|
||||
# Create a new token for each position because the token
|
||||
# penalty can vary depending on the position in the query.
|
||||
# (See rerank_tokens() below.)
|
||||
token = ICUToken.from_db_row(row)
|
||||
token = ICUToken.from_db_row(row, trange.penalty or 0.0)
|
||||
if row.type == 'S':
|
||||
if row.info['op'] in ('in', 'near'):
|
||||
if trange.start == 0:
|
||||
query.add_token(trange, qmod.TOKEN_NEAR_ITEM, token)
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
else:
|
||||
if trange.start == 0 and trange.end == query.num_token_slots():
|
||||
query.add_token(trange, qmod.TOKEN_NEAR_ITEM, token)
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
else:
|
||||
query.add_token(trange, qmod.TOKEN_QUALIFIER, token)
|
||||
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
|
||||
else:
|
||||
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
|
||||
|
||||
self.add_extra_tokens(query)
|
||||
for start, end, pc in self.postcode_parser.parse(query):
|
||||
term = ' '.join(n.term_lookup for n in query.nodes[start + 1:end + 1])
|
||||
query.add_token(qmod.TokenRange(start, end),
|
||||
qmod.TOKEN_POSTCODE,
|
||||
ICUToken(penalty=0.1, token=0, count=1, addr_count=1,
|
||||
lookup_word=pc, word_token=term,
|
||||
info=None))
|
||||
self.rerank_tokens(query)
|
||||
self.compute_break_penalties(query)
|
||||
self.add_extra_tokens(query, parts)
|
||||
self.rerank_tokens(query, parts)
|
||||
|
||||
log().table_dump('Word tokens', _dump_word_tokens(query))
|
||||
|
||||
@@ -214,11 +244,19 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
standardized form search will work with. All information removed
|
||||
at this stage is inevitably lost.
|
||||
"""
|
||||
return cast(str, self.normalizer.transliterate(text)).strip('-: ')
|
||||
return cast(str, self.normalizer.transliterate(text))
|
||||
|
||||
def split_query(self, query: qmod.QueryStruct) -> None:
|
||||
def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
|
||||
""" Transliterate the phrases and split them into tokens.
|
||||
|
||||
Returns the list of transliterated tokens together with their
|
||||
normalized form and a dictionary of words for lookup together
|
||||
with their position.
|
||||
"""
|
||||
parts: QueryParts = []
|
||||
phrase_start = 0
|
||||
words = defaultdict(list)
|
||||
wordnr = 0
|
||||
for phrase in query.source:
|
||||
query.nodes[-1].ptype = phrase.ptype
|
||||
phrase_split = re.split('([ :-])', phrase.text)
|
||||
@@ -233,92 +271,78 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
if trans:
|
||||
for term in trans.split(' '):
|
||||
if term:
|
||||
query.add_node(qmod.BREAK_TOKEN, phrase.ptype, term, word)
|
||||
query.nodes[-1].btype = breakchar
|
||||
parts.append(QueryPart(term, word, wordnr,
|
||||
PENALTY_IN_TOKEN_BREAK[qmod.BreakType.TOKEN]))
|
||||
query.add_node(qmod.BreakType.TOKEN, phrase.ptype)
|
||||
query.nodes[-1].btype = qmod.BreakType(breakchar)
|
||||
parts[-1].penalty = PENALTY_IN_TOKEN_BREAK[qmod.BreakType(breakchar)]
|
||||
wordnr += 1
|
||||
|
||||
query.nodes[-1].btype = qmod.BREAK_END
|
||||
for word, wrange in yield_words(parts, phrase_start):
|
||||
words[word].append(wrange)
|
||||
|
||||
phrase_start = len(parts)
|
||||
query.nodes[-1].btype = qmod.BreakType.END
|
||||
|
||||
return parts, words
|
||||
|
||||
async def lookup_in_db(self, words: List[str]) -> 'sa.Result[Any]':
|
||||
""" Return the token information from the database for the
|
||||
given word tokens.
|
||||
|
||||
This function excludes postcode tokens
|
||||
"""
|
||||
t = self.conn.t.meta.tables['word']
|
||||
return await self.conn.execute(t.select()
|
||||
.where(t.c.word_token.in_(words))
|
||||
.where(t.c.type != 'P'))
|
||||
return await self.conn.execute(t.select().where(t.c.word_token.in_(words)))
|
||||
|
||||
def add_extra_tokens(self, query: qmod.QueryStruct) -> None:
|
||||
def add_extra_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None:
|
||||
""" Add tokens to query that are not saved in the database.
|
||||
"""
|
||||
need_hnr = False
|
||||
for i, node in enumerate(query.nodes):
|
||||
is_full_token = node.btype not in (qmod.BREAK_TOKEN, qmod.BREAK_PART)
|
||||
if need_hnr and is_full_token \
|
||||
and len(node.term_normalized) <= 4 and node.term_normalized.isdigit():
|
||||
query.add_token(qmod.TokenRange(i-1, i), qmod.TOKEN_HOUSENUMBER,
|
||||
for part, node, i in zip(parts, query.nodes, range(1000)):
|
||||
if len(part.token) <= 4 and part.token.isdigit()\
|
||||
and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
|
||||
query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
|
||||
ICUToken(penalty=0.5, token=0,
|
||||
count=1, addr_count=1,
|
||||
lookup_word=node.term_lookup,
|
||||
word_token=node.term_lookup, info=None))
|
||||
count=1, addr_count=1, lookup_word=part.token,
|
||||
word_token=part.token, info=None))
|
||||
|
||||
need_hnr = is_full_token and not node.has_tokens(i+1, qmod.TOKEN_HOUSENUMBER)
|
||||
|
||||
def rerank_tokens(self, query: qmod.QueryStruct) -> None:
|
||||
def rerank_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None:
|
||||
""" Add penalties to tokens that depend on presence of other token.
|
||||
"""
|
||||
for start, end, tlist in query.iter_tokens_by_edge():
|
||||
if len(tlist) > 1:
|
||||
# If it looks like a Postcode, give preference.
|
||||
if qmod.TOKEN_POSTCODE in tlist:
|
||||
for ttype, tokens in tlist.items():
|
||||
if ttype != qmod.TOKEN_POSTCODE and \
|
||||
(ttype != qmod.TOKEN_HOUSENUMBER or
|
||||
start + 1 > end or
|
||||
len(query.nodes[end].term_lookup) > 4):
|
||||
for token in tokens:
|
||||
token.penalty += 0.39
|
||||
for i, node, tlist in query.iter_token_lists():
|
||||
if tlist.ttype == qmod.TokenType.POSTCODE:
|
||||
for repl in node.starting:
|
||||
if repl.end == tlist.end and repl.ttype != qmod.TokenType.POSTCODE \
|
||||
and (repl.ttype != qmod.TokenType.HOUSENUMBER
|
||||
or len(tlist.tokens[0].lookup_word) > 4):
|
||||
repl.add_penalty(0.39)
|
||||
elif (tlist.ttype == qmod.TokenType.HOUSENUMBER
|
||||
and len(tlist.tokens[0].lookup_word) <= 3):
|
||||
if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
|
||||
for repl in node.starting:
|
||||
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
|
||||
repl.add_penalty(0.5 - tlist.tokens[0].penalty)
|
||||
elif tlist.ttype not in (qmod.TokenType.COUNTRY, qmod.TokenType.PARTIAL):
|
||||
norm = parts[i].normalized
|
||||
for j in range(i + 1, tlist.end):
|
||||
if parts[j - 1].word_number != parts[j].word_number:
|
||||
norm += ' ' + parts[j].normalized
|
||||
for token in tlist.tokens:
|
||||
cast(ICUToken, token).rematch(norm)
|
||||
|
||||
# If it looks like a simple housenumber, prefer that.
|
||||
if qmod.TOKEN_HOUSENUMBER in tlist:
|
||||
hnr_lookup = tlist[qmod.TOKEN_HOUSENUMBER][0].lookup_word
|
||||
if len(hnr_lookup) <= 3 and any(c.isdigit() for c in hnr_lookup):
|
||||
penalty = 0.5 - tlist[qmod.TOKEN_HOUSENUMBER][0].penalty
|
||||
for ttype, tokens in tlist.items():
|
||||
if ttype != qmod.TOKEN_HOUSENUMBER:
|
||||
for token in tokens:
|
||||
token.penalty += penalty
|
||||
|
||||
# rerank tokens against the normalized form
|
||||
norm = ' '.join(n.term_normalized for n in query.nodes[start + 1:end + 1]
|
||||
if n.btype != qmod.BREAK_TOKEN)
|
||||
if not norm:
|
||||
# Can happen when the token only covers a partial term
|
||||
norm = query.nodes[start + 1].term_normalized
|
||||
for ttype, tokens in tlist.items():
|
||||
if ttype != qmod.TOKEN_COUNTRY:
|
||||
for token in tokens:
|
||||
cast(ICUToken, token).rematch(norm)
|
||||
|
||||
def compute_break_penalties(self, query: qmod.QueryStruct) -> None:
|
||||
""" Set the break penalties for the nodes in the query.
|
||||
"""
|
||||
for node in query.nodes:
|
||||
node.penalty = PENALTY_BREAK[node.btype]
|
||||
def _dump_transliterated(query: qmod.QueryStruct, parts: QueryParts) -> str:
|
||||
out = query.nodes[0].btype.value
|
||||
for node, part in zip(query.nodes[1:], parts):
|
||||
out += part.token + node.btype.value
|
||||
return out
|
||||
|
||||
|
||||
def _dump_word_tokens(query: qmod.QueryStruct) -> Iterator[List[Any]]:
|
||||
yield ['type', 'from', 'to', 'token', 'word_token', 'lookup_word', 'penalty', 'count', 'info']
|
||||
for i, node in enumerate(query.nodes):
|
||||
if node.partial is not None:
|
||||
t = cast(ICUToken, node.partial)
|
||||
yield [qmod.TOKEN_PARTIAL, str(i), str(i + 1), t.token,
|
||||
t.word_token, t.lookup_word, t.penalty, t.count, t.info]
|
||||
yield ['type', 'token', 'word_token', 'lookup_word', 'penalty', 'count', 'info']
|
||||
for node in query.nodes:
|
||||
for tlist in node.starting:
|
||||
for token in tlist.tokens:
|
||||
t = cast(ICUToken, token)
|
||||
yield [tlist.ttype, str(i), str(tlist.end), t.token, t.word_token or '',
|
||||
yield [tlist.ttype.name, t.token, t.word_token or '',
|
||||
t.lookup_word or '', t.penalty, t.count, t.info]
|
||||
|
||||
|
||||
@@ -326,17 +350,7 @@ async def create_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer
|
||||
""" Create and set up a new query analyzer for a database based
|
||||
on the ICU tokenizer.
|
||||
"""
|
||||
async def _get_config() -> ICUAnalyzerConfig:
|
||||
if 'word' not in conn.t.meta.tables:
|
||||
sa.Table('word', conn.t.meta,
|
||||
sa.Column('word_id', sa.Integer),
|
||||
sa.Column('word_token', sa.Text, nullable=False),
|
||||
sa.Column('type', sa.Text, nullable=False),
|
||||
sa.Column('word', sa.Text),
|
||||
sa.Column('info', Json))
|
||||
out = ICUQueryAnalyzer(conn)
|
||||
await out.setup()
|
||||
|
||||
return await ICUAnalyzerConfig.create(conn)
|
||||
|
||||
config = await conn.get_cached_value('ICUTOK', 'config', _get_config)
|
||||
|
||||
return ICUQueryAnalyzer(conn, config)
|
||||
return out
|
||||
|
||||
@@ -1,104 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Handling of arbitrary postcode tokens in tokenized query string.
|
||||
"""
|
||||
from typing import Tuple, Set, Dict, List
|
||||
import re
|
||||
from collections import defaultdict
|
||||
|
||||
import yaml
|
||||
|
||||
from ..config import Configuration
|
||||
from . import query as qmod
|
||||
|
||||
|
||||
class PostcodeParser:
|
||||
""" Pattern-based parser for postcodes in tokenized queries.
|
||||
|
||||
The postcode patterns are read from the country configuration.
|
||||
The parser does currently not return country restrictions.
|
||||
"""
|
||||
|
||||
def __init__(self, config: Configuration) -> None:
|
||||
# skip over includes here to avoid loading the complete country name data
|
||||
yaml.add_constructor('!include', lambda loader, node: [],
|
||||
Loader=yaml.SafeLoader)
|
||||
cdata = yaml.safe_load(config.find_config_file('country_settings.yaml')
|
||||
.read_text(encoding='utf-8'))
|
||||
|
||||
unique_patterns: Dict[str, Dict[str, List[str]]] = {}
|
||||
for cc, data in cdata.items():
|
||||
if data.get('postcode'):
|
||||
pat = data['postcode']['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
|
||||
out = data['postcode'].get('output')
|
||||
if pat not in unique_patterns:
|
||||
unique_patterns[pat] = defaultdict(list)
|
||||
unique_patterns[pat][out].append(cc.upper())
|
||||
|
||||
self.global_pattern = re.compile(
|
||||
'(?:(?P<cc>[A-Z][A-Z])(?P<space>[ -]?))?(?P<pc>(?:(?:'
|
||||
+ ')|(?:'.join(unique_patterns) + '))[:, >].*)')
|
||||
|
||||
self.local_patterns = [(re.compile(f"{pat}[:, >]"), list(info.items()))
|
||||
for pat, info in unique_patterns.items()]
|
||||
|
||||
def parse(self, query: qmod.QueryStruct) -> Set[Tuple[int, int, str]]:
|
||||
""" Parse postcodes in the given list of query tokens taking into
|
||||
account the list of breaks from the nodes.
|
||||
|
||||
The result is a sequence of tuples with
|
||||
[start node id, end node id, postcode token]
|
||||
"""
|
||||
nodes = query.nodes
|
||||
outcodes: Set[Tuple[int, int, str]] = set()
|
||||
|
||||
terms = [n.term_normalized.upper() + n.btype for n in nodes]
|
||||
for i in range(query.num_token_slots()):
|
||||
if nodes[i].btype in '<,: ' and nodes[i + 1].btype != '`' \
|
||||
and (i == 0 or nodes[i - 1].ptype != qmod.PHRASE_POSTCODE):
|
||||
if nodes[i].ptype == qmod.PHRASE_ANY:
|
||||
word = terms[i + 1]
|
||||
if word[-1] in ' -' and nodes[i + 2].btype != '`' \
|
||||
and nodes[i + 1].ptype == qmod.PHRASE_ANY:
|
||||
word += terms[i + 2]
|
||||
if word[-1] in ' -' and nodes[i + 3].btype != '`' \
|
||||
and nodes[i + 2].ptype == qmod.PHRASE_ANY:
|
||||
word += terms[i + 3]
|
||||
|
||||
self._match_word(word, i, False, outcodes)
|
||||
elif nodes[i].ptype == qmod.PHRASE_POSTCODE:
|
||||
word = terms[i + 1]
|
||||
for j in range(i + 1, query.num_token_slots()):
|
||||
if nodes[j].ptype != qmod.PHRASE_POSTCODE:
|
||||
break
|
||||
word += terms[j + 1]
|
||||
|
||||
self._match_word(word, i, True, outcodes)
|
||||
|
||||
return outcodes
|
||||
|
||||
def _match_word(self, word: str, pos: int, fullmatch: bool,
|
||||
outcodes: Set[Tuple[int, int, str]]) -> None:
|
||||
# Use global pattern to check for presence of any postcode.
|
||||
m = self.global_pattern.fullmatch(word)
|
||||
if m:
|
||||
# If there was a match, check against each pattern separately
|
||||
# because multiple patterns might be machting at the end.
|
||||
cc = m.group('cc')
|
||||
pc_word = m.group('pc')
|
||||
cc_spaces = len(m.group('space') or '')
|
||||
for pattern, info in self.local_patterns:
|
||||
lm = pattern.fullmatch(pc_word) if fullmatch else pattern.match(pc_word)
|
||||
if lm:
|
||||
trange = (pos, pos + cc_spaces + sum(c in ' ,-:>' for c in lm.group(0)))
|
||||
for out, out_ccs in info:
|
||||
if cc is None or cc in out_ccs:
|
||||
if out:
|
||||
outcodes.add((*trange, lm.expand(out)))
|
||||
else:
|
||||
outcodes.add((*trange, lm.group(0)[:-1]))
|
||||
@@ -2,111 +2,99 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Datastructures for a tokenized query.
|
||||
"""
|
||||
from typing import Dict, List, Tuple, Optional, Iterator
|
||||
from typing import List, Tuple, Optional, Iterator
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import defaultdict
|
||||
import dataclasses
|
||||
|
||||
# Precomputed denominator for the computation of the linear regression slope
|
||||
# used to determine the query direction.
|
||||
# The x value for the regression computation will be the position of the
|
||||
# token in the query. Thus we know the x values will be [0, query length).
|
||||
# As the denominator only depends on the x values, we can pre-compute here
|
||||
# the denominatior to use for a given query length.
|
||||
# Note that query length of two or less is special cased and will not use
|
||||
# the values from this array. Thus it is not a problem that they are 0.
|
||||
LINFAC = [i * (sum(si * si for si in range(i)) - (i - 1) * i * (i - 1) / 4)
|
||||
for i in range(50)]
|
||||
import enum
|
||||
|
||||
|
||||
BreakType = str
|
||||
""" Type of break between tokens.
|
||||
"""
|
||||
BREAK_START = '<'
|
||||
""" Begin of the query. """
|
||||
BREAK_END = '>'
|
||||
""" End of the query. """
|
||||
BREAK_PHRASE = ','
|
||||
""" Hard break between two phrases. Address parts cannot cross hard
|
||||
phrase boundaries."""
|
||||
BREAK_SOFT_PHRASE = ':'
|
||||
""" Likely break between two phrases. Address parts should not cross soft
|
||||
phrase boundaries. Soft breaks can be inserted by a preprocessor
|
||||
that is analysing the input string.
|
||||
"""
|
||||
BREAK_WORD = ' '
|
||||
""" Break between words. """
|
||||
BREAK_PART = '-'
|
||||
""" Break inside a word, for example a hyphen or apostrophe. """
|
||||
BREAK_TOKEN = '`'
|
||||
""" Break created as a result of tokenization.
|
||||
This may happen in languages without spaces between words.
|
||||
"""
|
||||
|
||||
|
||||
TokenType = str
|
||||
""" Type of token.
|
||||
"""
|
||||
TOKEN_WORD = 'W'
|
||||
""" Full name of a place. """
|
||||
TOKEN_PARTIAL = 'w'
|
||||
""" Word term without breaks, does not necessarily represent a full name. """
|
||||
TOKEN_HOUSENUMBER = 'H'
|
||||
""" Housenumber term. """
|
||||
TOKEN_POSTCODE = 'P'
|
||||
""" Postal code term. """
|
||||
TOKEN_COUNTRY = 'C'
|
||||
""" Country name or reference. """
|
||||
TOKEN_QUALIFIER = 'Q'
|
||||
""" Special term used together with name (e.g. _Hotel_ Bellevue). """
|
||||
TOKEN_NEAR_ITEM = 'N'
|
||||
""" Special term used as searchable object(e.g. supermarket in ...). """
|
||||
|
||||
|
||||
PhraseType = int
|
||||
""" Designation of a phrase.
|
||||
"""
|
||||
PHRASE_ANY = 0
|
||||
""" No specific designation (i.e. source is free-form query). """
|
||||
PHRASE_AMENITY = 1
|
||||
""" Contains name or type of a POI. """
|
||||
PHRASE_STREET = 2
|
||||
""" Contains a street name optionally with a housenumber. """
|
||||
PHRASE_CITY = 3
|
||||
""" Contains the postal city. """
|
||||
PHRASE_COUNTY = 4
|
||||
""" Contains the equivalent of a county. """
|
||||
PHRASE_STATE = 5
|
||||
""" Contains a state or province. """
|
||||
PHRASE_POSTCODE = 6
|
||||
""" Contains a postal code. """
|
||||
PHRASE_COUNTRY = 7
|
||||
""" Contains the country name or code. """
|
||||
|
||||
|
||||
def _phrase_compatible_with(ptype: PhraseType, ttype: TokenType,
|
||||
is_full_phrase: bool) -> bool:
|
||||
""" Check if the given token type can be used with the phrase type.
|
||||
class BreakType(enum.Enum):
|
||||
""" Type of break between tokens.
|
||||
"""
|
||||
START = '<'
|
||||
""" Begin of the query. """
|
||||
END = '>'
|
||||
""" End of the query. """
|
||||
PHRASE = ','
|
||||
""" Hard break between two phrases. Address parts cannot cross hard
|
||||
phrase boundaries."""
|
||||
SOFT_PHRASE = ':'
|
||||
""" Likely break between two phrases. Address parts should not cross soft
|
||||
phrase boundaries. Soft breaks can be inserted by a preprocessor
|
||||
that is analysing the input string.
|
||||
"""
|
||||
WORD = ' '
|
||||
""" Break between words. """
|
||||
PART = '-'
|
||||
""" Break inside a word, for example a hyphen or apostrophe. """
|
||||
TOKEN = '`'
|
||||
""" Break created as a result of tokenization.
|
||||
This may happen in languages without spaces between words.
|
||||
"""
|
||||
if ptype == PHRASE_ANY:
|
||||
return not is_full_phrase or ttype != TOKEN_QUALIFIER
|
||||
if ptype == PHRASE_AMENITY:
|
||||
return ttype in (TOKEN_WORD, TOKEN_PARTIAL)\
|
||||
or (is_full_phrase and ttype == TOKEN_NEAR_ITEM)\
|
||||
or (not is_full_phrase and ttype == TOKEN_QUALIFIER)
|
||||
if ptype == PHRASE_STREET:
|
||||
return ttype in (TOKEN_WORD, TOKEN_PARTIAL, TOKEN_HOUSENUMBER)
|
||||
if ptype == PHRASE_POSTCODE:
|
||||
return ttype == TOKEN_POSTCODE
|
||||
if ptype == PHRASE_COUNTRY:
|
||||
return ttype == TOKEN_COUNTRY
|
||||
|
||||
return ttype in (TOKEN_WORD, TOKEN_PARTIAL)
|
||||
|
||||
class TokenType(enum.Enum):
|
||||
""" Type of token.
|
||||
"""
|
||||
WORD = enum.auto()
|
||||
""" Full name of a place. """
|
||||
PARTIAL = enum.auto()
|
||||
""" Word term without breaks, does not necessarily represent a full name. """
|
||||
HOUSENUMBER = enum.auto()
|
||||
""" Housenumber term. """
|
||||
POSTCODE = enum.auto()
|
||||
""" Postal code term. """
|
||||
COUNTRY = enum.auto()
|
||||
""" Country name or reference. """
|
||||
QUALIFIER = enum.auto()
|
||||
""" Special term used together with name (e.g. _Hotel_ Bellevue). """
|
||||
NEAR_ITEM = enum.auto()
|
||||
""" Special term used as searchable object(e.g. supermarket in ...). """
|
||||
|
||||
|
||||
class PhraseType(enum.Enum):
|
||||
""" Designation of a phrase.
|
||||
"""
|
||||
NONE = 0
|
||||
""" No specific designation (i.e. source is free-form query). """
|
||||
AMENITY = enum.auto()
|
||||
""" Contains name or type of a POI. """
|
||||
STREET = enum.auto()
|
||||
""" Contains a street name optionally with a housenumber. """
|
||||
CITY = enum.auto()
|
||||
""" Contains the postal city. """
|
||||
COUNTY = enum.auto()
|
||||
""" Contains the equivalent of a county. """
|
||||
STATE = enum.auto()
|
||||
""" Contains a state or province. """
|
||||
POSTCODE = enum.auto()
|
||||
""" Contains a postal code. """
|
||||
COUNTRY = enum.auto()
|
||||
""" Contains the country name or code. """
|
||||
|
||||
def compatible_with(self, ttype: TokenType,
|
||||
is_full_phrase: bool) -> bool:
|
||||
""" Check if the given token type can be used with the phrase type.
|
||||
"""
|
||||
if self == PhraseType.NONE:
|
||||
return not is_full_phrase or ttype != TokenType.QUALIFIER
|
||||
if self == PhraseType.AMENITY:
|
||||
return ttype in (TokenType.WORD, TokenType.PARTIAL)\
|
||||
or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\
|
||||
or (not is_full_phrase and ttype == TokenType.QUALIFIER)
|
||||
if self == PhraseType.STREET:
|
||||
return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
|
||||
if self == PhraseType.POSTCODE:
|
||||
return ttype == TokenType.POSTCODE
|
||||
if self == PhraseType.COUNTRY:
|
||||
return ttype == TokenType.COUNTRY
|
||||
|
||||
return ttype in (TokenType.WORD, TokenType.PARTIAL)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
@@ -134,6 +122,7 @@ class TokenRange:
|
||||
"""
|
||||
start: int
|
||||
end: int
|
||||
penalty: Optional[float] = None
|
||||
|
||||
def __lt__(self, other: 'TokenRange') -> bool:
|
||||
return self.end <= other.start
|
||||
@@ -182,58 +171,10 @@ class TokenList:
|
||||
@dataclasses.dataclass
|
||||
class QueryNode:
|
||||
""" A node of the query representing a break between terms.
|
||||
|
||||
The node also contains information on the source term
|
||||
ending at the node. The tokens are created from this information.
|
||||
"""
|
||||
btype: BreakType
|
||||
ptype: PhraseType
|
||||
|
||||
penalty: float
|
||||
""" Penalty for having a word break at this position. The penalty
|
||||
may be negative, when a word break is more likely than continuing
|
||||
the word after the node.
|
||||
"""
|
||||
term_lookup: str
|
||||
""" Transliterated term ending at this node.
|
||||
"""
|
||||
term_normalized: str
|
||||
""" Normalised form of term ending at this node.
|
||||
When the token resulted from a split during transliteration,
|
||||
then this string contains the complete source term.
|
||||
"""
|
||||
|
||||
starting: List[TokenList] = dataclasses.field(default_factory=list)
|
||||
""" List of all full tokens starting at this node.
|
||||
"""
|
||||
partial: Optional[Token] = None
|
||||
""" Base token going to the next node.
|
||||
May be None when the query has parts for which no words are known.
|
||||
Note that the query may still be parsable when there are other
|
||||
types of tokens spanning over the gap.
|
||||
"""
|
||||
|
||||
@property
|
||||
def word_break_penalty(self) -> float:
|
||||
""" Penalty to apply when a words ends at this node.
|
||||
"""
|
||||
return max(0, self.penalty)
|
||||
|
||||
@property
|
||||
def word_continuation_penalty(self) -> float:
|
||||
""" Penalty to apply when a word continues over this node
|
||||
(i.e. is a multi-term word).
|
||||
"""
|
||||
return max(0, -self.penalty)
|
||||
|
||||
def name_address_ratio(self) -> float:
|
||||
""" Return the propability that the partial token belonging to
|
||||
this node forms part of a name (as opposed of part of the address).
|
||||
"""
|
||||
if self.partial is None:
|
||||
return 0.5
|
||||
|
||||
return self.partial.count / (self.partial.count + self.partial.addr_count)
|
||||
|
||||
def has_tokens(self, end: int, *ttypes: TokenType) -> bool:
|
||||
""" Check if there are tokens of the given types ending at the
|
||||
@@ -270,36 +211,26 @@ class QueryStruct:
|
||||
need to be direct neighbours. Thus the query is represented as a
|
||||
directed acyclic graph.
|
||||
|
||||
A query also has a direction penalty 'dir_penalty'. This describes
|
||||
the likelyhood if the query should be read from left-to-right or
|
||||
vice versa. A negative 'dir_penalty' should be read as a penalty on
|
||||
right-to-left reading, while a positive value represents a penalty
|
||||
for left-to-right reading. The default value is 0, which is equivalent
|
||||
to having no information about the reading.
|
||||
|
||||
When created, a query contains a single node: the start of the
|
||||
query. Further nodes can be added by appending to 'nodes'.
|
||||
"""
|
||||
|
||||
def __init__(self, source: List[Phrase]) -> None:
|
||||
self.source = source
|
||||
self.dir_penalty = 0.0
|
||||
self.nodes: List[QueryNode] = \
|
||||
[QueryNode(BREAK_START, source[0].ptype if source else PHRASE_ANY,
|
||||
0.0, '', '')]
|
||||
[QueryNode(BreakType.START, source[0].ptype if source else PhraseType.NONE)]
|
||||
|
||||
def num_token_slots(self) -> int:
|
||||
""" Return the length of the query in vertice steps.
|
||||
"""
|
||||
return len(self.nodes) - 1
|
||||
|
||||
def add_node(self, btype: BreakType, ptype: PhraseType,
|
||||
term_lookup: str = '', term_normalized: str = '') -> None:
|
||||
def add_node(self, btype: BreakType, ptype: PhraseType) -> None:
|
||||
""" Append a new break node with the given break type.
|
||||
The phrase type denotes the type for any tokens starting
|
||||
at the node.
|
||||
"""
|
||||
self.nodes.append(QueryNode(btype, ptype, 0.0, term_lookup, term_normalized))
|
||||
self.nodes.append(QueryNode(btype, ptype))
|
||||
|
||||
def add_token(self, trange: TokenRange, ttype: TokenType, token: Token) -> None:
|
||||
""" Add a token to the query. 'start' and 'end' are the indexes of the
|
||||
@@ -312,70 +243,37 @@ class QueryStruct:
|
||||
be added to, then the token is silently dropped.
|
||||
"""
|
||||
snode = self.nodes[trange.start]
|
||||
if ttype == TOKEN_PARTIAL:
|
||||
assert snode.partial is None
|
||||
if _phrase_compatible_with(snode.ptype, TOKEN_PARTIAL, False):
|
||||
snode.partial = token
|
||||
else:
|
||||
full_phrase = snode.btype in (BREAK_START, BREAK_PHRASE)\
|
||||
and self.nodes[trange.end].btype in (BREAK_PHRASE, BREAK_END)
|
||||
if _phrase_compatible_with(snode.ptype, ttype, full_phrase):
|
||||
tlist = snode.get_tokens(trange.end, ttype)
|
||||
if tlist is None:
|
||||
snode.starting.append(TokenList(trange.end, ttype, [token]))
|
||||
else:
|
||||
tlist.append(token)
|
||||
|
||||
def compute_direction_penalty(self) -> None:
|
||||
""" Recompute the direction probability from the partial tokens
|
||||
of each node.
|
||||
"""
|
||||
n = len(self.nodes) - 1
|
||||
if n <= 1 or n >= 50:
|
||||
self.dir_penalty = 0
|
||||
elif n == 2:
|
||||
self.dir_penalty = (self.nodes[1].name_address_ratio()
|
||||
- self.nodes[0].name_address_ratio()) / 3
|
||||
else:
|
||||
ratios = [n.name_address_ratio() for n in self.nodes[:-1]]
|
||||
self.dir_penalty = (n * sum(i * r for i, r in enumerate(ratios))
|
||||
- sum(ratios) * n * (n - 1) / 2) / LINFAC[n]
|
||||
full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\
|
||||
and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END)
|
||||
if snode.ptype.compatible_with(ttype, full_phrase):
|
||||
tlist = snode.get_tokens(trange.end, ttype)
|
||||
if tlist is None:
|
||||
snode.starting.append(TokenList(trange.end, ttype, [token]))
|
||||
else:
|
||||
tlist.append(token)
|
||||
|
||||
def get_tokens(self, trange: TokenRange, ttype: TokenType) -> List[Token]:
|
||||
""" Get the list of tokens of a given type, spanning the given
|
||||
nodes. The nodes must exist. If no tokens exist, an
|
||||
empty list is returned.
|
||||
|
||||
Cannot be used to get the partial token.
|
||||
"""
|
||||
assert ttype != TOKEN_PARTIAL
|
||||
return self.nodes[trange.start].get_tokens(trange.end, ttype) or []
|
||||
|
||||
def get_in_word_penalty(self, trange: TokenRange) -> float:
|
||||
""" Gets the sum of penalties for all token transitions
|
||||
within the given range.
|
||||
def get_partials_list(self, trange: TokenRange) -> List[Token]:
|
||||
""" Create a list of partial tokens between the given nodes.
|
||||
The list is composed of the first token of type PARTIAL
|
||||
going to the subsequent node. Such PARTIAL tokens are
|
||||
assumed to exist.
|
||||
"""
|
||||
return sum(n.word_continuation_penalty
|
||||
for n in self.nodes[trange.start + 1:trange.end])
|
||||
return [next(iter(self.get_tokens(TokenRange(i, i+1), TokenType.PARTIAL)))
|
||||
for i in range(trange.start, trange.end)]
|
||||
|
||||
def iter_partials(self, trange: TokenRange) -> Iterator[Token]:
|
||||
""" Iterate over the partial tokens between the given nodes.
|
||||
Missing partials are ignored.
|
||||
"""
|
||||
return (n.partial for n in self.nodes[trange.start:trange.end] if n.partial is not None)
|
||||
|
||||
def iter_tokens_by_edge(self) -> Iterator[Tuple[int, int, Dict[TokenType, List[Token]]]]:
|
||||
""" Iterator over all tokens except partial ones grouped by edge.
|
||||
|
||||
Returns the start and end node indexes and a dictionary
|
||||
of list of tokens by token type.
|
||||
def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]:
|
||||
""" Iterator over all token lists in the query.
|
||||
"""
|
||||
for i, node in enumerate(self.nodes):
|
||||
by_end: Dict[int, Dict[TokenType, List[Token]]] = defaultdict(dict)
|
||||
for tlist in node.starting:
|
||||
by_end[tlist.end][tlist.ttype] = tlist.tokens
|
||||
for end, endlist in by_end.items():
|
||||
yield i, end, endlist
|
||||
yield i, node, tlist
|
||||
|
||||
def find_lookup_word_by_id(self, token: int) -> str:
|
||||
""" Find the first token with the given token ID and return
|
||||
@@ -384,45 +282,8 @@ class QueryStruct:
|
||||
debugging.
|
||||
"""
|
||||
for node in self.nodes:
|
||||
if node.partial is not None and node.partial.token == token:
|
||||
return f"[P]{node.partial.lookup_word}"
|
||||
for tlist in node.starting:
|
||||
for t in tlist.tokens:
|
||||
if t.token == token:
|
||||
return f"[{tlist.ttype}]{t.lookup_word}"
|
||||
return f"[{tlist.ttype.name[0]}]{t.lookup_word}"
|
||||
return 'None'
|
||||
|
||||
def get_transliterated_query(self) -> str:
|
||||
""" Return a string representation of the transliterated query
|
||||
with the character representation of the different break types.
|
||||
|
||||
For debugging purposes only.
|
||||
"""
|
||||
return ''.join(''.join((n.term_lookup, n.btype)) for n in self.nodes)
|
||||
|
||||
def extract_words(self, start: int = 0,
|
||||
endpos: Optional[int] = None) -> Dict[str, List[TokenRange]]:
|
||||
""" Add all combinations of words that can be formed from the terms
|
||||
between the given start and endnode. The terms are joined with
|
||||
spaces for each break. Words can never go across a BREAK_PHRASE.
|
||||
|
||||
The functions returns a dictionary of possible words with their
|
||||
position within the query.
|
||||
"""
|
||||
if endpos is None:
|
||||
endpos = len(self.nodes)
|
||||
|
||||
words: Dict[str, List[TokenRange]] = defaultdict(list)
|
||||
|
||||
for first, first_node in enumerate(self.nodes[start + 1:endpos], start):
|
||||
word = first_node.term_lookup
|
||||
words[word].append(TokenRange(first, first + 1))
|
||||
if first_node.btype != BREAK_PHRASE:
|
||||
max_last = min(first + 20, endpos)
|
||||
for last, last_node in enumerate(self.nodes[first + 2:max_last], first + 2):
|
||||
word = ' '.join((word, last_node.term_lookup))
|
||||
words[word].append(TokenRange(first, last))
|
||||
if last_node.btype == BREAK_PHRASE:
|
||||
break
|
||||
|
||||
return words
|
||||
|
||||
@@ -23,6 +23,16 @@ class TypedRange:
|
||||
trange: qmod.TokenRange
|
||||
|
||||
|
||||
PENALTY_TOKENCHANGE = {
|
||||
qmod.BreakType.START: 0.0,
|
||||
qmod.BreakType.END: 0.0,
|
||||
qmod.BreakType.PHRASE: 0.0,
|
||||
qmod.BreakType.SOFT_PHRASE: 0.0,
|
||||
qmod.BreakType.WORD: 0.1,
|
||||
qmod.BreakType.PART: 0.2,
|
||||
qmod.BreakType.TOKEN: 0.4
|
||||
}
|
||||
|
||||
TypedRangeSeq = List[TypedRange]
|
||||
|
||||
|
||||
@@ -46,17 +56,17 @@ class TokenAssignment:
|
||||
"""
|
||||
out = TokenAssignment()
|
||||
for token in ranges:
|
||||
if token.ttype == qmod.TOKEN_PARTIAL:
|
||||
if token.ttype == qmod.TokenType.PARTIAL:
|
||||
out.address.append(token.trange)
|
||||
elif token.ttype == qmod.TOKEN_HOUSENUMBER:
|
||||
elif token.ttype == qmod.TokenType.HOUSENUMBER:
|
||||
out.housenumber = token.trange
|
||||
elif token.ttype == qmod.TOKEN_POSTCODE:
|
||||
elif token.ttype == qmod.TokenType.POSTCODE:
|
||||
out.postcode = token.trange
|
||||
elif token.ttype == qmod.TOKEN_COUNTRY:
|
||||
elif token.ttype == qmod.TokenType.COUNTRY:
|
||||
out.country = token.trange
|
||||
elif token.ttype == qmod.TOKEN_NEAR_ITEM:
|
||||
elif token.ttype == qmod.TokenType.NEAR_ITEM:
|
||||
out.near_item = token.trange
|
||||
elif token.ttype == qmod.TOKEN_QUALIFIER:
|
||||
elif token.ttype == qmod.TokenType.QUALIFIER:
|
||||
out.qualifier = token.trange
|
||||
return out
|
||||
|
||||
@@ -74,7 +84,7 @@ class _TokenSequence:
|
||||
self.penalty = penalty
|
||||
|
||||
def __str__(self) -> str:
|
||||
seq = ''.join(f'[{r.trange.start} - {r.trange.end}: {r.ttype}]' for r in self.seq)
|
||||
seq = ''.join(f'[{r.trange.start} - {r.trange.end}: {r.ttype.name}]' for r in self.seq)
|
||||
return f'{seq} (dir: {self.direction}, penalty: {self.penalty})'
|
||||
|
||||
@property
|
||||
@@ -95,7 +105,7 @@ class _TokenSequence:
|
||||
"""
|
||||
# Country and category must be the final term for left-to-right
|
||||
return len(self.seq) > 1 and \
|
||||
self.seq[-1].ttype in (qmod.TOKEN_COUNTRY, qmod.TOKEN_NEAR_ITEM)
|
||||
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM)
|
||||
|
||||
def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
|
||||
""" Check if the give token type is appendable to the existing sequence.
|
||||
@@ -104,23 +114,23 @@ class _TokenSequence:
|
||||
new direction of the sequence after adding such a type. The
|
||||
token is not added.
|
||||
"""
|
||||
if ttype == qmod.TOKEN_WORD:
|
||||
if ttype == qmod.TokenType.WORD:
|
||||
return None
|
||||
|
||||
if not self.seq:
|
||||
# Append unconditionally to the empty list
|
||||
if ttype == qmod.TOKEN_COUNTRY:
|
||||
if ttype == qmod.TokenType.COUNTRY:
|
||||
return -1
|
||||
if ttype in (qmod.TOKEN_HOUSENUMBER, qmod.TOKEN_QUALIFIER):
|
||||
if ttype in (qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
|
||||
return 1
|
||||
return self.direction
|
||||
|
||||
# Name tokens are always acceptable and don't change direction
|
||||
if ttype == qmod.TOKEN_PARTIAL:
|
||||
if ttype == qmod.TokenType.PARTIAL:
|
||||
# qualifiers cannot appear in the middle of the query. They need
|
||||
# to be near the next phrase.
|
||||
if self.direction == -1 \
|
||||
and any(t.ttype == qmod.TOKEN_QUALIFIER for t in self.seq[:-1]):
|
||||
and any(t.ttype == qmod.TokenType.QUALIFIER for t in self.seq[:-1]):
|
||||
return None
|
||||
return self.direction
|
||||
|
||||
@@ -128,61 +138,61 @@ class _TokenSequence:
|
||||
if self.has_types(ttype):
|
||||
return None
|
||||
|
||||
if ttype == qmod.TOKEN_HOUSENUMBER:
|
||||
if ttype == qmod.TokenType.HOUSENUMBER:
|
||||
if self.direction == 1:
|
||||
if len(self.seq) == 1 and self.seq[0].ttype == qmod.TOKEN_QUALIFIER:
|
||||
if len(self.seq) == 1 and self.seq[0].ttype == qmod.TokenType.QUALIFIER:
|
||||
return None
|
||||
if len(self.seq) > 2 \
|
||||
or self.has_types(qmod.TOKEN_POSTCODE, qmod.TOKEN_COUNTRY):
|
||||
or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
|
||||
return None # direction left-to-right: housenumber must come before anything
|
||||
elif (self.direction == -1
|
||||
or self.has_types(qmod.TOKEN_POSTCODE, qmod.TOKEN_COUNTRY)):
|
||||
or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY)):
|
||||
return -1 # force direction right-to-left if after other terms
|
||||
|
||||
return self.direction
|
||||
|
||||
if ttype == qmod.TOKEN_POSTCODE:
|
||||
if ttype == qmod.TokenType.POSTCODE:
|
||||
if self.direction == -1:
|
||||
if self.has_types(qmod.TOKEN_HOUSENUMBER, qmod.TOKEN_QUALIFIER):
|
||||
if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
|
||||
return None
|
||||
return -1
|
||||
if self.direction == 1:
|
||||
return None if self.has_types(qmod.TOKEN_COUNTRY) else 1
|
||||
if self.has_types(qmod.TOKEN_HOUSENUMBER, qmod.TOKEN_QUALIFIER):
|
||||
return None if self.has_types(qmod.TokenType.COUNTRY) else 1
|
||||
if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
|
||||
return 1
|
||||
return self.direction
|
||||
|
||||
if ttype == qmod.TOKEN_COUNTRY:
|
||||
if ttype == qmod.TokenType.COUNTRY:
|
||||
return None if self.direction == -1 else 1
|
||||
|
||||
if ttype == qmod.TOKEN_NEAR_ITEM:
|
||||
if ttype == qmod.TokenType.NEAR_ITEM:
|
||||
return self.direction
|
||||
|
||||
if ttype == qmod.TOKEN_QUALIFIER:
|
||||
if ttype == qmod.TokenType.QUALIFIER:
|
||||
if self.direction == 1:
|
||||
if (len(self.seq) == 1
|
||||
and self.seq[0].ttype in (qmod.TOKEN_PARTIAL, qmod.TOKEN_NEAR_ITEM)) \
|
||||
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \
|
||||
or (len(self.seq) == 2
|
||||
and self.seq[0].ttype == qmod.TOKEN_NEAR_ITEM
|
||||
and self.seq[1].ttype == qmod.TOKEN_PARTIAL):
|
||||
and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM
|
||||
and self.seq[1].ttype == qmod.TokenType.PARTIAL):
|
||||
return 1
|
||||
return None
|
||||
if self.direction == -1:
|
||||
return -1
|
||||
|
||||
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TOKEN_NEAR_ITEM else self.seq
|
||||
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq
|
||||
if len(tempseq) == 0:
|
||||
return 1
|
||||
if len(tempseq) == 1 and self.seq[0].ttype == qmod.TOKEN_HOUSENUMBER:
|
||||
if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER:
|
||||
return None
|
||||
if len(tempseq) > 1 or self.has_types(qmod.TOKEN_POSTCODE, qmod.TOKEN_COUNTRY):
|
||||
if len(tempseq) > 1 or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
|
||||
return -1
|
||||
return 0
|
||||
|
||||
return None
|
||||
|
||||
def advance(self, ttype: qmod.TokenType, end_pos: int,
|
||||
force_break: bool, break_penalty: float) -> Optional['_TokenSequence']:
|
||||
btype: qmod.BreakType) -> Optional['_TokenSequence']:
|
||||
""" Return a new token sequence state with the given token type
|
||||
extended.
|
||||
"""
|
||||
@@ -195,7 +205,7 @@ class _TokenSequence:
|
||||
new_penalty = 0.0
|
||||
else:
|
||||
last = self.seq[-1]
|
||||
if not force_break and last.ttype == ttype:
|
||||
if btype != qmod.BreakType.PHRASE and last.ttype == ttype:
|
||||
# extend the existing range
|
||||
newseq = self.seq[:-1] + [TypedRange(ttype, last.trange.replace_end(end_pos))]
|
||||
new_penalty = 0.0
|
||||
@@ -203,7 +213,7 @@ class _TokenSequence:
|
||||
# start a new range
|
||||
newseq = list(self.seq) + [TypedRange(ttype,
|
||||
qmod.TokenRange(last.trange.end, end_pos))]
|
||||
new_penalty = break_penalty
|
||||
new_penalty = PENALTY_TOKENCHANGE[btype]
|
||||
|
||||
return _TokenSequence(newseq, newdir, self.penalty + new_penalty)
|
||||
|
||||
@@ -230,18 +240,18 @@ class _TokenSequence:
|
||||
# housenumbers may not be further than 2 words from the beginning.
|
||||
# If there are two words in front, give it a penalty.
|
||||
hnrpos = next((i for i, tr in enumerate(self.seq)
|
||||
if tr.ttype == qmod.TOKEN_HOUSENUMBER),
|
||||
if tr.ttype == qmod.TokenType.HOUSENUMBER),
|
||||
None)
|
||||
if hnrpos is not None:
|
||||
if self.direction != -1:
|
||||
priors = sum(1 for t in self.seq[:hnrpos] if t.ttype == qmod.TOKEN_PARTIAL)
|
||||
priors = sum(1 for t in self.seq[:hnrpos] if t.ttype == qmod.TokenType.PARTIAL)
|
||||
if not self._adapt_penalty_from_priors(priors, -1):
|
||||
return False
|
||||
if self.direction != 1:
|
||||
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TOKEN_PARTIAL)
|
||||
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
|
||||
if not self._adapt_penalty_from_priors(priors, 1):
|
||||
return False
|
||||
if any(t.ttype == qmod.TOKEN_NEAR_ITEM for t in self.seq):
|
||||
if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq):
|
||||
self.penalty += 1.0
|
||||
|
||||
return True
|
||||
@@ -259,9 +269,10 @@ class _TokenSequence:
|
||||
# <address>,<postcode> should give preference to address search
|
||||
if base.postcode.start == 0:
|
||||
penalty = self.penalty
|
||||
self.direction = -1 # name searches are only possible backwards
|
||||
else:
|
||||
penalty = self.penalty + 0.1
|
||||
penalty += 0.1 * max(0, len(base.address) - 1)
|
||||
self.direction = 1 # name searches are only possible forwards
|
||||
yield dataclasses.replace(base, penalty=penalty)
|
||||
|
||||
def _get_assignments_address_forward(self, base: TokenAssignment,
|
||||
@@ -271,17 +282,8 @@ class _TokenSequence:
|
||||
"""
|
||||
first = base.address[0]
|
||||
|
||||
# The postcode must come after the name.
|
||||
if base.postcode and base.postcode < first:
|
||||
log().var_dump('skip forward', (base.postcode, first))
|
||||
return
|
||||
|
||||
penalty = self.penalty
|
||||
if not base.country and self.direction == 1 and query.dir_penalty > 0:
|
||||
penalty += query.dir_penalty
|
||||
|
||||
log().comment('first word = name')
|
||||
yield dataclasses.replace(base, penalty=penalty,
|
||||
yield dataclasses.replace(base, penalty=self.penalty,
|
||||
name=first, address=base.address[1:])
|
||||
|
||||
# To paraphrase:
|
||||
@@ -291,23 +293,22 @@ class _TokenSequence:
|
||||
# * the containing phrase is strictly typed
|
||||
if (base.housenumber and first.end < base.housenumber.start)\
|
||||
or (base.qualifier and base.qualifier > first)\
|
||||
or (query.nodes[first.start].ptype != qmod.PHRASE_ANY):
|
||||
or (query.nodes[first.start].ptype != qmod.PhraseType.NONE):
|
||||
return
|
||||
|
||||
penalty = self.penalty
|
||||
|
||||
# Penalty for:
|
||||
# * <name>, <street>, <housenumber> , ...
|
||||
# * queries that are comma-separated
|
||||
if (base.housenumber and base.housenumber > first) or len(query.source) > 1:
|
||||
penalty += 0.25
|
||||
|
||||
if self.direction == 0 and query.dir_penalty > 0:
|
||||
penalty += query.dir_penalty
|
||||
|
||||
for i in range(first.start + 1, first.end):
|
||||
name, addr = first.split(i)
|
||||
log().comment(f'split first word = name ({i - first.start})')
|
||||
yield dataclasses.replace(base, name=name, address=[addr] + base.address[1:],
|
||||
penalty=penalty + query.nodes[i].word_break_penalty)
|
||||
penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
|
||||
|
||||
def _get_assignments_address_backward(self, base: TokenAssignment,
|
||||
query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
||||
@@ -316,18 +317,9 @@ class _TokenSequence:
|
||||
"""
|
||||
last = base.address[-1]
|
||||
|
||||
# The postcode must come before the name for backward direction.
|
||||
if base.postcode and base.postcode > last:
|
||||
log().var_dump('skip backward', (base.postcode, last))
|
||||
return
|
||||
|
||||
penalty = self.penalty
|
||||
if not base.country and self.direction == -1 and query.dir_penalty < 0:
|
||||
penalty -= query.dir_penalty
|
||||
|
||||
if self.direction == -1 or len(base.address) > 1 or base.postcode:
|
||||
if self.direction == -1 or len(base.address) > 1:
|
||||
log().comment('last word = name')
|
||||
yield dataclasses.replace(base, penalty=penalty,
|
||||
yield dataclasses.replace(base, penalty=self.penalty,
|
||||
name=last, address=base.address[:-1])
|
||||
|
||||
# To paraphrase:
|
||||
@@ -337,22 +329,20 @@ class _TokenSequence:
|
||||
# * the containing phrase is strictly typed
|
||||
if (base.housenumber and last.start > base.housenumber.end)\
|
||||
or (base.qualifier and base.qualifier < last)\
|
||||
or (query.nodes[last.start].ptype != qmod.PHRASE_ANY):
|
||||
or (query.nodes[last.start].ptype != qmod.PhraseType.NONE):
|
||||
return
|
||||
|
||||
penalty = self.penalty
|
||||
if base.housenumber and base.housenumber < last:
|
||||
penalty += 0.4
|
||||
if len(query.source) > 1:
|
||||
penalty += 0.25
|
||||
|
||||
if self.direction == 0 and query.dir_penalty < 0:
|
||||
penalty -= query.dir_penalty
|
||||
|
||||
for i in range(last.start + 1, last.end):
|
||||
addr, name = last.split(i)
|
||||
log().comment(f'split last word = name ({i - last.start})')
|
||||
yield dataclasses.replace(base, name=name, address=base.address[:-1] + [addr],
|
||||
penalty=penalty + query.nodes[i].word_break_penalty)
|
||||
penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
|
||||
|
||||
def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
||||
""" Yield possible assignments for the current sequence.
|
||||
@@ -380,11 +370,11 @@ class _TokenSequence:
|
||||
if base.postcode and base.postcode.start == 0:
|
||||
self.penalty += 0.1
|
||||
|
||||
# Left-to-right reading of the address
|
||||
# Right-to-left reading of the address
|
||||
if self.direction != -1:
|
||||
yield from self._get_assignments_address_forward(base, query)
|
||||
|
||||
# Right-to-left reading of the address
|
||||
# Left-to-right reading of the address
|
||||
if self.direction != 1:
|
||||
yield from self._get_assignments_address_backward(base, query)
|
||||
|
||||
@@ -403,32 +393,18 @@ def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment
|
||||
another. It does not include penalties for transitions within a
|
||||
type.
|
||||
"""
|
||||
todo = [_TokenSequence([], direction=0 if query.source[0].ptype == qmod.PHRASE_ANY else 1)]
|
||||
todo = [_TokenSequence([], direction=0 if query.source[0].ptype == qmod.PhraseType.NONE else 1)]
|
||||
|
||||
while todo:
|
||||
state = todo.pop()
|
||||
node = query.nodes[state.end_pos]
|
||||
|
||||
for tlist in node.starting:
|
||||
yield from _append_state_to_todo(
|
||||
query, todo,
|
||||
state.advance(tlist.ttype, tlist.end,
|
||||
True, node.word_break_penalty))
|
||||
|
||||
if node.partial is not None:
|
||||
yield from _append_state_to_todo(
|
||||
query, todo,
|
||||
state.advance(qmod.TOKEN_PARTIAL, state.end_pos + 1,
|
||||
node.btype == qmod.BREAK_PHRASE,
|
||||
node.word_break_penalty))
|
||||
|
||||
|
||||
def _append_state_to_todo(query: qmod.QueryStruct, todo: List[_TokenSequence],
|
||||
newstate: Optional[_TokenSequence]) -> Iterator[TokenAssignment]:
|
||||
if newstate is not None:
|
||||
if newstate.end_pos == query.num_token_slots():
|
||||
if newstate.recheck_sequence():
|
||||
log().var_dump('Assignment', newstate)
|
||||
yield from newstate.get_assignments(query)
|
||||
elif not newstate.is_final():
|
||||
todo.append(newstate)
|
||||
newstate = state.advance(tlist.ttype, tlist.end, node.btype)
|
||||
if newstate is not None:
|
||||
if newstate.end_pos == query.num_token_slots():
|
||||
if newstate.recheck_sequence():
|
||||
log().var_dump('Assignment', newstate)
|
||||
yield from newstate.get_assignments(query)
|
||||
elif not newstate.is_final():
|
||||
todo.append(newstate)
|
||||
|
||||
@@ -143,7 +143,7 @@ def get_application(project_dir: Path,
|
||||
|
||||
log_file = config.LOG_FILE
|
||||
if log_file:
|
||||
middleware.append(Middleware(FileLoggingMiddleware, file_name=log_file)) # type: ignore
|
||||
middleware.append(Middleware(FileLoggingMiddleware, file_name=log_file))
|
||||
|
||||
exceptions: Dict[Any, Callable[[Request, Exception], Awaitable[Response]]] = {
|
||||
TimeoutError: timeout_error,
|
||||
|
||||
@@ -122,18 +122,15 @@ class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
|
||||
|
||||
def __init__(self, table: sa.Table) -> None:
|
||||
super().__init__(table.c.rank_address,
|
||||
table.c.housenumber, table.c.name, table.c.address)
|
||||
table.c.housenumber, table.c.name)
|
||||
|
||||
|
||||
@compiles(IsAddressPoint)
|
||||
def default_is_address_point(element: IsAddressPoint,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
rank, hnr, name, address = list(element.clauses)
|
||||
return "(%s = 30 AND (%s IS NULL OR NOT %s ? '_inherited')" \
|
||||
" AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
|
||||
rank, hnr, name = list(element.clauses)
|
||||
return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
|
||||
compiler.process(rank, **kw),
|
||||
compiler.process(address, **kw),
|
||||
compiler.process(address, **kw),
|
||||
compiler.process(hnr, **kw),
|
||||
compiler.process(name, **kw))
|
||||
|
||||
@@ -141,11 +138,9 @@ def default_is_address_point(element: IsAddressPoint,
|
||||
@compiles(IsAddressPoint, 'sqlite')
|
||||
def sqlite_is_address_point(element: IsAddressPoint,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
rank, hnr, name, address = list(element.clauses)
|
||||
return "(%s = 30 AND json_extract(%s, '$._inherited') IS NULL" \
|
||||
" AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
|
||||
rank, hnr, name = list(element.clauses)
|
||||
return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
|
||||
compiler.process(rank, **kw),
|
||||
compiler.process(address, **kw),
|
||||
compiler.process(hnr, **kw),
|
||||
compiler.process(name, **kw))
|
||||
|
||||
|
||||
@@ -173,7 +173,7 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg]
|
||||
def __init__(self, subtype: str = 'Geometry'):
|
||||
self.subtype = subtype
|
||||
|
||||
def get_col_spec(self, **_: Any) -> str:
|
||||
def get_col_spec(self) -> str:
|
||||
return f'GEOMETRY({self.subtype}, 4326)'
|
||||
|
||||
def bind_processor(self, dialect: 'sa.Dialect') -> Callable[[Any], str]:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Common json type for different dialects.
|
||||
@@ -24,6 +24,6 @@ class Json(sa.types.TypeDecorator[Any]):
|
||||
|
||||
def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
|
||||
if dialect.name == 'postgresql':
|
||||
return JSONB(none_as_null=True)
|
||||
return JSONB(none_as_null=True) # type: ignore[no-untyped-call]
|
||||
|
||||
return sqlite_json(none_as_null=True)
|
||||
|
||||
@@ -144,7 +144,7 @@ class Point(NamedTuple):
|
||||
except ValueError as exc:
|
||||
raise UsageError('Point parameter needs to be numbers.') from exc
|
||||
|
||||
if not -180 <= x <= 180 or not -90 <= y <= 90.0:
|
||||
if x < -180.0 or x > 180.0 or y < -90.0 or y > 90.0:
|
||||
raise UsageError('Point coordinates invalid.')
|
||||
|
||||
return Point(x, y)
|
||||
|
||||
@@ -25,8 +25,8 @@ def get_label_tag(category: Tuple[str, str], extratags: Optional[Mapping[str, st
|
||||
elif rank < 26 and extratags and 'linked_place' in extratags:
|
||||
label = extratags['linked_place']
|
||||
elif category == ('boundary', 'administrative'):
|
||||
label = ADMIN_LABELS.get((country or '', rank // 2))\
|
||||
or ADMIN_LABELS.get(('', rank // 2))\
|
||||
label = ADMIN_LABELS.get((country or '', int(rank/2)))\
|
||||
or ADMIN_LABELS.get(('', int(rank/2)))\
|
||||
or 'Administrative'
|
||||
elif category[1] == 'postal_code':
|
||||
label = 'postcode'
|
||||
|
||||
@@ -84,9 +84,8 @@ def format_base_json(results: Union[ReverseResults, SearchResults],
|
||||
|
||||
_write_osm_id(out, result.osm_object)
|
||||
|
||||
# lat and lon must be string values
|
||||
out.keyval('lat', f"{result.centroid.lat:0.7f}")\
|
||||
.keyval('lon', f"{result.centroid.lon:0.7f}")\
|
||||
out.keyval('lat', f"{result.centroid.lat}")\
|
||||
.keyval('lon', f"{result.centroid.lon}")\
|
||||
.keyval(class_label, result.category[0])\
|
||||
.keyval('type', result.category[1])\
|
||||
.keyval('place_rank', result.rank_search)\
|
||||
@@ -113,7 +112,6 @@ def format_base_json(results: Union[ReverseResults, SearchResults],
|
||||
if options.get('namedetails', False):
|
||||
out.keyval('namedetails', result.names)
|
||||
|
||||
# must be string values
|
||||
bbox = cl.bbox_from_result(result)
|
||||
out.key('boundingbox').start_array()\
|
||||
.value(f"{bbox.minlat:0.7f}").next()\
|
||||
@@ -251,9 +249,6 @@ def format_base_geocodejson(results: Union[ReverseResults, SearchResults],
|
||||
out.keyval(f"level{line.admin_level}", line.local_name)
|
||||
out.end_object().next()
|
||||
|
||||
if options.get('extratags', False):
|
||||
out.keyval('extra', result.extratags)
|
||||
|
||||
out.end_object().next().end_object().next()
|
||||
|
||||
out.key('geometry').raw(result.geometry.get('geojson')
|
||||
|
||||
@@ -90,7 +90,7 @@ def format_base_xml(results: Union[ReverseResults, SearchResults],
|
||||
result will be output, otherwise a list.
|
||||
"""
|
||||
root = ET.Element(xml_root_tag)
|
||||
root.set('timestamp', dt.datetime.now(dt.timezone.utc).strftime('%a, %d %b %Y %H:%M:%S +00:00'))
|
||||
root.set('timestamp', dt.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S +00:00'))
|
||||
root.set('attribution', cl.OSM_ATTRIBUTION)
|
||||
for k, v in xml_extra_info.items():
|
||||
root.set(k, v)
|
||||
|
||||
@@ -374,17 +374,14 @@ async def deletable_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any
|
||||
"""
|
||||
fmt = parse_format(params, RawDataList, 'json')
|
||||
|
||||
results = RawDataList()
|
||||
async with api.begin() as conn:
|
||||
for osm_type in ('N', 'W', 'R'):
|
||||
sql = sa.text(""" SELECT p.place_id, country_code,
|
||||
name->'name' as name, i.*
|
||||
FROM placex p, import_polygon_delete i
|
||||
WHERE i.osm_type = :osm_type
|
||||
AND p.osm_id = i.osm_id AND p.osm_type = :osm_type
|
||||
AND p.class = i.class AND p.type = i.type
|
||||
""")
|
||||
results.extend(r._asdict() for r in await conn.execute(sql, {'osm_type': osm_type}))
|
||||
sql = sa.text(""" SELECT p.place_id, country_code,
|
||||
name->'name' as name, i.*
|
||||
FROM placex p, import_polygon_delete i
|
||||
WHERE p.osm_id = i.osm_id AND p.osm_type = i.osm_type
|
||||
AND p.class = i.class AND p.type = i.type
|
||||
""")
|
||||
results = RawDataList(r._asdict() for r in await conn.execute(sql))
|
||||
|
||||
return build_response(params, params.formatting().format_result(results, fmt, {}))
|
||||
|
||||
|
||||
@@ -8,4 +8,4 @@
|
||||
Version information for the Nominatim API.
|
||||
"""
|
||||
|
||||
NOMINATIM_API_VERSION = '5.1.0'
|
||||
NOMINATIM_API_VERSION = '5.0.0'
|
||||
|
||||
@@ -2,15 +2,16 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Command-line interface to the Nominatim functions for import, update,
|
||||
database administration and querying.
|
||||
"""
|
||||
from typing import Optional, List, Mapping
|
||||
from typing import Optional, Any
|
||||
import importlib
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import asyncio
|
||||
@@ -80,14 +81,13 @@ class CommandlineParser:
|
||||
parser.set_defaults(command=cmd)
|
||||
cmd.add_args(parser)
|
||||
|
||||
def run(self, cli_args: Optional[List[str]],
|
||||
environ: Optional[Mapping[str, str]]) -> int:
|
||||
def run(self, **kwargs: Any) -> int:
|
||||
""" Parse the command line arguments of the program and execute the
|
||||
appropriate subcommand.
|
||||
"""
|
||||
args = NominatimArgs()
|
||||
try:
|
||||
self.parser.parse_args(args=cli_args, namespace=args)
|
||||
self.parser.parse_args(args=kwargs.get('cli_args'), namespace=args)
|
||||
except SystemExit:
|
||||
return 1
|
||||
|
||||
@@ -101,19 +101,23 @@ class CommandlineParser:
|
||||
|
||||
args.project_dir = Path(args.project_dir).resolve()
|
||||
|
||||
if cli_args is None:
|
||||
if 'cli_args' not in kwargs:
|
||||
logging.basicConfig(stream=sys.stderr,
|
||||
format='%(asctime)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S',
|
||||
level=max(4 - args.verbose, 1) * 10)
|
||||
|
||||
args.config = Configuration(args.project_dir, environ=environ)
|
||||
args.config = Configuration(args.project_dir,
|
||||
environ=kwargs.get('environ', os.environ))
|
||||
args.config.set_libdirs(osm2pgsql=kwargs['osm2pgsql_path'])
|
||||
|
||||
log = logging.getLogger()
|
||||
log.warning('Using project directory: %s', str(args.project_dir))
|
||||
|
||||
try:
|
||||
return args.command.run(args)
|
||||
ret = args.command.run(args)
|
||||
|
||||
return ret
|
||||
except UsageError as exception:
|
||||
if log.isEnabledFor(logging.DEBUG):
|
||||
raise # use Python's exception printing
|
||||
@@ -229,16 +233,9 @@ def get_set_parser() -> CommandlineParser:
|
||||
return parser
|
||||
|
||||
|
||||
def nominatim(cli_args: Optional[List[str]] = None,
|
||||
environ: Optional[Mapping[str, str]] = None) -> int:
|
||||
def nominatim(**kwargs: Any) -> int:
|
||||
"""\
|
||||
Command-line tools for importing, updating, administrating and
|
||||
querying the Nominatim database.
|
||||
|
||||
'cli_args' is a list of parameters for the command to run. If not given,
|
||||
sys.args will be used.
|
||||
|
||||
'environ' is the dictionary of environment variables containing the
|
||||
Nominatim configuration. When None, the os.environ is inherited.
|
||||
"""
|
||||
return get_set_parser().run(cli_args=cli_args, environ=environ)
|
||||
return get_set_parser().run(**kwargs)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Provides custom functions over command-line arguments.
|
||||
@@ -136,7 +136,6 @@ class NominatimArgs:
|
||||
import_from_wiki: bool
|
||||
import_from_csv: Optional[str]
|
||||
no_replace: bool
|
||||
min: int
|
||||
|
||||
# Arguments to all query functions
|
||||
format: str
|
||||
@@ -187,7 +186,7 @@ class NominatimArgs:
|
||||
from the command line arguments. The resulting dict can be
|
||||
further customized and then used in `run_osm2pgsql()`.
|
||||
"""
|
||||
return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY,
|
||||
return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.config.lib_dir.osm2pgsql,
|
||||
osm2pgsql_cache=self.osm2pgsql_cache or default_cache,
|
||||
osm2pgsql_style=self.config.get_import_style_file(),
|
||||
osm2pgsql_style_path=self.config.lib_dir.lua,
|
||||
|
||||
@@ -58,8 +58,6 @@ class ImportSpecialPhrases:
|
||||
help='Import special phrases from a CSV file')
|
||||
group.add_argument('--no-replace', action='store_true',
|
||||
help='Keep the old phrases and only add the new ones')
|
||||
group.add_argument('--min', type=int, default=0,
|
||||
help='Restrict special phrases by minimum occurance')
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
|
||||
@@ -84,9 +82,7 @@ class ImportSpecialPhrases:
|
||||
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
should_replace = not args.no_replace
|
||||
min = args.min
|
||||
|
||||
with connect(args.config.get_libpq_dsn()) as db_connection:
|
||||
SPImporter(
|
||||
args.config, db_connection, loader
|
||||
).import_phrases(tokenizer, should_replace, min)
|
||||
).import_phrases(tokenizer, should_replace)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Nominatim configuration accessor.
|
||||
@@ -73,6 +73,7 @@ class Configuration:
|
||||
self.project_dir = None
|
||||
|
||||
class _LibDirs:
|
||||
osm2pgsql: Path
|
||||
sql = paths.SQLLIB_DIR
|
||||
lua = paths.LUALIB_DIR
|
||||
data = paths.DATA_DIR
|
||||
|
||||
@@ -102,10 +102,10 @@ def server_version_tuple(conn: Connection) -> Tuple[int, int]:
|
||||
Converts correctly for pre-10 and post-10 PostgreSQL versions.
|
||||
"""
|
||||
version = conn.info.server_version
|
||||
major, minor = divmod(version, 10000)
|
||||
if major < 10:
|
||||
minor //= 100
|
||||
return major, minor
|
||||
if version < 100000:
|
||||
return (int(version / 10000), int((version % 10000) / 100))
|
||||
|
||||
return (int(version / 10000), version % 10000)
|
||||
|
||||
|
||||
def postgis_version_tuple(conn: Connection) -> Tuple[int, int]:
|
||||
|
||||
@@ -50,8 +50,8 @@ class ProgressLogger:
|
||||
places_per_sec = self.done_places / done_time
|
||||
eta = (self.total_places - self.done_places) / places_per_sec
|
||||
|
||||
LOG.warning("Done %d in %.0f @ %.3f per second - %s ETA (seconds): %.2f",
|
||||
self.done_places, done_time,
|
||||
LOG.warning("Done %d in %d @ %.3f per second - %s ETA (seconds): %.2f",
|
||||
self.done_places, int(done_time),
|
||||
places_per_sec, self.name, eta)
|
||||
|
||||
self.next_info += int(places_per_sec) * self.log_interval
|
||||
@@ -68,8 +68,8 @@ class ProgressLogger:
|
||||
diff_seconds = (rank_end_time - self.rank_start_time).total_seconds()
|
||||
places_per_sec = self.done_places / diff_seconds
|
||||
|
||||
LOG.warning("Done %d/%d in %.0f @ %.3f per second - FINISHED %s\n",
|
||||
self.done_places, self.total_places, diff_seconds,
|
||||
LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n",
|
||||
self.done_places, self.total_places, int(diff_seconds),
|
||||
places_per_sec, self.name)
|
||||
|
||||
return self.done_places
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Abstract class definitions for tokenizers. These base classes are here
|
||||
@@ -10,6 +10,7 @@ mainly for documentation purposes.
|
||||
"""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Tuple, Dict, Any, Optional, Iterable
|
||||
from pathlib import Path
|
||||
|
||||
from ..typing import Protocol
|
||||
from ..config import Configuration
|
||||
@@ -37,7 +38,7 @@ class AbstractAnalyzer(ABC):
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_word_token_info(self, words: List[str]) -> List[Tuple[str, str, Optional[int]]]:
|
||||
def get_word_token_info(self, words: List[str]) -> List[Tuple[str, str, int]]:
|
||||
""" Return token information for the given list of words.
|
||||
|
||||
The function is used for testing and debugging only
|
||||
@@ -231,6 +232,6 @@ class TokenizerModule(Protocol):
|
||||
own tokenizer.
|
||||
"""
|
||||
|
||||
def create(self, dsn: str) -> AbstractTokenizer:
|
||||
def create(self, dsn: str, data_dir: Path) -> AbstractTokenizer:
|
||||
""" Factory for new tokenizers.
|
||||
"""
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Functions for creating a tokenizer or initialising the right one for an
|
||||
@@ -52,10 +52,19 @@ def create_tokenizer(config: Configuration, init_db: bool = True,
|
||||
if module_name is None:
|
||||
module_name = config.TOKENIZER
|
||||
|
||||
# Create the directory for the tokenizer data
|
||||
assert config.project_dir is not None
|
||||
basedir = config.project_dir / 'tokenizer'
|
||||
if not basedir.exists():
|
||||
basedir.mkdir()
|
||||
elif not basedir.is_dir():
|
||||
LOG.fatal("Tokenizer directory '%s' cannot be created.", basedir)
|
||||
raise UsageError("Tokenizer setup failed.")
|
||||
|
||||
# Import and initialize the tokenizer.
|
||||
tokenizer_module = _import_tokenizer(module_name)
|
||||
|
||||
tokenizer = tokenizer_module.create(config.get_libpq_dsn())
|
||||
tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
|
||||
tokenizer.init_new_db(config, init_db=init_db)
|
||||
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
@@ -70,6 +79,12 @@ def get_tokenizer_for_db(config: Configuration) -> AbstractTokenizer:
|
||||
The function looks up the appropriate tokenizer in the database
|
||||
and initialises it.
|
||||
"""
|
||||
assert config.project_dir is not None
|
||||
basedir = config.project_dir / 'tokenizer'
|
||||
if not basedir.is_dir():
|
||||
# Directory will be repopulated by tokenizer below.
|
||||
basedir.mkdir()
|
||||
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
name = properties.get_property(conn, 'tokenizer')
|
||||
|
||||
@@ -79,7 +94,7 @@ def get_tokenizer_for_db(config: Configuration) -> AbstractTokenizer:
|
||||
|
||||
tokenizer_module = _import_tokenizer(name)
|
||||
|
||||
tokenizer = tokenizer_module.create(config.get_libpq_dsn())
|
||||
tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
|
||||
tokenizer.init_from_project(config)
|
||||
|
||||
return tokenizer
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tokenizer implementing normalisation as used before Nominatim 4 but using
|
||||
@@ -12,6 +12,7 @@ from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, \
|
||||
Dict, Set, Iterable
|
||||
import itertools
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from psycopg.types.json import Jsonb
|
||||
from psycopg import sql as pysql
|
||||
@@ -37,10 +38,10 @@ WORD_TYPES = (('country_names', 'C'),
|
||||
('housenumbers', 'H'))
|
||||
|
||||
|
||||
def create(dsn: str) -> 'ICUTokenizer':
|
||||
def create(dsn: str, data_dir: Path) -> 'ICUTokenizer':
|
||||
""" Create a new instance of the tokenizer provided by this module.
|
||||
"""
|
||||
return ICUTokenizer(dsn)
|
||||
return ICUTokenizer(dsn, data_dir)
|
||||
|
||||
|
||||
class ICUTokenizer(AbstractTokenizer):
|
||||
@@ -49,8 +50,9 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
normalization routines in Nominatim 3.
|
||||
"""
|
||||
|
||||
def __init__(self, dsn: str) -> None:
|
||||
def __init__(self, dsn: str, data_dir: Path) -> None:
|
||||
self.dsn = dsn
|
||||
self.data_dir = data_dir
|
||||
self.loader: Optional[ICURuleLoader] = None
|
||||
|
||||
def init_new_db(self, config: Configuration, init_db: bool = True) -> None:
|
||||
@@ -119,10 +121,10 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
SELECT unnest(nameaddress_vector) as id, count(*)
|
||||
FROM search_name GROUP BY id)
|
||||
SELECT coalesce(a.id, w.id) as id,
|
||||
(CASE WHEN w.count is null or w.count <= 1 THEN '{}'::JSONB
|
||||
(CASE WHEN w.count is null THEN '{}'::JSONB
|
||||
ELSE jsonb_build_object('count', w.count) END
|
||||
||
|
||||
CASE WHEN a.count is null or a.count <= 1 THEN '{}'::JSONB
|
||||
CASE WHEN a.count is null THEN '{}'::JSONB
|
||||
ELSE jsonb_build_object('addr_count', a.count) END) as info
|
||||
FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
|
||||
""")
|
||||
@@ -132,10 +134,9 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
drop_tables(conn, 'tmp_word')
|
||||
cur.execute("""CREATE TABLE tmp_word AS
|
||||
SELECT word_id, word_token, type, word,
|
||||
coalesce(word.info, '{}'::jsonb)
|
||||
- 'count' - 'addr_count' ||
|
||||
coalesce(wf.info, '{}'::jsonb)
|
||||
as info
|
||||
(CASE WHEN wf.info is null THEN word.info
|
||||
ELSE coalesce(word.info, '{}'::jsonb) || wf.info
|
||||
END) as info
|
||||
FROM word LEFT JOIN word_frequencies wf
|
||||
ON word.word_id = wf.id
|
||||
""")
|
||||
@@ -338,7 +339,7 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
"""
|
||||
return cast(str, self.token_analysis.normalizer.transliterate(name)).strip()
|
||||
|
||||
def get_word_token_info(self, words: Sequence[str]) -> List[Tuple[str, str, Optional[int]]]:
|
||||
def get_word_token_info(self, words: Sequence[str]) -> List[Tuple[str, str, int]]:
|
||||
""" Return token information for the given list of words.
|
||||
If a word starts with # it is assumed to be a full name
|
||||
otherwise is a partial name.
|
||||
@@ -362,11 +363,11 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
cur.execute("""SELECT word_token, word_id
|
||||
FROM word WHERE word_token = ANY(%s) and type = 'W'
|
||||
""", (list(full_tokens.values()),))
|
||||
full_ids = {r[0]: cast(int, r[1]) for r in cur}
|
||||
full_ids = {r[0]: r[1] for r in cur}
|
||||
cur.execute("""SELECT word_token, word_id
|
||||
FROM word WHERE word_token = ANY(%s) and type = 'w'""",
|
||||
(list(partial_tokens.values()),))
|
||||
part_ids = {r[0]: cast(int, r[1]) for r in cur}
|
||||
part_ids = {r[0]: r[1] for r in cur}
|
||||
|
||||
return [(k, v, full_ids.get(v, None)) for k, v in full_tokens.items()] \
|
||||
+ [(k, v, part_ids.get(v, None)) for k, v in partial_tokens.items()]
|
||||
@@ -380,15 +381,76 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
return postcode.strip().upper()
|
||||
|
||||
def update_postcodes_from_db(self) -> None:
|
||||
""" Postcode update.
|
||||
|
||||
Removes all postcodes from the word table because they are not
|
||||
needed. Postcodes are recognised by pattern.
|
||||
""" Update postcode tokens in the word table from the location_postcode
|
||||
table.
|
||||
"""
|
||||
assert self.conn is not None
|
||||
analyzer = self.token_analysis.analysis.get('@postcode')
|
||||
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("DELETE FROM word WHERE type = 'P'")
|
||||
# First get all postcode names currently in the word table.
|
||||
cur.execute("SELECT DISTINCT word FROM word WHERE type = 'P'")
|
||||
word_entries = set((entry[0] for entry in cur))
|
||||
|
||||
# Then compute the required postcode names from the postcode table.
|
||||
needed_entries = set()
|
||||
cur.execute("SELECT country_code, postcode FROM location_postcode")
|
||||
for cc, postcode in cur:
|
||||
info = PlaceInfo({'country_code': cc,
|
||||
'class': 'place', 'type': 'postcode',
|
||||
'address': {'postcode': postcode}})
|
||||
address = self.sanitizer.process_names(info)[1]
|
||||
for place in address:
|
||||
if place.kind == 'postcode':
|
||||
if analyzer is None:
|
||||
postcode_name = place.name.strip().upper()
|
||||
variant_base = None
|
||||
else:
|
||||
postcode_name = analyzer.get_canonical_id(place)
|
||||
variant_base = place.get_attr("variant")
|
||||
|
||||
if variant_base:
|
||||
needed_entries.add(f'{postcode_name}@{variant_base}')
|
||||
else:
|
||||
needed_entries.add(postcode_name)
|
||||
break
|
||||
|
||||
# Now update the word table.
|
||||
self._delete_unused_postcode_words(word_entries - needed_entries)
|
||||
self._add_missing_postcode_words(needed_entries - word_entries)
|
||||
|
||||
def _delete_unused_postcode_words(self, tokens: Iterable[str]) -> None:
|
||||
assert self.conn is not None
|
||||
if tokens:
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("DELETE FROM word WHERE type = 'P' and word = any(%s)",
|
||||
(list(tokens), ))
|
||||
|
||||
def _add_missing_postcode_words(self, tokens: Iterable[str]) -> None:
|
||||
assert self.conn is not None
|
||||
if not tokens:
|
||||
return
|
||||
|
||||
analyzer = self.token_analysis.analysis.get('@postcode')
|
||||
terms = []
|
||||
|
||||
for postcode_name in tokens:
|
||||
if '@' in postcode_name:
|
||||
term, variant = postcode_name.split('@', 2)
|
||||
term = self._search_normalized(term)
|
||||
if analyzer is None:
|
||||
variants = [term]
|
||||
else:
|
||||
variants = analyzer.compute_variants(variant)
|
||||
if term not in variants:
|
||||
variants.append(term)
|
||||
else:
|
||||
variants = [self._search_normalized(postcode_name)]
|
||||
terms.append((postcode_name, variants))
|
||||
|
||||
if terms:
|
||||
with self.conn.cursor() as cur:
|
||||
cur.executemany("""SELECT create_postcode_word(%s, %s)""", terms)
|
||||
|
||||
def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
|
||||
should_replace: bool) -> None:
|
||||
@@ -583,14 +645,10 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
if word_id:
|
||||
result = self._cache.housenumbers.get(word_id, result)
|
||||
if result[0] is None:
|
||||
varout = analyzer.compute_variants(word_id)
|
||||
if isinstance(varout, tuple):
|
||||
variants = varout[0]
|
||||
else:
|
||||
variants = varout
|
||||
variants = analyzer.compute_variants(word_id)
|
||||
if variants:
|
||||
hid = execute_scalar(self.conn, "SELECT create_analyzed_hnr_id(%s, %s)",
|
||||
(word_id, variants))
|
||||
(word_id, list(variants)))
|
||||
result = hid, variants[0]
|
||||
self._cache.housenumbers[word_id] = result
|
||||
|
||||
@@ -635,17 +693,13 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
|
||||
full, part = self._cache.names.get(token_id, (None, None))
|
||||
if full is None:
|
||||
varset = analyzer.compute_variants(word_id)
|
||||
if isinstance(varset, tuple):
|
||||
variants, lookups = varset
|
||||
else:
|
||||
variants, lookups = varset, None
|
||||
variants = analyzer.compute_variants(word_id)
|
||||
if not variants:
|
||||
continue
|
||||
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("SELECT * FROM getorcreate_full_word(%s, %s, %s)",
|
||||
(token_id, variants, lookups))
|
||||
cur.execute("SELECT * FROM getorcreate_full_word(%s, %s)",
|
||||
(token_id, variants))
|
||||
full, part = cast(Tuple[int, List[int]], cur.fetchone())
|
||||
|
||||
self._cache.names[token_id] = (full, part)
|
||||
@@ -664,9 +718,32 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
analyzer = self.token_analysis.analysis.get('@postcode')
|
||||
|
||||
if analyzer is None:
|
||||
return item.name.strip().upper()
|
||||
postcode_name = item.name.strip().upper()
|
||||
variant_base = None
|
||||
else:
|
||||
return analyzer.get_canonical_id(item)
|
||||
postcode_name = analyzer.get_canonical_id(item)
|
||||
variant_base = item.get_attr("variant")
|
||||
|
||||
if variant_base:
|
||||
postcode = f'{postcode_name}@{variant_base}'
|
||||
else:
|
||||
postcode = postcode_name
|
||||
|
||||
if postcode not in self._cache.postcodes:
|
||||
term = self._search_normalized(postcode_name)
|
||||
if not term:
|
||||
return None
|
||||
|
||||
variants = {term}
|
||||
if analyzer is not None and variant_base:
|
||||
variants.update(analyzer.compute_variants(variant_base))
|
||||
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("SELECT create_postcode_word(%s, %s)",
|
||||
(postcode, list(variants)))
|
||||
self._cache.postcodes.add(postcode)
|
||||
|
||||
return postcode_name
|
||||
|
||||
|
||||
class _TokenInfo:
|
||||
@@ -759,4 +836,5 @@ class _TokenCache:
|
||||
self.names: Dict[str, Tuple[int, List[int]]] = {}
|
||||
self.partials: Dict[str, int] = {}
|
||||
self.fulls: Dict[str, List[int]] = {}
|
||||
self.postcodes: Set[str] = set()
|
||||
self.housenumbers: Dict[str, Tuple[Optional[int], Optional[str]]] = {}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"""
|
||||
Common data types and protocols for analysers.
|
||||
"""
|
||||
from typing import Mapping, List, Any, Union, Tuple
|
||||
from typing import Mapping, List, Any
|
||||
|
||||
from ...typing import Protocol
|
||||
from ...data.place_name import PlaceName
|
||||
@@ -33,7 +33,7 @@ class Analyzer(Protocol):
|
||||
for example because the character set in use does not match.
|
||||
"""
|
||||
|
||||
def compute_variants(self, canonical_id: str) -> Union[List[str], Tuple[List[str], List[str]]]:
|
||||
def compute_variants(self, canonical_id: str) -> List[str]:
|
||||
""" Compute the transliterated spelling variants for the given
|
||||
canonical ID.
|
||||
|
||||
|
||||
@@ -2,19 +2,20 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Generic processor for names that creates abbreviation variants.
|
||||
"""
|
||||
from typing import Mapping, Dict, Any, Iterable, Optional, List, cast, Tuple
|
||||
from typing import Mapping, Dict, Any, Iterable, Iterator, Optional, List, cast
|
||||
import itertools
|
||||
|
||||
import datrie
|
||||
|
||||
from ...errors import UsageError
|
||||
from ...data.place_name import PlaceName
|
||||
from .config_variants import get_variant_config
|
||||
from .generic_mutation import MutationVariantGenerator
|
||||
from .simple_trie import SimpleTrie
|
||||
|
||||
# Configuration section
|
||||
|
||||
@@ -24,7 +25,8 @@ def configure(rules: Mapping[str, Any], normalizer: Any, _: Any) -> Dict[str, An
|
||||
"""
|
||||
config: Dict[str, Any] = {}
|
||||
|
||||
config['replacements'], _ = get_variant_config(rules.get('variants'), normalizer)
|
||||
config['replacements'], config['chars'] = get_variant_config(rules.get('variants'),
|
||||
normalizer)
|
||||
config['variant_only'] = rules.get('mode', '') == 'variant-only'
|
||||
|
||||
# parse mutation rules
|
||||
@@ -66,8 +68,12 @@ class GenericTokenAnalysis:
|
||||
self.variant_only = config['variant_only']
|
||||
|
||||
# Set up datrie
|
||||
self.replacements: Optional[SimpleTrie[List[str]]] = \
|
||||
SimpleTrie(config['replacements']) if config['replacements'] else None
|
||||
if config['replacements']:
|
||||
self.replacements = datrie.Trie(config['chars'])
|
||||
for src, repllist in config['replacements']:
|
||||
self.replacements[src] = repllist
|
||||
else:
|
||||
self.replacements = None
|
||||
|
||||
# set up mutation rules
|
||||
self.mutations = [MutationVariantGenerator(*cfg) for cfg in config['mutations']]
|
||||
@@ -78,7 +84,7 @@ class GenericTokenAnalysis:
|
||||
"""
|
||||
return cast(str, self.norm.transliterate(name.name)).strip()
|
||||
|
||||
def compute_variants(self, norm_name: str) -> Tuple[List[str], List[str]]:
|
||||
def compute_variants(self, norm_name: str) -> List[str]:
|
||||
""" Compute the spelling variants for the given normalized name
|
||||
and transliterate the result.
|
||||
"""
|
||||
@@ -87,20 +93,18 @@ class GenericTokenAnalysis:
|
||||
for mutation in self.mutations:
|
||||
variants = mutation.generate(variants)
|
||||
|
||||
varset = set(map(str.strip, variants))
|
||||
return [name for name in self._transliterate_unique_list(norm_name, variants) if name]
|
||||
|
||||
def _transliterate_unique_list(self, norm_name: str,
|
||||
iterable: Iterable[str]) -> Iterator[Optional[str]]:
|
||||
seen = set()
|
||||
if self.variant_only:
|
||||
varset.discard(norm_name)
|
||||
seen.add(norm_name)
|
||||
|
||||
trans = []
|
||||
norm = []
|
||||
|
||||
for var in varset:
|
||||
t = self.to_ascii.transliterate(var).strip()
|
||||
if t:
|
||||
trans.append(t)
|
||||
norm.append(var)
|
||||
|
||||
return trans, norm
|
||||
for variant in map(str.strip, iterable):
|
||||
if variant not in seen:
|
||||
seen.add(variant)
|
||||
yield self.to_ascii.transliterate(variant).strip()
|
||||
|
||||
def _generate_word_variants(self, norm_name: str) -> Iterable[str]:
|
||||
baseform = '^ ' + norm_name + ' ^'
|
||||
@@ -112,10 +116,10 @@ class GenericTokenAnalysis:
|
||||
pos = 0
|
||||
force_space = False
|
||||
while pos < baselen:
|
||||
frm = pos
|
||||
repl, pos = self.replacements.longest_prefix(baseform, pos)
|
||||
if repl is not None:
|
||||
done = baseform[startpos:frm]
|
||||
full, repl = self.replacements.longest_prefix_item(baseform[pos:],
|
||||
(None, None))
|
||||
if full is not None:
|
||||
done = baseform[startpos:pos]
|
||||
partials = [v + done + r
|
||||
for v, r in itertools.product(partials, repl)
|
||||
if not force_space or r.startswith(' ')]
|
||||
@@ -124,10 +128,11 @@ class GenericTokenAnalysis:
|
||||
# to be helpful. Only use the original term.
|
||||
startpos = 0
|
||||
break
|
||||
if baseform[pos - 1] == ' ':
|
||||
pos -= 1
|
||||
startpos = pos + len(full)
|
||||
if full[-1] == ' ':
|
||||
startpos -= 1
|
||||
force_space = True
|
||||
startpos = pos
|
||||
pos = startpos
|
||||
else:
|
||||
pos += 1
|
||||
force_space = False
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Simple dict-based implementation of a trie structure.
|
||||
"""
|
||||
from typing import TypeVar, Generic, Tuple, Optional, List, Dict
|
||||
from collections import defaultdict
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
class SimpleTrie(Generic[T]):
|
||||
""" A simple read-only trie structure.
|
||||
This structure supports examply one lookup operation,
|
||||
which is longest-prefix lookup.
|
||||
"""
|
||||
|
||||
def __init__(self, data: Optional[List[Tuple[str, T]]] = None) -> None:
|
||||
self._tree: Dict[str, 'SimpleTrie[T]'] = defaultdict(SimpleTrie[T])
|
||||
self._value: Optional[T] = None
|
||||
self._prefix = ''
|
||||
|
||||
if data:
|
||||
for key, value in data:
|
||||
self._add(key, 0, value)
|
||||
|
||||
self._make_compact()
|
||||
|
||||
def _add(self, word: str, pos: int, value: T) -> None:
|
||||
""" (Internal) Add a sub-word to the trie.
|
||||
The word is added from index 'pos'. If the sub-word to add
|
||||
is empty, then the trie saves the given value.
|
||||
"""
|
||||
if pos < len(word):
|
||||
self._tree[word[pos]]._add(word, pos + 1, value)
|
||||
else:
|
||||
self._value = value
|
||||
|
||||
def _make_compact(self) -> None:
|
||||
""" (Internal) Compress tree where there is exactly one subtree
|
||||
and no value.
|
||||
|
||||
Compression works recursively starting at the leaf.
|
||||
"""
|
||||
for t in self._tree.values():
|
||||
t._make_compact()
|
||||
|
||||
if len(self._tree) == 1 and self._value is None:
|
||||
assert not self._prefix
|
||||
for k, v in self._tree.items():
|
||||
self._prefix = k + v._prefix
|
||||
self._tree = v._tree
|
||||
self._value = v._value
|
||||
|
||||
def longest_prefix(self, word: str, start: int = 0) -> Tuple[Optional[T], int]:
|
||||
""" Return the longest prefix match for the given word starting at
|
||||
the position 'start'.
|
||||
|
||||
The function returns a tuple with the value for the longest match and
|
||||
the position of the word after the match. If no match was found at
|
||||
all, the function returns (None, start).
|
||||
"""
|
||||
cur = self
|
||||
pos = start
|
||||
result: Tuple[Optional[T], int] = None, start
|
||||
|
||||
while True:
|
||||
if cur._prefix:
|
||||
if not word.startswith(cur._prefix, pos):
|
||||
return result
|
||||
pos += len(cur._prefix)
|
||||
|
||||
if cur._value:
|
||||
result = cur._value, pos
|
||||
|
||||
if pos >= len(word) or word[pos] not in cur._tree:
|
||||
return result
|
||||
|
||||
cur = cur._tree[word[pos]]
|
||||
pos += 1
|
||||
@@ -127,7 +127,7 @@ def import_osm_data(osm_files: Union[Path, Sequence[Path]],
|
||||
fsize += os.stat(str(fname)).st_size
|
||||
else:
|
||||
fsize = os.stat(str(osm_files)).st_size
|
||||
options['osm2pgsql_cache'] = int(min((mem.available + getattr(mem, 'cached', 0)) * 0.75,
|
||||
options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
|
||||
fsize * 2) / 1024 / 1024) + 1
|
||||
|
||||
run_osm2pgsql(options)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Helper functions for executing external programs.
|
||||
@@ -37,17 +37,21 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
|
||||
'--style', str(options['osm2pgsql_style'])
|
||||
]
|
||||
|
||||
env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / '?.lua'),
|
||||
os.environ.get('LUA_PATH', ';')))
|
||||
env['THEMEPARK_PATH'] = str(options['osm2pgsql_style_path'] / 'themes')
|
||||
if 'THEMEPARK_PATH' in os.environ:
|
||||
env['THEMEPARK_PATH'] += ':' + os.environ['THEMEPARK_PATH']
|
||||
cmd.extend(('--output', 'flex'))
|
||||
if str(options['osm2pgsql_style']).endswith('.lua'):
|
||||
env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / '?.lua'),
|
||||
os.environ.get('LUA_PATH', ';')))
|
||||
env['THEMEPARK_PATH'] = str(options['osm2pgsql_style_path'] / 'themes')
|
||||
if 'THEMEPARK_PATH' in os.environ:
|
||||
env['THEMEPARK_PATH'] += ':' + os.environ['THEMEPARK_PATH']
|
||||
cmd.extend(('--output', 'flex'))
|
||||
|
||||
for flavour in ('data', 'index'):
|
||||
if options['tablespaces'][f"main_{flavour}"]:
|
||||
env[f"NOMINATIM_TABLESPACE_PLACE_{flavour.upper()}"] = \
|
||||
options['tablespaces'][f"main_{flavour}"]
|
||||
for flavour in ('data', 'index'):
|
||||
if options['tablespaces'][f"main_{flavour}"]:
|
||||
env[f"NOMINATIM_TABLESPACE_PLACE_{flavour.upper()}"] = \
|
||||
options['tablespaces'][f"main_{flavour}"]
|
||||
else:
|
||||
cmd.extend(('--output', 'gazetteer', '--hstore', '--latlon'))
|
||||
cmd.extend(_mk_tablespace_options('main', options))
|
||||
|
||||
if options['flatnode_file']:
|
||||
cmd.extend(('--flat-nodes', options['flatnode_file']))
|
||||
@@ -81,7 +85,7 @@ def _mk_tablespace_options(ttype: str, options: Mapping[str, Any]) -> List[str]:
|
||||
|
||||
|
||||
def _find_osm2pgsql_cmd(cmdline: Optional[str]) -> str:
|
||||
if cmdline:
|
||||
if cmdline is not None:
|
||||
return cmdline
|
||||
|
||||
in_path = shutil.which('osm2pgsql')
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Functions for importing, updating and otherwise maintaining the table
|
||||
@@ -64,15 +64,11 @@ class _PostcodeCollector:
|
||||
if normalized:
|
||||
self.collected[normalized] += (x, y)
|
||||
|
||||
def commit(self, conn: Connection, analyzer: AbstractAnalyzer,
|
||||
project_dir: Optional[Path]) -> None:
|
||||
""" Update postcodes for the country from the postcodes selected so far.
|
||||
|
||||
When 'project_dir' is set, then any postcode files found in this
|
||||
directory are taken into account as well.
|
||||
def commit(self, conn: Connection, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
|
||||
""" Update postcodes for the country from the postcodes selected so far
|
||||
as well as any externally supplied postcodes.
|
||||
"""
|
||||
if project_dir is not None:
|
||||
self._update_from_external(analyzer, project_dir)
|
||||
self._update_from_external(analyzer, project_dir)
|
||||
to_add, to_delete, to_update = self._compute_changes(conn)
|
||||
|
||||
LOG.info("Processing country '%s' (%s added, %s deleted, %s updated).",
|
||||
@@ -174,7 +170,7 @@ class _PostcodeCollector:
|
||||
return None
|
||||
|
||||
|
||||
def update_postcodes(dsn: str, project_dir: Optional[Path], tokenizer: AbstractTokenizer) -> None:
|
||||
def update_postcodes(dsn: str, project_dir: Path, tokenizer: AbstractTokenizer) -> None:
|
||||
""" Update the table of artificial postcodes.
|
||||
|
||||
Computes artificial postcode centroids from the placex table,
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Functions for bringing auxiliary data in the database up-to-date.
|
||||
@@ -212,11 +212,6 @@ def recompute_importance(conn: Connection) -> None:
|
||||
WHERE s.place_id = d.linked_place_id and d.wikipedia is not null
|
||||
and (s.wikipedia is null or s.importance < d.importance);
|
||||
""")
|
||||
cur.execute("""
|
||||
UPDATE search_name s SET importance = p.importance
|
||||
FROM placex p
|
||||
WHERE s.place_id = p.place_id AND s.importance != p.importance
|
||||
""")
|
||||
|
||||
cur.execute('ALTER TABLE placex ENABLE TRIGGER ALL')
|
||||
conn.commit()
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
from typing import Iterable, Tuple, Mapping, Sequence, Optional, Set
|
||||
import logging
|
||||
import re
|
||||
|
||||
from psycopg.sql import Identifier, SQL
|
||||
|
||||
from ...typing import Protocol
|
||||
@@ -64,32 +65,7 @@ class SPImporter():
|
||||
# special phrases class/type on the wiki.
|
||||
self.table_phrases_to_delete: Set[str] = set()
|
||||
|
||||
def get_classtype_pairs(self, min: int = 0) -> Set[Tuple[str, str]]:
|
||||
"""
|
||||
Returns list of allowed special phrases from the database,
|
||||
restricting to a list of combinations of classes and types
|
||||
which occur equal to or more than a specified amount of times.
|
||||
|
||||
Default value for this is 0, which allows everything in database.
|
||||
"""
|
||||
db_combinations = set()
|
||||
|
||||
query = f"""
|
||||
SELECT class AS CLS, type AS typ
|
||||
FROM placex
|
||||
GROUP BY class, type
|
||||
HAVING COUNT(*) >= {min}
|
||||
"""
|
||||
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL(query))
|
||||
for row in db_cursor:
|
||||
db_combinations.add((row[0], row[1]))
|
||||
|
||||
return db_combinations
|
||||
|
||||
def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool,
|
||||
min: int = 0) -> None:
|
||||
def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool) -> None:
|
||||
"""
|
||||
Iterate through all SpecialPhrases extracted from the
|
||||
loader and import them into the database.
|
||||
@@ -109,10 +85,9 @@ class SPImporter():
|
||||
if result:
|
||||
class_type_pairs.add(result)
|
||||
|
||||
self._create_classtype_table_and_indexes(class_type_pairs, min)
|
||||
self._create_classtype_table_and_indexes(class_type_pairs)
|
||||
if should_replace:
|
||||
self._remove_non_existent_tables_from_db()
|
||||
|
||||
self.db_connection.commit()
|
||||
|
||||
with tokenizer.name_analyzer() as analyzer:
|
||||
@@ -188,8 +163,7 @@ class SPImporter():
|
||||
return (phrase.p_class, phrase.p_type)
|
||||
|
||||
def _create_classtype_table_and_indexes(self,
|
||||
class_type_pairs: Iterable[Tuple[str, str]],
|
||||
min: int = 0) -> None:
|
||||
class_type_pairs: Iterable[Tuple[str, str]]) -> None:
|
||||
"""
|
||||
Create table place_classtype for each given pair.
|
||||
Also create indexes on place_id and centroid.
|
||||
@@ -203,19 +177,10 @@ class SPImporter():
|
||||
with self.db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
|
||||
|
||||
if min:
|
||||
allowed_special_phrases = self.get_classtype_pairs(min)
|
||||
|
||||
for pair in class_type_pairs:
|
||||
phrase_class = pair[0]
|
||||
phrase_type = pair[1]
|
||||
|
||||
# Will only filter if min is not 0
|
||||
if min and (phrase_class, phrase_type) not in allowed_special_phrases:
|
||||
LOG.warning("Skipping phrase %s=%s: not in allowed special phrases",
|
||||
phrase_class, phrase_type)
|
||||
continue
|
||||
|
||||
table_name = _classtype_table(phrase_class, phrase_type)
|
||||
|
||||
if table_name in self.table_phrases_to_delete:
|
||||
|
||||
@@ -108,7 +108,8 @@ async def add_tiger_data(data_dir: str, config: Configuration, threads: int,
|
||||
|
||||
async with QueryPool(dsn, place_threads, autocommit=True) as pool:
|
||||
with tokenizer.name_analyzer() as analyzer:
|
||||
for lineno, row in enumerate(tar, 1):
|
||||
lines = 0
|
||||
for row in tar:
|
||||
try:
|
||||
address = dict(street=row['street'], postcode=row['postcode'])
|
||||
args = ('SRID=4326;' + row['geometry'],
|
||||
@@ -123,8 +124,10 @@ async def add_tiger_data(data_dir: str, config: Configuration, threads: int,
|
||||
%s::INT, %s::TEXT, %s::JSONB, %s::TEXT)""",
|
||||
args)
|
||||
|
||||
if not lineno % 1000:
|
||||
lines += 1
|
||||
if lines == 1000:
|
||||
print('.', end='', flush=True)
|
||||
lines = 0
|
||||
|
||||
print('', flush=True)
|
||||
|
||||
|
||||
@@ -30,8 +30,8 @@ class PointsCentroid:
|
||||
if self.count == 0:
|
||||
raise ValueError("No points available for centroid.")
|
||||
|
||||
return (self.sum_x / self.count / 10_000_000,
|
||||
self.sum_y / self.count / 10_000_000)
|
||||
return (float(self.sum_x/self.count)/10000000,
|
||||
float(self.sum_y/self.count)/10000000)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self.count
|
||||
@@ -40,8 +40,8 @@ class PointsCentroid:
|
||||
if isinstance(other, Collection) and len(other) == 2:
|
||||
if all(isinstance(p, (float, int)) for p in other):
|
||||
x, y = other
|
||||
self.sum_x += int(x * 10_000_000)
|
||||
self.sum_y += int(y * 10_000_000)
|
||||
self.sum_x += int(x * 10000000)
|
||||
self.sum_y += int(y * 10000000)
|
||||
self.count += 1
|
||||
return self
|
||||
|
||||
|
||||
@@ -55,7 +55,7 @@ def parse_version(version: str) -> NominatimVersion:
|
||||
return NominatimVersion(*[int(x) for x in parts[:2] + parts[2].split('-')])
|
||||
|
||||
|
||||
NOMINATIM_VERSION = parse_version('5.1.0-0')
|
||||
NOMINATIM_VERSION = parse_version('5.0.0-0')
|
||||
|
||||
POSTGRESQL_REQUIRED_VERSION = (12, 0)
|
||||
POSTGIS_REQUIRED_VERSION = (3, 0)
|
||||
|
||||
10
test/Makefile
Normal file
10
test/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
all: bdd python
|
||||
|
||||
bdd:
|
||||
cd bdd && behave -DREMOVE_TEMPLATE=1
|
||||
|
||||
python:
|
||||
pytest python
|
||||
|
||||
|
||||
.PHONY: bdd python
|
||||
3
test/bdd/.behaverc
Normal file
3
test/bdd/.behaverc
Normal file
@@ -0,0 +1,3 @@
|
||||
[behave]
|
||||
show_skipped=False
|
||||
default_tags=~@Fail
|
||||
63
test/bdd/api/details/language.feature
Normal file
63
test/bdd/api/details/language.feature
Normal file
@@ -0,0 +1,63 @@
|
||||
@SQLITE
|
||||
@APIDB
|
||||
Feature: Localization of search results
|
||||
|
||||
Scenario: default language
|
||||
When sending details query for R1155955
|
||||
Then results contain
|
||||
| ID | localname |
|
||||
| 0 | Liechtenstein |
|
||||
|
||||
Scenario: accept-language first
|
||||
When sending details query for R1155955
|
||||
| accept-language |
|
||||
| zh,de |
|
||||
Then results contain
|
||||
| ID | localname |
|
||||
| 0 | 列支敦士登 |
|
||||
|
||||
Scenario: accept-language missing
|
||||
When sending details query for R1155955
|
||||
| accept-language |
|
||||
| xx,fr,en,de |
|
||||
Then results contain
|
||||
| ID | localname |
|
||||
| 0 | Liechtenstein |
|
||||
|
||||
Scenario: http accept language header first
|
||||
Given the HTTP header
|
||||
| accept-language |
|
||||
| fo;q=0.8,en-ca;q=0.5,en;q=0.3 |
|
||||
When sending details query for R1155955
|
||||
Then results contain
|
||||
| ID | localname |
|
||||
| 0 | Liktinstein |
|
||||
|
||||
Scenario: http accept language header and accept-language
|
||||
Given the HTTP header
|
||||
| accept-language |
|
||||
| fr-ca,fr;q=0.8,en-ca;q=0.5,en;q=0.3 |
|
||||
When sending details query for R1155955
|
||||
| accept-language |
|
||||
| fo,en |
|
||||
Then results contain
|
||||
| ID | localname |
|
||||
| 0 | Liktinstein |
|
||||
|
||||
Scenario: http accept language header fallback
|
||||
Given the HTTP header
|
||||
| accept-language |
|
||||
| fo-ca,en-ca;q=0.5 |
|
||||
When sending details query for R1155955
|
||||
Then results contain
|
||||
| ID | localname |
|
||||
| 0 | Liktinstein |
|
||||
|
||||
Scenario: http accept language header fallback (upper case)
|
||||
Given the HTTP header
|
||||
| accept-language |
|
||||
| fo-FR;q=0.8,en-ca;q=0.5 |
|
||||
When sending details query for R1155955
|
||||
Then results contain
|
||||
| ID | localname |
|
||||
| 0 | Liktinstein |
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user