mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 18:37:58 +00:00
Compare commits
326 Commits
v3.7.3
...
helm-chart
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b0aaa25f0d | ||
|
|
c3ddc7579a | ||
|
|
fdff579188 | ||
|
|
d48793c22c | ||
|
|
001b2aa9f9 | ||
|
|
1db098c05d | ||
|
|
324b1b5575 | ||
|
|
e42878eeda | ||
|
|
eb6814d74e | ||
|
|
6ad35aca4a | ||
|
|
70f154be8b | ||
|
|
4342b28882 | ||
|
|
5394b1fa1b | ||
|
|
5ab0a63fd6 | ||
|
|
1618aba5f2 | ||
|
|
8377528952 | ||
|
|
34dcf02dee | ||
|
|
5d7d7f15d9 | ||
|
|
0c023fb4d2 | ||
|
|
1bd068d42d | ||
|
|
e42349c963 | ||
|
|
878835e4bd | ||
|
|
8096a1d67f | ||
|
|
e16c5d5f70 | ||
|
|
2c8242c8df | ||
|
|
e7d6f89aca | ||
|
|
379f5db516 | ||
|
|
ee32315378 | ||
|
|
cca912af4e | ||
|
|
86ea077092 | ||
|
|
5d6aabc457 | ||
|
|
b14ce959d9 | ||
|
|
a48ebd9b47 | ||
|
|
3cd85eaaf1 | ||
|
|
ec3f6c9c42 | ||
|
|
143ff14466 | ||
|
|
6070c3d1d5 | ||
|
|
bc8b2d4ae0 | ||
|
|
14f777da18 | ||
|
|
6f6681ce67 | ||
|
|
06602b4ec0 | ||
|
|
cf98cff2a1 | ||
|
|
b4fec57b6d | ||
|
|
f8b5a63de3 | ||
|
|
568316f07c | ||
|
|
daa597b300 | ||
|
|
47adb2a3fc | ||
|
|
fff0012249 | ||
|
|
d5a1883b62 | ||
|
|
a08ef43e40 | ||
|
|
bc5e15996a | ||
|
|
128ca800cd | ||
|
|
000d133af6 | ||
|
|
1e40d65aa9 | ||
|
|
bffbe68ec3 | ||
|
|
58b10074ad | ||
|
|
d933ead2b5 | ||
|
|
1cdc30c5e8 | ||
|
|
3661f7a321 | ||
|
|
27af9b102c | ||
|
|
500c61685b | ||
|
|
106d960f84 | ||
|
|
322fa19ceb | ||
|
|
5bea0b6086 | ||
|
|
a5970d7548 | ||
|
|
c216144dd1 | ||
|
|
42e08da7ca | ||
|
|
a2edbbf78a | ||
|
|
1e86dc1d93 | ||
|
|
54f295be52 | ||
|
|
8bc3c0a07c | ||
|
|
d75bc20174 | ||
|
|
fd8751658f | ||
|
|
4db5a1a0b8 | ||
|
|
4c52777ef0 | ||
|
|
d4c7bf20a2 | ||
|
|
affe1300d9 | ||
|
|
62d5984b1b | ||
|
|
c32551b4e0 | ||
|
|
e85f7e7aa9 | ||
|
|
7b0f6b7905 | ||
|
|
0894ce9dc3 | ||
|
|
4fd2e961b6 | ||
|
|
b9fbfeff67 | ||
|
|
5dd24b3ef0 | ||
|
|
62828fc5c1 | ||
|
|
a6aa6360e0 | ||
|
|
c4f6c06f44 | ||
|
|
0d80a9b897 | ||
|
|
f70930b1a0 | ||
|
|
9ff4f66f55 | ||
|
|
32ca631b74 | ||
|
|
2e81084f35 | ||
|
|
a0a7b05c9f | ||
|
|
2f6e4edcdb | ||
|
|
1bd9f455fc | ||
|
|
2e3c5d4c5b | ||
|
|
8413075249 | ||
|
|
6ba00e6aee | ||
|
|
de4fac33dc | ||
|
|
c9984669a7 | ||
|
|
63755c31ff | ||
|
|
161f5f5cee | ||
|
|
c7073a1fc0 | ||
|
|
e7b4fc70e7 | ||
|
|
457982e1d2 | ||
|
|
aa558e6080 | ||
|
|
fe11d3cbbd | ||
|
|
1ce223a83b | ||
|
|
3676310efe | ||
|
|
ddf866c4c7 | ||
|
|
9e07a197e9 | ||
|
|
1c175e3a67 | ||
|
|
47fb7cd3a8 | ||
|
|
e879814e43 | ||
|
|
a4733eed90 | ||
|
|
38fbc4fcbb | ||
|
|
c6fe91bfa5 | ||
|
|
7383f05e45 | ||
|
|
3aac51c81f | ||
|
|
f0a7850edf | ||
|
|
4336ca69c7 | ||
|
|
4bca5e838b | ||
|
|
bc981d0261 | ||
|
|
b1d33e6b49 | ||
|
|
38d442edf6 | ||
|
|
72625dc72a | ||
|
|
cc2f152d70 | ||
|
|
f74dc38766 | ||
|
|
7d9665d8d2 | ||
|
|
a0e85cc17c | ||
|
|
29b02f9e56 | ||
|
|
24c986c842 | ||
|
|
4f4d15c28a | ||
|
|
fa3e48c59f | ||
|
|
02f6afa51b | ||
|
|
10143e0ac7 | ||
|
|
8f3429939f | ||
|
|
00094c43d1 | ||
|
|
8bf15fa691 | ||
|
|
63dc503b8d | ||
|
|
430c316e45 | ||
|
|
01f5a9ff84 | ||
|
|
af52eed0dd | ||
|
|
f93d0fa957 | ||
|
|
c06a1d007a | ||
|
|
65bd749918 | ||
|
|
510eb53f53 | ||
|
|
507543a482 | ||
|
|
16bb007135 | ||
|
|
1ffb6bd5d0 | ||
|
|
799a4c9ab6 | ||
|
|
b2722650d4 | ||
|
|
54b06d7abc | ||
|
|
fef1bbb1a7 | ||
|
|
3206bf59df | ||
|
|
a33f2c0f5b | ||
|
|
8b8dfc46eb | ||
|
|
06aab389ed | ||
|
|
fb0ebb5bf0 | ||
|
|
925726222f | ||
|
|
550e7edb64 | ||
|
|
2992dea5c8 | ||
|
|
e76e4bd964 | ||
|
|
7d621389ee | ||
|
|
35efe3b41c | ||
|
|
e5ffc59cd5 | ||
|
|
d7f9d2bde9 | ||
|
|
5feece64c1 | ||
|
|
b9a09129fa | ||
|
|
96e6bbe3a1 | ||
|
|
fe39185894 | ||
|
|
fc860787dd | ||
|
|
63e35574d4 | ||
|
|
db2dbf15f7 | ||
|
|
f5977dac75 | ||
|
|
8f2746fe24 | ||
|
|
41b9bc9984 | ||
|
|
1ccd4360b4 | ||
|
|
bf864b2c54 | ||
|
|
4abaf71234 | ||
|
|
a4aba23a83 | ||
|
|
cae0cf3546 | ||
|
|
38f9e18afb | ||
|
|
9d83da830f | ||
|
|
00959fac57 | ||
|
|
40cb17d299 | ||
|
|
2ae293aeb6 | ||
|
|
d8ead78e03 | ||
|
|
b2c6eca2c8 | ||
|
|
872ab91421 | ||
|
|
a263e54b94 | ||
|
|
18c99a5c5f | ||
|
|
d55fc39275 | ||
|
|
ba8ed7967d | ||
|
|
f44af49df9 | ||
|
|
3c67bae868 | ||
|
|
3dade534fd | ||
|
|
8b1a509442 | ||
|
|
8bdb9aa607 | ||
|
|
36c624ec71 | ||
|
|
7fd871a74d | ||
|
|
ced8f0f4a2 | ||
|
|
388ebcbae2 | ||
|
|
20891abe1c | ||
|
|
6ce6f62b8e | ||
|
|
602728895e | ||
|
|
fc995ea6b9 | ||
|
|
be6262c6ce | ||
|
|
893490f94e | ||
|
|
044bb6afa5 | ||
|
|
3eb4d88057 | ||
|
|
23fd1d032a | ||
|
|
7cb7cf848d | ||
|
|
bef300305e | ||
|
|
dc700c25b6 | ||
|
|
0ba93e5ba9 | ||
|
|
0da481f207 | ||
|
|
d75a235c1f | ||
|
|
9e92759ac7 | ||
|
|
ffc2d82b0e | ||
|
|
d8ed1bfc60 | ||
|
|
d711f5a81e | ||
|
|
fa2bc60468 | ||
|
|
e1c5673ac3 | ||
|
|
1b1ed820c3 | ||
|
|
a73711f3cd | ||
|
|
9397bf54b8 | ||
|
|
fbbdd31399 | ||
|
|
b5540dc35c | ||
|
|
296a66558f | ||
|
|
af968d4903 | ||
|
|
5c7b9ef909 | ||
|
|
185d369404 | ||
|
|
51d20b19b6 | ||
|
|
46e8c6b112 | ||
|
|
c8fb25201a | ||
|
|
1fd483643b | ||
|
|
a21a0864f1 | ||
|
|
4457bf7528 | ||
|
|
5ed6f18d83 | ||
|
|
abb3d56b20 | ||
|
|
c5ecb9bae0 | ||
|
|
1b68152fb2 | ||
|
|
6812f397af | ||
|
|
68bd9c6091 | ||
|
|
754f9e3a20 | ||
|
|
b951b11336 | ||
|
|
89c90bedb9 | ||
|
|
b4fe7d7c7d | ||
|
|
5071710db7 | ||
|
|
9faaf3fc88 | ||
|
|
9c51c133f7 | ||
|
|
91d2fb6b1c | ||
|
|
280406c0d7 | ||
|
|
d5fc3b5e99 | ||
|
|
f8f8c7e534 | ||
|
|
3a642d50a4 | ||
|
|
9685c68e30 | ||
|
|
95e6ec091b | ||
|
|
34f5e4a199 | ||
|
|
788baafa26 | ||
|
|
4c31813398 | ||
|
|
b7bae80616 | ||
|
|
f7e4aa51d3 | ||
|
|
696c50459f | ||
|
|
50b6d7298c | ||
|
|
26a81654a8 | ||
|
|
6430371d7d | ||
|
|
18705b3f18 | ||
|
|
c6bd2bb7fb | ||
|
|
c4fd94bd1a | ||
|
|
b88b952f56 | ||
|
|
d68b02d36a | ||
|
|
b9b85eb208 | ||
|
|
1f898405a6 | ||
|
|
6f6910101e | ||
|
|
79d55357e8 | ||
|
|
4fa6c0ad53 | ||
|
|
8f63f9516b | ||
|
|
995ba2c7c2 | ||
|
|
830e3be1e6 | ||
|
|
29a314a092 | ||
|
|
abdba5fdc7 | ||
|
|
b2ae715699 | ||
|
|
a95c748363 | ||
|
|
ec859e41c6 | ||
|
|
7aeae9da81 | ||
|
|
2ca11ccc6b | ||
|
|
d74ae669e3 | ||
|
|
9fabc5572d | ||
|
|
da98a2102a | ||
|
|
fb3353b854 | ||
|
|
b7e5c54593 | ||
|
|
68beec5590 | ||
|
|
6ba06d1eb4 | ||
|
|
0f11e311c4 | ||
|
|
886a01c796 | ||
|
|
a632b9f86a | ||
|
|
76b1885595 | ||
|
|
c55b409cf6 | ||
|
|
c64193f839 | ||
|
|
28a2a795ba | ||
|
|
e90adfc7c3 | ||
|
|
16267dc021 | ||
|
|
e7266b52ae | ||
|
|
dc02610408 | ||
|
|
dc1bfe4a93 | ||
|
|
cf69daaafb | ||
|
|
49ee7505ed | ||
|
|
ae2b2cb9a5 | ||
|
|
8c2f287ce4 | ||
|
|
2351f36315 | ||
|
|
5ecae10713 | ||
|
|
2e3d657794 | ||
|
|
90f990b806 | ||
|
|
7666d48409 | ||
|
|
be4cb190e8 | ||
|
|
2f4eca8c46 | ||
|
|
71564fa1de | ||
|
|
ce08cb6cd7 | ||
|
|
1f0cf6311a | ||
|
|
1db468b6c3 | ||
|
|
534de5ba81 | ||
|
|
2bfea15fdc | ||
|
|
771b3377c0 |
15
.github/actions/build-nominatim/action.yml
vendored
15
.github/actions/build-nominatim/action.yml
vendored
@@ -1,13 +1,26 @@
|
||||
name: 'Build Nominatim'
|
||||
|
||||
inputs:
|
||||
ubuntu:
|
||||
description: 'Version of Ubuntu to install on'
|
||||
required: false
|
||||
default: '20'
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
|
||||
steps:
|
||||
- name: Install prerequisites
|
||||
run: |
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu python3-argparse-manpage
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev
|
||||
if [ "x$UBUNTUVER" == "x18" ]; then
|
||||
pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu osmium PyYAML==5.1 datrie
|
||||
else
|
||||
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
|
||||
fi
|
||||
shell: bash
|
||||
env:
|
||||
UBUNTUVER: ${{ inputs.ubuntu }}
|
||||
|
||||
- name: Download dependencies
|
||||
run: |
|
||||
|
||||
4
.github/actions/setup-postgresql/action.yml
vendored
4
.github/actions/setup-postgresql/action.yml
vendored
@@ -14,8 +14,10 @@ runs:
|
||||
steps:
|
||||
- name: Remove existing PostgreSQL
|
||||
run: |
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get purge -yq postgresql*
|
||||
sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
|
||||
sudo apt-get update -qq
|
||||
|
||||
shell: bash
|
||||
|
||||
- name: Install PostgreSQL
|
||||
|
||||
111
.github/workflows/ci-tests.yml
vendored
111
.github/workflows/ci-tests.yml
vendored
@@ -4,16 +4,22 @@ on: [ push, pull_request ]
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
postgresql: [9.5, 13]
|
||||
ubuntu: [18, 20]
|
||||
include:
|
||||
- postgresql: 9.5
|
||||
- ubuntu: 18
|
||||
postgresql: 9.5
|
||||
postgis: 2.5
|
||||
- postgresql: 13
|
||||
pytest: pytest
|
||||
php: 7.2
|
||||
- ubuntu: 20
|
||||
postgresql: 13
|
||||
postgis: 3
|
||||
pytest: py.test-3
|
||||
php: 7.4
|
||||
|
||||
runs-on: ubuntu-${{ matrix.ubuntu }}.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
@@ -24,8 +30,14 @@ jobs:
|
||||
- name: Setup PHP
|
||||
uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
php-version: '7.4'
|
||||
tools: phpunit, phpcs
|
||||
php-version: ${{ matrix.php }}
|
||||
coverage: xdebug
|
||||
tools: phpunit, phpcs, composer
|
||||
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.6
|
||||
if: matrix.ubuntu == 18
|
||||
|
||||
- name: Get Date
|
||||
id: get-date
|
||||
@@ -43,33 +55,83 @@ jobs:
|
||||
with:
|
||||
postgresql-version: ${{ matrix.postgresql }}
|
||||
postgis-version: ${{ matrix.postgis }}
|
||||
|
||||
- uses: ./Nominatim/.github/actions/build-nominatim
|
||||
with:
|
||||
ubuntu: ${{ matrix.ubuntu }}
|
||||
|
||||
- name: Install test prerequsites
|
||||
run: sudo apt-get install -y -qq php-codesniffer pylint python3-pytest python3-behave
|
||||
run: sudo apt-get install -y -qq pylint python3-pytest python3-behave python3-pytest-cov php-codecoverage
|
||||
if: matrix.ubuntu == 20
|
||||
|
||||
- name: Install test prerequsites
|
||||
run: |
|
||||
pip3 install pylint==2.6.0 pytest pytest-cov behave==1.2.6
|
||||
if: matrix.ubuntu == 18
|
||||
|
||||
- name: PHP linting
|
||||
run: phpcs --report-width=120 .
|
||||
working-directory: Nominatim
|
||||
|
||||
- name: Python linting
|
||||
run: pylint --extension-pkg-whitelist=osmium nominatim
|
||||
run: pylint nominatim
|
||||
working-directory: Nominatim
|
||||
|
||||
- name: PHP unit tests
|
||||
run: phpunit ./
|
||||
run: phpunit --coverage-clover ../../coverage-php.xml ./
|
||||
working-directory: Nominatim/test/php
|
||||
if: matrix.ubuntu == 20
|
||||
|
||||
- name: Python unit tests
|
||||
run: py.test-3 test/python
|
||||
run: $PYTEST --cov=nominatim --cov-report=xml test/python
|
||||
working-directory: Nominatim
|
||||
env:
|
||||
PYTEST: ${{ matrix.pytest }}
|
||||
|
||||
- name: BDD tests
|
||||
run: behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
|
||||
run: |
|
||||
mkdir cov
|
||||
behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3 -DPHPCOV=./cov
|
||||
composer require phpunit/phpcov:7.0.2
|
||||
vendor/bin/phpcov merge --clover ../../coverage-bdd.xml ./cov
|
||||
working-directory: Nominatim/test/bdd
|
||||
if: matrix.ubuntu == 20
|
||||
|
||||
- name: BDD tests
|
||||
run: |
|
||||
behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
|
||||
working-directory: Nominatim/test/bdd
|
||||
if: matrix.ubuntu == 18
|
||||
|
||||
- name: BDD tests (legacy_icu tokenizer)
|
||||
run: |
|
||||
behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DTOKENIZER=legacy_icu --format=progress3
|
||||
working-directory: Nominatim/test/bdd
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v1
|
||||
with:
|
||||
files: ./Nominatim/coverage*.xml
|
||||
directory: ./
|
||||
name: codecov-umbrella
|
||||
fail_ci_if_error: false
|
||||
path_to_write_report: ./coverage/codecov_report.txt
|
||||
verbose: true
|
||||
if: matrix.ubuntu == 20
|
||||
|
||||
import:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
ubuntu: [18, 20]
|
||||
include:
|
||||
- ubuntu: 18
|
||||
postgresql: 9.5
|
||||
postgis: 2.5
|
||||
- ubuntu: 20
|
||||
postgresql: 13
|
||||
postgis: 3
|
||||
|
||||
runs-on: ubuntu-${{ matrix.ubuntu }}.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
@@ -95,11 +157,18 @@ jobs:
|
||||
monaco-latest.osm.pbf
|
||||
key: nominatim-test-data-${{ steps.get-date.outputs.date }}
|
||||
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.6
|
||||
if: matrix.ubuntu == 18
|
||||
|
||||
- uses: ./Nominatim/.github/actions/setup-postgresql
|
||||
with:
|
||||
postgresql-version: 13
|
||||
postgis-version: 3
|
||||
postgresql-version: ${{ matrix.postgresql }}
|
||||
postgis-version: ${{ matrix.postgis }}
|
||||
- uses: ./Nominatim/.github/actions/build-nominatim
|
||||
with:
|
||||
ubuntu: ${{ matrix.ubuntu }}
|
||||
|
||||
- name: Clean installation
|
||||
run: rm -rf Nominatim build
|
||||
@@ -123,10 +192,14 @@ jobs:
|
||||
run: nominatim special-phrases --import-from-wiki
|
||||
working-directory: data-env
|
||||
|
||||
- name: Check import
|
||||
- name: Check full import
|
||||
run: nominatim admin --check-database
|
||||
working-directory: data-env
|
||||
|
||||
- name: Warm up database
|
||||
run: nominatim admin --warm
|
||||
working-directory: data-env
|
||||
|
||||
- name: Run update
|
||||
run: |
|
||||
nominatim replication --init
|
||||
@@ -134,7 +207,11 @@ jobs:
|
||||
working-directory: data-env
|
||||
|
||||
- name: Run reverse-only import
|
||||
run : nominatim import --osm-file ../monaco-latest.osm.pbf --reverse-only
|
||||
run : nominatim import --osm-file ../monaco-latest.osm.pbf --reverse-only --no-updates
|
||||
working-directory: data-env
|
||||
env:
|
||||
NOMINATIM_DATABASE_DSN: pgsql:dbname=reverse
|
||||
|
||||
- name: Check reverse import
|
||||
run: nominatim admin --check-database
|
||||
working-directory: data-env
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[MASTER]
|
||||
|
||||
extension-pkg-whitelist=osmium
|
||||
ignored-modules=icu
|
||||
ignored-modules=icu,datrie
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
@@ -10,3 +10,6 @@ ignored-modules=icu
|
||||
# closing added here because it sometimes triggers a false positive with
|
||||
# 'with' statements.
|
||||
ignored-classes=NominatimArgs,closing
|
||||
disable=too-few-public-methods,duplicate-code
|
||||
|
||||
good-names=i,x,y,fd,db
|
||||
|
||||
@@ -62,7 +62,7 @@ endif()
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
if (BUILD_IMPORTER)
|
||||
find_package(PythonInterp 3.5 REQUIRED)
|
||||
find_package(PythonInterp 3.6 REQUIRED)
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
@@ -109,21 +109,6 @@ if (BUILD_IMPORTER)
|
||||
" wget -O ${PROJECT_SOURCE_DIR}/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz")
|
||||
endif()
|
||||
|
||||
set(CUSTOMSCRIPTS
|
||||
check_import_finished.php
|
||||
country_languages.php
|
||||
export.php
|
||||
query.php
|
||||
setup.php
|
||||
update.php
|
||||
warm.php
|
||||
)
|
||||
|
||||
foreach (script_source ${CUSTOMSCRIPTS})
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/script.tmpl
|
||||
${PROJECT_BINARY_DIR}/utils/${script_source})
|
||||
endforeach()
|
||||
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool.tmpl
|
||||
${PROJECT_BINARY_DIR}/nominatim)
|
||||
endif()
|
||||
@@ -273,4 +258,6 @@ install(FILES settings/env.defaults
|
||||
settings/import-address.style
|
||||
settings/import-full.style
|
||||
settings/import-extratags.style
|
||||
settings/legacy_icu_tokenizer.yaml
|
||||
settings/icu-rules/extended-unicode-to-asccii.yaml
|
||||
DESTINATION ${NOMINATIM_CONFIGDIR})
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
[](https://github.com/osm-search/Nominatim/actions?query=workflow%3A%22CI+Tests%22)
|
||||
[](https://codecov.io/gh/osm-search/Nominatim)
|
||||
|
||||
Nominatim
|
||||
=========
|
||||
|
||||
39
SECURITY.md
Normal file
39
SECURITY.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# Security Policy
|
||||
|
||||
## Supported Versions
|
||||
|
||||
All Nominatim releases receive security updates for two years.
|
||||
|
||||
The following table lists the end of support for all currently supported
|
||||
versions.
|
||||
|
||||
| Version | End of support for security updates |
|
||||
| ------- | ----------------------------------- |
|
||||
| 3.7.x | 2023-04-05 |
|
||||
| 3.6.x | 2022-12-12 |
|
||||
| 3.5.x | 2022-06-05 |
|
||||
| 3.4.x | 2021-10-24 |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
If you believe, you have found an issue in Nominatim that has implications on
|
||||
security, please send a description of the issue to **security@nominatim.org**.
|
||||
You will receive an acknowledgement of your mail within 3 work days where we
|
||||
also notify you of the next steps.
|
||||
|
||||
## How we Disclose Security Issues
|
||||
|
||||
** The following section only applies to security issues found in released
|
||||
versions. Issues that concern the master development branch only will be
|
||||
fixed immediately on the branch with the corresponding PR containing the
|
||||
description of the nature and severity of the issue. **
|
||||
|
||||
Patches for identified security issues are applied to all affected versions and
|
||||
new minor versions are released. At the same time we release a statement at
|
||||
the [Nominatim blog](https://nominatim.org/blog/) describing the nature of the
|
||||
incident. Announcements will also be published at the
|
||||
[geocoding mailinglist](https://lists.openstreetmap.org/listinfo/geocoding).
|
||||
|
||||
## List of Previous Incidents
|
||||
|
||||
* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
|
||||
@@ -1,14 +0,0 @@
|
||||
#!@PHP_BIN@ -Cq
|
||||
<?php
|
||||
require('@CMAKE_SOURCE_DIR@/lib-php/dotenv_loader.php');
|
||||
|
||||
@define('CONST_Default_ModulePath', '@CMAKE_BINARY_DIR@/module');
|
||||
@define('CONST_Default_Osm2pgsql', '@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql');
|
||||
@define('CONST_DataDir', '@CMAKE_SOURCE_DIR@/data');
|
||||
@define('CONST_SqlDir', '@CMAKE_SOURCE_DIR@/lib-sql');
|
||||
@define('CONST_ConfigDir', '@CMAKE_SOURCE_DIR@/settings');
|
||||
|
||||
loadDotEnv();
|
||||
$_SERVER['NOMINATIM_NOMINATIM_TOOL'] = '@CMAKE_BINARY_DIR@/nominatim';
|
||||
|
||||
require_once('@CMAKE_SOURCE_DIR@/lib-php/admin/@script_source@');
|
||||
14
codecov.yml
Normal file
14
codecov.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
codecov:
|
||||
require_ci_to_pass: yes
|
||||
|
||||
coverage:
|
||||
status:
|
||||
project: off
|
||||
patch: off
|
||||
|
||||
comment:
|
||||
require_changes: true
|
||||
after_n_builds: 2
|
||||
|
||||
fixes:
|
||||
- "Nominatim/::"
|
||||
@@ -29787,7 +29787,7 @@ st 5557484
|
||||
|
||||
-- prefill word table
|
||||
|
||||
select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;
|
||||
select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null;
|
||||
select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
|
||||
|
||||
-- copy the word frequencies
|
||||
|
||||
101
docs/admin/Customization.md
Normal file
101
docs/admin/Customization.md
Normal file
@@ -0,0 +1,101 @@
|
||||
# Customization of the Database
|
||||
|
||||
This section explains in detail how to configure a Nominatim import and
|
||||
the various means to use external data.
|
||||
|
||||
## External postcode data
|
||||
|
||||
Nominatim creates a table of known postcode centroids during import. This table
|
||||
is used for searches of postcodes and for adding postcodes to places where the
|
||||
OSM data does not provide one. These postcode centroids are mainly computed
|
||||
from the OSM data itself. In addition, Nominatim supports reading postcode
|
||||
information from an external CSV file, to supplement the postcodes that are
|
||||
missing in OSM.
|
||||
|
||||
To enable external postcode support, simply put one CSV file per country into
|
||||
your project directory and name it `<CC>_postcodes.csv`. `<CC>` must be the
|
||||
two-letter country code for which to apply the file. The file may also be
|
||||
gzipped. Then it must be called `<CC>_postcodes.csv.gz`.
|
||||
|
||||
The CSV file must use commas as a delimiter and have a header line. Nominatim
|
||||
expects three columns to be present: `postcode`, `lat` and `lon`. All other
|
||||
columns are ignored. `lon` and `lat` must describe the x and y coordinates of the
|
||||
postcode centroids in WGS84.
|
||||
|
||||
The postcode files are loaded only when there is data for the given country
|
||||
in your database. For example, if there is a `us_postcodes.csv` file in your
|
||||
project directory but you import only an excerpt of Italy, then the US postcodes
|
||||
will simply be ignored.
|
||||
|
||||
As a rule, the external postcode data should be put into the project directory
|
||||
**before** starting the initial import. Still, you can add, remove and update the
|
||||
external postcode data at any time. Simply
|
||||
run:
|
||||
|
||||
```
|
||||
nominatim refresh --postcodes
|
||||
```
|
||||
|
||||
to make the changes visible in your database. Be aware, however, that the changes
|
||||
only have an immediate effect on searches for postcodes. Postcodes that were
|
||||
added to places are only updated, when they are reindexed. That usually happens
|
||||
only during replication updates.
|
||||
|
||||
## Installing Tiger housenumber data for the US
|
||||
|
||||
Nominatim is able to use the official [TIGER](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
|
||||
address set to complement the OSM house number data in the US. You can add
|
||||
TIGER data to your own Nominatim instance by following these steps. The
|
||||
entire US adds about 10GB to your database.
|
||||
|
||||
1. Get preprocessed TIGER 2020 data:
|
||||
|
||||
cd $PROJECT_DIR
|
||||
wget https://nominatim.org/data/tiger2020-nominatim-preprocessed.csv.tar.gz
|
||||
|
||||
2. Import the data into your Nominatim database:
|
||||
|
||||
nominatim add-data --tiger-data tiger2020-nominatim-preprocessed.csv.tar.gz
|
||||
|
||||
3. Enable use of the Tiger data in your `.env` by adding:
|
||||
|
||||
echo NOMINATIM_USE_US_TIGER_DATA=yes >> .env
|
||||
|
||||
4. Apply the new settings:
|
||||
|
||||
nominatim refresh --functions
|
||||
|
||||
|
||||
See the [developer's guide](../develop/data-sources.md#us-census-tiger) for more
|
||||
information on how the data got preprocessed.
|
||||
|
||||
## Special phrases import
|
||||
|
||||
As described in the [Importation chapter](Import.md), it is possible to
|
||||
import special phrases from the wiki with the following command:
|
||||
|
||||
```sh
|
||||
nominatim special-phrases --import-from-wiki
|
||||
```
|
||||
|
||||
But, it is also possible to import some phrases from a csv file.
|
||||
To do so, you have access to the following command:
|
||||
|
||||
```sh
|
||||
nominatim special-phrases --import-from-csv <csv file>
|
||||
```
|
||||
|
||||
Note that the two previous import commands will update the phrases from your database.
|
||||
This means that if you import some phrases from a csv file, only the phrases
|
||||
present in the csv file will be kept into the database. All other phrases will
|
||||
be removed.
|
||||
|
||||
If you want to only add new phrases and not update the other ones you can add
|
||||
the argument `--no-replace` to the import command. For example:
|
||||
|
||||
```sh
|
||||
nominatim special-phrases --import-from-csv <csv file> --no-replace
|
||||
```
|
||||
|
||||
This will add the phrases present in the csv file into the database without
|
||||
removing the other ones.
|
||||
@@ -1,7 +1,7 @@
|
||||
# Deploying Nominatim
|
||||
|
||||
The Nominatim API is implemented as a PHP application. The `website/` directory
|
||||
in the build directory contains the configured website. You can serve this
|
||||
in the project directory contains the configured website. You can serve this
|
||||
in a production environment with any web server that is capable to run
|
||||
PHP scripts.
|
||||
|
||||
@@ -13,10 +13,11 @@ to run a web service. Please refer to the documentation of
|
||||
for background information on configuring the services.
|
||||
|
||||
!!! Note
|
||||
Throughout this page, we assume that your Nominatim build directory is
|
||||
located in `/srv/nominatim/build` and the source code in
|
||||
`/srv/nominatim/Nominatim`. If you have put it somewhere else, you
|
||||
need to adjust the commands and configuration accordingly.
|
||||
Throughout this page, we assume that your Nominatim project directory is
|
||||
located in `/srv/nominatim-project` and that you have installed Nominatim
|
||||
using the default installation prefix `/usr/local`. If you have put it
|
||||
somewhere else, you need to adjust the commands and configuration
|
||||
accordingly.
|
||||
|
||||
We further assume that your web server runs as user `www-data`. Older
|
||||
versions of CentOS may still use the user name `apache`. You also need
|
||||
@@ -29,7 +30,7 @@ web server user. You can check that the permissions are correct by accessing
|
||||
on of the php files as the web server user:
|
||||
|
||||
``` sh
|
||||
sudo -u www-data head -n 1 /srv/nominatim/build/website/search.php
|
||||
sudo -u www-data head -n 1 /srv/nominatim-project/website/search.php
|
||||
```
|
||||
|
||||
If this shows a permission error, then you need to adapt the permissions of
|
||||
@@ -40,11 +41,11 @@ web server access. At a minimum the following SELinux labelling should be done
|
||||
for Nominatim:
|
||||
|
||||
``` sh
|
||||
sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim/Nominatim/(website|lib|settings)(/.*)?"
|
||||
sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim/build/(website|settings)(/.*)?"
|
||||
sudo semanage fcontext -a -t lib_t "/srv/nominatim/build/module/nominatim.so"
|
||||
sudo restorecon -R -v /srv/nominatim/Nominatim
|
||||
sudo restorecon -R -v /srv/nominatim/build
|
||||
sudo semanage fcontext -a -t httpd_sys_content_t "/usr/local/nominatim/lib/lib-php(/.*)?"
|
||||
sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim-project/website(/.*)?"
|
||||
sudo semanage fcontext -a -t lib_t "/srv/nominatim-project/module/nominatim.so"
|
||||
sudo restorecon -R -v /usr/local/lib/nominatim
|
||||
sudo restorecon -R -v /srv/nominatim-project
|
||||
```
|
||||
|
||||
## Nominatim with Apache
|
||||
@@ -65,13 +66,13 @@ Make sure your Apache configuration contains the required permissions for the
|
||||
directory and create an alias:
|
||||
|
||||
``` apache
|
||||
<Directory "/srv/nominatim/build/website">
|
||||
<Directory "/srv/nominatim-project/website">
|
||||
Options FollowSymLinks MultiViews
|
||||
AddType text/html .php
|
||||
DirectoryIndex search.php
|
||||
Require all granted
|
||||
</Directory>
|
||||
Alias /nominatim /srv/nominatim/build/website
|
||||
Alias /nominatim /srv/nominatim-project/website
|
||||
```
|
||||
|
||||
After making changes in the apache config you need to restart apache.
|
||||
@@ -110,7 +111,7 @@ Tell nginx that php files are special and to fastcgi_pass to the php-fpm
|
||||
unix socket by adding the location definition to the default configuration.
|
||||
|
||||
``` nginx
|
||||
root /srv/nominatim/build/website;
|
||||
root /srv/nominatim-project/website;
|
||||
index search.php;
|
||||
location / {
|
||||
try_files $uri $uri/ @php;
|
||||
|
||||
@@ -40,7 +40,7 @@ all commands from the project directory.
|
||||
|
||||
### Configuration setup in `.env`
|
||||
|
||||
The Nominatim server can be customized via an `.env` configuration file in the
|
||||
The Nominatim server can be customized via an `.env` configuration file in the
|
||||
project directory. This is a file in [dotenv](https://github.com/theskumar/python-dotenv)
|
||||
format which looks the same as variable settings in a standard shell environment.
|
||||
You can also set the same configuration via environment variables. All
|
||||
@@ -48,7 +48,7 @@ settings have a `NOMINATIM_` prefix to avoid conflicts with other environment
|
||||
variables.
|
||||
|
||||
There are lots of configuration settings you can tweak. Have a look
|
||||
at `settings/env.default` for a full list. Most should have a sensible default.
|
||||
at `Nominatim/settings/env.default` for a full list. Most should have a sensible default.
|
||||
|
||||
#### Flatnode files
|
||||
|
||||
@@ -83,15 +83,19 @@ The file is about 400MB and adds around 4GB to the Nominatim database.
|
||||
`nominatim refresh --wiki-data --importance`. Updating importances for
|
||||
a planet can take a couple of hours.
|
||||
|
||||
### Great Britain, USA postcodes
|
||||
### External postcodes
|
||||
|
||||
Nominatim can use postcodes from an external source to improve searches that
|
||||
involve a GB or US postcode. This data can be optionally downloaded into the
|
||||
project directory:
|
||||
Nominatim can use postcodes from an external source to improve searching with
|
||||
postcodes. We provide precomputed postcodes sets for the US (using TIGER data)
|
||||
and the UK (using the [CodePoint OpenData set](https://osdatahub.os.uk/downloads/open/CodePointOpen).
|
||||
This data can be optionally downloaded into the project directory:
|
||||
|
||||
cd $PROJECT_DIR
|
||||
wget https://www.nominatim.org/data/gb_postcode_data.sql.gz
|
||||
wget https://www.nominatim.org/data/us_postcode_data.sql.gz
|
||||
wget https://www.nominatim.org/data/gb_postcodes.csv.gz
|
||||
wget https://www.nominatim.org/data/us_postcodes.csv.gz
|
||||
|
||||
You can also add your own custom postcode sources, see
|
||||
[Customization of postcodes](Customization.md#external-postcode-data).
|
||||
|
||||
## Choosing the data to import
|
||||
|
||||
@@ -189,12 +193,15 @@ can be found in the development section.
|
||||
[Geofabrik](https://download.geofabrik.de).
|
||||
|
||||
Download the data to import. Then issue the following command
|
||||
from the **build directory** to start the import:
|
||||
from the **project directory** to start the import:
|
||||
|
||||
```sh
|
||||
nominatim import --osm-file <data file> 2>&1 | tee setup.log
|
||||
```
|
||||
|
||||
The **project directory** is the one that you have set up at the beginning.
|
||||
See [creating the project directory](Import#creating-the-project-directory).
|
||||
|
||||
### Notes on full planet imports
|
||||
|
||||
Even on a perfectly configured machine
|
||||
@@ -248,6 +255,9 @@ to verify that your installation is working. Go to
|
||||
`http://localhost:8088/status.php` and you should see the message `OK`.
|
||||
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`.
|
||||
|
||||
Note that search query is not supported for reverse-only imports. You can run a
|
||||
reverse query, e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
|
||||
|
||||
To run Nominatim via webservers like Apache or nginx, please read the
|
||||
[Deployment chapter](Deployment.md).
|
||||
|
||||
@@ -270,37 +280,12 @@ If you want to be able to search for places by their type through
|
||||
[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
|
||||
you also need to import these key phrases like this:
|
||||
|
||||
nominatim special-phrases --import-from-wiki
|
||||
```sh
|
||||
nominatim special-phrases --import-from-wiki
|
||||
```
|
||||
|
||||
Note that this command downloads the phrases from the wiki link above. You
|
||||
need internet access for the step.
|
||||
|
||||
|
||||
## Installing Tiger housenumber data for the US
|
||||
|
||||
Nominatim is able to use the official [TIGER](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
|
||||
address set to complement the OSM house number data in the US. You can add
|
||||
TIGER data to your own Nominatim instance by following these steps. The
|
||||
entire US adds about 10GB to your database.
|
||||
|
||||
1. Get preprocessed TIGER 2020 data:
|
||||
|
||||
cd $PROJECT_DIR
|
||||
wget https://nominatim.org/data/tiger2020-nominatim-preprocessed.tar.gz
|
||||
|
||||
2. Import the data into your Nominatim database:
|
||||
|
||||
nominatim add-data --tiger-data tiger2020-nominatim-preprocessed.tar.gz
|
||||
|
||||
3. Enable use of the Tiger data in your `.env` by adding:
|
||||
|
||||
echo NOMINATIM_USE_US_TIGER_DATA=yes >> .env
|
||||
|
||||
4. Apply the new settings:
|
||||
|
||||
nominatim refresh --functions
|
||||
|
||||
|
||||
See the [developer's guide](../develop/data-sources.md#us-census-tiger) for more
|
||||
information on how the data got preprocessed.
|
||||
|
||||
You can also import special phrases from a csv file, for more
|
||||
information please read the [Customization chapter](Customization.md).
|
||||
|
||||
@@ -17,6 +17,7 @@ and can't offer support.
|
||||
|
||||
* [Docker](https://github.com/mediagis/nominatim-docker)
|
||||
* [Docker on Kubernetes](https://github.com/peter-evans/nominatim-k8s)
|
||||
* [Kubernetes with Helm](https://github.com/robjuz/helm-charts/blob/master/charts/nominatim/README.md)
|
||||
* [Ansible](https://github.com/synthesio/infra-ansible-nominatim)
|
||||
|
||||
## Prerequisites
|
||||
@@ -37,14 +38,16 @@ For compiling:
|
||||
|
||||
For running Nominatim:
|
||||
|
||||
* [PostgreSQL](https://www.postgresql.org) (9.3+ will work, 11+ strongly recommended)
|
||||
* [PostgreSQL](https://www.postgresql.org) (9.5+ will work, 11+ strongly recommended)
|
||||
* [PostGIS](https://postgis.net) (2.2+)
|
||||
* [Python 3](https://www.python.org/) (3.5+)
|
||||
* [Python 3](https://www.python.org/) (3.6+)
|
||||
* [Psycopg2](https://www.psycopg.org) (2.7+)
|
||||
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
|
||||
* [psutil](https://github.com/giampaolo/psutil)
|
||||
* [Jinja2](https://palletsprojects.com/p/jinja/)
|
||||
* [PyICU](https://pypi.org/project/PyICU/)
|
||||
* [PyYaml](https://pyyaml.org/) (5.1+)
|
||||
* [datrie](https://github.com/pytries/datrie)
|
||||
* [PHP](https://php.net) (7.0 or later)
|
||||
* PHP-pgsql
|
||||
* PHP-intl (bundled with PHP)
|
||||
|
||||
@@ -17,6 +17,17 @@ breaking changes. **Please read them before running the migration.**
|
||||
|
||||
## 3.6.0 -> 3.7.0
|
||||
|
||||
### New format and name of configuration file
|
||||
|
||||
The configuration for an import is now saved in a `.env` file in the project
|
||||
directory. This file follows the dotenv format. For more information, see
|
||||
the [installation chapter](Import.md#configuration-setup-in-env).
|
||||
|
||||
To migrate to the new system, create a new project directory, add the `.env`
|
||||
file and port your custom configuration from `settings/local.php`. Most
|
||||
settings are named similar and only have received a `NOMINATIM_` prefix.
|
||||
Use the default settings in `settings/env.defaults` as a reference.
|
||||
|
||||
### New location for data files
|
||||
|
||||
External data files for Wikipedia importance, postcodes etc. are no longer
|
||||
@@ -113,6 +124,14 @@ configuration file, run the following command after updating:
|
||||
./utils/setup.php --setup-website
|
||||
```
|
||||
|
||||
### Update SQL code
|
||||
|
||||
To update the SQL code to the leatest version run:
|
||||
|
||||
```
|
||||
./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
|
||||
```
|
||||
|
||||
## 3.4.0 -> 3.5.0
|
||||
|
||||
### New Wikipedia/Wikidata importance tables
|
||||
|
||||
@@ -10,12 +10,11 @@ installation. For more details, please also have a look at the
|
||||
|
||||
## Installing nominatim-ui
|
||||
|
||||
nominatim-ui does not need any special installation, just download, configure
|
||||
and run it.
|
||||
|
||||
Clone the source from github:
|
||||
|
||||
git clone https://github.com/osm-search/nominatim-ui
|
||||
We provide regular releases of nominatim-ui that contain the packaged website.
|
||||
They do not need any special installation. Just download, configure
|
||||
and run it. Grab the latest release from
|
||||
[nominatim-ui's Github release page](https://github.com/osm-search/nominatim-ui/releases)
|
||||
and unpack it. You can use `nominatim-ui-x.x.x.tar.gz` or `nominatim-ui-x.x.x.zip`.
|
||||
|
||||
Copy the example configuration into the right place:
|
||||
|
||||
|
||||
205
docs/admin/Tokenizers.md
Normal file
205
docs/admin/Tokenizers.md
Normal file
@@ -0,0 +1,205 @@
|
||||
# Tokenizers
|
||||
|
||||
The tokenizer module in Nominatim is responsible for analysing the names given
|
||||
to OSM objects and the terms of an incoming query in order to make sure, they
|
||||
can be matched appropriately.
|
||||
|
||||
Nominatim offers different tokenizer modules, which behave differently and have
|
||||
different configuration options. This sections describes the tokenizers and how
|
||||
they can be configured.
|
||||
|
||||
!!! important
|
||||
The use of a tokenizer is tied to a database installation. You need to choose
|
||||
and configure the tokenizer before starting the initial import. Once the import
|
||||
is done, you cannot switch to another tokenizer anymore. Reconfiguring the
|
||||
chosen tokenizer is very limited as well. See the comments in each tokenizer
|
||||
section.
|
||||
|
||||
## Legacy tokenizer
|
||||
|
||||
The legacy tokenizer implements the analysis algorithms of older Nominatim
|
||||
versions. It uses a special Postgresql module to normalize names and queries.
|
||||
This tokenizer is currently the default.
|
||||
|
||||
To enable the tokenizer add the following line to your project configuration:
|
||||
|
||||
```
|
||||
NOMINATIM_TOKENIZER=legacy
|
||||
```
|
||||
|
||||
The Postgresql module for the tokenizer is available in the `module` directory
|
||||
and also installed with the remainder of the software under
|
||||
`lib/nominatim/module/nominatim.so`. You can specify a custom location for
|
||||
the module with
|
||||
|
||||
```
|
||||
NOMINATIM_DATABASE_MODULE_PATH=<path to directory where nominatim.so resides>
|
||||
```
|
||||
|
||||
This is in particular useful when the database runs on a different server.
|
||||
See [Advanced installations](Advanced-Installations.md#importing-nominatim-to-an-external-postgresql-database) for details.
|
||||
|
||||
There are no other configuration options for the legacy tokenizer. All
|
||||
normalization functions are hard-coded.
|
||||
|
||||
## ICU tokenizer
|
||||
|
||||
!!! danger
|
||||
This tokenizer is currently in active development and still subject
|
||||
to backwards-incompatible changes.
|
||||
|
||||
The ICU tokenizer uses the [ICU library](http://site.icu-project.org/) to
|
||||
normalize names and queries. It also offers configurable decomposition and
|
||||
abbreviation handling.
|
||||
|
||||
### How it works
|
||||
|
||||
On import the tokenizer processes names in the following four stages:
|
||||
|
||||
1. The **Normalization** part removes all non-relevant information from the
|
||||
input.
|
||||
2. Incoming names are now converted to **full names**. This process is currently
|
||||
hard coded and mostly serves to handle name tags from OSM that contain
|
||||
multiple names (e.g. [Biel/Bienne](https://www.openstreetmap.org/node/240097197)).
|
||||
3. Next the tokenizer creates **variants** from the full names. These variants
|
||||
cover decomposition and abbreviation handling. Variants are saved to the
|
||||
database, so that it is not necessary to create the variants for a search
|
||||
query.
|
||||
4. The final **Tokenization** step converts the names to a simple ASCII form,
|
||||
potentially removing further spelling variants for better matching.
|
||||
|
||||
At query time only stage 1) and 4) are used. The query is normalized and
|
||||
tokenized and the resulting string used for searching in the database.
|
||||
|
||||
### Configuration
|
||||
|
||||
The ICU tokenizer is configured using a YAML file which can be configured using
|
||||
`NOMINATIM_TOKENIZER_CONFIG`. The configuration is read on import and then
|
||||
saved as part of the internal database status. Later changes to the variable
|
||||
have no effect.
|
||||
|
||||
Here is an example configuration file:
|
||||
|
||||
``` yaml
|
||||
normalization:
|
||||
- ":: lower ()"
|
||||
- "ß > 'ss'" # German szet is unimbigiously equal to double ss
|
||||
transliteration:
|
||||
- !include /etc/nominatim/icu-rules/extended-unicode-to-asccii.yaml
|
||||
- ":: Ascii ()"
|
||||
variants:
|
||||
- language: de
|
||||
words:
|
||||
- ~haus => haus
|
||||
- ~strasse -> str
|
||||
- language: en
|
||||
words:
|
||||
- road -> rd
|
||||
- bridge -> bdge,br,brdg,bri,brg
|
||||
```
|
||||
|
||||
The configuration file contains three sections:
|
||||
`normalization`, `transliteration`, `variants`.
|
||||
|
||||
The normalization and transliteration sections each must contain a list of
|
||||
[ICU transformation rules](https://unicode-org.github.io/icu/userguide/transforms/general/rules.html).
|
||||
The rules are applied in the order in which they appear in the file.
|
||||
You can also include additional rules from external yaml file using the
|
||||
`!include` tag. The included file must contain a valid YAML list of ICU rules
|
||||
and may again include other files.
|
||||
|
||||
!!! warning
|
||||
The ICU rule syntax contains special characters that conflict with the
|
||||
YAML syntax. You should therefore always enclose the ICU rules in
|
||||
double-quotes.
|
||||
|
||||
The variants section defines lists of replacements which create alternative
|
||||
spellings of a name. To create the variants, a name is scanned from left to
|
||||
right and the longest matching replacement is applied until the end of the
|
||||
string is reached.
|
||||
|
||||
The variants section must contain a list of replacement groups. Each group
|
||||
defines a set of properties that describes where the replacements are
|
||||
applicable. In addition, the word section defines the list of replacements
|
||||
to be made. The basic replacement description is of the form:
|
||||
|
||||
```
|
||||
<source>[,<source>[...]] => <target>[,<target>[...]]
|
||||
```
|
||||
|
||||
The left side contains one or more `source` terms to be replaced. The right side
|
||||
lists one or more replacements. Each source is replaced with each replacement
|
||||
term.
|
||||
|
||||
!!! tip
|
||||
The source and target terms are internally normalized using the
|
||||
normalization rules given in the configuration. This ensures that the
|
||||
strings match as expected. In fact, it is better to use unnormalized
|
||||
words in the configuration because then it is possible to change the
|
||||
rules for normalization later without having to adapt the variant rules.
|
||||
|
||||
#### Decomposition
|
||||
|
||||
In its standard form, only full words match against the source. There
|
||||
is a special notation to match the prefix and suffix of a word:
|
||||
|
||||
``` yaml
|
||||
- ~strasse => str # matches "strasse" as full word and in suffix position
|
||||
- hinter~ => hntr # matches "hinter" as full word and in prefix position
|
||||
```
|
||||
|
||||
There is no facility to match a string in the middle of the word. The suffix
|
||||
and prefix notation automatically trigger the decomposition mode: two variants
|
||||
are created for each replacement, one with the replacement attached to the word
|
||||
and one separate. So in above example, the tokenization of "hauptstrasse" will
|
||||
create the variants "hauptstr" and "haupt str". Similarly, the name "rote strasse"
|
||||
triggers the variants "rote str" and "rotestr". By having decomposition work
|
||||
both ways, it is sufficient to create the variants at index time. The variant
|
||||
rules are not applied at query time.
|
||||
|
||||
To avoid automatic decomposition, use the '|' notation:
|
||||
|
||||
``` yaml
|
||||
- ~strasse |=> str
|
||||
```
|
||||
|
||||
simply changes "hauptstrasse" to "hauptstr" and "rote strasse" to "rote str".
|
||||
|
||||
#### Initial and final terms
|
||||
|
||||
It is also possible to restrict replacements to the beginning and end of a
|
||||
name:
|
||||
|
||||
``` yaml
|
||||
- ^south => s # matches only at the beginning of the name
|
||||
- road$ => rd # matches only at the end of the name
|
||||
```
|
||||
|
||||
So the first example would trigger a replacement for "south 45th street" but
|
||||
not for "the south beach restaurant".
|
||||
|
||||
#### Replacements vs. variants
|
||||
|
||||
The replacement syntax `source => target` works as a pure replacement. It changes
|
||||
the name instead of creating a variant. To create an additional version, you'd
|
||||
have to write `source => source,target`. As this is a frequent case, there is
|
||||
a shortcut notation for it:
|
||||
|
||||
```
|
||||
<source>[,<source>[...]] -> <target>[,<target>[...]]
|
||||
```
|
||||
|
||||
The simple arrow causes an additional variant to be added. Note that
|
||||
decomposition has an effect here on the source as well. So a rule
|
||||
|
||||
``` yaml
|
||||
- "~strasse -> str"
|
||||
```
|
||||
|
||||
means that for a word like `hauptstrasse` four variants are created:
|
||||
`hauptstrasse`, `haupt strasse`, `hauptstr` and `haupt str`.
|
||||
|
||||
### Reconfiguration
|
||||
|
||||
Changing the configuration after the import is currently not possible, although
|
||||
this feature may be added at a later time.
|
||||
@@ -30,9 +30,9 @@ diffs for Ireland from Geofabrik add the following:
|
||||
|
||||
# base URL of the replication service
|
||||
NOMINATIM_REPLICATION_URL="https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates"
|
||||
# How often upstream publishes diffs
|
||||
# How often upstream publishes diffs (in seconds)
|
||||
NOMINATIM_REPLICATION_UPDATE_INTERVAL=86400
|
||||
# How long to sleep if no update found yet
|
||||
# How long to sleep if no update found yet (in seconds)
|
||||
NOMINATIM_REPLICATION_RECHECK_INTERVAL=900
|
||||
|
||||
To set up the update process now run the following command:
|
||||
|
||||
@@ -29,7 +29,7 @@ The Nominatim test suite consists of behavioural tests (using behave) and
|
||||
unit tests (using PHPUnit for PHP code and pytest for Python code).
|
||||
It has the following additional requirements:
|
||||
|
||||
* [behave test framework](https://behave.readthedocs.io) >= 1.2.5
|
||||
* [behave test framework](https://behave.readthedocs.io) >= 1.2.6
|
||||
* [phpunit](https://phpunit.de) >= 7.3
|
||||
* [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer)
|
||||
* [Pylint](https://pylint.org/) (2.6.0 is used for the CI)
|
||||
|
||||
@@ -19,6 +19,8 @@ pages:
|
||||
- 'Import' : 'admin/Import.md'
|
||||
- 'Update' : 'admin/Update.md'
|
||||
- 'Deploy' : 'admin/Deployment.md'
|
||||
- 'Customize Imports' : 'admin/Customization.md'
|
||||
- 'Tokenizers' : 'admin/Tokenizers.md'
|
||||
- 'Nominatim UI' : 'admin/Setup-Nominatim-UI.md'
|
||||
- 'Advanced Installations' : 'admin/Advanced-Installations.md'
|
||||
- 'Migration from older Versions' : 'admin/Migration.md'
|
||||
|
||||
@@ -61,7 +61,7 @@ class AddressDetails
|
||||
return join(', ', $aParts);
|
||||
}
|
||||
|
||||
public function getAddressNames($sCountry = null)
|
||||
public function getAddressNames()
|
||||
{
|
||||
$aAddress = array();
|
||||
|
||||
@@ -79,13 +79,11 @@ class AddressDetails
|
||||
$sName = $aLine['housenumber'];
|
||||
}
|
||||
|
||||
if (isset($sName)) {
|
||||
$sTypeLabel = strtolower(str_replace(' ', '_', $sTypeLabel));
|
||||
if (!isset($aAddress[$sTypeLabel])
|
||||
|| $aLine['class'] == 'place'
|
||||
) {
|
||||
$aAddress[$sTypeLabel] = $sName;
|
||||
}
|
||||
if (isset($sName)
|
||||
&& (!isset($aAddress[$sTypeLabel])
|
||||
|| $aLine['class'] == 'place')
|
||||
) {
|
||||
$aAddress[$sTypeLabel] = $sName;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -39,7 +39,9 @@ class DB
|
||||
$conn->exec("SET DateStyle TO 'sql,european'");
|
||||
$conn->exec("SET client_encoding TO 'utf-8'");
|
||||
$iMaxExecution = ini_get('max_execution_time');
|
||||
if ($iMaxExecution > 0) $conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
|
||||
if ($iMaxExecution > 0) {
|
||||
$conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
|
||||
}
|
||||
|
||||
$this->connection = $conn;
|
||||
return true;
|
||||
@@ -95,7 +97,9 @@ class DB
|
||||
try {
|
||||
$stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
|
||||
$row = $stmt->fetch(\PDO::FETCH_NUM);
|
||||
if ($row === false) return false;
|
||||
if ($row === false) {
|
||||
return false;
|
||||
}
|
||||
} catch (\PDOException $e) {
|
||||
throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
|
||||
}
|
||||
@@ -306,9 +310,13 @@ class DB
|
||||
if (preg_match('/^pgsql:(.+)$/', $sDSN, $aMatches)) {
|
||||
foreach (explode(';', $aMatches[1]) as $sKeyVal) {
|
||||
list($sKey, $sVal) = explode('=', $sKeyVal, 2);
|
||||
if ($sKey == 'host') $sKey = 'hostspec';
|
||||
if ($sKey == 'dbname') $sKey = 'database';
|
||||
if ($sKey == 'user') $sKey = 'username';
|
||||
if ($sKey == 'host') {
|
||||
$sKey = 'hostspec';
|
||||
} elseif ($sKey == 'dbname') {
|
||||
$sKey = 'database';
|
||||
} elseif ($sKey == 'user') {
|
||||
$sKey = 'username';
|
||||
}
|
||||
$aInfo[$sKey] = $sVal;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ namespace Nominatim;
|
||||
class DatabaseError extends \Exception
|
||||
{
|
||||
|
||||
public function __construct($message, $code = 500, Exception $previous = null, $oPDOErr, $sSql = null)
|
||||
public function __construct($message, $code, $previous, $oPDOErr, $sSql = null)
|
||||
{
|
||||
parent::__construct($message, $code, $previous);
|
||||
// https://secure.php.net/manual/en/class.pdoexception.php
|
||||
|
||||
@@ -78,7 +78,7 @@ class Debug
|
||||
echo '<th>Address Tokens</th><th>Address Not</th>';
|
||||
echo '<th>country</th><th>operator</th>';
|
||||
echo '<th>class</th><th>type</th><th>postcode</th><th>housenumber</th></tr>';
|
||||
foreach ($aSearches as $iRank => $aRankedSet) {
|
||||
foreach ($aSearches as $aRankedSet) {
|
||||
foreach ($aRankedSet as $aRow) {
|
||||
$aRow->dumpAsHtmlTableRow($aWordsIDs);
|
||||
}
|
||||
|
||||
@@ -7,18 +7,20 @@ require_once(CONST_LibDir.'/Phrase.php');
|
||||
require_once(CONST_LibDir.'/ReverseGeocode.php');
|
||||
require_once(CONST_LibDir.'/SearchDescription.php');
|
||||
require_once(CONST_LibDir.'/SearchContext.php');
|
||||
require_once(CONST_LibDir.'/SearchPosition.php');
|
||||
require_once(CONST_LibDir.'/TokenList.php');
|
||||
require_once(CONST_TokenizerDir.'/tokenizer.php');
|
||||
|
||||
class Geocode
|
||||
{
|
||||
protected $oDB;
|
||||
|
||||
protected $oPlaceLookup;
|
||||
protected $oTokenizer;
|
||||
|
||||
protected $aLangPrefOrder = array();
|
||||
|
||||
protected $aExcludePlaceIDs = array();
|
||||
protected $bReverseInPlan = true;
|
||||
|
||||
protected $iLimit = 20;
|
||||
protected $iFinalLimit = 10;
|
||||
@@ -42,28 +44,12 @@ class Geocode
|
||||
protected $sQuery = false;
|
||||
protected $aStructuredQuery = false;
|
||||
|
||||
protected $oNormalizer = null;
|
||||
|
||||
|
||||
public function __construct(&$oDB)
|
||||
{
|
||||
$this->oDB =& $oDB;
|
||||
$this->oPlaceLookup = new PlaceLookup($this->oDB);
|
||||
$this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
|
||||
}
|
||||
|
||||
private function normTerm($sTerm)
|
||||
{
|
||||
if ($this->oNormalizer === null) {
|
||||
return $sTerm;
|
||||
}
|
||||
|
||||
return $this->oNormalizer->transliterate($sTerm);
|
||||
}
|
||||
|
||||
public function setReverseInPlan($bReverse)
|
||||
{
|
||||
$this->bReverseInPlan = $bReverse;
|
||||
$this->oTokenizer = new \Nominatim\Tokenizer($this->oDB);
|
||||
}
|
||||
|
||||
public function setLanguagePreference($aLangPref)
|
||||
@@ -85,7 +71,9 @@ class Geocode
|
||||
$aParams['exclude_place_ids'] = implode(',', $this->aExcludePlaceIDs);
|
||||
}
|
||||
|
||||
if ($this->bBoundedSearch) $aParams['bounded'] = '1';
|
||||
if ($this->bBoundedSearch) {
|
||||
$aParams['bounded'] = '1';
|
||||
}
|
||||
|
||||
if ($this->aCountryCodes) {
|
||||
$aParams['countrycodes'] = implode(',', $this->aCountryCodes);
|
||||
@@ -100,8 +88,11 @@ class Geocode
|
||||
|
||||
public function setLimit($iLimit = 10)
|
||||
{
|
||||
if ($iLimit > 50) $iLimit = 50;
|
||||
if ($iLimit < 1) $iLimit = 1;
|
||||
if ($iLimit > 50) {
|
||||
$iLimit = 50;
|
||||
} elseif ($iLimit < 1) {
|
||||
$iLimit = 1;
|
||||
}
|
||||
|
||||
$this->iFinalLimit = $iLimit;
|
||||
$this->iLimit = $iLimit + min($iLimit, 10);
|
||||
@@ -196,18 +187,24 @@ class Geocode
|
||||
if ($sExcluded) {
|
||||
foreach ($sExcluded as $iExcludedPlaceID) {
|
||||
$iExcludedPlaceID = (int)$iExcludedPlaceID;
|
||||
if ($iExcludedPlaceID)
|
||||
if ($iExcludedPlaceID) {
|
||||
$aExcludePlaceIDs[$iExcludedPlaceID] = $iExcludedPlaceID;
|
||||
}
|
||||
}
|
||||
|
||||
if (isset($aExcludePlaceIDs))
|
||||
if (isset($aExcludePlaceIDs)) {
|
||||
$this->aExcludePlaceIDs = $aExcludePlaceIDs;
|
||||
}
|
||||
}
|
||||
|
||||
// Only certain ranks of feature
|
||||
$sFeatureType = $oParams->getString('featureType');
|
||||
if (!$sFeatureType) $sFeatureType = $oParams->getString('featuretype');
|
||||
if ($sFeatureType) $this->setFeatureType($sFeatureType);
|
||||
if (!$sFeatureType) {
|
||||
$sFeatureType = $oParams->getString('featuretype');
|
||||
}
|
||||
if ($sFeatureType) {
|
||||
$this->setFeatureType($sFeatureType);
|
||||
}
|
||||
|
||||
// Country code list
|
||||
$sCountries = $oParams->getStringList('countrycodes');
|
||||
@@ -217,8 +214,9 @@ class Geocode
|
||||
$aCountries[] = strtolower($sCountryCode);
|
||||
}
|
||||
}
|
||||
if (isset($aCountries))
|
||||
if (isset($aCountries)) {
|
||||
$this->aCountryCodes = $aCountries;
|
||||
}
|
||||
}
|
||||
|
||||
$aViewbox = $oParams->getStringList('viewboxlbrt');
|
||||
@@ -262,7 +260,6 @@ class Geocode
|
||||
$oParams->getString('country'),
|
||||
$oParams->getString('postalcode')
|
||||
);
|
||||
$this->setReverseInPlan(false);
|
||||
} else {
|
||||
$this->setQuery($sQuery);
|
||||
}
|
||||
@@ -271,13 +268,17 @@ class Geocode
|
||||
public function loadStructuredAddressElement($sValue, $sKey, $iNewMinAddressRank, $iNewMaxAddressRank, $aItemListValues)
|
||||
{
|
||||
$sValue = trim($sValue);
|
||||
if (!$sValue) return false;
|
||||
if (!$sValue) {
|
||||
return false;
|
||||
}
|
||||
$this->aStructuredQuery[$sKey] = $sValue;
|
||||
if ($this->iMinAddressRank == 0 && $this->iMaxAddressRank == 30) {
|
||||
$this->iMinAddressRank = $iNewMinAddressRank;
|
||||
$this->iMaxAddressRank = $iNewMaxAddressRank;
|
||||
}
|
||||
if ($aItemListValues) $this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues);
|
||||
if ($aItemListValues) {
|
||||
$this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -311,11 +312,11 @@ class Geocode
|
||||
|
||||
public function fallbackStructuredQuery()
|
||||
{
|
||||
if (!$this->aStructuredQuery) return false;
|
||||
|
||||
$aParams = $this->aStructuredQuery;
|
||||
|
||||
if (count($aParams) == 1) return false;
|
||||
if (!$aParams || count($aParams) == 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$aOrderToFallback = array('postalcode', 'street', 'city', 'county', 'state');
|
||||
|
||||
@@ -330,7 +331,7 @@ class Geocode
|
||||
return false;
|
||||
}
|
||||
|
||||
public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bIsStructured)
|
||||
public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens)
|
||||
{
|
||||
/*
|
||||
Calculate all searches using oValidTokens i.e.
|
||||
@@ -345,52 +346,26 @@ class Geocode
|
||||
*/
|
||||
foreach ($aPhrases as $iPhrase => $oPhrase) {
|
||||
$aNewPhraseSearches = array();
|
||||
$sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : '';
|
||||
$oPosition = new SearchPosition(
|
||||
$oPhrase->getPhraseType(),
|
||||
$iPhrase,
|
||||
count($aPhrases)
|
||||
);
|
||||
|
||||
foreach ($oPhrase->getWordSets() as $aWordset) {
|
||||
$aWordsetSearches = $aSearches;
|
||||
|
||||
// Add all words from this wordset
|
||||
foreach ($aWordset as $iToken => $sToken) {
|
||||
//echo "<br><b>$sToken</b>";
|
||||
$aNewWordsetSearches = array();
|
||||
$oPosition->setTokenPosition($iToken, count($aWordset));
|
||||
|
||||
foreach ($aWordsetSearches as $oCurrentSearch) {
|
||||
//echo "<i>";
|
||||
//var_dump($oCurrentSearch);
|
||||
//echo "</i>";
|
||||
|
||||
// Tokens with full name matches.
|
||||
foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) {
|
||||
$aNewSearches = $oCurrentSearch->extendWithFullTerm(
|
||||
$oSearchTerm,
|
||||
$oValidTokens->contains($sToken)
|
||||
&& strpos($sToken, ' ') === false,
|
||||
$sPhraseType,
|
||||
$iToken == 0 && $iPhrase == 0,
|
||||
$iPhrase == 0,
|
||||
$iToken + 1 == count($aWordset)
|
||||
&& $iPhrase + 1 == count($aPhrases)
|
||||
);
|
||||
|
||||
foreach ($aNewSearches as $oSearch) {
|
||||
if ($oSearch->getRank() < $this->iMaxRank) {
|
||||
$aNewWordsetSearches[] = $oSearch;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Look for partial matches.
|
||||
// Note that there is no point in adding country terms here
|
||||
// because country is omitted in the address.
|
||||
if ($sPhraseType != 'country') {
|
||||
// Allow searching for a word - but at extra cost
|
||||
foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
|
||||
$aNewSearches = $oCurrentSearch->extendWithPartialTerm(
|
||||
$sToken,
|
||||
$oSearchTerm,
|
||||
$bIsStructured,
|
||||
$iPhrase,
|
||||
$oValidTokens->get(' '.$sToken)
|
||||
foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
|
||||
if ($oSearchTerm->isExtendable($oCurrentSearch, $oPosition)) {
|
||||
$aNewSearches = $oSearchTerm->extendSearch(
|
||||
$oCurrentSearch,
|
||||
$oPosition
|
||||
);
|
||||
|
||||
foreach ($aNewSearches as $oSearch) {
|
||||
@@ -405,7 +380,6 @@ class Geocode
|
||||
usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank'));
|
||||
$aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50);
|
||||
}
|
||||
//var_Dump('<hr>',count($aWordsetSearches)); exit;
|
||||
|
||||
$aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches);
|
||||
usort($aNewPhraseSearches, array('Nominatim\SearchDescription', 'bySearchRank'));
|
||||
@@ -413,8 +387,11 @@ class Geocode
|
||||
$aSearchHash = array();
|
||||
foreach ($aNewPhraseSearches as $iSearch => $aSearch) {
|
||||
$sHash = serialize($aSearch);
|
||||
if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]);
|
||||
else $aSearchHash[$sHash] = 1;
|
||||
if (isset($aSearchHash[$sHash])) {
|
||||
unset($aNewPhraseSearches[$iSearch]);
|
||||
} else {
|
||||
$aSearchHash[$sHash] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
$aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50);
|
||||
@@ -435,10 +412,12 @@ class Geocode
|
||||
|
||||
$iSearchCount = 0;
|
||||
$aSearches = array();
|
||||
foreach ($aGroupedSearches as $iScore => $aNewSearches) {
|
||||
foreach ($aGroupedSearches as $aNewSearches) {
|
||||
$iSearchCount += count($aNewSearches);
|
||||
$aSearches = array_merge($aSearches, $aNewSearches);
|
||||
if ($iSearchCount > 50) break;
|
||||
if ($iSearchCount > 50) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -495,7 +474,9 @@ class Geocode
|
||||
public function lookup()
|
||||
{
|
||||
Debug::newFunction('Geocode::lookup');
|
||||
if (!$this->sQuery && !$this->aStructuredQuery) return array();
|
||||
if (!$this->sQuery && !$this->aStructuredQuery) {
|
||||
return array();
|
||||
}
|
||||
|
||||
Debug::printDebugArray('Geocode', $this);
|
||||
|
||||
@@ -517,16 +498,10 @@ class Geocode
|
||||
if ($this->aCountryCodes) {
|
||||
$oCtx->setCountryList($this->aCountryCodes);
|
||||
}
|
||||
$this->oTokenizer->setCountryRestriction($this->aCountryCodes);
|
||||
|
||||
Debug::newSection('Query Preprocessing');
|
||||
|
||||
$sNormQuery = $this->normTerm($this->sQuery);
|
||||
Debug::printVar('Normalized query', $sNormQuery);
|
||||
|
||||
$sLanguagePrefArraySQL = $this->oDB->getArraySQL(
|
||||
$this->oDB->getDBQuotedList($this->aLangPrefOrder)
|
||||
);
|
||||
|
||||
$sQuery = $this->sQuery;
|
||||
if (!preg_match('//u', $sQuery)) {
|
||||
userError('Query string is not UTF-8 encoded.');
|
||||
@@ -576,117 +551,62 @@ class Geocode
|
||||
}
|
||||
|
||||
if ($sSpecialTerm && !$aSearches[0]->hasOperator()) {
|
||||
$sSpecialTerm = pg_escape_string($sSpecialTerm);
|
||||
$sToken = $this->oDB->getOne(
|
||||
'SELECT make_standard_name(:term)',
|
||||
array(':term' => $sSpecialTerm),
|
||||
'Cannot decode query. Wrong encoding?'
|
||||
);
|
||||
$sSQL = 'SELECT class, type FROM word ';
|
||||
$sSQL .= ' WHERE word_token in (\' '.$sToken.'\')';
|
||||
$sSQL .= ' AND class is not null AND class not in (\'place\')';
|
||||
$aTokens = $this->oTokenizer->tokensForSpecialTerm($sSpecialTerm);
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
$aSearchWords = $this->oDB->getAll($sSQL);
|
||||
$aNewSearches = array();
|
||||
foreach ($aSearches as $oSearch) {
|
||||
foreach ($aSearchWords as $aSearchTerm) {
|
||||
$oNewSearch = clone $oSearch;
|
||||
$oNewSearch->setPoiSearch(
|
||||
Operator::TYPE,
|
||||
$aSearchTerm['class'],
|
||||
$aSearchTerm['type']
|
||||
);
|
||||
$aNewSearches[] = $oNewSearch;
|
||||
if (!empty($aTokens)) {
|
||||
$aNewSearches = array();
|
||||
$oPosition = new SearchPosition('', 0, 1);
|
||||
$oPosition->setTokenPosition(0, 1);
|
||||
|
||||
foreach ($aSearches as $oSearch) {
|
||||
foreach ($aTokens as $oToken) {
|
||||
$aNewSearches = array_merge(
|
||||
$aNewSearches,
|
||||
$oToken->extendSearch($oSearch, $oPosition)
|
||||
);
|
||||
}
|
||||
}
|
||||
$aSearches = $aNewSearches;
|
||||
}
|
||||
$aSearches = $aNewSearches;
|
||||
}
|
||||
|
||||
// Split query into phrases
|
||||
// Commas are used to reduce the search space by indicating where phrases split
|
||||
$aPhrases = array();
|
||||
if ($this->aStructuredQuery) {
|
||||
$aInPhrases = $this->aStructuredQuery;
|
||||
$bStructuredPhrases = true;
|
||||
foreach ($this->aStructuredQuery as $iPhrase => $sPhrase) {
|
||||
$aPhrases[] = new Phrase($sPhrase, $iPhrase);
|
||||
}
|
||||
} else {
|
||||
$aInPhrases = explode(',', $sQuery);
|
||||
$bStructuredPhrases = false;
|
||||
foreach (explode(',', $sQuery) as $sPhrase) {
|
||||
$aPhrases[] = new Phrase($sPhrase, '');
|
||||
}
|
||||
}
|
||||
|
||||
Debug::printDebugArray('Search context', $oCtx);
|
||||
Debug::printDebugArray('Base search', empty($aSearches) ? null : $aSearches[0]);
|
||||
Debug::printVar('Final query phrases', $aInPhrases);
|
||||
|
||||
// Convert each phrase to standard form
|
||||
// Create a list of standard words
|
||||
// Get all 'sets' of words
|
||||
// Generate a complete list of all
|
||||
Debug::newSection('Tokenization');
|
||||
$aTokens = array();
|
||||
$aPhrases = array();
|
||||
foreach ($aInPhrases as $iPhrase => $sPhrase) {
|
||||
$sPhrase = $this->oDB->getOne(
|
||||
'SELECT make_standard_name(:phrase)',
|
||||
array(':phrase' => $sPhrase),
|
||||
'Cannot normalize query string (is it a UTF-8 string?)'
|
||||
);
|
||||
if (trim($sPhrase)) {
|
||||
$oPhrase = new Phrase($sPhrase, is_string($iPhrase) ? $iPhrase : '');
|
||||
$oPhrase->addTokens($aTokens);
|
||||
$aPhrases[] = $oPhrase;
|
||||
}
|
||||
}
|
||||
|
||||
Debug::printVar('Tokens', $aTokens);
|
||||
|
||||
$oValidTokens = new TokenList();
|
||||
|
||||
if (!empty($aTokens)) {
|
||||
$oValidTokens->addTokensFromDB(
|
||||
$this->oDB,
|
||||
$aTokens,
|
||||
$this->aCountryCodes,
|
||||
$sNormQuery,
|
||||
$this->oNormalizer
|
||||
);
|
||||
$oValidTokens = $this->oTokenizer->extractTokensFromPhrases($aPhrases);
|
||||
|
||||
if ($oValidTokens->count() > 0) {
|
||||
$oCtx->setFullNameWords($oValidTokens->getFullWordIDs());
|
||||
|
||||
// Try more interpretations for Tokens that could not be matched.
|
||||
foreach ($aTokens as $sToken) {
|
||||
if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
|
||||
if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
|
||||
// US ZIP+4 codes - merge in the 5-digit ZIP code
|
||||
$oValidTokens->addToken(
|
||||
$sToken,
|
||||
new Token\Postcode(null, $aData[1], 'us')
|
||||
);
|
||||
} elseif (preg_match('/^ [0-9]+$/', $sToken)) {
|
||||
// Unknown single word token with a number.
|
||||
// Assume it is a house number.
|
||||
$oValidTokens->addToken(
|
||||
$sToken,
|
||||
new Token\HouseNumber(null, trim($sToken))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
$aPhrases = array_filter($aPhrases, function ($oPhrase) {
|
||||
return $oPhrase->getWordSets() !== null;
|
||||
});
|
||||
|
||||
// Any words that have failed completely?
|
||||
// TODO: suggestions
|
||||
|
||||
Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
|
||||
|
||||
foreach ($aPhrases as $oPhrase) {
|
||||
$oPhrase->computeWordSets($oValidTokens);
|
||||
}
|
||||
Debug::printDebugTable('Phrases', $aPhrases);
|
||||
|
||||
Debug::newSection('Search candidates');
|
||||
|
||||
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases);
|
||||
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens);
|
||||
|
||||
if ($this->bReverseInPlan) {
|
||||
if (!$this->aStructuredQuery) {
|
||||
// Reverse phrase array and also reverse the order of the wordsets in
|
||||
// the first and final phrase. Don't bother about phrases in the middle
|
||||
// because order in the address doesn't matter.
|
||||
@@ -695,7 +615,7 @@ class Geocode
|
||||
if (count($aPhrases) > 1) {
|
||||
$aPhrases[count($aPhrases)-1]->invertWordSets();
|
||||
}
|
||||
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, false);
|
||||
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens);
|
||||
|
||||
foreach ($aGroupedSearches as $aSearches) {
|
||||
foreach ($aSearches as $aSearch) {
|
||||
@@ -714,7 +634,9 @@ class Geocode
|
||||
$aGroupedSearches = array();
|
||||
foreach ($aSearches as $aSearch) {
|
||||
if ($aSearch->getRank() < $this->iMaxRank) {
|
||||
if (!isset($aGroupedSearches[$aSearch->getRank()])) $aGroupedSearches[$aSearch->getRank()] = array();
|
||||
if (!isset($aGroupedSearches[$aSearch->getRank()])) {
|
||||
$aGroupedSearches[$aSearch->getRank()] = array();
|
||||
}
|
||||
$aGroupedSearches[$aSearch->getRank()][] = $aSearch;
|
||||
}
|
||||
}
|
||||
@@ -728,7 +650,9 @@ class Geocode
|
||||
$sHash = serialize($aSearch);
|
||||
if (isset($aSearchHash[$sHash])) {
|
||||
unset($aGroupedSearches[$iGroup][$iSearch]);
|
||||
if (empty($aGroupedSearches[$iGroup])) unset($aGroupedSearches[$iGroup]);
|
||||
if (empty($aGroupedSearches[$iGroup])) {
|
||||
unset($aGroupedSearches[$iGroup]);
|
||||
}
|
||||
} else {
|
||||
$aSearchHash[$sHash] = 1;
|
||||
}
|
||||
@@ -772,7 +696,9 @@ class Geocode
|
||||
}
|
||||
}
|
||||
|
||||
if ($iQueryLoop > 20) break;
|
||||
if ($iQueryLoop > 20) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($aResults)) {
|
||||
@@ -838,7 +764,6 @@ class Geocode
|
||||
foreach ($aResults as $oResult) {
|
||||
if (($this->iMaxAddressRank == 30 &&
|
||||
($oResult->iTable == Result::TABLE_OSMLINE
|
||||
|| $oResult->iTable == Result::TABLE_AUX
|
||||
|| $oResult->iTable == Result::TABLE_TIGER))
|
||||
|| in_array($oResult->iId, $aFilteredIDs)
|
||||
) {
|
||||
@@ -848,9 +773,9 @@ class Geocode
|
||||
$aResults = $tempIDs;
|
||||
}
|
||||
|
||||
if (!empty($aResults)) break;
|
||||
if ($iGroupLoop > 4) break;
|
||||
if ($iQueryLoop > 30) break;
|
||||
if (!empty($aResults) || $iGroupLoop > 4 || $iQueryLoop > 30) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Just interpret as a reverse geocode
|
||||
@@ -868,10 +793,8 @@ class Geocode
|
||||
|
||||
// No results? Done
|
||||
if (empty($aResults)) {
|
||||
if ($this->bFallback) {
|
||||
if ($this->fallbackStructuredQuery()) {
|
||||
return $this->lookup();
|
||||
}
|
||||
if ($this->bFallback && $this->fallbackStructuredQuery()) {
|
||||
return $this->lookup();
|
||||
}
|
||||
|
||||
return array();
|
||||
@@ -890,7 +813,9 @@ class Geocode
|
||||
|
||||
$aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery);
|
||||
foreach ($aRecheckWords as $i => $sWord) {
|
||||
if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]);
|
||||
if (!preg_match('/[\pL\pN]/', $sWord)) {
|
||||
unset($aRecheckWords[$i]);
|
||||
}
|
||||
}
|
||||
|
||||
Debug::printVar('Recheck words', $aRecheckWords);
|
||||
@@ -950,7 +875,9 @@ class Geocode
|
||||
foreach ($aRecheckWords as $i => $sWord) {
|
||||
if (stripos($sAddress, $sWord)!==false) {
|
||||
$iCountWords++;
|
||||
if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) $iCountWords += 0.1;
|
||||
if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) {
|
||||
$iCountWords += 0.1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -967,15 +894,8 @@ class Geocode
|
||||
$aToFilter = $aSearchResults;
|
||||
$aSearchResults = array();
|
||||
|
||||
$bFirst = true;
|
||||
foreach ($aToFilter as $aResult) {
|
||||
$this->aExcludePlaceIDs[$aResult['place_id']] = $aResult['place_id'];
|
||||
if ($bFirst) {
|
||||
$fLat = $aResult['lat'];
|
||||
$fLon = $aResult['lon'];
|
||||
if (isset($aResult['zoom'])) $iZoom = $aResult['zoom'];
|
||||
$bFirst = false;
|
||||
}
|
||||
if (!$this->oPlaceLookup->doDeDupe() || (!isset($aOSMIDDone[$aResult['osm_type'].$aResult['osm_id']])
|
||||
&& !isset($aClassTypeNameDone[$aResult['osm_type'].$aResult['class'].$aResult['type'].$aResult['name'].$aResult['admin_level']]))
|
||||
) {
|
||||
@@ -985,7 +905,9 @@ class Geocode
|
||||
}
|
||||
|
||||
// Absolute limit on number of results
|
||||
if (count($aSearchResults) >= $this->iFinalLimit) break;
|
||||
if (count($aSearchResults) >= $this->iFinalLimit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Debug::printVar('Post-filter results', $aSearchResults);
|
||||
@@ -999,7 +921,6 @@ class Geocode
|
||||
'Structured query' => $this->aStructuredQuery,
|
||||
'Name keys' => Debug::fmtArrayVals($this->aLangPrefOrder),
|
||||
'Excluded place IDs' => Debug::fmtArrayVals($this->aExcludePlaceIDs),
|
||||
'Try reversed query'=> $this->bReverseInPlan,
|
||||
'Limit (for searches)' => $this->iLimit,
|
||||
'Limit (for results)'=> $this->iFinalLimit,
|
||||
'Country codes' => Debug::fmtArrayVals($this->aCountryCodes),
|
||||
|
||||
@@ -90,14 +90,16 @@ class ParameterParser
|
||||
$aLanguages = array();
|
||||
$sLangString = $this->getString('accept-language', $sFallback);
|
||||
|
||||
if ($sLangString) {
|
||||
if (preg_match_all('/(([a-z]{1,8})([-_][a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', $sLangString, $aLanguagesParse, PREG_SET_ORDER)) {
|
||||
foreach ($aLanguagesParse as $iLang => $aLanguage) {
|
||||
$aLanguages[$aLanguage[1]] = isset($aLanguage[5])?(float)$aLanguage[5]:1 - ($iLang/100);
|
||||
if (!isset($aLanguages[$aLanguage[2]])) $aLanguages[$aLanguage[2]] = $aLanguages[$aLanguage[1]]/10;
|
||||
if ($sLangString
|
||||
&& preg_match_all('/(([a-z]{1,8})([-_][a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', $sLangString, $aLanguagesParse, PREG_SET_ORDER)
|
||||
) {
|
||||
foreach ($aLanguagesParse as $iLang => $aLanguage) {
|
||||
$aLanguages[$aLanguage[1]] = isset($aLanguage[5])?(float)$aLanguage[5]:1 - ($iLang/100);
|
||||
if (!isset($aLanguages[$aLanguage[2]])) {
|
||||
$aLanguages[$aLanguage[2]] = $aLanguages[$aLanguage[1]]/10;
|
||||
}
|
||||
arsort($aLanguages);
|
||||
}
|
||||
arsort($aLanguages);
|
||||
}
|
||||
if (empty($aLanguages) && CONST_Default_Language) {
|
||||
$aLanguages[CONST_Default_Language] = 1;
|
||||
|
||||
@@ -16,8 +16,6 @@ class Phrase
|
||||
private $sPhrase;
|
||||
// Element type for structured searches.
|
||||
private $sPhraseType;
|
||||
// Space-separated words of the phrase.
|
||||
private $aWords;
|
||||
// Possible segmentations of the phrase.
|
||||
private $aWordSets;
|
||||
|
||||
@@ -38,7 +36,14 @@ class Phrase
|
||||
{
|
||||
$this->sPhrase = trim($sPhrase);
|
||||
$this->sPhraseType = $sPhraseType;
|
||||
$this->aWords = explode(' ', $this->sPhrase);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the orginal phrase of the string.
|
||||
*/
|
||||
public function getPhrase()
|
||||
{
|
||||
return $this->sPhrase;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -63,30 +68,6 @@ class Phrase
|
||||
return $this->aWordSets;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the tokens from this phrase to the given list of tokens.
|
||||
*
|
||||
* @param string[] $aTokens List of tokens to append.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function addTokens(&$aTokens)
|
||||
{
|
||||
$iNumWords = count($this->aWords);
|
||||
|
||||
for ($i = 0; $i < $iNumWords; $i++) {
|
||||
$sPhrase = $this->aWords[$i];
|
||||
$aTokens[' '.$sPhrase] = ' '.$sPhrase;
|
||||
$aTokens[$sPhrase] = $sPhrase;
|
||||
|
||||
for ($j = $i + 1; $j < $iNumWords; $j++) {
|
||||
$sPhrase .= ' '.$this->aWords[$j];
|
||||
$aTokens[' '.$sPhrase] = ' '.$sPhrase;
|
||||
$aTokens[$sPhrase] = $sPhrase;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Invert the set of possible segmentations.
|
||||
*
|
||||
@@ -99,21 +80,27 @@ class Phrase
|
||||
}
|
||||
}
|
||||
|
||||
public function computeWordSets($oTokens)
|
||||
public function computeWordSets($aWords, $oTokens)
|
||||
{
|
||||
$iNumWords = count($this->aWords);
|
||||
$iNumWords = count($aWords);
|
||||
|
||||
if ($iNumWords == 0) {
|
||||
$this->aWordSets = null;
|
||||
return;
|
||||
}
|
||||
|
||||
// Caches the word set for the partial phrase up to word i.
|
||||
$aSetCache = array_fill(0, $iNumWords, array());
|
||||
|
||||
// Initialise first element of cache. There can only be the word.
|
||||
if ($oTokens->containsAny($this->aWords[0])) {
|
||||
$aSetCache[0][] = array($this->aWords[0]);
|
||||
if ($oTokens->containsAny($aWords[0])) {
|
||||
$aSetCache[0][] = array($aWords[0]);
|
||||
}
|
||||
|
||||
// Now do the next elements using what we already have.
|
||||
for ($i = 1; $i < $iNumWords; $i++) {
|
||||
for ($j = $i; $j > 0; $j--) {
|
||||
$sPartial = $j == $i ? $this->aWords[$j] : $this->aWords[$j].' '.$sPartial;
|
||||
$sPartial = $j == $i ? $aWords[$j] : $aWords[$j].' '.$sPartial;
|
||||
if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) {
|
||||
$aPartial = array($sPartial);
|
||||
foreach ($aSetCache[$j - 1] as $aSet) {
|
||||
@@ -136,7 +123,7 @@ class Phrase
|
||||
}
|
||||
|
||||
// finally the current full phrase
|
||||
$sPartial = $this->aWords[0].' '.$sPartial;
|
||||
$sPartial = $aWords[0].' '.$sPartial;
|
||||
if ($oTokens->containsAny($sPartial)) {
|
||||
$aSetCache[$i][] = array($sPartial);
|
||||
}
|
||||
@@ -153,7 +140,6 @@ class Phrase
|
||||
return array(
|
||||
'Type' => $this->sPhraseType,
|
||||
'Phrase' => $this->sPhrase,
|
||||
'Words' => $this->aWords,
|
||||
'WordSets' => $this->aWordSets
|
||||
);
|
||||
}
|
||||
|
||||
@@ -89,20 +89,36 @@ class PlaceLookup
|
||||
{
|
||||
$aParams = array();
|
||||
|
||||
if ($this->bAddressDetails) $aParams['addressdetails'] = '1';
|
||||
if ($this->bExtraTags) $aParams['extratags'] = '1';
|
||||
if ($this->bNameDetails) $aParams['namedetails'] = '1';
|
||||
if ($this->bAddressDetails) {
|
||||
$aParams['addressdetails'] = '1';
|
||||
}
|
||||
if ($this->bExtraTags) {
|
||||
$aParams['extratags'] = '1';
|
||||
}
|
||||
if ($this->bNameDetails) {
|
||||
$aParams['namedetails'] = '1';
|
||||
}
|
||||
|
||||
if ($this->bIncludePolygonAsText) $aParams['polygon_text'] = '1';
|
||||
if ($this->bIncludePolygonAsGeoJSON) $aParams['polygon_geojson'] = '1';
|
||||
if ($this->bIncludePolygonAsKML) $aParams['polygon_kml'] = '1';
|
||||
if ($this->bIncludePolygonAsSVG) $aParams['polygon_svg'] = '1';
|
||||
if ($this->bIncludePolygonAsText) {
|
||||
$aParams['polygon_text'] = '1';
|
||||
}
|
||||
if ($this->bIncludePolygonAsGeoJSON) {
|
||||
$aParams['polygon_geojson'] = '1';
|
||||
}
|
||||
if ($this->bIncludePolygonAsKML) {
|
||||
$aParams['polygon_kml'] = '1';
|
||||
}
|
||||
if ($this->bIncludePolygonAsSVG) {
|
||||
$aParams['polygon_svg'] = '1';
|
||||
}
|
||||
|
||||
if ($this->fPolygonSimplificationThreshold > 0.0) {
|
||||
$aParams['polygon_threshold'] = $this->fPolygonSimplificationThreshold;
|
||||
}
|
||||
|
||||
if (!$this->bDeDupe) $aParams['dedupe'] = '0';
|
||||
if (!$this->bDeDupe) {
|
||||
$aParams['dedupe'] = '0';
|
||||
}
|
||||
|
||||
return $aParams;
|
||||
}
|
||||
@@ -147,8 +163,9 @@ class PlaceLookup
|
||||
|
||||
private function langAddressSql($sHousenumber)
|
||||
{
|
||||
if ($this->bAddressDetails)
|
||||
if ($this->bAddressDetails) {
|
||||
return ''; // langaddress will be computed from address details
|
||||
}
|
||||
|
||||
return 'get_address_by_language(place_id,'.$sHousenumber.','.$this->aLangPrefOrderSql.') AS langaddress,';
|
||||
}
|
||||
@@ -234,12 +251,20 @@ class PlaceLookup
|
||||
$sSQL .= ' housenumber,';
|
||||
$sSQL .= ' country_code, ';
|
||||
$sSQL .= ' importance, ';
|
||||
if (!$this->bDeDupe) $sSQL .= 'place_id,';
|
||||
if (!$this->bAddressDetails) $sSQL .= 'langaddress, ';
|
||||
if (!$this->bDeDupe) {
|
||||
$sSQL .= 'place_id,';
|
||||
}
|
||||
if (!$this->bAddressDetails) {
|
||||
$sSQL .= 'langaddress, ';
|
||||
}
|
||||
$sSQL .= ' placename, ';
|
||||
$sSQL .= ' ref, ';
|
||||
if ($this->bExtraTags) $sSQL .= 'extratags, ';
|
||||
if ($this->bNameDetails) $sSQL .= 'name, ';
|
||||
if ($this->bExtraTags) {
|
||||
$sSQL .= 'extratags, ';
|
||||
}
|
||||
if ($this->bNameDetails) {
|
||||
$sSQL .= 'name, ';
|
||||
}
|
||||
$sSQL .= ' extra_place ';
|
||||
|
||||
$aSubSelects[] = $sSQL;
|
||||
@@ -260,8 +285,12 @@ class PlaceLookup
|
||||
$sSQL .= $this->langAddressSql('-1');
|
||||
$sSQL .= ' postcode as placename,';
|
||||
$sSQL .= ' postcode as ref,';
|
||||
if ($this->bExtraTags) $sSQL .= 'null::text AS extra,';
|
||||
if ($this->bNameDetails) $sSQL .= 'null::text AS names,';
|
||||
if ($this->bExtraTags) {
|
||||
$sSQL .= 'null::text AS extra,';
|
||||
}
|
||||
if ($this->bNameDetails) {
|
||||
$sSQL .= 'null::text AS names,';
|
||||
}
|
||||
$sSQL .= ' ST_x(geometry) AS lon, ST_y(geometry) AS lat,';
|
||||
$sSQL .= ' (0.75-(rank_search::float/40)) AS importance, ';
|
||||
$sSQL .= $this->addressImportanceSql('geometry', 'lp.parent_place_id');
|
||||
@@ -298,8 +327,12 @@ class PlaceLookup
|
||||
$sSQL .= $this->langAddressSql('housenumber_for_place');
|
||||
$sSQL .= ' null::text AS placename, ';
|
||||
$sSQL .= ' null::text AS ref, ';
|
||||
if ($this->bExtraTags) $sSQL .= 'null::text AS extra,';
|
||||
if ($this->bNameDetails) $sSQL .= 'null::text AS names,';
|
||||
if ($this->bExtraTags) {
|
||||
$sSQL .= 'null::text AS extra,';
|
||||
}
|
||||
if ($this->bNameDetails) {
|
||||
$sSQL .= 'null::text AS names,';
|
||||
}
|
||||
$sSQL .= ' st_x(centroid) AS lon, ';
|
||||
$sSQL .= ' st_y(centroid) AS lat,';
|
||||
$sSQL .= ' -1.15 AS importance, ';
|
||||
@@ -344,8 +377,12 @@ class PlaceLookup
|
||||
$sSQL .= $this->langAddressSql('housenumber_for_place');
|
||||
$sSQL .= ' null::text AS placename, ';
|
||||
$sSQL .= ' null::text AS ref, ';
|
||||
if ($this->bExtraTags) $sSQL .= 'null::text AS extra, ';
|
||||
if ($this->bNameDetails) $sSQL .= 'null::text AS names, ';
|
||||
if ($this->bExtraTags) {
|
||||
$sSQL .= 'null::text AS extra, ';
|
||||
}
|
||||
if ($this->bNameDetails) {
|
||||
$sSQL .= 'null::text AS names, ';
|
||||
}
|
||||
$sSQL .= ' st_x(centroid) AS lon, ';
|
||||
$sSQL .= ' st_y(centroid) AS lat, ';
|
||||
// slightly smaller than the importance for normal houses
|
||||
@@ -373,42 +410,6 @@ class PlaceLookup
|
||||
|
||||
$aSubSelects[] = $sSQL;
|
||||
}
|
||||
|
||||
if (CONST_Use_Aux_Location_data) {
|
||||
$sPlaceIDs = Result::joinIdsByTable($aResults, Result::TABLE_AUX);
|
||||
if ($sPlaceIDs) {
|
||||
$sHousenumbers = Result::sqlHouseNumberTable($aResults, Result::TABLE_AUX);
|
||||
$sSQL = ' SELECT ';
|
||||
$sSQL .= " 'L' AS osm_type, ";
|
||||
$sSQL .= ' place_id AS osm_id, ';
|
||||
$sSQL .= " 'place' AS class,";
|
||||
$sSQL .= " 'house' AS type, ";
|
||||
$sSQL .= ' null::smallint AS admin_level, ';
|
||||
$sSQL .= ' 30 AS rank_search,';
|
||||
$sSQL .= ' 30 AS rank_address, ';
|
||||
$sSQL .= ' place_id,';
|
||||
$sSQL .= ' parent_place_id, ';
|
||||
$sSQL .= ' housenumber,';
|
||||
$sSQL .= " 'us' AS country_code, ";
|
||||
$sSQL .= $this->langAddressSql('-1');
|
||||
$sSQL .= ' null::text AS placename, ';
|
||||
$sSQL .= ' null::text AS ref, ';
|
||||
if ($this->bExtraTags) $sSQL .= 'null::text AS extra, ';
|
||||
if ($this->bNameDetails) $sSQL .= 'null::text AS names, ';
|
||||
$sSQL .= ' ST_X(centroid) AS lon, ';
|
||||
$sSQL .= ' ST_Y(centroid) AS lat, ';
|
||||
$sSQL .= ' -1.10 AS importance, ';
|
||||
$sSQL .= $this->addressImportanceSql(
|
||||
'centroid',
|
||||
'location_property_aux.parent_place_id'
|
||||
);
|
||||
$sSQL .= ' null::text AS extra_place ';
|
||||
$sSQL .= ' FROM location_property_aux ';
|
||||
$sSQL .= " WHERE place_id in ($sPlaceIDs) ";
|
||||
|
||||
$aSubSelects[] = $sSQL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($aSubSelects)) {
|
||||
@@ -484,7 +485,9 @@ class PlaceLookup
|
||||
{
|
||||
|
||||
$aOutlineResult = array();
|
||||
if (!$iPlaceID) return $aOutlineResult;
|
||||
if (!$iPlaceID) {
|
||||
return $aOutlineResult;
|
||||
}
|
||||
|
||||
// Get the bounding box and outline polygon
|
||||
$sSQL = 'select place_id,0 as numfeatures,st_area(geometry) as area,';
|
||||
@@ -496,10 +499,18 @@ class PlaceLookup
|
||||
}
|
||||
$sSQL .= ' ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,';
|
||||
$sSQL .= ' ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon';
|
||||
if ($this->bIncludePolygonAsGeoJSON) $sSQL .= ',ST_AsGeoJSON(geometry) as asgeojson';
|
||||
if ($this->bIncludePolygonAsKML) $sSQL .= ',ST_AsKML(geometry) as askml';
|
||||
if ($this->bIncludePolygonAsSVG) $sSQL .= ',ST_AsSVG(geometry) as assvg';
|
||||
if ($this->bIncludePolygonAsText) $sSQL .= ',ST_AsText(geometry) as astext';
|
||||
if ($this->bIncludePolygonAsGeoJSON) {
|
||||
$sSQL .= ',ST_AsGeoJSON(geometry) as asgeojson';
|
||||
}
|
||||
if ($this->bIncludePolygonAsKML) {
|
||||
$sSQL .= ',ST_AsKML(geometry) as askml';
|
||||
}
|
||||
if ($this->bIncludePolygonAsSVG) {
|
||||
$sSQL .= ',ST_AsSVG(geometry) as assvg';
|
||||
}
|
||||
if ($this->bIncludePolygonAsText) {
|
||||
$sSQL .= ',ST_AsText(geometry) as astext';
|
||||
}
|
||||
if ($fLonReverse != null && $fLatReverse != null) {
|
||||
$sFrom = ' from (SELECT * , CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
|
||||
$sFrom .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
|
||||
@@ -522,10 +533,18 @@ class PlaceLookup
|
||||
$aOutlineResult['lon'] = $aPointPolygon['centrelon'];
|
||||
}
|
||||
|
||||
if ($this->bIncludePolygonAsGeoJSON) $aOutlineResult['asgeojson'] = $aPointPolygon['asgeojson'];
|
||||
if ($this->bIncludePolygonAsKML) $aOutlineResult['askml'] = $aPointPolygon['askml'];
|
||||
if ($this->bIncludePolygonAsSVG) $aOutlineResult['assvg'] = $aPointPolygon['assvg'];
|
||||
if ($this->bIncludePolygonAsText) $aOutlineResult['astext'] = $aPointPolygon['astext'];
|
||||
if ($this->bIncludePolygonAsGeoJSON) {
|
||||
$aOutlineResult['asgeojson'] = $aPointPolygon['asgeojson'];
|
||||
}
|
||||
if ($this->bIncludePolygonAsKML) {
|
||||
$aOutlineResult['askml'] = $aPointPolygon['askml'];
|
||||
}
|
||||
if ($this->bIncludePolygonAsSVG) {
|
||||
$aOutlineResult['assvg'] = $aPointPolygon['assvg'];
|
||||
}
|
||||
if ($this->bIncludePolygonAsText) {
|
||||
$aOutlineResult['astext'] = $aPointPolygon['astext'];
|
||||
}
|
||||
|
||||
if (abs($aPointPolygon['minlat'] - $aPointPolygon['maxlat']) < 0.0000001) {
|
||||
$aPointPolygon['minlat'] = $aPointPolygon['minlat'] - $fRadius;
|
||||
|
||||
@@ -13,8 +13,7 @@ class Result
|
||||
const TABLE_PLACEX = 0;
|
||||
const TABLE_POSTCODE = 1;
|
||||
const TABLE_OSMLINE = 2;
|
||||
const TABLE_AUX = 3;
|
||||
const TABLE_TIGER = 4;
|
||||
const TABLE_TIGER = 3;
|
||||
|
||||
/// Database table that contains the result.
|
||||
public $iTable;
|
||||
@@ -56,6 +55,27 @@ class Result
|
||||
}
|
||||
)));
|
||||
}
|
||||
|
||||
public static function joinIdsByTableMinRank($aResults, $iTable, $iMinAddressRank)
|
||||
{
|
||||
return join(',', array_keys(array_filter(
|
||||
$aResults,
|
||||
function ($aValue) use ($iTable, $iMinAddressRank) {
|
||||
return $aValue->iTable == $iTable && $aValue->iAddressRank >= $iMinAddressRank;
|
||||
}
|
||||
)));
|
||||
}
|
||||
|
||||
public static function joinIdsByTableMaxRank($aResults, $iTable, $iMaxAddressRank)
|
||||
{
|
||||
return join(',', array_keys(array_filter(
|
||||
$aResults,
|
||||
function ($aValue) use ($iTable, $iMaxAddressRank) {
|
||||
return $aValue->iTable == $iTable && $aValue->iAddressRank <= $iMaxAddressRank;
|
||||
}
|
||||
)));
|
||||
}
|
||||
|
||||
public static function sqlHouseNumberTable($aResults, $iTable)
|
||||
{
|
||||
$sHousenumbers = '';
|
||||
|
||||
@@ -74,8 +74,6 @@ class ReverseGeocode
|
||||
|
||||
protected function lookupLargeArea($sPointSQL, $iMaxRank)
|
||||
{
|
||||
$oResult = null;
|
||||
|
||||
if ($iMaxRank > 4) {
|
||||
$aPlace = $this->lookupPolygon($sPointSQL, $iMaxRank);
|
||||
if ($aPlace) {
|
||||
@@ -167,9 +165,13 @@ class ReverseGeocode
|
||||
{
|
||||
Debug::newFunction('lookupPolygon');
|
||||
// polygon search begins at suburb-level
|
||||
if ($iMaxRank > 25) $iMaxRank = 25;
|
||||
if ($iMaxRank > 25) {
|
||||
$iMaxRank = 25;
|
||||
}
|
||||
// no polygon search over country-level
|
||||
if ($iMaxRank < 5) $iMaxRank = 5;
|
||||
if ($iMaxRank < 5) {
|
||||
$iMaxRank = 5;
|
||||
}
|
||||
// search for polygon
|
||||
$sSQL = 'SELECT place_id, parent_place_id, rank_address, rank_search FROM';
|
||||
$sSQL .= '(select place_id, parent_place_id, rank_address, rank_search, country_code, geometry';
|
||||
@@ -190,7 +192,6 @@ class ReverseGeocode
|
||||
|
||||
if ($aPoly) {
|
||||
// if a polygon is found, search for placenodes begins ...
|
||||
$iParentPlaceID = $aPoly['parent_place_id'];
|
||||
$iRankAddress = $aPoly['rank_address'];
|
||||
$iRankSearch = $aPoly['rank_search'];
|
||||
$iPlaceID = $aPoly['place_id'];
|
||||
@@ -242,26 +243,24 @@ class ReverseGeocode
|
||||
public function lookupPoint($sPointSQL, $bDoInterpolation = true)
|
||||
{
|
||||
Debug::newFunction('lookupPoint');
|
||||
// starts if the search is on POI or street level,
|
||||
// searches for the nearest POI or street,
|
||||
// if a street is found and a POI is searched for,
|
||||
// the nearest POI which the found street is a parent of is choosen.
|
||||
$iMaxRank = $this->iMaxRank;
|
||||
|
||||
// Find the nearest point
|
||||
$fSearchDiam = 0.006;
|
||||
$oResult = null;
|
||||
$aPlace = null;
|
||||
|
||||
// for POI or street level
|
||||
if ($iMaxRank >= 26) {
|
||||
if ($this->iMaxRank >= 26) {
|
||||
// starts if the search is on POI or street level,
|
||||
// searches for the nearest POI or street,
|
||||
// if a street is found and a POI is searched for,
|
||||
// the nearest POI which the found street is a parent of is choosen.
|
||||
$sSQL = 'select place_id,parent_place_id,rank_address,country_code,';
|
||||
$sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
|
||||
$sSQL .= ' FROM ';
|
||||
$sSQL .= ' placex';
|
||||
$sSQL .= ' WHERE ST_DWithin('.$sPointSQL.', geometry, '.$fSearchDiam.')';
|
||||
$sSQL .= ' AND';
|
||||
$sSQL .= ' rank_address between 26 and '.$iMaxRank;
|
||||
$sSQL .= ' rank_address between 26 and '.$this->iMaxRank;
|
||||
$sSQL .= ' and (name is not null or housenumber is not null';
|
||||
$sSQL .= ' or rank_address between 26 and 27)';
|
||||
$sSQL .= ' and (rank_address between 26 and 27';
|
||||
@@ -284,7 +283,7 @@ class ReverseGeocode
|
||||
|
||||
if ($aPlace) {
|
||||
// if street and maxrank > streetlevel
|
||||
if ($iRankAddress <= 27 && $iMaxRank > 27) {
|
||||
if ($iRankAddress <= 27 && $this->iMaxRank > 27) {
|
||||
// find the closest object (up to a certain radius) of which the street is a parent of
|
||||
$sSQL = ' select place_id,';
|
||||
$sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
|
||||
@@ -338,7 +337,7 @@ class ReverseGeocode
|
||||
}
|
||||
}
|
||||
|
||||
if ($bDoInterpolation && $iMaxRank >= 30) {
|
||||
if ($bDoInterpolation && $this->iMaxRank >= 30) {
|
||||
$fDistance = $fSearchDiam;
|
||||
if ($aPlace) {
|
||||
// We can't reliably go from the closest street to an
|
||||
@@ -356,7 +355,6 @@ class ReverseGeocode
|
||||
$oResult = new Result($aHouse['place_id'], Result::TABLE_OSMLINE);
|
||||
$oResult->iHouseNumber = closestHouseNumber($aHouse);
|
||||
$aPlace = $aHouse;
|
||||
$iRankAddress = 30;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -366,7 +364,7 @@ class ReverseGeocode
|
||||
}
|
||||
} else {
|
||||
// lower than street level ($iMaxRank < 26 )
|
||||
$oResult = $this->lookupLargeArea($sPointSQL, $iMaxRank);
|
||||
$oResult = $this->lookupLargeArea($sPointSQL, $this->iMaxRank);
|
||||
}
|
||||
|
||||
Debug::printVar('Final result', $oResult);
|
||||
|
||||
@@ -67,35 +67,6 @@ class SearchDescription
|
||||
return $this->iSearchRank;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make this search a POI search.
|
||||
*
|
||||
* In a POI search, objects are not (only) searched by their name
|
||||
* but also by the primary OSM key/value pair (class and type in Nominatim).
|
||||
*
|
||||
* @param integer $iOperator Type of POI search
|
||||
* @param string $sClass Class (or OSM tag key) of POI.
|
||||
* @param string $sType Type (or OSM tag value) of POI.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function setPoiSearch($iOperator, $sClass, $sType)
|
||||
{
|
||||
$this->iOperator = $iOperator;
|
||||
$this->sClass = $sClass;
|
||||
$this->sType = $sType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if any operator is set.
|
||||
*
|
||||
* @return bool True, if this is a special search operation.
|
||||
*/
|
||||
public function hasOperator()
|
||||
{
|
||||
return $this->iOperator != Operator::NONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract key/value pairs from a query.
|
||||
*
|
||||
@@ -148,251 +119,234 @@ class SearchDescription
|
||||
|
||||
/////////// Search building functions
|
||||
|
||||
|
||||
/**
|
||||
* Derive new searches by adding a full term to the existing search.
|
||||
* Create a copy of this search description adding to search rank.
|
||||
*
|
||||
* @param object $oSearchTerm Description of the token.
|
||||
* @param bool $bHasPartial True if there are also tokens of partial terms
|
||||
* with the same name.
|
||||
* @param string $sPhraseType Type of phrase the token is contained in.
|
||||
* @param bool $bFirstToken True if the token is at the beginning of the
|
||||
* query.
|
||||
* @param bool $bFirstPhrase True if the token is in the first phrase of
|
||||
* the query.
|
||||
* @param bool $bLastToken True if the token is at the end of the query.
|
||||
* @param integer $iTermCost Cost to add to the current search rank.
|
||||
*
|
||||
* @return SearchDescription[] List of derived search descriptions.
|
||||
* @return object Cloned search description.
|
||||
*/
|
||||
public function extendWithFullTerm($oSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
|
||||
public function clone($iTermCost)
|
||||
{
|
||||
$aNewSearches = array();
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank += $iTermCost;
|
||||
|
||||
if (($sPhraseType == '' || $sPhraseType == 'country')
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\Country')
|
||||
) {
|
||||
if (!$this->sCountryCode) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
$oSearch->sCountryCode = $oSearchTerm->sCountryCode;
|
||||
// Country is almost always at the end of the string
|
||||
// - increase score for finding it anywhere else (optimisation)
|
||||
if (!$bLastToken) {
|
||||
$oSearch->iSearchRank += 5;
|
||||
$oSearch->iNamePhrase = -1;
|
||||
}
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
} elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\Postcode')
|
||||
) {
|
||||
if (!$this->sPostcode) {
|
||||
// If we have structured search or this is the first term,
|
||||
// make the postcode the primary search element.
|
||||
if ($this->iOperator == Operator::NONE && $bFirstToken) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
$oSearch->iOperator = Operator::POSTCODE;
|
||||
$oSearch->aAddress = array_merge($this->aAddress, $this->aName);
|
||||
$oSearch->aName =
|
||||
array($oSearchTerm->iId => $oSearchTerm->sPostcode);
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
|
||||
// If we have a structured search or this is not the first term,
|
||||
// add the postcode as an addendum.
|
||||
if ($this->iOperator != Operator::POSTCODE
|
||||
&& ($sPhraseType == 'postalcode' || !empty($this->aName))
|
||||
) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
$oSearch->iNamePhrase = -1;
|
||||
if (strlen($oSearchTerm->sPostcode) < 4) {
|
||||
$oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode);
|
||||
}
|
||||
$oSearch->sPostcode = $oSearchTerm->sPostcode;
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
}
|
||||
} elseif (($sPhraseType == '' || $sPhraseType == 'street')
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
|
||||
) {
|
||||
if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
$oSearch->iNamePhrase = -1;
|
||||
$oSearch->sHouseNumber = $oSearchTerm->sToken;
|
||||
if ($this->iOperator != Operator::NONE) {
|
||||
$oSearch->iSearchRank++;
|
||||
}
|
||||
// sanity check: if the housenumber is not mainly made
|
||||
// up of numbers, add a penalty
|
||||
if (preg_match('/\\d/', $oSearch->sHouseNumber) === 0
|
||||
|| preg_match_all('/[^0-9]/', $oSearch->sHouseNumber, $aMatches) > 2) {
|
||||
$oSearch->iSearchRank++;
|
||||
}
|
||||
if (empty($oSearchTerm->iId)) {
|
||||
$oSearch->iSearchRank++;
|
||||
}
|
||||
// also must not appear in the middle of the address
|
||||
if (!empty($this->aAddress)
|
||||
|| (!empty($this->aAddressNonSearch))
|
||||
|| $this->sPostcode
|
||||
) {
|
||||
$oSearch->iSearchRank++;
|
||||
}
|
||||
$aNewSearches[] = $oSearch;
|
||||
// Housenumbers may appear in the name when the place has its own
|
||||
// address terms.
|
||||
if ($oSearchTerm->iId !== null
|
||||
&& ($this->iNamePhrase >= 0 || empty($this->aName))
|
||||
&& empty($this->aAddress)
|
||||
) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
$oSearch->aAddress = $this->aName;
|
||||
$oSearch->bRareName = false;
|
||||
$oSearch->aName = array($oSearchTerm->iId => $oSearchTerm->iId);
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
}
|
||||
} elseif ($sPhraseType == ''
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
|
||||
) {
|
||||
if ($this->iOperator == Operator::NONE) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank += 2;
|
||||
$oSearch->iNamePhrase = -1;
|
||||
|
||||
$iOp = $oSearchTerm->iOperator;
|
||||
if ($iOp == Operator::NONE) {
|
||||
if (!empty($this->aName) || $this->oContext->isBoundedSearch()) {
|
||||
$iOp = Operator::NAME;
|
||||
} else {
|
||||
$iOp = Operator::NEAR;
|
||||
}
|
||||
$oSearch->iSearchRank += 2;
|
||||
} elseif (!$bFirstToken && !$bLastToken) {
|
||||
$oSearch->iSearchRank += 2;
|
||||
}
|
||||
if ($this->sHouseNumber) {
|
||||
$oSearch->iSearchRank++;
|
||||
}
|
||||
|
||||
$oSearch->setPoiSearch(
|
||||
$iOp,
|
||||
$oSearchTerm->sClass,
|
||||
$oSearchTerm->sType
|
||||
);
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
} elseif ($sPhraseType != 'country'
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\Word')
|
||||
) {
|
||||
$iWordID = $oSearchTerm->iId;
|
||||
// Full words can only be a name if they appear at the beginning
|
||||
// of the phrase. In structured search the name must forcably in
|
||||
// the first phrase. In unstructured search it may be in a later
|
||||
// phrase when the first phrase is a house number.
|
||||
if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
|
||||
if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iNamePhrase = -1;
|
||||
$oSearch->iSearchRank += 3 * $oSearchTerm->iTermCount;
|
||||
$oSearch->aAddress[$iWordID] = $iWordID;
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
} elseif (empty($this->aNameNonSearch)) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
$oSearch->aName = array($iWordID => $iWordID);
|
||||
if (CONST_Search_NameOnlySearchFrequencyThreshold) {
|
||||
$oSearch->bRareName =
|
||||
$oSearchTerm->iSearchNameCount
|
||||
< CONST_Search_NameOnlySearchFrequencyThreshold;
|
||||
}
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
}
|
||||
|
||||
return $aNewSearches;
|
||||
return $oSearch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive new searches by adding a partial term to the existing search.
|
||||
* Check if the search currently includes a name.
|
||||
*
|
||||
* @param string $sToken Term for the token.
|
||||
* @param object $oSearchTerm Description of the token.
|
||||
* @param bool $bStructuredPhrases True if the search is structured.
|
||||
* @param integer $iPhrase Number of the phrase the token is in.
|
||||
* @param array[] $aFullTokens List of full term tokens with the
|
||||
* same name.
|
||||
* @param bool bIncludeNonNames If true stop-word tokens are taken into
|
||||
* account, too.
|
||||
*
|
||||
* @return SearchDescription[] List of derived search descriptions.
|
||||
* @return bool True, if search has a name.
|
||||
*/
|
||||
public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
|
||||
public function hasName($bIncludeNonNames = false)
|
||||
{
|
||||
// Only allow name terms.
|
||||
if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))) {
|
||||
return array();
|
||||
return !empty($this->aName)
|
||||
|| (!empty($this->aNameNonSearch) && $bIncludeNonNames);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the search currently includes an address term.
|
||||
*
|
||||
* @return bool True, if any address term is included, including stop-word
|
||||
* terms.
|
||||
*/
|
||||
public function hasAddress()
|
||||
{
|
||||
return !empty($this->aAddress) || !empty($this->aAddressNonSearch);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a country restriction is currently included in the search.
|
||||
*
|
||||
* @return bool True, if a country restriction is set.
|
||||
*/
|
||||
public function hasCountry()
|
||||
{
|
||||
return $this->sCountryCode !== '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a postcode is currently included in the search.
|
||||
*
|
||||
* @return bool True, if a postcode is set.
|
||||
*/
|
||||
public function hasPostcode()
|
||||
{
|
||||
return $this->sPostcode !== '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a house number is set for the search.
|
||||
*
|
||||
* @return bool True, if a house number is set.
|
||||
*/
|
||||
public function hasHousenumber()
|
||||
{
|
||||
return $this->sHouseNumber !== '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a special type of place is requested.
|
||||
*
|
||||
* param integer iOperator When set, check for the particular
|
||||
* operator used for the special type.
|
||||
*
|
||||
* @return bool True, if speial type is requested or, if requested,
|
||||
* a special type with the given operator.
|
||||
*/
|
||||
public function hasOperator($iOperator = null)
|
||||
{
|
||||
return $iOperator === null ? $this->iOperator != Operator::NONE : $this->iOperator == $iOperator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the given token to the list of terms to search for in the address.
|
||||
*
|
||||
* @param integer iID ID of term to add.
|
||||
* @param bool bSearchable Term should be used to search for result
|
||||
* (i.e. term is not a stop word).
|
||||
*/
|
||||
public function addAddressToken($iId, $bSearchable = true)
|
||||
{
|
||||
if ($bSearchable) {
|
||||
$this->aAddress[$iId] = $iId;
|
||||
} else {
|
||||
$this->aAddressNonSearch[$iId] = $iId;
|
||||
}
|
||||
}
|
||||
|
||||
$aNewSearches = array();
|
||||
$iWordID = $oSearchTerm->iId;
|
||||
/**
|
||||
* Add the given full-word token to the list of terms to search for in the
|
||||
* name.
|
||||
*
|
||||
* @param interger iId ID of term to add.
|
||||
* @param bool bRareName True if the term is infrequent enough to not
|
||||
* require other constraints for efficient search.
|
||||
*/
|
||||
public function addNameToken($iId, $bRareName)
|
||||
{
|
||||
$this->aName[$iId] = $iId;
|
||||
$this->bRareName = $bRareName;
|
||||
}
|
||||
|
||||
if ((!$bStructuredPhrases || $iPhrase > 0)
|
||||
&& (!empty($this->aName))
|
||||
) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
if (preg_match('#^[0-9 ]+$#', $sToken)) {
|
||||
$oSearch->iSearchRank++;
|
||||
}
|
||||
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
|
||||
$oSearch->aAddress[$iWordID] = $iWordID;
|
||||
} else {
|
||||
$oSearch->aAddressNonSearch[$iWordID] = $iWordID;
|
||||
if (!empty($aFullTokens)) {
|
||||
$oSearch->iSearchRank++;
|
||||
}
|
||||
}
|
||||
$aNewSearches[] = $oSearch;
|
||||
/**
|
||||
* Add the given partial token to the list of terms to search for in
|
||||
* the name.
|
||||
*
|
||||
* @param integer iID ID of term to add.
|
||||
* @param bool bSearchable Term should be used to search for result
|
||||
* (i.e. term is not a stop word).
|
||||
* @param integer iPhraseNumber Index of phrase, where the partial term
|
||||
* appears.
|
||||
*/
|
||||
public function addPartialNameToken($iId, $bSearchable, $iPhraseNumber)
|
||||
{
|
||||
if ($bSearchable) {
|
||||
$this->aName[$iId] = $iId;
|
||||
} else {
|
||||
$this->aNameNonSearch[$iId] = $iId;
|
||||
}
|
||||
$this->iNamePhrase = $iPhraseNumber;
|
||||
}
|
||||
|
||||
if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
|
||||
&& ((empty($this->aName) && empty($this->aNameNonSearch)) || $this->iNamePhrase == $iPhrase)
|
||||
&& strpos($sToken, ' ') === false
|
||||
) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
if (empty($this->aName) && empty($this->aNameNonSearch)) {
|
||||
$oSearch->iSearchRank++;
|
||||
}
|
||||
if (preg_match('#^[0-9 ]+$#', $sToken)) {
|
||||
$oSearch->iSearchRank++;
|
||||
}
|
||||
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
|
||||
if (empty($this->aName)
|
||||
&& CONST_Search_NameOnlySearchFrequencyThreshold
|
||||
) {
|
||||
$oSearch->bRareName =
|
||||
$oSearchTerm->iSearchNameCount
|
||||
< CONST_Search_NameOnlySearchFrequencyThreshold;
|
||||
} else {
|
||||
$oSearch->bRareName = false;
|
||||
}
|
||||
$oSearch->aName[$iWordID] = $iWordID;
|
||||
} else {
|
||||
if (!empty($aFullTokens)) {
|
||||
$oSearch->iSearchRank++;
|
||||
}
|
||||
$oSearch->aNameNonSearch[$iWordID] = $iWordID;
|
||||
}
|
||||
$oSearch->iNamePhrase = $iPhrase;
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
/**
|
||||
* Set country restriction for the search.
|
||||
*
|
||||
* @param string sCountryCode Country code of country to restrict search to.
|
||||
*/
|
||||
public function setCountry($sCountryCode)
|
||||
{
|
||||
$this->sCountryCode = $sCountryCode;
|
||||
$this->iNamePhrase = -1;
|
||||
}
|
||||
|
||||
return $aNewSearches;
|
||||
/**
|
||||
* Set postcode search constraint.
|
||||
*
|
||||
* @param string sPostcode Postcode the result should have.
|
||||
*/
|
||||
public function setPostcode($sPostcode)
|
||||
{
|
||||
$this->sPostcode = $sPostcode;
|
||||
$this->iNamePhrase = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make this search a search for a postcode object.
|
||||
*
|
||||
* @param integer iId Token Id for the postcode.
|
||||
* @param string sPostcode Postcode to look for.
|
||||
*/
|
||||
public function setPostcodeAsName($iId, $sPostcode)
|
||||
{
|
||||
$this->iOperator = Operator::POSTCODE;
|
||||
$this->aAddress = array_merge($this->aAddress, $this->aName);
|
||||
$this->aName = array($iId => $sPostcode);
|
||||
$this->bRareName = true;
|
||||
$this->iNamePhrase = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set house number search cnstraint.
|
||||
*
|
||||
* @param string sNumber House number the result should have.
|
||||
*/
|
||||
public function setHousenumber($sNumber)
|
||||
{
|
||||
$this->sHouseNumber = $sNumber;
|
||||
$this->iNamePhrase = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make this search a search for a house number.
|
||||
*
|
||||
* @param integer iId Token Id for the house number.
|
||||
*/
|
||||
public function setHousenumberAsName($iId)
|
||||
{
|
||||
$this->aAddress = array_merge($this->aAddress, $this->aName);
|
||||
$this->bRareName = false;
|
||||
$this->aName = array($iId => $iId);
|
||||
$this->iNamePhrase = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make this search a POI search.
|
||||
*
|
||||
* In a POI search, objects are not (only) searched by their name
|
||||
* but also by the primary OSM key/value pair (class and type in Nominatim).
|
||||
*
|
||||
* @param integer $iOperator Type of POI search
|
||||
* @param string $sClass Class (or OSM tag key) of POI.
|
||||
* @param string $sType Type (or OSM tag value) of POI.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function setPoiSearch($iOperator, $sClass, $sType)
|
||||
{
|
||||
$this->iOperator = $iOperator;
|
||||
$this->sClass = $sClass;
|
||||
$this->sType = $sType;
|
||||
$this->iNamePhrase = -1;
|
||||
}
|
||||
|
||||
public function getNamePhrase()
|
||||
{
|
||||
return $this->iNamePhrase;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the global search context.
|
||||
*
|
||||
* @return object Objects of global search constraints.
|
||||
*/
|
||||
public function getContext()
|
||||
{
|
||||
return $this->oContext;
|
||||
}
|
||||
|
||||
/////////// Query functions
|
||||
@@ -413,7 +367,6 @@ class SearchDescription
|
||||
public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit)
|
||||
{
|
||||
$aResults = array();
|
||||
$iHousenumber = -1;
|
||||
|
||||
if ($this->sCountryCode
|
||||
&& empty($this->aName)
|
||||
@@ -446,23 +399,24 @@ class SearchDescription
|
||||
|
||||
// Now search for housenumber, if housenumber provided. Can be zero.
|
||||
if (($this->sHouseNumber || $this->sHouseNumber === '0') && !empty($aResults)) {
|
||||
// Downgrade the rank of the street results, they are missing
|
||||
// the housenumber.
|
||||
foreach ($aResults as $oRes) {
|
||||
if ($oRes->iAddressRank >= 26) {
|
||||
$oRes->iResultRank++;
|
||||
} else {
|
||||
$oRes->iResultRank += 2;
|
||||
}
|
||||
}
|
||||
|
||||
$aHnResults = $this->queryHouseNumber($oDB, $aResults);
|
||||
|
||||
if (!empty($aHnResults)) {
|
||||
foreach ($aHnResults as $oRes) {
|
||||
$aResults[$oRes->iId] = $oRes;
|
||||
// Downgrade the rank of the street results, they are missing
|
||||
// the housenumber. Also drop POI places (rank 30) here, they
|
||||
// cannot be a parent place and therefore must not be shown
|
||||
// as a result for a search with a missing housenumber.
|
||||
foreach ($aResults as $oRes) {
|
||||
if ($oRes->iAddressRank < 28) {
|
||||
if ($oRes->iAddressRank >= 26) {
|
||||
$oRes->iResultRank++;
|
||||
} else {
|
||||
$oRes->iResultRank += 2;
|
||||
}
|
||||
$aHnResults[$oRes->iId] = $oRes;
|
||||
}
|
||||
}
|
||||
|
||||
$aResults = $aHnResults;
|
||||
}
|
||||
|
||||
// finally get POIs if requested
|
||||
@@ -742,16 +696,33 @@ class SearchDescription
|
||||
private function queryHouseNumber(&$oDB, $aRoadPlaceIDs)
|
||||
{
|
||||
$aResults = array();
|
||||
$sPlaceIDs = Result::joinIdsByTable($aRoadPlaceIDs, Result::TABLE_PLACEX);
|
||||
$sRoadPlaceIDs = Result::joinIdsByTableMaxRank(
|
||||
$aRoadPlaceIDs,
|
||||
Result::TABLE_PLACEX,
|
||||
27
|
||||
);
|
||||
$sPOIPlaceIDs = Result::joinIdsByTableMinRank(
|
||||
$aRoadPlaceIDs,
|
||||
Result::TABLE_PLACEX,
|
||||
30
|
||||
);
|
||||
|
||||
if (!$sPlaceIDs) {
|
||||
$aIDCondition = array();
|
||||
if ($sRoadPlaceIDs) {
|
||||
$aIDCondition[] = 'parent_place_id in ('.$sRoadPlaceIDs.')';
|
||||
}
|
||||
if ($sPOIPlaceIDs) {
|
||||
$aIDCondition[] = 'place_id in ('.$sPOIPlaceIDs.')';
|
||||
}
|
||||
|
||||
if (empty($aIDCondition)) {
|
||||
return $aResults;
|
||||
}
|
||||
|
||||
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
|
||||
$sSQL = 'SELECT place_id FROM placex ';
|
||||
$sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')';
|
||||
$sSQL .= " AND housenumber ~* E'".$sHouseNumberRegex."'";
|
||||
$sSQL = 'SELECT place_id FROM placex WHERE';
|
||||
$sSQL .= " housenumber ~* E'".$sHouseNumberRegex."'";
|
||||
$sSQL .= ' AND ('.join(' OR ', $aIDCondition).')';
|
||||
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
@@ -763,11 +734,11 @@ class SearchDescription
|
||||
|
||||
$bIsIntHouseNumber= (bool) preg_match('/[0-9]+/', $this->sHouseNumber);
|
||||
$iHousenumber = intval($this->sHouseNumber);
|
||||
if ($bIsIntHouseNumber && empty($aResults)) {
|
||||
if ($bIsIntHouseNumber && $sRoadPlaceIDs && empty($aResults)) {
|
||||
// if nothing found, search in the interpolation line table
|
||||
$sSQL = 'SELECT distinct place_id FROM location_property_osmline';
|
||||
$sSQL .= ' WHERE startnumber is not NULL';
|
||||
$sSQL .= ' AND parent_place_id in ('.$sPlaceIDs.') AND (';
|
||||
$sSQL .= ' AND parent_place_id in ('.$sRoadPlaceIDs.') AND (';
|
||||
if ($iHousenumber % 2 == 0) {
|
||||
// If housenumber is even, look for housenumber in streets
|
||||
// with interpolationtype even or all.
|
||||
@@ -790,24 +761,10 @@ class SearchDescription
|
||||
}
|
||||
}
|
||||
|
||||
// If nothing found try the aux fallback table
|
||||
if (CONST_Use_Aux_Location_data && empty($aResults)) {
|
||||
$sSQL = 'SELECT place_id FROM location_property_aux';
|
||||
$sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.')';
|
||||
$sSQL .= " AND housenumber = '".$this->sHouseNumber."'";
|
||||
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
|
||||
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
|
||||
$aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_AUX);
|
||||
}
|
||||
}
|
||||
|
||||
// If nothing found then search in Tiger data (location_property_tiger)
|
||||
if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber && empty($aResults)) {
|
||||
if (CONST_Use_US_Tiger_Data && $sRoadPlaceIDs && $bIsIntHouseNumber && empty($aResults)) {
|
||||
$sSQL = 'SELECT place_id FROM location_property_tiger';
|
||||
$sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.') and (';
|
||||
$sSQL .= ' WHERE parent_place_id in ('.$sRoadPlaceIDs.') and (';
|
||||
if ($iHousenumber % 2 == 0) {
|
||||
$sSQL .= "interpolationtype='even'";
|
||||
} else {
|
||||
|
||||
87
lib-php/SearchPosition.php
Normal file
87
lib-php/SearchPosition.php
Normal file
@@ -0,0 +1,87 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim;
|
||||
|
||||
/**
|
||||
* Description of the position of a token within a query.
|
||||
*/
|
||||
class SearchPosition
|
||||
{
|
||||
private $sPhraseType;
|
||||
|
||||
private $iPhrase;
|
||||
private $iNumPhrases;
|
||||
|
||||
private $iToken;
|
||||
private $iNumTokens;
|
||||
|
||||
|
||||
public function __construct($sPhraseType, $iPhrase, $iNumPhrases)
|
||||
{
|
||||
$this->sPhraseType = $sPhraseType;
|
||||
$this->iPhrase = $iPhrase;
|
||||
$this->iNumPhrases = $iNumPhrases;
|
||||
}
|
||||
|
||||
public function setTokenPosition($iToken, $iNumTokens)
|
||||
{
|
||||
$this->iToken = $iToken;
|
||||
$this->iNumTokens = $iNumTokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the phrase can be of the given type.
|
||||
*
|
||||
* @param string $sType Type of phrse requested.
|
||||
*
|
||||
* @return True if the phrase is untyped or of the given type.
|
||||
*/
|
||||
public function maybePhrase($sType)
|
||||
{
|
||||
return $this->sPhraseType == '' || $this->sPhraseType == $sType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the phrase is exactly of the given type.
|
||||
*
|
||||
* @param string $sType Type of phrse requested.
|
||||
*
|
||||
* @return True if the phrase of the given type.
|
||||
*/
|
||||
public function isPhrase($sType)
|
||||
{
|
||||
return $this->sPhraseType == $sType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the token is the very first in the query.
|
||||
*/
|
||||
public function isFirstToken()
|
||||
{
|
||||
return $this->iPhrase == 0 && $this->iToken == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the token is the final one in the query.
|
||||
*/
|
||||
public function isLastToken()
|
||||
{
|
||||
return $this->iToken + 1 == $this->iNumTokens && $this->iPhrase + 1 == $this->iNumPhrases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the current token is part of the first phrase in the query.
|
||||
*/
|
||||
public function isFirstPhrase()
|
||||
{
|
||||
return $this->iPhrase == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the phrase position in the query.
|
||||
*/
|
||||
public function getPhrase()
|
||||
{
|
||||
return $this->iPhrase;
|
||||
}
|
||||
}
|
||||
@@ -33,7 +33,9 @@ class Shell
|
||||
public function addEnvPair($sKey, $sVal)
|
||||
{
|
||||
if (isset($sKey) && $sKey && isset($sVal)) {
|
||||
if (!isset($this->aEnv)) $this->aEnv = $_ENV;
|
||||
if (!isset($this->aEnv)) {
|
||||
$this->aEnv = $_ENV;
|
||||
}
|
||||
$this->aEnv = array_merge($this->aEnv, array($sKey => $sVal), $_ENV);
|
||||
}
|
||||
return $this;
|
||||
@@ -75,11 +77,8 @@ class Shell
|
||||
return $iStat;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private function escapeParam($sParam)
|
||||
{
|
||||
if (preg_match('/^-*\w+$/', $sParam)) return $sParam;
|
||||
return escapeshellarg($sParam);
|
||||
return (preg_match('/^-*\w+$/', $sParam)) ? $sParam : escapeshellarg($sParam);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
namespace Nominatim;
|
||||
|
||||
require_once(CONST_TokenizerDir.'/tokenizer.php');
|
||||
|
||||
use Exception;
|
||||
|
||||
class Status
|
||||
@@ -25,24 +27,8 @@ class Status
|
||||
throw new Exception('Database connection failed', 700);
|
||||
}
|
||||
|
||||
$sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')");
|
||||
if ($sStandardWord === false) {
|
||||
throw new Exception('Module failed', 701);
|
||||
}
|
||||
|
||||
if ($sStandardWord != 'a') {
|
||||
throw new Exception('Module call failed', 702);
|
||||
}
|
||||
|
||||
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code, ';
|
||||
$sSQL .= "operator, search_name_count FROM word WHERE word_token IN (' a')";
|
||||
$iWordID = $this->oDB->getOne($sSQL);
|
||||
if ($iWordID === false) {
|
||||
throw new Exception('Query failed', 703);
|
||||
}
|
||||
if (!$iWordID) {
|
||||
throw new Exception('No value', 704);
|
||||
}
|
||||
$oTokenizer = new \Nominatim\Tokenizer($this->oDB);
|
||||
$oTokenizer->checkStatus();
|
||||
}
|
||||
|
||||
public function dataDate()
|
||||
@@ -51,7 +37,7 @@ class Status
|
||||
$iDataDateEpoch = $this->oDB->getOne($sSQL);
|
||||
|
||||
if ($iDataDateEpoch === false) {
|
||||
throw Exception('Data date query failed '.$iDataDateEpoch->getMessage(), 705);
|
||||
throw new Exception('Import date is not available', 705);
|
||||
}
|
||||
|
||||
return $iDataDateEpoch;
|
||||
|
||||
@@ -8,9 +8,9 @@ namespace Nominatim\Token;
|
||||
class Country
|
||||
{
|
||||
/// Database word id, if available.
|
||||
public $iId;
|
||||
private $iId;
|
||||
/// Two-letter country code (lower-cased).
|
||||
public $sCountryCode;
|
||||
private $sCountryCode;
|
||||
|
||||
public function __construct($iId, $sCountryCode)
|
||||
{
|
||||
@@ -18,6 +18,44 @@ class Country
|
||||
$this->sCountryCode = $sCountryCode;
|
||||
}
|
||||
|
||||
public function getId()
|
||||
{
|
||||
return $this->iId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the token can be added to the given search.
|
||||
* Derive new searches by adding this token to an existing search.
|
||||
*
|
||||
* @param object $oSearch Partial search description derived so far.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return True if the token is compatible with the search configuration
|
||||
* given the position.
|
||||
*/
|
||||
public function isExtendable($oSearch, $oPosition)
|
||||
{
|
||||
return !$oSearch->hasCountry() && $oPosition->maybePhrase('country');
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive new searches by adding this token to an existing search.
|
||||
*
|
||||
* @param object $oSearch Partial search description derived so far.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return SearchDescription[] List of derived search descriptions.
|
||||
*/
|
||||
public function extendSearch($oSearch, $oPosition)
|
||||
{
|
||||
$oNewSearch = $oSearch->clone($oPosition->isLastToken() ? 1 : 6);
|
||||
$oNewSearch->setCountry($this->sCountryCode);
|
||||
|
||||
return array($oNewSearch);
|
||||
}
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return array(
|
||||
@@ -26,4 +64,9 @@ class Country
|
||||
'Info' => $this->sCountryCode
|
||||
);
|
||||
}
|
||||
|
||||
public function debugCode()
|
||||
{
|
||||
return 'C';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,9 +8,9 @@ namespace Nominatim\Token;
|
||||
class HouseNumber
|
||||
{
|
||||
/// Database word id, if available.
|
||||
public $iId;
|
||||
private $iId;
|
||||
/// Normalized house number.
|
||||
public $sToken;
|
||||
private $sToken;
|
||||
|
||||
public function __construct($iId, $sToken)
|
||||
{
|
||||
@@ -18,6 +18,80 @@ class HouseNumber
|
||||
$this->sToken = $sToken;
|
||||
}
|
||||
|
||||
public function getId()
|
||||
{
|
||||
return $this->iId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the token can be added to the given search.
|
||||
* Derive new searches by adding this token to an existing search.
|
||||
*
|
||||
* @param object $oSearch Partial search description derived so far.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return True if the token is compatible with the search configuration
|
||||
* given the position.
|
||||
*/
|
||||
public function isExtendable($oSearch, $oPosition)
|
||||
{
|
||||
return !$oSearch->hasHousenumber()
|
||||
&& !$oSearch->hasOperator(\Nominatim\Operator::POSTCODE)
|
||||
&& $oPosition->maybePhrase('street');
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive new searches by adding this token to an existing search.
|
||||
*
|
||||
* @param object $oSearch Partial search description derived so far.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return SearchDescription[] List of derived search descriptions.
|
||||
*/
|
||||
public function extendSearch($oSearch, $oPosition)
|
||||
{
|
||||
$aNewSearches = array();
|
||||
|
||||
// sanity check: if the housenumber is not mainly made
|
||||
// up of numbers, add a penalty
|
||||
$iSearchCost = 1;
|
||||
if (preg_match('/\\d/', $this->sToken) === 0
|
||||
|| preg_match_all('/[^0-9]/', $this->sToken, $aMatches) > 2) {
|
||||
$iSearchCost++;
|
||||
}
|
||||
if (!$oSearch->hasOperator(\Nominatim\Operator::NONE)) {
|
||||
$iSearchCost++;
|
||||
}
|
||||
if (empty($this->iId)) {
|
||||
$iSearchCost++;
|
||||
}
|
||||
// also must not appear in the middle of the address
|
||||
if ($oSearch->hasAddress() || $oSearch->hasPostcode()) {
|
||||
$iSearchCost++;
|
||||
}
|
||||
|
||||
$oNewSearch = $oSearch->clone($iSearchCost);
|
||||
$oNewSearch->setHousenumber($this->sToken);
|
||||
$aNewSearches[] = $oNewSearch;
|
||||
|
||||
// Housenumbers may appear in the name when the place has its own
|
||||
// address terms.
|
||||
if ($this->iId !== null
|
||||
&& ($oSearch->getNamePhrase() >= 0 || !$oSearch->hasName())
|
||||
&& !$oSearch->hasAddress()
|
||||
) {
|
||||
$oNewSearch = $oSearch->clone($iSearchCost);
|
||||
$oNewSearch->setHousenumberAsName($this->iId);
|
||||
|
||||
$aNewSearches[] = $oNewSearch;
|
||||
}
|
||||
|
||||
return $aNewSearches;
|
||||
}
|
||||
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return array(
|
||||
@@ -26,4 +100,9 @@ class HouseNumber
|
||||
'Info' => array('nr' => $this->sToken)
|
||||
);
|
||||
}
|
||||
|
||||
public function debugCode()
|
||||
{
|
||||
return 'H';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ require_once(CONST_LibDir.'/TokenHousenumber.php');
|
||||
require_once(CONST_LibDir.'/TokenPostcode.php');
|
||||
require_once(CONST_LibDir.'/TokenSpecialTerm.php');
|
||||
require_once(CONST_LibDir.'/TokenWord.php');
|
||||
require_once(CONST_LibDir.'/TokenPartial.php');
|
||||
require_once(CONST_LibDir.'/SpecialSearchOperator.php');
|
||||
|
||||
/**
|
||||
@@ -17,15 +18,6 @@ require_once(CONST_LibDir.'/SpecialSearchOperator.php');
|
||||
* tokens do not have a common base class. All tokens need to have a field
|
||||
* with the word id that points to an entry in the `word` database table
|
||||
* but otherwise the information saved about a token can be very different.
|
||||
*
|
||||
* There are two different kinds of token words: full words and partial terms.
|
||||
*
|
||||
* Full words start with a space. They represent a complete name of a place.
|
||||
* All special tokens are normally full words.
|
||||
*
|
||||
* Partial terms have no space at the beginning. They may represent a part of
|
||||
* a name of a place (e.g. in the name 'World Trade Center' a partial term
|
||||
* would be 'Trade' or 'Trade Center'). They are only used in TokenWord.
|
||||
*/
|
||||
class TokenList
|
||||
{
|
||||
@@ -64,7 +56,7 @@ class TokenList
|
||||
*/
|
||||
public function containsAny($sWord)
|
||||
{
|
||||
return isset($this->aTokens[$sWord]) || isset($this->aTokens[' '.$sWord]);
|
||||
return isset($this->aTokens[$sWord]);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -86,8 +78,8 @@ class TokenList
|
||||
|
||||
foreach ($this->aTokens as $aTokenList) {
|
||||
foreach ($aTokenList as $oToken) {
|
||||
if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) {
|
||||
$ids[$oToken->iId] = $oToken->iId;
|
||||
if (is_a($oToken, '\Nominatim\Token\Word')) {
|
||||
$ids[$oToken->getId()] = $oToken->getId();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -95,88 +87,6 @@ class TokenList
|
||||
return $ids;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add token information from the word table in the database.
|
||||
*
|
||||
* @param object $oDB Nominatim::DB instance.
|
||||
* @param string[] $aTokens List of tokens to look up in the database.
|
||||
* @param string[] $aCountryCodes List of country restrictions.
|
||||
* @param string $sNormQuery Normalized query string.
|
||||
* @param object $oNormalizer Normalizer function to use on tokens.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function addTokensFromDB(&$oDB, &$aTokens, &$aCountryCodes, $sNormQuery, $oNormalizer)
|
||||
{
|
||||
// Check which tokens we have, get the ID numbers
|
||||
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
|
||||
$sSQL .= ' operator, coalesce(search_name_count, 0) as count';
|
||||
$sSQL .= ' FROM word WHERE word_token in (';
|
||||
$sSQL .= join(',', $oDB->getDBQuotedList($aTokens)).')';
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
|
||||
$aDBWords = $oDB->getAll($sSQL, null, 'Could not get word tokens.');
|
||||
|
||||
foreach ($aDBWords as $aWord) {
|
||||
$oToken = null;
|
||||
$iId = (int) $aWord['word_id'];
|
||||
|
||||
if ($aWord['class']) {
|
||||
// Special terms need to appear in their normalized form.
|
||||
if ($aWord['word']) {
|
||||
$sNormWord = $aWord['word'];
|
||||
if ($oNormalizer != null) {
|
||||
$sNormWord = $oNormalizer->transliterate($aWord['word']);
|
||||
}
|
||||
if (strpos($sNormQuery, $sNormWord) === false) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
|
||||
$oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
|
||||
} elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
|
||||
if ($aWord['word']
|
||||
&& pg_escape_string($aWord['word']) == $aWord['word']
|
||||
) {
|
||||
$oToken = new Token\Postcode(
|
||||
$iId,
|
||||
$aWord['word'],
|
||||
$aWord['country_code']
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// near and in operator the same at the moment
|
||||
$oToken = new Token\SpecialTerm(
|
||||
$iId,
|
||||
$aWord['class'],
|
||||
$aWord['type'],
|
||||
$aWord['operator'] ? Operator::NEAR : Operator::NONE
|
||||
);
|
||||
}
|
||||
} elseif ($aWord['country_code']) {
|
||||
// Filter country tokens that do not match restricted countries.
|
||||
if (!$aCountryCodes
|
||||
|| in_array($aWord['country_code'], $aCountryCodes)
|
||||
) {
|
||||
$oToken = new Token\Country($iId, $aWord['country_code']);
|
||||
}
|
||||
} else {
|
||||
$oToken = new Token\Word(
|
||||
$iId,
|
||||
$aWord['word_token'][0] != ' ',
|
||||
(int) $aWord['count'],
|
||||
substr_count($aWord['word_token'], ' ')
|
||||
);
|
||||
}
|
||||
|
||||
if ($oToken) {
|
||||
$this->addToken($aWord['word_token'], $oToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new token for the given word.
|
||||
*
|
||||
@@ -199,9 +109,9 @@ class TokenList
|
||||
$aWordsIDs = array();
|
||||
foreach ($this->aTokens as $sToken => $aWords) {
|
||||
foreach ($aWords as $aToken) {
|
||||
if ($aToken->iId !== null) {
|
||||
$aWordsIDs[$aToken->iId] =
|
||||
'#'.$sToken.'('.$aToken->iId.')#';
|
||||
$iId = $aToken->getId();
|
||||
if ($iId !== null) {
|
||||
$aWordsIDs[$iId] = '#'.$sToken.'('.$aToken->debugCode().' '.$iId.')#';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
118
lib-php/TokenPartial.php
Normal file
118
lib-php/TokenPartial.php
Normal file
@@ -0,0 +1,118 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim\Token;
|
||||
|
||||
/**
|
||||
* A standard word token.
|
||||
*/
|
||||
class Partial
|
||||
{
|
||||
/// Database word id, if applicable.
|
||||
private $iId;
|
||||
/// Number of appearances in the database.
|
||||
private $iSearchNameCount;
|
||||
/// True, if the token consists exclusively of digits and spaces.
|
||||
private $bNumberToken;
|
||||
|
||||
public function __construct($iId, $sToken, $iSearchNameCount)
|
||||
{
|
||||
$this->iId = $iId;
|
||||
$this->bNumberToken = (bool) preg_match('#^[0-9 ]+$#', $sToken);
|
||||
$this->iSearchNameCount = $iSearchNameCount;
|
||||
}
|
||||
|
||||
public function getId()
|
||||
{
|
||||
return $this->iId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the token can be added to the given search.
|
||||
* Derive new searches by adding this token to an existing search.
|
||||
*
|
||||
* @param object $oSearch Partial search description derived so far.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return True if the token is compatible with the search configuration
|
||||
* given the position.
|
||||
*/
|
||||
public function isExtendable($oSearch, $oPosition)
|
||||
{
|
||||
return !$oPosition->isPhrase('country');
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive new searches by adding this token to an existing search.
|
||||
*
|
||||
* @param object $oSearch Partial search description derived so far.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return SearchDescription[] List of derived search descriptions.
|
||||
*/
|
||||
public function extendSearch($oSearch, $oPosition)
|
||||
{
|
||||
$aNewSearches = array();
|
||||
|
||||
// Partial token in Address.
|
||||
if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
|
||||
&& $oSearch->hasName()
|
||||
) {
|
||||
$iSearchCost = $this->bNumberToken ? 2 : 1;
|
||||
if ($this->iSearchNameCount >= CONST_Max_Word_Frequency) {
|
||||
$iSearchCost += 1;
|
||||
}
|
||||
|
||||
$oNewSearch = $oSearch->clone($iSearchCost);
|
||||
$oNewSearch->addAddressToken(
|
||||
$this->iId,
|
||||
$this->iSearchNameCount < CONST_Max_Word_Frequency
|
||||
);
|
||||
|
||||
$aNewSearches[] = $oNewSearch;
|
||||
}
|
||||
|
||||
// Partial token in Name.
|
||||
if ((!$oSearch->hasPostcode() && !$oSearch->hasAddress())
|
||||
&& (!$oSearch->hasName(true)
|
||||
|| $oSearch->getNamePhrase() == $oPosition->getPhrase())
|
||||
) {
|
||||
$iSearchCost = 1;
|
||||
if (!$oSearch->hasName(true)) {
|
||||
$iSearchCost += 1;
|
||||
}
|
||||
if ($this->bNumberToken) {
|
||||
$iSearchCost += 1;
|
||||
}
|
||||
|
||||
$oNewSearch = $oSearch->clone($iSearchCost);
|
||||
$oNewSearch->addPartialNameToken(
|
||||
$this->iId,
|
||||
$this->iSearchNameCount < CONST_Max_Word_Frequency,
|
||||
$oPosition->getPhrase()
|
||||
);
|
||||
|
||||
$aNewSearches[] = $oNewSearch;
|
||||
}
|
||||
|
||||
return $aNewSearches;
|
||||
}
|
||||
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return array(
|
||||
'ID' => $this->iId,
|
||||
'Type' => 'partial',
|
||||
'Info' => array(
|
||||
'count' => $this->iSearchNameCount
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
public function debugCode()
|
||||
{
|
||||
return 'w';
|
||||
}
|
||||
}
|
||||
@@ -8,11 +8,11 @@ namespace Nominatim\Token;
|
||||
class Postcode
|
||||
{
|
||||
/// Database word id, if available.
|
||||
public $iId;
|
||||
private $iId;
|
||||
/// Full nomralized postcode (upper cased).
|
||||
public $sPostcode;
|
||||
private $sPostcode;
|
||||
// Optional country code the postcode belongs to (currently unused).
|
||||
public $sCountryCode;
|
||||
private $sCountryCode;
|
||||
|
||||
public function __construct($iId, $sPostcode, $sCountryCode = '')
|
||||
{
|
||||
@@ -21,6 +21,67 @@ class Postcode
|
||||
$this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode;
|
||||
}
|
||||
|
||||
public function getId()
|
||||
{
|
||||
return $this->iId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the token can be added to the given search.
|
||||
* Derive new searches by adding this token to an existing search.
|
||||
*
|
||||
* @param object $oSearch Partial search description derived so far.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return True if the token is compatible with the search configuration
|
||||
* given the position.
|
||||
*/
|
||||
public function isExtendable($oSearch, $oPosition)
|
||||
{
|
||||
return !$oSearch->hasPostcode() && $oPosition->maybePhrase('postalcode');
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive new searches by adding this token to an existing search.
|
||||
*
|
||||
* @param object $oSearch Partial search description derived so far.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return SearchDescription[] List of derived search descriptions.
|
||||
*/
|
||||
public function extendSearch($oSearch, $oPosition)
|
||||
{
|
||||
$aNewSearches = array();
|
||||
|
||||
// If we have structured search or this is the first term,
|
||||
// make the postcode the primary search element.
|
||||
if ($oSearch->hasOperator(\Nominatim\Operator::NONE) && $oPosition->isFirstToken()) {
|
||||
$oNewSearch = $oSearch->clone(1);
|
||||
$oNewSearch->setPostcodeAsName($this->iId, $this->sPostcode);
|
||||
|
||||
$aNewSearches[] = $oNewSearch;
|
||||
}
|
||||
|
||||
// If we have a structured search or this is not the first term,
|
||||
// add the postcode as an addendum.
|
||||
if (!$oSearch->hasOperator(\Nominatim\Operator::POSTCODE)
|
||||
&& ($oPosition->isPhrase('postalcode') || $oSearch->hasName())
|
||||
) {
|
||||
$iPenalty = 1;
|
||||
if (strlen($this->sPostcode) < 4) {
|
||||
$iPenalty += 4 - strlen($this->sPostcode);
|
||||
}
|
||||
$oNewSearch = $oSearch->clone($iPenalty);
|
||||
$oNewSearch->setPostcode($this->sPostcode);
|
||||
|
||||
$aNewSearches[] = $oNewSearch;
|
||||
}
|
||||
|
||||
return $aNewSearches;
|
||||
}
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return array(
|
||||
@@ -29,4 +90,9 @@ class Postcode
|
||||
'Info' => $this->sPostcode.'('.$this->sCountryCode.')'
|
||||
);
|
||||
}
|
||||
|
||||
public function debugCode()
|
||||
{
|
||||
return 'P';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,13 +10,13 @@ require_once(CONST_LibDir.'/SpecialSearchOperator.php');
|
||||
class SpecialTerm
|
||||
{
|
||||
/// Database word id, if applicable.
|
||||
public $iId;
|
||||
private $iId;
|
||||
/// Class (or OSM tag key) of the place to look for.
|
||||
public $sClass;
|
||||
private $sClass;
|
||||
/// Type (or OSM tag value) of the place to look for.
|
||||
public $sType;
|
||||
private $sType;
|
||||
/// Relationship of the operator to the object (see Operator class).
|
||||
public $iOperator;
|
||||
private $iOperator;
|
||||
|
||||
public function __construct($iID, $sClass, $sType, $iOperator)
|
||||
{
|
||||
@@ -26,6 +26,62 @@ class SpecialTerm
|
||||
$this->iOperator = $iOperator;
|
||||
}
|
||||
|
||||
public function getId()
|
||||
{
|
||||
return $this->iId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the token can be added to the given search.
|
||||
* Derive new searches by adding this token to an existing search.
|
||||
*
|
||||
* @param object $oSearch Partial search description derived so far.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return True if the token is compatible with the search configuration
|
||||
* given the position.
|
||||
*/
|
||||
public function isExtendable($oSearch, $oPosition)
|
||||
{
|
||||
return !$oSearch->hasOperator() && $oPosition->isPhrase('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive new searches by adding this token to an existing search.
|
||||
*
|
||||
* @param object $oSearch Partial search description derived so far.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return SearchDescription[] List of derived search descriptions.
|
||||
*/
|
||||
public function extendSearch($oSearch, $oPosition)
|
||||
{
|
||||
$iSearchCost = 2;
|
||||
|
||||
$iOp = $this->iOperator;
|
||||
if ($iOp == \Nominatim\Operator::NONE) {
|
||||
if ($oSearch->hasName() || $oSearch->getContext()->isBoundedSearch()) {
|
||||
$iOp = \Nominatim\Operator::NAME;
|
||||
} else {
|
||||
$iOp = \Nominatim\Operator::NEAR;
|
||||
}
|
||||
$iSearchCost += 2;
|
||||
} elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) {
|
||||
$iSearchCost += 2;
|
||||
}
|
||||
if ($oSearch->hasHousenumber()) {
|
||||
$iSearchCost ++;
|
||||
}
|
||||
|
||||
$oNewSearch = $oSearch->clone($iSearchCost);
|
||||
$oNewSearch->setPoiSearch($iOp, $this->sClass, $this->sType);
|
||||
|
||||
return array($oNewSearch);
|
||||
}
|
||||
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return array(
|
||||
@@ -38,4 +94,9 @@ class SpecialTerm
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
public function debugCode()
|
||||
{
|
||||
return 'S';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,31 +8,95 @@ namespace Nominatim\Token;
|
||||
class Word
|
||||
{
|
||||
/// Database word id, if applicable.
|
||||
public $iId;
|
||||
/// If true, the word may represent only part of a place name.
|
||||
public $bPartial;
|
||||
private $iId;
|
||||
/// Number of appearances in the database.
|
||||
public $iSearchNameCount;
|
||||
private $iSearchNameCount;
|
||||
/// Number of terms in the word.
|
||||
public $iTermCount;
|
||||
private $iTermCount;
|
||||
|
||||
public function __construct($iId, $bPartial, $iSearchNameCount, $iTermCount)
|
||||
public function __construct($iId, $iSearchNameCount, $iTermCount)
|
||||
{
|
||||
$this->iId = $iId;
|
||||
$this->bPartial = $bPartial;
|
||||
$this->iSearchNameCount = $iSearchNameCount;
|
||||
$this->iTermCount = $iTermCount;
|
||||
}
|
||||
|
||||
public function getId()
|
||||
{
|
||||
return $this->iId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the token can be added to the given search.
|
||||
* Derive new searches by adding this token to an existing search.
|
||||
*
|
||||
* @param object $oSearch Partial search description derived so far.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return True if the token is compatible with the search configuration
|
||||
* given the position.
|
||||
*/
|
||||
public function isExtendable($oSearch, $oPosition)
|
||||
{
|
||||
return !$oPosition->isPhrase('country');
|
||||
}
|
||||
|
||||
/**
|
||||
* Derive new searches by adding this token to an existing search.
|
||||
*
|
||||
* @param object $oSearch Partial search description derived so far.
|
||||
* @param object $oPosition Description of the token position within
|
||||
the query.
|
||||
*
|
||||
* @return SearchDescription[] List of derived search descriptions.
|
||||
*/
|
||||
public function extendSearch($oSearch, $oPosition)
|
||||
{
|
||||
// Full words can only be a name if they appear at the beginning
|
||||
// of the phrase. In structured search the name must forcably in
|
||||
// the first phrase. In unstructured search it may be in a later
|
||||
// phrase when the first phrase is a house number.
|
||||
if ($oSearch->hasName()
|
||||
|| !($oPosition->isFirstPhrase() || $oPosition->isPhrase(''))
|
||||
) {
|
||||
if ($this->iTermCount > 1
|
||||
&& ($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
|
||||
) {
|
||||
$oNewSearch = $oSearch->clone(1);
|
||||
$oNewSearch->addAddressToken($this->iId);
|
||||
|
||||
return array($oNewSearch);
|
||||
}
|
||||
} elseif (!$oSearch->hasName(true)) {
|
||||
$oNewSearch = $oSearch->clone(1);
|
||||
$oNewSearch->addNameToken(
|
||||
$this->iId,
|
||||
CONST_Search_NameOnlySearchFrequencyThreshold
|
||||
&& $this->iSearchNameCount
|
||||
< CONST_Search_NameOnlySearchFrequencyThreshold
|
||||
);
|
||||
|
||||
return array($oNewSearch);
|
||||
}
|
||||
|
||||
return array();
|
||||
}
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return array(
|
||||
'ID' => $this->iId,
|
||||
'Type' => 'word',
|
||||
'Info' => array(
|
||||
'partial' => $this->bPartial,
|
||||
'count' => $this->iSearchNameCount
|
||||
'count' => $this->iSearchNameCount,
|
||||
'terms' => $this->iTermCount
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
public function debugCode()
|
||||
{
|
||||
return 'W';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
<?php
|
||||
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
|
||||
loadSettings(getcwd());
|
||||
|
||||
(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
|
||||
->addParams('admin', '--check-database')
|
||||
->run();
|
||||
@@ -49,7 +49,9 @@
|
||||
$oDB->connect();
|
||||
|
||||
if (isset($aCMDResult['output-type'])) {
|
||||
if (!isset($aRankmap[$aCMDResult['output-type']])) fail('unknown output-type: '.$aCMDResult['output-type']);
|
||||
if (!isset($aRankmap[$aCMDResult['output-type']])) {
|
||||
fail('unknown output-type: '.$aCMDResult['output-type']);
|
||||
}
|
||||
$iOutputRank = $aRankmap[$aCMDResult['output-type']];
|
||||
} else {
|
||||
$iOutputRank = $aRankmap['street'];
|
||||
@@ -58,14 +60,18 @@
|
||||
|
||||
// Preferred language
|
||||
$oParams = new Nominatim\ParameterParser();
|
||||
if (!isset($aCMDResult['language'])) $aCMDResult['language'] = 'xx';
|
||||
if (!isset($aCMDResult['language'])) {
|
||||
$aCMDResult['language'] = 'xx';
|
||||
}
|
||||
$aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']);
|
||||
$sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
|
||||
|
||||
// output formatting: build up a lookup table that maps address ranks to columns
|
||||
$aColumnMapping = array();
|
||||
$iNumCol = 0;
|
||||
if (!isset($aCMDResult['output-format'])) $aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
|
||||
if (!isset($aCMDResult['output-format'])) {
|
||||
$aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
|
||||
}
|
||||
foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) {
|
||||
$bHasData = false;
|
||||
foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) {
|
||||
@@ -80,7 +86,9 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($bHasData) $iNumCol++;
|
||||
if ($bHasData) {
|
||||
$iNumCol++;
|
||||
}
|
||||
}
|
||||
|
||||
// build the query for objects
|
||||
@@ -122,7 +130,9 @@
|
||||
if ($sOsmType) {
|
||||
$sSQL = 'select place_id from placex where osm_type = :osm_type and osm_id = :osm_id';
|
||||
$sParentId = $oDB->getOne($sSQL, array('osm_type' => $sOsmType, 'osm_id' => $sOsmId));
|
||||
if (!$sParentId) fail('Could not find place '.$sOsmType.' '.$sOsmId);
|
||||
if (!$sParentId) {
|
||||
fail('Could not find place '.$sOsmType.' '.$sOsmId);
|
||||
}
|
||||
}
|
||||
if ($sParentId) {
|
||||
$sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)';
|
||||
@@ -136,7 +146,6 @@
|
||||
$oResults = $oDB->getQueryStatement($sPlacexSQL);
|
||||
$fOutstream = fopen('php://output', 'w');
|
||||
while ($aRow = $oResults->fetch()) {
|
||||
//var_dump($aRow);
|
||||
$iPlaceID = $aRow['place_id'];
|
||||
$sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(:place_id, -1)";
|
||||
$sSQL .= ' WHERE isaddress';
|
||||
|
||||
@@ -1,93 +0,0 @@
|
||||
<?php
|
||||
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
require_once(CONST_LibDir.'/Geocode.php');
|
||||
require_once(CONST_LibDir.'/ParameterParser.php');
|
||||
ini_set('memory_limit', '800M');
|
||||
|
||||
$aCMDOptions
|
||||
= array(
|
||||
'Query database from command line. Returns search result as JSON.',
|
||||
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||
|
||||
array('search', '', 0, 1, 1, 1, 'string', 'Search for given term or coordinate'),
|
||||
array('country', '', 0, 1, 1, 1, 'string', 'Structured search: country'),
|
||||
array('state', '', 0, 1, 1, 1, 'string', 'Structured search: state'),
|
||||
array('county', '', 0, 1, 1, 1, 'string', 'Structured search: county'),
|
||||
array('city', '', 0, 1, 1, 1, 'string', 'Structured search: city'),
|
||||
array('street', '', 0, 1, 1, 1, 'string', 'Structured search: street'),
|
||||
array('amenity', '', 0, 1, 1, 1, 'string', 'Structured search: amenity'),
|
||||
array('postalcode', '', 0, 1, 1, 1, 'string', 'Structured search: postal code'),
|
||||
|
||||
array('accept-language', '', 0, 1, 1, 1, 'string', 'Preferred language order for showing search results'),
|
||||
array('bounded', '', 0, 1, 0, 0, 'bool', 'Restrict results to given viewbox'),
|
||||
array('nodedupe', '', 0, 1, 0, 0, 'bool', 'Do not remove duplicate results'),
|
||||
array('limit', '', 0, 1, 1, 1, 'int', 'Maximum number of results returned (default: 10)'),
|
||||
array('exclude_place_ids', '', 0, 1, 1, 1, 'string', 'Comma-separated list of place ids to exclude from results'),
|
||||
array('featureType', '', 0, 1, 1, 1, 'string', 'Restrict results to certain features (country, state,city,settlement)'),
|
||||
array('countrycodes', '', 0, 1, 1, 1, 'string', 'Comma-separated list of countries to restrict search to'),
|
||||
array('viewbox', '', 0, 1, 1, 1, 'string', 'Prefer results in given view box'),
|
||||
|
||||
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
|
||||
);
|
||||
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
|
||||
|
||||
loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||
|
||||
@define('CONST_Database_DSN', getSetting('DATABASE_DSN'));
|
||||
@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
|
||||
@define('CONST_Log_DB', getSettingBool('LOG_DB'));
|
||||
@define('CONST_Log_File', getSetting('LOG_FILE', false));
|
||||
@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY'));
|
||||
@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
|
||||
@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
|
||||
@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
|
||||
@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
|
||||
@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
|
||||
@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION'));
|
||||
@define('CONST_Use_Aux_Location_data', getSettingBool('USE_AUX_LOCATION_DATA'));
|
||||
@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
|
||||
@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
|
||||
|
||||
|
||||
$oDB = new Nominatim\DB;
|
||||
$oDB->connect();
|
||||
|
||||
if (isset($aCMDResult['nodedupe'])) $aCMDResult['dedupe'] = 'false';
|
||||
|
||||
$oParams = new Nominatim\ParameterParser($aCMDResult);
|
||||
|
||||
$aSearchParams = array(
|
||||
'search',
|
||||
'amenity',
|
||||
'street',
|
||||
'city',
|
||||
'county',
|
||||
'state',
|
||||
'country',
|
||||
'postalcode'
|
||||
);
|
||||
|
||||
if (!$oParams->hasSetAny($aSearchParams)) {
|
||||
showUsage($aCMDOptions, true);
|
||||
return 1;
|
||||
}
|
||||
|
||||
$oGeocode = new Nominatim\Geocode($oDB);
|
||||
|
||||
$oGeocode->setLanguagePreference($oParams->getPreferredLanguages(false));
|
||||
$oGeocode->setReverseInPlan(true);
|
||||
$oGeocode->loadParamArray($oParams);
|
||||
|
||||
if ($oParams->getBool('search')) {
|
||||
$oGeocode->setQuery($aCMDResult['search']);
|
||||
} else {
|
||||
$oGeocode->setQueryFromParams($oParams);
|
||||
}
|
||||
|
||||
$aSearchResults = $oGeocode->lookup();
|
||||
|
||||
echo json_encode($aSearchResults, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE)."\n";
|
||||
@@ -1,218 +0,0 @@
|
||||
<?php
|
||||
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
require_once(CONST_LibDir.'/setup/SetupClass.php');
|
||||
require_once(CONST_LibDir.'/setup_functions.php');
|
||||
ini_set('memory_limit', '800M');
|
||||
|
||||
use Nominatim\Setup\SetupFunctions as SetupFunctions;
|
||||
|
||||
// (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
|
||||
$aCMDOptions
|
||||
= array(
|
||||
'Create and setup nominatim search system',
|
||||
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||
|
||||
array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
|
||||
array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
|
||||
|
||||
array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
|
||||
|
||||
array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
|
||||
array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
|
||||
array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
|
||||
array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
|
||||
array('reverse-only', '', 0, 1, 0, 0, 'bool', 'Do not create search tables and indexes'),
|
||||
array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
|
||||
array('enable-diff-updates', '', 0, 1, 0, 0, 'bool', 'Turn on the code required to make diff updates work'),
|
||||
array('enable-debug-statements', '', 0, 1, 0, 0, 'bool', 'Include debug warning statements in pgsql commands'),
|
||||
array('ignore-errors', '', 0, 1, 0, 0, 'bool', 'Continue import even when errors in SQL are present (EXPERT)'),
|
||||
array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
|
||||
array('create-partition-tables', '', 0, 1, 0, 0, 'bool', 'Create required partition tables'),
|
||||
array('create-partition-functions', '', 0, 1, 0, 0, 'bool', 'Create required partition triggers'),
|
||||
array('no-partitions', '', 0, 1, 0, 0, 'bool', 'Do not partition search indices (speeds up import of single country extracts)'),
|
||||
array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'),
|
||||
array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
|
||||
array('disable-token-precalc', '', 0, 1, 0, 0, 'bool', 'Disable name precalculation (EXPERT)'),
|
||||
array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
|
||||
array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
|
||||
array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
|
||||
array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'),
|
||||
array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
|
||||
array('create-country-names', '', 0, 1, 0, 0, 'bool', 'Create default list of searchable country names'),
|
||||
array('drop', '', 0, 1, 0, 0, 'bool', 'Drop tables needed for updates, making the database readonly (EXPERIMENTAL)'),
|
||||
array('setup-website', '', 0, 1, 0, 0, 'bool', 'Used to compile environment variables for the website'),
|
||||
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
|
||||
);
|
||||
|
||||
// $aCMDOptions passed to getCmdOpt by reference
|
||||
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
|
||||
|
||||
loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||
setupHTTPProxy();
|
||||
|
||||
$bDidSomething = false;
|
||||
|
||||
$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
|
||||
|
||||
// by default, use all but one processor, but never more than 15.
|
||||
$iInstances = max(1, $aCMDResult['threads'] ?? (min(16, getProcessorCount()) - 1));
|
||||
|
||||
function run($oCmd)
|
||||
{
|
||||
global $iInstances;
|
||||
global $aCMDResult;
|
||||
$oCmd->addParams('--threads', $iInstances);
|
||||
if ($aCMDResult['ignore-errors'] ?? false) {
|
||||
$oCmd->addParams('--ignore-errors');
|
||||
}
|
||||
if ($aCMDResult['quiet'] ?? false) {
|
||||
$oCmd->addParams('--quiet');
|
||||
}
|
||||
if ($aCMDResult['verbose'] ?? false) {
|
||||
$oCmd->addParams('--verbose');
|
||||
}
|
||||
$oCmd->run(true);
|
||||
}
|
||||
|
||||
|
||||
//*******************************************************
|
||||
// Making some sanity check:
|
||||
// Check if osm-file is set and points to a valid file
|
||||
if ($aCMDResult['import-data'] || $aCMDResult['all']) {
|
||||
// to remain in /lib/setup_functions.php function
|
||||
checkInFile($aCMDResult['osm-file']);
|
||||
}
|
||||
|
||||
// ******************************************************
|
||||
// instantiate Setup class
|
||||
$oSetup = new SetupFunctions($aCMDResult);
|
||||
|
||||
// *******************************************************
|
||||
// go through complete process if 'all' is selected or start selected functions
|
||||
if ($aCMDResult['create-db'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
run((clone($oNominatimCmd))->addParams('transition', '--create-db'));
|
||||
}
|
||||
|
||||
if ($aCMDResult['setup-db'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--setup-db');
|
||||
|
||||
if ($aCMDResult['no-partitions'] ?? false) {
|
||||
$oCmd->addParams('--no-partitions');
|
||||
}
|
||||
|
||||
run($oCmd);
|
||||
}
|
||||
|
||||
if ($aCMDResult['import-data'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oCmd = (clone($oNominatimCmd))
|
||||
->addParams('transition', '--import-data')
|
||||
->addParams('--osm-file', $aCMDResult['osm-file']);
|
||||
if ($aCMDResult['drop'] ?? false) {
|
||||
$oCmd->addParams('--drop');
|
||||
}
|
||||
|
||||
run($oCmd);
|
||||
}
|
||||
|
||||
if ($aCMDResult['create-functions'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oSetup->createSqlFunctions();
|
||||
}
|
||||
|
||||
if ($aCMDResult['create-tables'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-tables');
|
||||
|
||||
if ($aCMDResult['reverse-only'] ?? false) {
|
||||
$oCmd->addParams('--reverse-only');
|
||||
}
|
||||
|
||||
run($oCmd);
|
||||
}
|
||||
|
||||
if ($aCMDResult['create-partition-tables'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
run((clone($oNominatimCmd))->addParams('transition', '--create-partition-tables'));
|
||||
}
|
||||
|
||||
if ($aCMDResult['create-partition-functions'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oSetup->createSqlFunctions(); // also create partition functions
|
||||
}
|
||||
|
||||
if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
// ignore errors!
|
||||
(clone($oNominatimCmd))->addParams('refresh', '--wiki-data')->run();
|
||||
}
|
||||
|
||||
if ($aCMDResult['load-data'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
run((clone($oNominatimCmd))->addParams('transition', '--load-data'));
|
||||
}
|
||||
|
||||
if ($aCMDResult['import-tiger-data']) {
|
||||
$bDidSomething = true;
|
||||
$sTigerPath = getSetting('TIGER_DATA_PATH', CONST_InstallDir.'/tiger');
|
||||
run((clone($oNominatimCmd))->addParams('transition', '--tiger-data', $sTigerPath));
|
||||
}
|
||||
|
||||
if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oSetup->calculatePostcodes($aCMDResult['all']);
|
||||
}
|
||||
|
||||
if ($aCMDResult['index'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--index');
|
||||
if ($aCMDResult['index-noanalyse'] ?? false) {
|
||||
$oCmd->addParams('--no-analyse');
|
||||
}
|
||||
|
||||
run($oCmd);
|
||||
}
|
||||
|
||||
if ($aCMDResult['drop']) {
|
||||
$bDidSomething = true;
|
||||
run((clone($oNominatimCmd))->addParams('freeze'));
|
||||
}
|
||||
|
||||
if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
|
||||
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-search-indices');
|
||||
|
||||
if ($aCMDResult['drop'] ?? false) {
|
||||
$oCmd->addParams('--drop');
|
||||
}
|
||||
|
||||
run($oCmd);
|
||||
}
|
||||
|
||||
if ($aCMDResult['create-country-names'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
run(clone($oNominatimCmd))->addParams('transition', '--create-country-names');
|
||||
}
|
||||
|
||||
if ($aCMDResult['setup-website'] || $aCMDResult['all']) {
|
||||
$bDidSomething = true;
|
||||
run((clone($oNominatimCmd))->addParams('refresh', '--website'));
|
||||
}
|
||||
|
||||
// ******************************************************
|
||||
// If we did something, repeat the warnings
|
||||
if (!$bDidSomething) {
|
||||
showUsage($aCMDOptions, true);
|
||||
} else {
|
||||
echo "Summary of warnings:\n\n";
|
||||
repeatWarnings();
|
||||
echo "\n";
|
||||
info('Setup finished.');
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
|
||||
<?php
|
||||
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
|
||||
loadSettings(getcwd());
|
||||
|
||||
(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
|
||||
->addParams('special-phrases', '--import-from-wiki')
|
||||
->run();
|
||||
@@ -1,236 +0,0 @@
|
||||
<?php
|
||||
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
require_once(CONST_LibDir.'/setup_functions.php');
|
||||
require_once(CONST_LibDir.'/setup/SetupClass.php');
|
||||
|
||||
ini_set('memory_limit', '800M');
|
||||
|
||||
use Nominatim\Setup\SetupFunctions as SetupFunctions;
|
||||
|
||||
// (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
|
||||
$aCMDOptions
|
||||
= array(
|
||||
'Import / update / index osm data',
|
||||
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||
|
||||
array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'),
|
||||
array('check-for-updates', '', 0, 1, 0, 0, 'bool', 'Check if new updates are available'),
|
||||
array('no-update-functions', '', 0, 1, 0, 0, 'bool', 'Do not update trigger functions to support differential updates (assuming the diff update logic is already present)'),
|
||||
array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'),
|
||||
array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'),
|
||||
array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
|
||||
|
||||
array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
|
||||
|
||||
array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
|
||||
array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
|
||||
array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
|
||||
|
||||
array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
|
||||
array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
|
||||
array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
|
||||
array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
|
||||
|
||||
array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
|
||||
array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
|
||||
array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
|
||||
|
||||
array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'),
|
||||
array('update-address-levels', '', 0, 1, 0, 0, 'bool', 'Reimport address level configuration (EXPERT)'),
|
||||
array('recompute-importance', '', 0, 1, 0, 0, 'bool', 'Recompute place importances'),
|
||||
|
||||
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
|
||||
);
|
||||
|
||||
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
|
||||
|
||||
loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||
setupHTTPProxy();
|
||||
|
||||
if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
|
||||
if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
|
||||
|
||||
date_default_timezone_set('Etc/UTC');
|
||||
|
||||
$oDB = new Nominatim\DB();
|
||||
$oDB->connect();
|
||||
$fPostgresVersion = $oDB->getPostgresVersion();
|
||||
|
||||
$aDSNInfo = Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
|
||||
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
|
||||
|
||||
// cache memory to be used by osm2pgsql, should not be more than the available memory
|
||||
$iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000);
|
||||
if ($iCacheMemory + 500 > getTotalMemoryMB()) {
|
||||
$iCacheMemory = getCacheMemoryMB();
|
||||
echo "WARNING: resetting cache memory to $iCacheMemory\n";
|
||||
}
|
||||
|
||||
$oOsm2pgsqlCmd = (new \Nominatim\Shell(getOsm2pgsqlBinary()))
|
||||
->addParams('--hstore')
|
||||
->addParams('--latlong')
|
||||
->addParams('--append')
|
||||
->addParams('--slim')
|
||||
->addParams('--with-forward-dependencies', 'false')
|
||||
->addParams('--log-progress', 'true')
|
||||
->addParams('--number-processes', 1)
|
||||
->addParams('--cache', $iCacheMemory)
|
||||
->addParams('--output', 'gazetteer')
|
||||
->addParams('--style', getImportStyle())
|
||||
->addParams('--database', $aDSNInfo['database'])
|
||||
->addParams('--port', $aDSNInfo['port']);
|
||||
|
||||
if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
|
||||
$oOsm2pgsqlCmd->addParams('--host', $aDSNInfo['hostspec']);
|
||||
}
|
||||
if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
|
||||
$oOsm2pgsqlCmd->addParams('--user', $aDSNInfo['username']);
|
||||
}
|
||||
if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
|
||||
$oOsm2pgsqlCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']);
|
||||
}
|
||||
if (getSetting('FLATNODE_FILE')) {
|
||||
$oOsm2pgsqlCmd->addParams('--flat-nodes', getSetting('FLATNODE_FILE'));
|
||||
}
|
||||
if ($fPostgresVersion >= 11.0) {
|
||||
$oOsm2pgsqlCmd->addEnvPair(
|
||||
'PGOPTIONS',
|
||||
'-c jit=off -c max_parallel_workers_per_gather=0'
|
||||
);
|
||||
}
|
||||
|
||||
$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
|
||||
|
||||
function run($oCmd)
|
||||
{
|
||||
global $aCMDResult;
|
||||
if ($aCMDResult['quiet'] ?? false) {
|
||||
$oCmd->addParams('--quiet');
|
||||
}
|
||||
if ($aCMDResult['verbose'] ?? false) {
|
||||
$oCmd->addParams('--verbose');
|
||||
}
|
||||
$oCmd->run(true);
|
||||
}
|
||||
|
||||
|
||||
if ($aResult['init-updates']) {
|
||||
$oCmd = (clone($oNominatimCmd))->addParams('replication', '--init');
|
||||
|
||||
if ($aResult['no-update-functions']) {
|
||||
$oCmd->addParams('--no-update-functions');
|
||||
}
|
||||
|
||||
run($oCmd);
|
||||
}
|
||||
|
||||
if ($aResult['check-for-updates']) {
|
||||
exit((clone($oNominatimCmd))->addParams('replication', '--check-for-updates')->run());
|
||||
}
|
||||
|
||||
if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
|
||||
// import diffs and files directly (e.g. from osmosis --rri)
|
||||
$sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file'];
|
||||
|
||||
if (!file_exists($sNextFile)) {
|
||||
fail("Cannot open $sNextFile\n");
|
||||
}
|
||||
|
||||
// Import the file
|
||||
$oCMD = (clone $oOsm2pgsqlCmd)->addParams($sNextFile);
|
||||
echo $oCMD->escapedCmd()."\n";
|
||||
$iRet = $oCMD->run();
|
||||
|
||||
if ($iRet) {
|
||||
fail("Error from osm2pgsql, $iRet\n");
|
||||
}
|
||||
|
||||
// Don't update the import status - we don't know what this file contains
|
||||
}
|
||||
|
||||
if ($aResult['calculate-postcodes']) {
|
||||
run((clone($oNominatimCmd))->addParams('refresh', '--postcodes'));
|
||||
}
|
||||
|
||||
$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
|
||||
$bHaveDiff = false;
|
||||
$bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];
|
||||
$sContentURL = '';
|
||||
if (isset($aResult['import-node']) && $aResult['import-node']) {
|
||||
if ($bUseOSMApi) {
|
||||
$sContentURL = 'https://www.openstreetmap.org/api/0.6/node/'.$aResult['import-node'];
|
||||
} else {
|
||||
$sContentURL = 'https://overpass-api.de/api/interpreter?data=node('.$aResult['import-node'].');out%20meta;';
|
||||
}
|
||||
}
|
||||
|
||||
if (isset($aResult['import-way']) && $aResult['import-way']) {
|
||||
if ($bUseOSMApi) {
|
||||
$sContentURL = 'https://www.openstreetmap.org/api/0.6/way/'.$aResult['import-way'].'/full';
|
||||
} else {
|
||||
$sContentURL = 'https://overpass-api.de/api/interpreter?data=(way('.$aResult['import-way'].');%3E;);out%20meta;';
|
||||
}
|
||||
}
|
||||
|
||||
if (isset($aResult['import-relation']) && $aResult['import-relation']) {
|
||||
if ($bUseOSMApi) {
|
||||
$sContentURL = 'https://www.openstreetmap.org/api/0.6/relation/'.$aResult['import-relation'].'/full';
|
||||
} else {
|
||||
$sContentURL = 'https://overpass-api.de/api/interpreter?data=(rel(id:'.$aResult['import-relation'].');%3E;);out%20meta;';
|
||||
}
|
||||
}
|
||||
|
||||
if ($sContentURL) {
|
||||
file_put_contents($sTemporaryFile, file_get_contents($sContentURL));
|
||||
$bHaveDiff = true;
|
||||
}
|
||||
|
||||
if ($bHaveDiff) {
|
||||
// import generated change file
|
||||
|
||||
$oCMD = (clone $oOsm2pgsqlCmd)->addParams($sTemporaryFile);
|
||||
echo $oCMD->escapedCmd()."\n";
|
||||
|
||||
$iRet = $oCMD->run();
|
||||
if ($iRet) {
|
||||
fail("osm2pgsql exited with error level $iRet\n");
|
||||
}
|
||||
}
|
||||
|
||||
if ($aResult['recompute-word-counts']) {
|
||||
run((clone($oNominatimCmd))->addParams('refresh', '--word-counts'));
|
||||
}
|
||||
|
||||
if ($aResult['index']) {
|
||||
run((clone $oNominatimCmd)
|
||||
->addParams('index', '--minrank', $aResult['index-rank'])
|
||||
->addParams('--threads', $aResult['index-instances']));
|
||||
}
|
||||
|
||||
if ($aResult['update-address-levels']) {
|
||||
run((clone($oNominatimCmd))->addParams('refresh', '--address-levels'));
|
||||
}
|
||||
|
||||
if ($aResult['recompute-importance']) {
|
||||
run((clone($oNominatimCmd))->addParams('refresh', '--importance'));
|
||||
}
|
||||
|
||||
if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
|
||||
$oCmd = (clone($oNominatimCmd))
|
||||
->addParams('replication')
|
||||
->addParams('--threads', $aResult['index-instances']);
|
||||
|
||||
if (!$aResult['import-osmosis-all']) {
|
||||
$oCmd->addParams('--once');
|
||||
}
|
||||
|
||||
if ($aResult['no-index']) {
|
||||
$oCmd->addParams('--no-index');
|
||||
}
|
||||
|
||||
run($oCmd);
|
||||
}
|
||||
@@ -3,7 +3,6 @@
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
require_once(CONST_LibDir.'/log.php');
|
||||
require_once(CONST_LibDir.'/Geocode.php');
|
||||
require_once(CONST_LibDir.'/PlaceLookup.php');
|
||||
require_once(CONST_LibDir.'/ReverseGeocode.php');
|
||||
|
||||
@@ -26,17 +25,16 @@ loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||
@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
|
||||
@define('CONST_Log_DB', getSettingBool('LOG_DB'));
|
||||
@define('CONST_Log_File', getSetting('LOG_FILE', false));
|
||||
@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY'));
|
||||
@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
|
||||
@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
|
||||
@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
|
||||
@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
|
||||
@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
|
||||
@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION'));
|
||||
@define('CONST_Use_Aux_Location_data', getSettingBool('USE_AUX_LOCATION_DATA'));
|
||||
@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
|
||||
@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
|
||||
@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
|
||||
|
||||
require_once(CONST_LibDir.'/Geocode.php');
|
||||
|
||||
$oDB = new Nominatim\DB();
|
||||
$oDB->connect();
|
||||
@@ -64,11 +62,15 @@ if (!$aResult['search-only']) {
|
||||
$oPlaceLookup->setLanguagePreference(array('en'));
|
||||
|
||||
echo 'Warm reverse: ';
|
||||
if ($bVerbose) echo "\n";
|
||||
if ($bVerbose) {
|
||||
echo "\n";
|
||||
}
|
||||
for ($i = 0; $i < 1000; $i++) {
|
||||
$fLat = rand(-9000, 9000) / 100;
|
||||
$fLon = rand(-18000, 18000) / 100;
|
||||
if ($bVerbose) echo "$fLat, $fLon = ";
|
||||
if ($bVerbose) {
|
||||
echo "$fLat, $fLon = ";
|
||||
}
|
||||
|
||||
$oLookup = $oReverseGeocode->lookup($fLat, $fLon);
|
||||
$aSearchResults = $oLookup ? $oPlaceLookup->lookup(array($oLookup->iId => $oLookup)) : null;
|
||||
@@ -81,10 +83,14 @@ if (!$aResult['reverse-only']) {
|
||||
$oGeocode = new Nominatim\Geocode($oDB);
|
||||
|
||||
echo 'Warm search: ';
|
||||
if ($bVerbose) echo "\n";
|
||||
if ($bVerbose) {
|
||||
echo "\n";
|
||||
}
|
||||
$sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
|
||||
foreach ($oDB->getCol($sSQL) as $sWord) {
|
||||
if ($bVerbose) echo "$sWord = ";
|
||||
if ($bVerbose) {
|
||||
echo "$sWord = ";
|
||||
}
|
||||
|
||||
$oGeocode->setLanguagePreference(array('en'));
|
||||
$oGeocode->setQuery($sWord);
|
||||
|
||||
103
lib-php/cmd.php
103
lib-php/cmd.php
@@ -9,8 +9,12 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn
|
||||
|
||||
foreach ($aSpec as $aLine) {
|
||||
if (is_array($aLine)) {
|
||||
if ($aLine[0]) $aQuick['--'.$aLine[0]] = $aLine;
|
||||
if ($aLine[1]) $aQuick['-'.$aLine[1]] = $aLine;
|
||||
if ($aLine[0]) {
|
||||
$aQuick['--'.$aLine[0]] = $aLine;
|
||||
}
|
||||
if ($aLine[1]) {
|
||||
$aQuick['-'.$aLine[1]] = $aLine;
|
||||
}
|
||||
$aCounts[$aLine[0]] = 0;
|
||||
}
|
||||
}
|
||||
@@ -28,7 +32,9 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn
|
||||
$xVal = array();
|
||||
for ($n = $aLine[4]; $i < $iSize && $n; $n--) {
|
||||
$i++;
|
||||
if ($i >= $iSize || $aArg[$i][0] == '-') showUsage($aSpec, $bExitOnError, 'Parameter of \''.$aLine[0].'\' is missing');
|
||||
if ($i >= $iSize || $aArg[$i][0] == '-') {
|
||||
showUsage($aSpec, $bExitOnError, 'Parameter of \''.$aLine[0].'\' is missing');
|
||||
}
|
||||
|
||||
switch ($aLine[6]) {
|
||||
case 'realpath':
|
||||
@@ -56,7 +62,9 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ($aLine[4] == 1) $xVal = $xVal[0];
|
||||
if ($aLine[4] == 1) {
|
||||
$xVal = $xVal[0];
|
||||
}
|
||||
} else {
|
||||
$xVal = true;
|
||||
}
|
||||
@@ -65,7 +73,9 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn
|
||||
}
|
||||
|
||||
if ($aLine[3] > 1) {
|
||||
if (!array_key_exists($aLine[0], $aResult)) $aResult[$aLine[0]] = array();
|
||||
if (!array_key_exists($aLine[0], $aResult)) {
|
||||
$aResult[$aLine[0]] = array();
|
||||
}
|
||||
$aResult[$aLine[0]][] = $xVal;
|
||||
} else {
|
||||
$aResult[$aLine[0]] = $xVal;
|
||||
@@ -75,18 +85,23 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn
|
||||
}
|
||||
}
|
||||
|
||||
if (array_key_exists('help', $aResult)) showUsage($aSpec);
|
||||
if ($bUnknown && $bExitOnUnknown) showUsage($aSpec, $bExitOnError, 'Unknown option \''.$bUnknown.'\'');
|
||||
if (array_key_exists('help', $aResult)) {
|
||||
showUsage($aSpec);
|
||||
}
|
||||
if ($bUnknown && $bExitOnUnknown) {
|
||||
showUsage($aSpec, $bExitOnError, 'Unknown option \''.$bUnknown.'\'');
|
||||
}
|
||||
|
||||
foreach ($aSpec as $aLine) {
|
||||
if (is_array($aLine)) {
|
||||
if ($aCounts[$aLine[0]] < $aLine[2]) showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is missing');
|
||||
if ($aCounts[$aLine[0]] > $aLine[3]) showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is pressent too many times');
|
||||
switch ($aLine[6]) {
|
||||
case 'bool':
|
||||
if (!array_key_exists($aLine[0], $aResult))
|
||||
$aResult[$aLine[0]] = false;
|
||||
break;
|
||||
if ($aCounts[$aLine[0]] < $aLine[2]) {
|
||||
showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is missing');
|
||||
}
|
||||
if ($aCounts[$aLine[0]] > $aLine[3]) {
|
||||
showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is pressent too many times');
|
||||
}
|
||||
if ($aLine[6] == 'bool' && !array_key_exists($aLine[0], $aResult)) {
|
||||
$aResult[$aLine[0]] = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -109,8 +124,12 @@ function showUsage($aSpec, $bExit = false, $sError = false)
|
||||
echo "\n";
|
||||
}
|
||||
$aNames = array();
|
||||
if ($aLine[1]) $aNames[] = '-'.$aLine[1];
|
||||
if ($aLine[0]) $aNames[] = '--'.$aLine[0];
|
||||
if ($aLine[1]) {
|
||||
$aNames[] = '-'.$aLine[1];
|
||||
}
|
||||
if ($aLine[0]) {
|
||||
$aNames[] = '--'.$aLine[0];
|
||||
}
|
||||
$sName = join(', ', $aNames);
|
||||
echo ' '.$sName.str_repeat(' ', 30-strlen($sName)).$aLine[7]."\n";
|
||||
} else {
|
||||
@@ -144,58 +163,6 @@ function repeatWarnings()
|
||||
}
|
||||
|
||||
|
||||
function runSQLScript($sScript, $bfatal = true, $bVerbose = false, $bIgnoreErrors = false)
|
||||
{
|
||||
// Convert database DSN to psql parameters
|
||||
$aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
|
||||
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
|
||||
|
||||
$oCmd = new \Nominatim\Shell('psql');
|
||||
$oCmd->addParams('--port', $aDSNInfo['port']);
|
||||
$oCmd->addParams('--dbname', $aDSNInfo['database']);
|
||||
if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
|
||||
$oCmd->addParams('--host', $aDSNInfo['hostspec']);
|
||||
}
|
||||
if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
|
||||
$oCmd->addParams('--username', $aDSNInfo['username']);
|
||||
}
|
||||
if (isset($aDSNInfo['password'])) {
|
||||
$oCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']);
|
||||
}
|
||||
if (!$bVerbose) {
|
||||
$oCmd->addParams('--quiet');
|
||||
}
|
||||
if ($bfatal && !$bIgnoreErrors) {
|
||||
$oCmd->addParams('-v', 'ON_ERROR_STOP=1');
|
||||
}
|
||||
|
||||
$aDescriptors = array(
|
||||
0 => array('pipe', 'r'),
|
||||
1 => STDOUT,
|
||||
2 => STDERR
|
||||
);
|
||||
$ahPipes = null;
|
||||
$hProcess = @proc_open($oCmd->escapedCmd(), $aDescriptors, $ahPipes, null, $oCmd->aEnv);
|
||||
if (!is_resource($hProcess)) {
|
||||
fail('unable to start pgsql');
|
||||
}
|
||||
|
||||
if (!$bVerbose) {
|
||||
fwrite($ahPipes[0], 'set client_min_messages to WARNING;');
|
||||
}
|
||||
|
||||
while (strlen($sScript)) {
|
||||
$iWritten = fwrite($ahPipes[0], $sScript);
|
||||
if ($iWritten <= 0) break;
|
||||
$sScript = substr($sScript, $iWritten);
|
||||
}
|
||||
fclose($ahPipes[0]);
|
||||
$iReturn = proc_close($hProcess);
|
||||
if ($bfatal && $iReturn > 0) {
|
||||
fail("pgsql returned with error code ($iReturn)");
|
||||
}
|
||||
}
|
||||
|
||||
function setupHTTPProxy()
|
||||
{
|
||||
if (!getSettingBool('HTTP_PROXY')) {
|
||||
|
||||
@@ -12,7 +12,7 @@ require_once(CONST_Debug ? 'DebugHtml.php' : 'DebugNone.php');
|
||||
|
||||
function userError($sMsg)
|
||||
{
|
||||
throw new Exception($sMsg, 400);
|
||||
throw new \Exception($sMsg, 400);
|
||||
}
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ function shutdown_exception_handler_xml()
|
||||
{
|
||||
$error = error_get_last();
|
||||
if ($error !== null && $error['type'] === E_ERROR) {
|
||||
exception_handler_xml(new Exception($error['message'], 500));
|
||||
exception_handler_xml(new \Exception($error['message'], 500));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ function shutdown_exception_handler_json()
|
||||
{
|
||||
$error = error_get_last();
|
||||
if ($error !== null && $error['type'] === E_ERROR) {
|
||||
exception_handler_json(new Exception($error['message'], 500));
|
||||
exception_handler_json(new \Exception($error['message'], 500));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,6 +81,10 @@ if (CONST_NoAccessControl) {
|
||||
header('Access-Control-Allow-Headers: '.$_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']);
|
||||
}
|
||||
}
|
||||
if (isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit;
|
||||
if (isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] == 'OPTIONS') {
|
||||
exit;
|
||||
}
|
||||
|
||||
if (CONST_Debug) header('Content-type: text/html; charset=utf-8');
|
||||
if (CONST_Debug) {
|
||||
header('Content-type: text/html; charset=utf-8');
|
||||
}
|
||||
|
||||
@@ -6,10 +6,7 @@ function loadSettings($sProjectDir)
|
||||
// Temporary hack to set the direcory via environment instead of
|
||||
// the installed scripts. Neither setting is part of the official
|
||||
// set of settings.
|
||||
defined('CONST_DataDir') or define('CONST_DataDir', $_SERVER['NOMINATIM_DATADIR']);
|
||||
defined('CONST_SqlDir') or define('CONST_SqlDir', $_SERVER['NOMINATIM_SQLDIR']);
|
||||
defined('CONST_ConfigDir') or define('CONST_ConfigDir', $_SERVER['NOMINATIM_CONFIGDIR']);
|
||||
defined('CONST_Default_ModulePath') or define('CONST_Default_ModulePath', $_SERVER['NOMINATIM_DATABASE_MODULE_SRC_PATH']);
|
||||
}
|
||||
|
||||
function getSetting($sConfName, $sDefault = null)
|
||||
@@ -32,22 +29,14 @@ function getSettingBool($sConfName)
|
||||
|| strcmp($sVal, '1') == 0;
|
||||
}
|
||||
|
||||
function getSettingConfig($sConfName, $sSystemConfig)
|
||||
{
|
||||
$sValue = $_SERVER['NOMINATIM_'.$sConfName];
|
||||
|
||||
if (!$sValue) {
|
||||
return CONST_ConfigDir.'/'.$sSystemConfig;
|
||||
}
|
||||
|
||||
return $sValue;
|
||||
}
|
||||
|
||||
function fail($sError, $sUserError = false)
|
||||
{
|
||||
if (!$sUserError) $sUserError = $sError;
|
||||
if (!$sUserError) {
|
||||
$sUserError = $sError;
|
||||
}
|
||||
error_log('ERROR: '.$sError);
|
||||
var_dump($sUserError)."\n";
|
||||
var_dump($sUserError);
|
||||
echo "\n";
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
@@ -95,8 +84,9 @@ function getDatabaseDate(&$oDB)
|
||||
|
||||
function byImportance($a, $b)
|
||||
{
|
||||
if ($a['importance'] != $b['importance'])
|
||||
if ($a['importance'] != $b['importance']) {
|
||||
return ($a['importance'] > $b['importance']?-1:1);
|
||||
}
|
||||
|
||||
return $a['foundorder'] <=> $b['foundorder'];
|
||||
}
|
||||
@@ -227,3 +217,12 @@ function closestHouseNumber($aRow)
|
||||
|
||||
return max(min($aRow['endnumber'], $iHn), $aRow['startnumber']);
|
||||
}
|
||||
|
||||
if (!function_exists('array_key_last')) {
|
||||
function array_key_last(array $array)
|
||||
{
|
||||
if (!empty($array)) {
|
||||
return key(array_slice($array, -1, 1, true));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,15 +5,23 @@ function logStart(&$oDB, $sType = '', $sQuery = '', $aLanguageList = array())
|
||||
{
|
||||
$fStartTime = microtime(true);
|
||||
$aStartTime = explode('.', $fStartTime);
|
||||
if (!isset($aStartTime[1])) $aStartTime[1] = '0';
|
||||
if (!isset($aStartTime[1])) {
|
||||
$aStartTime[1] = '0';
|
||||
}
|
||||
|
||||
$sOutputFormat = '';
|
||||
if (isset($_GET['format'])) $sOutputFormat = $_GET['format'];
|
||||
if (isset($_GET['format'])) {
|
||||
$sOutputFormat = $_GET['format'];
|
||||
}
|
||||
|
||||
if ($sType == 'reverse') {
|
||||
$sOutQuery = (isset($_GET['lat'])?$_GET['lat']:'').'/';
|
||||
if (isset($_GET['lon'])) $sOutQuery .= $_GET['lon'];
|
||||
if (isset($_GET['zoom'])) $sOutQuery .= '/'.$_GET['zoom'];
|
||||
if (isset($_GET['lon'])) {
|
||||
$sOutQuery .= $_GET['lon'];
|
||||
}
|
||||
if (isset($_GET['zoom'])) {
|
||||
$sOutQuery .= '/'.$_GET['zoom'];
|
||||
}
|
||||
} else {
|
||||
$sOutQuery = $sQuery;
|
||||
}
|
||||
@@ -28,13 +36,15 @@ function logStart(&$oDB, $sType = '', $sQuery = '', $aLanguageList = array())
|
||||
);
|
||||
|
||||
if (CONST_Log_DB) {
|
||||
if (isset($_GET['email']))
|
||||
if (isset($_GET['email'])) {
|
||||
$sUserAgent = $_GET['email'];
|
||||
elseif (isset($_SERVER['HTTP_REFERER']))
|
||||
} elseif (isset($_SERVER['HTTP_REFERER'])) {
|
||||
$sUserAgent = $_SERVER['HTTP_REFERER'];
|
||||
elseif (isset($_SERVER['HTTP_USER_AGENT']))
|
||||
} elseif (isset($_SERVER['HTTP_USER_AGENT'])) {
|
||||
$sUserAgent = $_SERVER['HTTP_USER_AGENT'];
|
||||
else $sUserAgent = '';
|
||||
} else {
|
||||
$sUserAgent = '';
|
||||
}
|
||||
$sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)';
|
||||
$sSQL .= ' values (';
|
||||
$sSQL .= join(',', $oDB->getDBQuotedList(array(
|
||||
@@ -60,7 +70,9 @@ function logEnd(&$oDB, $hLog, $iNumResults)
|
||||
|
||||
if (CONST_Log_DB) {
|
||||
$aEndTime = explode('.', $fEndTime);
|
||||
if (!$aEndTime[1]) $aEndTime[1] = '0';
|
||||
if (!$aEndTime[1]) {
|
||||
$aEndTime[1] = '0';
|
||||
}
|
||||
$sEndTime = date('Y-m-d H:i:s', $aEndTime[0]).'.'.$aEndTime[1];
|
||||
|
||||
$sSQL = 'update new_query_log set endtime = '.$oDB->getDBQuoted($sEndTime).', results = '.$iNumResults;
|
||||
|
||||
@@ -8,10 +8,12 @@ if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)
|
||||
|
||||
$data = array();
|
||||
|
||||
if (isset($aTagsBlacklist))
|
||||
if (isset($aTagsBlacklist)) {
|
||||
$data['blackList'] = $aTagsBlacklist;
|
||||
if (isset($aTagsWhitelist))
|
||||
}
|
||||
if (isset($aTagsWhitelist)) {
|
||||
$data['whiteList'] = $aTagsWhitelist;
|
||||
}
|
||||
|
||||
$jsonFile = fopen($jsonPhraseSettingsFile, 'w');
|
||||
fwrite($jsonFile, json_encode($data));
|
||||
|
||||
@@ -3,14 +3,26 @@
|
||||
|
||||
function formatOSMType($sType, $bIncludeExternal = true)
|
||||
{
|
||||
if ($sType == 'N') return 'node';
|
||||
if ($sType == 'W') return 'way';
|
||||
if ($sType == 'R') return 'relation';
|
||||
if ($sType == 'N') {
|
||||
return 'node';
|
||||
}
|
||||
if ($sType == 'W') {
|
||||
return 'way';
|
||||
}
|
||||
if ($sType == 'R') {
|
||||
return 'relation';
|
||||
}
|
||||
|
||||
if (!$bIncludeExternal) return '';
|
||||
if (!$bIncludeExternal) {
|
||||
return '';
|
||||
}
|
||||
|
||||
if ($sType == 'T') return 'way';
|
||||
if ($sType == 'I') return 'way';
|
||||
if ($sType == 'T') {
|
||||
return 'way';
|
||||
}
|
||||
if ($sType == 'I') {
|
||||
return 'way';
|
||||
}
|
||||
|
||||
// not handled: P, L
|
||||
|
||||
|
||||
@@ -1,261 +0,0 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim\Setup;
|
||||
|
||||
require_once(CONST_LibDir.'/Shell.php');
|
||||
|
||||
class SetupFunctions
|
||||
{
|
||||
protected $iInstances;
|
||||
protected $aDSNInfo;
|
||||
protected $bQuiet;
|
||||
protected $bVerbose;
|
||||
protected $sIgnoreErrors;
|
||||
protected $bEnableDiffUpdates;
|
||||
protected $bEnableDebugStatements;
|
||||
protected $bDrop;
|
||||
protected $oDB = null;
|
||||
protected $oNominatimCmd;
|
||||
|
||||
public function __construct(array $aCMDResult)
|
||||
{
|
||||
// by default, use all but one processor, but never more than 15.
|
||||
$this->iInstances = isset($aCMDResult['threads'])
|
||||
? $aCMDResult['threads']
|
||||
: (min(16, getProcessorCount()) - 1);
|
||||
|
||||
if ($this->iInstances < 1) {
|
||||
$this->iInstances = 1;
|
||||
warn('resetting threads to '.$this->iInstances);
|
||||
}
|
||||
|
||||
// parse database string
|
||||
$this->aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
|
||||
if (!isset($this->aDSNInfo['port'])) {
|
||||
$this->aDSNInfo['port'] = 5432;
|
||||
}
|
||||
|
||||
// setting member variables based on command line options stored in $aCMDResult
|
||||
$this->bQuiet = isset($aCMDResult['quiet']) && $aCMDResult['quiet'];
|
||||
$this->bVerbose = $aCMDResult['verbose'];
|
||||
|
||||
//setting default values which are not set by the update.php array
|
||||
if (isset($aCMDResult['ignore-errors'])) {
|
||||
$this->sIgnoreErrors = $aCMDResult['ignore-errors'];
|
||||
} else {
|
||||
$this->sIgnoreErrors = false;
|
||||
}
|
||||
if (isset($aCMDResult['enable-debug-statements'])) {
|
||||
$this->bEnableDebugStatements = $aCMDResult['enable-debug-statements'];
|
||||
} else {
|
||||
$this->bEnableDebugStatements = false;
|
||||
}
|
||||
if (isset($aCMDResult['enable-diff-updates'])) {
|
||||
$this->bEnableDiffUpdates = $aCMDResult['enable-diff-updates'];
|
||||
} else {
|
||||
$this->bEnableDiffUpdates = false;
|
||||
}
|
||||
|
||||
$this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop'];
|
||||
|
||||
$this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
|
||||
if ($this->bQuiet) {
|
||||
$this->oNominatimCmd->addParams('--quiet');
|
||||
}
|
||||
if ($this->bVerbose) {
|
||||
$this->oNominatimCmd->addParams('--verbose');
|
||||
}
|
||||
}
|
||||
|
||||
public function calculatePostcodes($bCMDResultAll)
|
||||
{
|
||||
info('Calculate Postcodes');
|
||||
$this->pgsqlRunScriptFile(CONST_SqlDir.'/postcode_tables.sql');
|
||||
|
||||
$sPostcodeFilename = CONST_InstallDir.'/gb_postcode_data.sql.gz';
|
||||
if (file_exists($sPostcodeFilename)) {
|
||||
$this->pgsqlRunScriptFile($sPostcodeFilename);
|
||||
} else {
|
||||
warn('optional external GB postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
|
||||
}
|
||||
|
||||
$sPostcodeFilename = CONST_InstallDir.'/us_postcode_data.sql.gz';
|
||||
if (file_exists($sPostcodeFilename)) {
|
||||
$this->pgsqlRunScriptFile($sPostcodeFilename);
|
||||
} else {
|
||||
warn('optional external US postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
|
||||
}
|
||||
|
||||
|
||||
$this->db()->exec('TRUNCATE location_postcode');
|
||||
|
||||
$sSQL = 'INSERT INTO location_postcode';
|
||||
$sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
|
||||
$sSQL .= "SELECT nextval('seq_place'), 1, country_code,";
|
||||
$sSQL .= " upper(trim (both ' ' from address->'postcode')) as pc,";
|
||||
$sSQL .= ' ST_Centroid(ST_Collect(ST_Centroid(geometry)))';
|
||||
$sSQL .= ' FROM placex';
|
||||
$sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'";
|
||||
$sSQL .= ' AND geometry IS NOT null';
|
||||
$sSQL .= ' GROUP BY country_code, pc';
|
||||
$this->db()->exec($sSQL);
|
||||
|
||||
// only add postcodes that are not yet available in OSM
|
||||
$sSQL = 'INSERT INTO location_postcode';
|
||||
$sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
|
||||
$sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
|
||||
$sSQL .= ' ST_SetSRID(ST_Point(x,y),4326)';
|
||||
$sSQL .= ' FROM us_postcode WHERE postcode NOT IN';
|
||||
$sSQL .= ' (SELECT postcode FROM location_postcode';
|
||||
$sSQL .= " WHERE country_code = 'us')";
|
||||
$this->db()->exec($sSQL);
|
||||
|
||||
// add missing postcodes for GB (if available)
|
||||
$sSQL = 'INSERT INTO location_postcode';
|
||||
$sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
|
||||
$sSQL .= "SELECT nextval('seq_place'), 1, 'gb', postcode, geometry";
|
||||
$sSQL .= ' FROM gb_postcode WHERE postcode NOT IN';
|
||||
$sSQL .= ' (SELECT postcode FROM location_postcode';
|
||||
$sSQL .= " WHERE country_code = 'gb')";
|
||||
$this->db()->exec($sSQL);
|
||||
|
||||
if (!$bCMDResultAll) {
|
||||
$sSQL = "DELETE FROM word WHERE class='place' and type='postcode'";
|
||||
$sSQL .= 'and word NOT IN (SELECT postcode FROM location_postcode)';
|
||||
$this->db()->exec($sSQL);
|
||||
}
|
||||
|
||||
$sSQL = 'SELECT count(getorcreate_postcode_id(v)) FROM ';
|
||||
$sSQL .= '(SELECT distinct(postcode) as v FROM location_postcode) p';
|
||||
$this->db()->exec($sSQL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the connection to the database.
|
||||
*
|
||||
* @return Database object.
|
||||
*
|
||||
* Creates a new connection if none exists yet. Otherwise reuses the
|
||||
* already established connection.
|
||||
*/
|
||||
private function db()
|
||||
{
|
||||
if (is_null($this->oDB)) {
|
||||
$this->oDB = new \Nominatim\DB();
|
||||
$this->oDB->connect();
|
||||
}
|
||||
|
||||
return $this->oDB;
|
||||
}
|
||||
|
||||
private function pgsqlRunScript($sScript, $bfatal = true)
|
||||
{
|
||||
runSQLScript(
|
||||
$sScript,
|
||||
$bfatal,
|
||||
$this->bVerbose,
|
||||
$this->sIgnoreErrors
|
||||
);
|
||||
}
|
||||
|
||||
public function createSqlFunctions()
|
||||
{
|
||||
$oCmd = (clone($this->oNominatimCmd))
|
||||
->addParams('refresh', '--functions');
|
||||
|
||||
if (!$this->bEnableDiffUpdates) {
|
||||
$oCmd->addParams('--no-diff-updates');
|
||||
}
|
||||
|
||||
if ($this->bEnableDebugStatements) {
|
||||
$oCmd->addParams('--enable-debug-statements');
|
||||
}
|
||||
|
||||
$oCmd->run(!$this->sIgnoreErrors);
|
||||
}
|
||||
|
||||
private function pgsqlRunScriptFile($sFilename)
|
||||
{
|
||||
if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
|
||||
|
||||
$oCmd = (new \Nominatim\Shell('psql'))
|
||||
->addParams('--port', $this->aDSNInfo['port'])
|
||||
->addParams('--dbname', $this->aDSNInfo['database']);
|
||||
|
||||
if (!$this->bVerbose) {
|
||||
$oCmd->addParams('--quiet');
|
||||
}
|
||||
if (isset($this->aDSNInfo['hostspec'])) {
|
||||
$oCmd->addParams('--host', $this->aDSNInfo['hostspec']);
|
||||
}
|
||||
if (isset($this->aDSNInfo['username'])) {
|
||||
$oCmd->addParams('--username', $this->aDSNInfo['username']);
|
||||
}
|
||||
if (isset($this->aDSNInfo['password'])) {
|
||||
$oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
|
||||
}
|
||||
$ahGzipPipes = null;
|
||||
if (preg_match('/\\.gz$/', $sFilename)) {
|
||||
$aDescriptors = array(
|
||||
0 => array('pipe', 'r'),
|
||||
1 => array('pipe', 'w'),
|
||||
2 => array('file', '/dev/null', 'a')
|
||||
);
|
||||
$oZcatCmd = new \Nominatim\Shell('zcat', $sFilename);
|
||||
|
||||
$hGzipProcess = proc_open($oZcatCmd->escapedCmd(), $aDescriptors, $ahGzipPipes);
|
||||
if (!is_resource($hGzipProcess)) fail('unable to start zcat');
|
||||
$aReadPipe = $ahGzipPipes[1];
|
||||
fclose($ahGzipPipes[0]);
|
||||
} else {
|
||||
$oCmd->addParams('--file', $sFilename);
|
||||
$aReadPipe = array('pipe', 'r');
|
||||
}
|
||||
$aDescriptors = array(
|
||||
0 => $aReadPipe,
|
||||
1 => array('pipe', 'w'),
|
||||
2 => array('file', '/dev/null', 'a')
|
||||
);
|
||||
$ahPipes = null;
|
||||
|
||||
$hProcess = proc_open($oCmd->escapedCmd(), $aDescriptors, $ahPipes, null, $oCmd->aEnv);
|
||||
if (!is_resource($hProcess)) fail('unable to start pgsql');
|
||||
// TODO: error checking
|
||||
while (!feof($ahPipes[1])) {
|
||||
echo fread($ahPipes[1], 4096);
|
||||
}
|
||||
fclose($ahPipes[1]);
|
||||
$iReturn = proc_close($hProcess);
|
||||
if ($iReturn > 0) {
|
||||
fail("pgsql returned with error code ($iReturn)");
|
||||
}
|
||||
if ($ahGzipPipes) {
|
||||
fclose($ahGzipPipes[1]);
|
||||
proc_close($hGzipProcess);
|
||||
}
|
||||
}
|
||||
|
||||
private function replaceSqlPatterns($sSql)
|
||||
{
|
||||
$sSql = str_replace('{www-user}', getSetting('DATABASE_WEBUSER'), $sSql);
|
||||
|
||||
$aPatterns = array(
|
||||
'{ts:address-data}' => getSetting('TABLESPACE_ADDRESS_DATA'),
|
||||
'{ts:address-index}' => getSetting('TABLESPACE_ADDRESS_INDEX'),
|
||||
'{ts:search-data}' => getSetting('TABLESPACE_SEARCH_DATA'),
|
||||
'{ts:search-index}' => getSetting('TABLESPACE_SEARCH_INDEX'),
|
||||
'{ts:aux-data}' => getSetting('TABLESPACE_AUX_DATA'),
|
||||
'{ts:aux-index}' => getSetting('TABLESPACE_AUX_INDEX')
|
||||
);
|
||||
|
||||
foreach ($aPatterns as $sPattern => $sTablespace) {
|
||||
if ($sTablespace) {
|
||||
$sSql = str_replace($sPattern, 'TABLESPACE "'.$sTablespace.'"', $sSql);
|
||||
} else {
|
||||
$sSql = str_replace($sPattern, '', $sSql);
|
||||
}
|
||||
}
|
||||
|
||||
return $sSql;
|
||||
}
|
||||
}
|
||||
@@ -1,20 +1,5 @@
|
||||
<?php
|
||||
|
||||
function checkInFile($sOSMFile)
|
||||
{
|
||||
if (!isset($sOSMFile)) {
|
||||
fail('missing --osm-file for data import');
|
||||
}
|
||||
|
||||
if (!file_exists($sOSMFile)) {
|
||||
fail('the path supplied to --osm-file does not exist');
|
||||
}
|
||||
|
||||
if (!is_readable($sOSMFile)) {
|
||||
fail('osm-file "' . $aCMDResult['osm-file'] . '" not readable');
|
||||
}
|
||||
}
|
||||
|
||||
function getOsm2pgsqlBinary()
|
||||
{
|
||||
$sBinary = getSetting('OSM2PGSQL_BINARY');
|
||||
|
||||
@@ -5,9 +5,11 @@
|
||||
$aFilteredPlaces = array();
|
||||
|
||||
if (empty($aPlace)) {
|
||||
if (isset($sError))
|
||||
if (isset($sError)) {
|
||||
$aFilteredPlaces['error'] = $sError;
|
||||
else $aFilteredPlaces['error'] = 'Unable to geocode';
|
||||
} else {
|
||||
$aFilteredPlaces['error'] = 'Unable to geocode';
|
||||
}
|
||||
javascript_renderData($aFilteredPlaces);
|
||||
} else {
|
||||
$aFilteredPlaces = array(
|
||||
@@ -17,7 +19,9 @@ if (empty($aPlace)) {
|
||||
)
|
||||
);
|
||||
|
||||
if (isset($aPlace['place_id'])) $aFilteredPlaces['properties']['geocoding']['place_id'] = $aPlace['place_id'];
|
||||
if (isset($aPlace['place_id'])) {
|
||||
$aFilteredPlaces['properties']['geocoding']['place_id'] = $aPlace['place_id'];
|
||||
}
|
||||
$sOSMType = formatOSMType($aPlace['osm_type']);
|
||||
if ($sOSMType) {
|
||||
$aFilteredPlaces['properties']['geocoding']['osm_type'] = $sOSMType;
|
||||
|
||||
@@ -3,9 +3,11 @@
|
||||
$aFilteredPlaces = array();
|
||||
|
||||
if (empty($aPlace)) {
|
||||
if (isset($sError))
|
||||
if (isset($sError)) {
|
||||
$aFilteredPlaces['error'] = $sError;
|
||||
else $aFilteredPlaces['error'] = 'Unable to geocode';
|
||||
} else {
|
||||
$aFilteredPlaces['error'] = 'Unable to geocode';
|
||||
}
|
||||
javascript_renderData($aFilteredPlaces);
|
||||
} else {
|
||||
$aFilteredPlaces = array(
|
||||
@@ -13,7 +15,9 @@ if (empty($aPlace)) {
|
||||
'properties' => array()
|
||||
);
|
||||
|
||||
if (isset($aPlace['place_id'])) $aFilteredPlaces['properties']['place_id'] = $aPlace['place_id'];
|
||||
if (isset($aPlace['place_id'])) {
|
||||
$aFilteredPlaces['properties']['place_id'] = $aPlace['place_id'];
|
||||
}
|
||||
$sOSMType = formatOSMType($aPlace['osm_type']);
|
||||
if ($sOSMType) {
|
||||
$aFilteredPlaces['properties']['osm_type'] = $sOSMType;
|
||||
@@ -36,8 +40,12 @@ if (empty($aPlace)) {
|
||||
if (isset($aPlace['address'])) {
|
||||
$aFilteredPlaces['properties']['address'] = $aPlace['address']->getAddressNames();
|
||||
}
|
||||
if (isset($aPlace['sExtraTags'])) $aFilteredPlaces['properties']['extratags'] = $aPlace['sExtraTags'];
|
||||
if (isset($aPlace['sNameDetails'])) $aFilteredPlaces['properties']['namedetails'] = $aPlace['sNameDetails'];
|
||||
if (isset($aPlace['sExtraTags'])) {
|
||||
$aFilteredPlaces['properties']['extratags'] = $aPlace['sExtraTags'];
|
||||
}
|
||||
if (isset($aPlace['sNameDetails'])) {
|
||||
$aFilteredPlaces['properties']['namedetails'] = $aPlace['sNameDetails'];
|
||||
}
|
||||
|
||||
if (isset($aPlace['aBoundingBox'])) {
|
||||
$aFilteredPlaces['bbox'] = array(
|
||||
|
||||
@@ -3,19 +3,27 @@
|
||||
$aFilteredPlaces = array();
|
||||
|
||||
if (empty($aPlace)) {
|
||||
if (isset($sError))
|
||||
if (isset($sError)) {
|
||||
$aFilteredPlaces['error'] = $sError;
|
||||
else $aFilteredPlaces['error'] = 'Unable to geocode';
|
||||
} else {
|
||||
$aFilteredPlaces['error'] = 'Unable to geocode';
|
||||
}
|
||||
} else {
|
||||
if (isset($aPlace['place_id'])) $aFilteredPlaces['place_id'] = $aPlace['place_id'];
|
||||
if (isset($aPlace['place_id'])) {
|
||||
$aFilteredPlaces['place_id'] = $aPlace['place_id'];
|
||||
}
|
||||
$aFilteredPlaces['licence'] = 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright';
|
||||
$sOSMType = formatOSMType($aPlace['osm_type']);
|
||||
if ($sOSMType) {
|
||||
$aFilteredPlaces['osm_type'] = $sOSMType;
|
||||
$aFilteredPlaces['osm_id'] = $aPlace['osm_id'];
|
||||
}
|
||||
if (isset($aPlace['lat'])) $aFilteredPlaces['lat'] = $aPlace['lat'];
|
||||
if (isset($aPlace['lon'])) $aFilteredPlaces['lon'] = $aPlace['lon'];
|
||||
if (isset($aPlace['lat'])) {
|
||||
$aFilteredPlaces['lat'] = $aPlace['lat'];
|
||||
}
|
||||
if (isset($aPlace['lon'])) {
|
||||
$aFilteredPlaces['lon'] = $aPlace['lon'];
|
||||
}
|
||||
|
||||
if ($sOutputFormat == 'jsonv2' || $sOutputFormat == 'geojson') {
|
||||
$aFilteredPlaces['place_rank'] = $aPlace['rank_search'];
|
||||
@@ -35,8 +43,12 @@ if (empty($aPlace)) {
|
||||
if (isset($aPlace['address'])) {
|
||||
$aFilteredPlaces['address'] = $aPlace['address']->getAddressNames();
|
||||
}
|
||||
if (isset($aPlace['sExtraTags'])) $aFilteredPlaces['extratags'] = $aPlace['sExtraTags'];
|
||||
if (isset($aPlace['sNameDetails'])) $aFilteredPlaces['namedetails'] = $aPlace['sNameDetails'];
|
||||
if (isset($aPlace['sExtraTags'])) {
|
||||
$aFilteredPlaces['extratags'] = $aPlace['sExtraTags'];
|
||||
}
|
||||
if (isset($aPlace['sNameDetails'])) {
|
||||
$aFilteredPlaces['namedetails'] = $aPlace['sNameDetails'];
|
||||
}
|
||||
|
||||
if (isset($aPlace['aBoundingBox'])) {
|
||||
$aFilteredPlaces['boundingbox'] = $aPlace['aBoundingBox'];
|
||||
|
||||
@@ -12,17 +12,29 @@ echo " querystring='".htmlspecialchars($_SERVER['QUERY_STRING'], ENT_QUOTES)."'"
|
||||
echo ">\n";
|
||||
|
||||
if (empty($aPlace)) {
|
||||
if (isset($sError))
|
||||
if (isset($sError)) {
|
||||
echo "<error>$sError</error>";
|
||||
else echo '<error>Unable to geocode</error>';
|
||||
} else {
|
||||
echo '<error>Unable to geocode</error>';
|
||||
}
|
||||
} else {
|
||||
echo '<result';
|
||||
if ($aPlace['place_id']) echo ' place_id="'.$aPlace['place_id'].'"';
|
||||
if ($aPlace['place_id']) {
|
||||
echo ' place_id="'.$aPlace['place_id'].'"';
|
||||
}
|
||||
$sOSMType = formatOSMType($aPlace['osm_type']);
|
||||
if ($sOSMType) echo ' osm_type="'.$sOSMType.'"'.' osm_id="'.$aPlace['osm_id'].'"';
|
||||
if ($aPlace['ref']) echo ' ref="'.htmlspecialchars($aPlace['ref']).'"';
|
||||
if (isset($aPlace['lat'])) echo ' lat="'.htmlspecialchars($aPlace['lat']).'"';
|
||||
if (isset($aPlace['lon'])) echo ' lon="'.htmlspecialchars($aPlace['lon']).'"';
|
||||
if ($sOSMType) {
|
||||
echo ' osm_type="'.$sOSMType.'"'.' osm_id="'.$aPlace['osm_id'].'"';
|
||||
}
|
||||
if ($aPlace['ref']) {
|
||||
echo ' ref="'.htmlspecialchars($aPlace['ref']).'"';
|
||||
}
|
||||
if (isset($aPlace['lat'])) {
|
||||
echo ' lat="'.htmlspecialchars($aPlace['lat']).'"';
|
||||
}
|
||||
if (isset($aPlace['lon'])) {
|
||||
echo ' lon="'.htmlspecialchars($aPlace['lon']).'"';
|
||||
}
|
||||
if (isset($aPlace['aBoundingBox'])) {
|
||||
echo ' boundingbox="';
|
||||
echo join(',', $aPlace['aBoundingBox']);
|
||||
|
||||
@@ -43,29 +43,26 @@ $aPlaceDetails['centroid'] = array(
|
||||
$aPlaceDetails['geometry'] = json_decode($aPointDetails['asgeojson']);
|
||||
|
||||
$funcMapAddressLine = function ($aFull) {
|
||||
$aMapped = array(
|
||||
'localname' => $aFull['localname'],
|
||||
'place_id' => isset($aFull['place_id']) ? (int) $aFull['place_id'] : null,
|
||||
'osm_id' => isset($aFull['osm_id']) ? (int) $aFull['osm_id'] : null,
|
||||
'osm_type' => isset($aFull['osm_type']) ? $aFull['osm_type'] : null,
|
||||
'place_type' => isset($aFull['place_type']) ? $aFull['place_type'] : null,
|
||||
'class' => $aFull['class'],
|
||||
'type' => $aFull['type'],
|
||||
'admin_level' => isset($aFull['admin_level']) ? (int) $aFull['admin_level'] : null,
|
||||
'rank_address' => $aFull['rank_address'] ? (int) $aFull['rank_address'] : null,
|
||||
'distance' => (float) $aFull['distance'],
|
||||
'isaddress' => isset($aFull['isaddress']) ? (bool) $aFull['isaddress'] : null
|
||||
);
|
||||
|
||||
return $aMapped;
|
||||
return array(
|
||||
'localname' => $aFull['localname'],
|
||||
'place_id' => isset($aFull['place_id']) ? (int) $aFull['place_id'] : null,
|
||||
'osm_id' => isset($aFull['osm_id']) ? (int) $aFull['osm_id'] : null,
|
||||
'osm_type' => isset($aFull['osm_type']) ? $aFull['osm_type'] : null,
|
||||
'place_type' => isset($aFull['place_type']) ? $aFull['place_type'] : null,
|
||||
'class' => $aFull['class'],
|
||||
'type' => $aFull['type'],
|
||||
'admin_level' => isset($aFull['admin_level']) ? (int) $aFull['admin_level'] : null,
|
||||
'rank_address' => $aFull['rank_address'] ? (int) $aFull['rank_address'] : null,
|
||||
'distance' => (float) $aFull['distance'],
|
||||
'isaddress' => isset($aFull['isaddress']) ? (bool) $aFull['isaddress'] : null
|
||||
);
|
||||
};
|
||||
|
||||
$funcMapKeyword = function ($aFull) {
|
||||
$aMapped = array(
|
||||
'id' => (int) $aFull['word_id'],
|
||||
'token' => $aFull['word_token']
|
||||
);
|
||||
return $aMapped;
|
||||
return array(
|
||||
'id' => (int) $aFull['word_id'],
|
||||
'token' => $aFull['word_token']
|
||||
);
|
||||
};
|
||||
|
||||
if ($aAddressLines) {
|
||||
@@ -81,10 +78,14 @@ if ($bIncludeKeywords) {
|
||||
|
||||
if ($aPlaceSearchNameKeywords) {
|
||||
$aPlaceDetails['keywords']['name'] = array_map($funcMapKeyword, $aPlaceSearchNameKeywords);
|
||||
} else {
|
||||
$aPlaceDetails['keywords']['name'] = array();
|
||||
}
|
||||
|
||||
if ($aPlaceSearchAddressKeywords) {
|
||||
$aPlaceDetails['keywords']['address'] = array_map($funcMapKeyword, $aPlaceSearchAddressKeywords);
|
||||
} else {
|
||||
$aPlaceDetails['keywords']['address'] = array();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,11 +93,15 @@ if ($bIncludeHierarchy) {
|
||||
if ($bGroupHierarchy) {
|
||||
$aPlaceDetails['hierarchy'] = array();
|
||||
foreach ($aHierarchyLines as $aAddressLine) {
|
||||
if ($aAddressLine['type'] == 'yes') $sType = $aAddressLine['class'];
|
||||
else $sType = $aAddressLine['type'];
|
||||
if ($aAddressLine['type'] == 'yes') {
|
||||
$sType = $aAddressLine['class'];
|
||||
} else {
|
||||
$sType = $aAddressLine['type'];
|
||||
}
|
||||
|
||||
if (!isset($aPlaceDetails['hierarchy'][$sType]))
|
||||
if (!isset($aPlaceDetails['hierarchy'][$sType])) {
|
||||
$aPlaceDetails['hierarchy'][$sType] = array();
|
||||
}
|
||||
$aPlaceDetails['hierarchy'][$sType][] = $funcMapAddressLine($aAddressLine);
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -8,4 +8,4 @@
|
||||
$error['details'] = $exception->getFile() . '('. $exception->getLine() . ')';
|
||||
}
|
||||
|
||||
echo javascript_renderData(array('error' => $error));
|
||||
javascript_renderData(array('error' => $error));
|
||||
|
||||
@@ -5,7 +5,9 @@ $aOutput['licence'] = 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm
|
||||
$aOutput['batch'] = array();
|
||||
|
||||
foreach ($aBatchResults as $aSearchResults) {
|
||||
if (!$aSearchResults) $aSearchResults = array();
|
||||
if (!$aSearchResults) {
|
||||
$aSearchResults = array();
|
||||
}
|
||||
$aFilteredPlaces = array();
|
||||
foreach ($aSearchResults as $iResNum => $aPointDetails) {
|
||||
$aPlace = array(
|
||||
|
||||
@@ -9,7 +9,9 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
|
||||
)
|
||||
);
|
||||
|
||||
if (isset($aPointDetails['place_id'])) $aPlace['properties']['geocoding']['place_id'] = $aPointDetails['place_id'];
|
||||
if (isset($aPointDetails['place_id'])) {
|
||||
$aPlace['properties']['geocoding']['place_id'] = $aPointDetails['place_id'];
|
||||
}
|
||||
$sOSMType = formatOSMType($aPointDetails['osm_type']);
|
||||
if ($sOSMType) {
|
||||
$aPlace['properties']['geocoding']['osm_type'] = $sOSMType;
|
||||
|
||||
@@ -8,7 +8,7 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
|
||||
'place_id'=>$aPointDetails['place_id'],
|
||||
)
|
||||
);
|
||||
|
||||
|
||||
$sOSMType = formatOSMType($aPointDetails['osm_type']);
|
||||
if ($sOSMType) {
|
||||
$aPlace['properties']['osm_type'] = $sOSMType;
|
||||
@@ -58,8 +58,12 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
|
||||
}
|
||||
|
||||
|
||||
if (isset($aPointDetails['sExtraTags'])) $aPlace['properties']['extratags'] = $aPointDetails['sExtraTags'];
|
||||
if (isset($aPointDetails['sNameDetails'])) $aPlace['properties']['namedetails'] = $aPointDetails['sNameDetails'];
|
||||
if (isset($aPointDetails['sExtraTags'])) {
|
||||
$aPlace['properties']['extratags'] = $aPointDetails['sExtraTags'];
|
||||
}
|
||||
if (isset($aPointDetails['sNameDetails'])) {
|
||||
$aPlace['properties']['namedetails'] = $aPointDetails['sNameDetails'];
|
||||
}
|
||||
|
||||
$aFilteredPlaces[] = $aPlace;
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
|
||||
'place_id'=>$aPointDetails['place_id'],
|
||||
'licence'=>'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
|
||||
);
|
||||
|
||||
|
||||
$sOSMType = formatOSMType($aPointDetails['osm_type']);
|
||||
if ($sOSMType) {
|
||||
$aPlace['osm_type'] = $sOSMType;
|
||||
@@ -60,8 +60,12 @@ foreach ($aSearchResults as $iResNum => $aPointDetails) {
|
||||
$aPlace['geokml'] = $aPointDetails['askml'];
|
||||
}
|
||||
|
||||
if (isset($aPointDetails['sExtraTags'])) $aPlace['extratags'] = $aPointDetails['sExtraTags'];
|
||||
if (isset($aPointDetails['sNameDetails'])) $aPlace['namedetails'] = $aPointDetails['sNameDetails'];
|
||||
if (isset($aPointDetails['sExtraTags'])) {
|
||||
$aPlace['extratags'] = $aPointDetails['sExtraTags'];
|
||||
}
|
||||
if (isset($aPointDetails['sNameDetails'])) {
|
||||
$aPlace['namedetails'] = $aPointDetails['sNameDetails'];
|
||||
}
|
||||
|
||||
$aFilteredPlaces[] = $aPlace;
|
||||
}
|
||||
|
||||
@@ -10,7 +10,9 @@ echo (isset($sXmlRootTag)?$sXmlRootTag:'searchresults');
|
||||
echo " timestamp='".date(DATE_RFC822)."'";
|
||||
echo " attribution='Data © OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright'";
|
||||
echo " querystring='".htmlspecialchars($sQuery, ENT_QUOTES)."'";
|
||||
if (isset($aMoreParams['viewbox'])) echo " viewbox='".htmlspecialchars($aMoreParams['viewbox'], ENT_QUOTES)."'";
|
||||
if (isset($aMoreParams['viewbox'])) {
|
||||
echo " viewbox='".htmlspecialchars($aMoreParams['viewbox'], ENT_QUOTES)."'";
|
||||
}
|
||||
if (isset($aMoreParams['exclude_place_ids'])) {
|
||||
echo " exclude_place_ids='".htmlspecialchars($aMoreParams['exclude_place_ids'])."'";
|
||||
}
|
||||
|
||||
246
lib-php/tokenizer/legacy_icu_tokenizer.php
Normal file
246
lib-php/tokenizer/legacy_icu_tokenizer.php
Normal file
@@ -0,0 +1,246 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim;
|
||||
|
||||
class Tokenizer
|
||||
{
|
||||
private $oDB;
|
||||
|
||||
private $oNormalizer;
|
||||
private $oTransliterator;
|
||||
private $aCountryRestriction;
|
||||
|
||||
public function __construct(&$oDB)
|
||||
{
|
||||
$this->oDB =& $oDB;
|
||||
$this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
|
||||
$this->oTransliterator = \Transliterator::createFromRules(CONST_Transliteration);
|
||||
}
|
||||
|
||||
public function checkStatus()
|
||||
{
|
||||
$sSQL = 'SELECT word_id FROM word limit 1';
|
||||
$iWordID = $this->oDB->getOne($sSQL);
|
||||
if ($iWordID === false) {
|
||||
throw new \Exception('Query failed', 703);
|
||||
}
|
||||
if (!$iWordID) {
|
||||
throw new \Exception('No value', 704);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public function setCountryRestriction($aCountries)
|
||||
{
|
||||
$this->aCountryRestriction = $aCountries;
|
||||
}
|
||||
|
||||
|
||||
public function normalizeString($sTerm)
|
||||
{
|
||||
if ($this->oNormalizer === null) {
|
||||
return $sTerm;
|
||||
}
|
||||
|
||||
return $this->oNormalizer->transliterate($sTerm);
|
||||
}
|
||||
|
||||
private function makeStandardWord($sTerm)
|
||||
{
|
||||
return trim($this->oTransliterator->transliterate(' '.$sTerm.' '));
|
||||
}
|
||||
|
||||
|
||||
public function tokensForSpecialTerm($sTerm)
|
||||
{
|
||||
$aResults = array();
|
||||
|
||||
$sSQL = "SELECT word_id, info->>'class' as class, info->>'type' as type ";
|
||||
$sSQL .= ' FROM word WHERE word_token = :term and type = \'S\'';
|
||||
|
||||
Debug::printVar('Term', $sTerm);
|
||||
Debug::printSQL($sSQL);
|
||||
$aSearchWords = $this->oDB->getAll($sSQL, array(':term' => $this->makeStandardWord($sTerm)));
|
||||
|
||||
Debug::printVar('Results', $aSearchWords);
|
||||
|
||||
foreach ($aSearchWords as $aSearchTerm) {
|
||||
$aResults[] = new \Nominatim\Token\SpecialTerm(
|
||||
$aSearchTerm['word_id'],
|
||||
$aSearchTerm['class'],
|
||||
$aSearchTerm['type'],
|
||||
\Nominatim\Operator::TYPE
|
||||
);
|
||||
}
|
||||
|
||||
Debug::printVar('Special term tokens', $aResults);
|
||||
|
||||
return $aResults;
|
||||
}
|
||||
|
||||
|
||||
public function extractTokensFromPhrases(&$aPhrases)
|
||||
{
|
||||
$sNormQuery = '';
|
||||
$aWordLists = array();
|
||||
$aTokens = array();
|
||||
foreach ($aPhrases as $iPhrase => $oPhrase) {
|
||||
$sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase());
|
||||
$sPhrase = $this->makeStandardWord($oPhrase->getPhrase());
|
||||
Debug::printVar('Phrase', $sPhrase);
|
||||
if (strlen($sPhrase) > 0) {
|
||||
$aWords = explode(' ', $sPhrase);
|
||||
Tokenizer::addTokens($aTokens, $aWords);
|
||||
$aWordLists[] = $aWords;
|
||||
} else {
|
||||
$aWordLists[] = array();
|
||||
}
|
||||
}
|
||||
|
||||
Debug::printVar('Tokens', $aTokens);
|
||||
Debug::printVar('WordLists', $aWordLists);
|
||||
|
||||
$oValidTokens = $this->computeValidTokens($aTokens, $sNormQuery);
|
||||
|
||||
foreach ($aPhrases as $iPhrase => $oPhrase) {
|
||||
$oPhrase->computeWordSets($aWordLists[$iPhrase], $oValidTokens);
|
||||
}
|
||||
|
||||
return $oValidTokens;
|
||||
}
|
||||
|
||||
|
||||
private function computeValidTokens($aTokens, $sNormQuery)
|
||||
{
|
||||
$oValidTokens = new TokenList();
|
||||
|
||||
if (!empty($aTokens)) {
|
||||
$this->addTokensFromDB($oValidTokens, $aTokens, $sNormQuery);
|
||||
|
||||
// Try more interpretations for Tokens that could not be matched.
|
||||
foreach ($aTokens as $sToken) {
|
||||
if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) {
|
||||
if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
|
||||
// US ZIP+4 codes - merge in the 5-digit ZIP code
|
||||
$oValidTokens->addToken(
|
||||
$sToken,
|
||||
new Token\Postcode(null, $aData[1], 'us')
|
||||
);
|
||||
} elseif (preg_match('/^[0-9]+$/', $sToken)) {
|
||||
// Unknown single word token with a number.
|
||||
// Assume it is a house number.
|
||||
$oValidTokens->addToken(
|
||||
$sToken,
|
||||
new Token\HouseNumber(null, trim($sToken))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $oValidTokens;
|
||||
}
|
||||
|
||||
|
||||
private function addTokensFromDB(&$oValidTokens, $aTokens, $sNormQuery)
|
||||
{
|
||||
// Check which tokens we have, get the ID numbers
|
||||
$sSQL = 'SELECT word_id, word_token, type, word,';
|
||||
$sSQL .= " info->>'op' as operator,";
|
||||
$sSQL .= " info->>'class' as class, info->>'type' as ctype,";
|
||||
$sSQL .= " info->>'count' as count";
|
||||
$sSQL .= ' FROM word WHERE word_token in (';
|
||||
$sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
|
||||
$aDBWords = $this->oDB->getAll($sSQL, null, 'Could not get word tokens.');
|
||||
|
||||
foreach ($aDBWords as $aWord) {
|
||||
$iId = (int) $aWord['word_id'];
|
||||
$sTok = $aWord['word_token'];
|
||||
|
||||
switch ($aWord['type']) {
|
||||
case 'C': // country name tokens
|
||||
if ($aWord['word'] !== null
|
||||
&& (!$this->aCountryRestriction
|
||||
|| in_array($aWord['word'], $this->aCountryRestriction))
|
||||
) {
|
||||
$oValidTokens->addToken(
|
||||
$sTok,
|
||||
new Token\Country($iId, $aWord['word'])
|
||||
);
|
||||
}
|
||||
break;
|
||||
case 'H': // house number tokens
|
||||
$oValidTokens->addToken($sTok, new Token\HouseNumber($iId, $aWord['word_token']));
|
||||
break;
|
||||
case 'P': // postcode tokens
|
||||
// Postcodes are not normalized, so they may have content
|
||||
// that makes SQL injection possible. Reject postcodes
|
||||
// that would need special escaping.
|
||||
if ($aWord['word'] !== null
|
||||
&& pg_escape_string($aWord['word']) == $aWord['word']
|
||||
) {
|
||||
$sNormPostcode = $this->normalizeString($aWord['word']);
|
||||
if (strpos($sNormQuery, $sNormPostcode) !== false) {
|
||||
$oValidTokens->addToken(
|
||||
$sTok,
|
||||
new Token\Postcode($iId, $aWord['word'], null)
|
||||
);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'S': // tokens for classification terms (special phrases)
|
||||
if ($aWord['class'] !== null && $aWord['ctype'] !== null) {
|
||||
$oValidTokens->addToken($sTok, new Token\SpecialTerm(
|
||||
$iId,
|
||||
$aWord['class'],
|
||||
$aWord['ctype'],
|
||||
(isset($aWord['operator'])) ? Operator::NEAR : Operator::NONE
|
||||
));
|
||||
}
|
||||
break;
|
||||
case 'W': // full-word tokens
|
||||
$oValidTokens->addToken($sTok, new Token\Word(
|
||||
$iId,
|
||||
(int) $aWord['count'],
|
||||
substr_count($aWord['word_token'], ' ')
|
||||
));
|
||||
break;
|
||||
case 'w': // partial word terms
|
||||
$oValidTokens->addToken($sTok, new Token\Partial(
|
||||
$iId,
|
||||
$aWord['word_token'],
|
||||
(int) $aWord['count']
|
||||
));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add the tokens from this phrase to the given list of tokens.
|
||||
*
|
||||
* @param string[] $aTokens List of tokens to append.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
private static function addTokens(&$aTokens, $aWords)
|
||||
{
|
||||
$iNumWords = count($aWords);
|
||||
|
||||
for ($i = 0; $i < $iNumWords; $i++) {
|
||||
$sPhrase = $aWords[$i];
|
||||
$aTokens[$sPhrase] = $sPhrase;
|
||||
|
||||
for ($j = $i + 1; $j < $iNumWords; $j++) {
|
||||
$sPhrase .= ' '.$aWords[$j];
|
||||
$aTokens[$sPhrase] = $sPhrase;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
266
lib-php/tokenizer/legacy_tokenizer.php
Normal file
266
lib-php/tokenizer/legacy_tokenizer.php
Normal file
@@ -0,0 +1,266 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim;
|
||||
|
||||
class Tokenizer
|
||||
{
|
||||
private $oDB;
|
||||
|
||||
private $oNormalizer = null;
|
||||
private $aCountryRestriction = null;
|
||||
|
||||
public function __construct(&$oDB)
|
||||
{
|
||||
$this->oDB =& $oDB;
|
||||
$this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
|
||||
}
|
||||
|
||||
public function checkStatus()
|
||||
{
|
||||
$sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')");
|
||||
if ($sStandardWord === false) {
|
||||
throw new \Exception('Module failed', 701);
|
||||
}
|
||||
|
||||
if ($sStandardWord != 'a') {
|
||||
throw new \Exception('Module call failed', 702);
|
||||
}
|
||||
|
||||
$sSQL = "SELECT word_id FROM word WHERE word_token IN (' a')";
|
||||
$iWordID = $this->oDB->getOne($sSQL);
|
||||
if ($iWordID === false) {
|
||||
throw new \Exception('Query failed', 703);
|
||||
}
|
||||
if (!$iWordID) {
|
||||
throw new \Exception('No value', 704);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public function setCountryRestriction($aCountries)
|
||||
{
|
||||
$this->aCountryRestriction = $aCountries;
|
||||
}
|
||||
|
||||
|
||||
public function normalizeString($sTerm)
|
||||
{
|
||||
if ($this->oNormalizer === null) {
|
||||
return $sTerm;
|
||||
}
|
||||
|
||||
return $this->oNormalizer->transliterate($sTerm);
|
||||
}
|
||||
|
||||
|
||||
public function tokensForSpecialTerm($sTerm)
|
||||
{
|
||||
$aResults = array();
|
||||
|
||||
$sSQL = 'SELECT word_id, class, type FROM word ';
|
||||
$sSQL .= ' WHERE word_token = \' \' || make_standard_name(:term)';
|
||||
$sSQL .= ' AND class is not null AND class not in (\'place\')';
|
||||
|
||||
Debug::printVar('Term', $sTerm);
|
||||
Debug::printSQL($sSQL);
|
||||
$aSearchWords = $this->oDB->getAll($sSQL, array(':term' => $sTerm));
|
||||
|
||||
Debug::printVar('Results', $aSearchWords);
|
||||
|
||||
foreach ($aSearchWords as $aSearchTerm) {
|
||||
$aResults[] = new \Nominatim\Token\SpecialTerm(
|
||||
$aSearchTerm['word_id'],
|
||||
$aSearchTerm['class'],
|
||||
$aSearchTerm['type'],
|
||||
\Nominatim\Operator::TYPE
|
||||
);
|
||||
}
|
||||
|
||||
Debug::printVar('Special term tokens', $aResults);
|
||||
|
||||
return $aResults;
|
||||
}
|
||||
|
||||
|
||||
public function extractTokensFromPhrases(&$aPhrases)
|
||||
{
|
||||
// First get the normalized version of all phrases
|
||||
$sNormQuery = '';
|
||||
$sSQL = 'SELECT ';
|
||||
$aParams = array();
|
||||
foreach ($aPhrases as $iPhrase => $oPhrase) {
|
||||
$sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase());
|
||||
$sSQL .= 'make_standard_name(:' .$iPhrase.') as p'.$iPhrase.',';
|
||||
$aParams[':'.$iPhrase] = $oPhrase->getPhrase();
|
||||
}
|
||||
$sSQL = substr($sSQL, 0, -1);
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
Debug::printVar('SQL parameters', $aParams);
|
||||
|
||||
$aNormPhrases = $this->oDB->getRow($sSQL, $aParams);
|
||||
|
||||
Debug::printVar('SQL result', $aNormPhrases);
|
||||
|
||||
// now compute all possible tokens
|
||||
$aWordLists = array();
|
||||
$aTokens = array();
|
||||
foreach ($aNormPhrases as $sPhrase) {
|
||||
if (strlen($sPhrase) > 0) {
|
||||
$aWords = explode(' ', $sPhrase);
|
||||
Tokenizer::addTokens($aTokens, $aWords);
|
||||
$aWordLists[] = $aWords;
|
||||
} else {
|
||||
$aWordLists[] = array();
|
||||
}
|
||||
}
|
||||
|
||||
Debug::printVar('Tokens', $aTokens);
|
||||
Debug::printVar('WordLists', $aWordLists);
|
||||
|
||||
$oValidTokens = $this->computeValidTokens($aTokens, $sNormQuery);
|
||||
|
||||
foreach ($aPhrases as $iPhrase => $oPhrase) {
|
||||
$oPhrase->computeWordSets($aWordLists[$iPhrase], $oValidTokens);
|
||||
}
|
||||
|
||||
return $oValidTokens;
|
||||
}
|
||||
|
||||
|
||||
private function computeValidTokens($aTokens, $sNormQuery)
|
||||
{
|
||||
$oValidTokens = new TokenList();
|
||||
|
||||
if (!empty($aTokens)) {
|
||||
$this->addTokensFromDB($oValidTokens, $aTokens, $sNormQuery);
|
||||
|
||||
// Try more interpretations for Tokens that could not be matched.
|
||||
foreach ($aTokens as $sToken) {
|
||||
if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) {
|
||||
if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
|
||||
// US ZIP+4 codes - merge in the 5-digit ZIP code
|
||||
$oValidTokens->addToken(
|
||||
$sToken,
|
||||
new Token\Postcode(null, $aData[1], 'us')
|
||||
);
|
||||
} elseif (preg_match('/^[0-9]+$/', $sToken)) {
|
||||
// Unknown single word token with a number.
|
||||
// Assume it is a house number.
|
||||
$oValidTokens->addToken(
|
||||
$sToken,
|
||||
new Token\HouseNumber(null, trim($sToken))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $oValidTokens;
|
||||
}
|
||||
|
||||
|
||||
private function addTokensFromDB(&$oValidTokens, $aTokens, $sNormQuery)
|
||||
{
|
||||
// Check which tokens we have, get the ID numbers
|
||||
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
|
||||
$sSQL .= ' operator, coalesce(search_name_count, 0) as count';
|
||||
$sSQL .= ' FROM word WHERE word_token in (';
|
||||
$sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
|
||||
$aDBWords = $this->oDB->getAll($sSQL, null, 'Could not get word tokens.');
|
||||
|
||||
foreach ($aDBWords as $aWord) {
|
||||
$oToken = null;
|
||||
$iId = (int) $aWord['word_id'];
|
||||
|
||||
if ($aWord['class']) {
|
||||
// Special terms need to appear in their normalized form.
|
||||
// (postcodes are not normalized in the word table)
|
||||
$sNormWord = $this->normalizeString($aWord['word']);
|
||||
if ($aWord['word'] && strpos($sNormQuery, $sNormWord) === false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
|
||||
$oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
|
||||
} elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
|
||||
if ($aWord['word']
|
||||
&& pg_escape_string($aWord['word']) == $aWord['word']
|
||||
) {
|
||||
$oToken = new Token\Postcode(
|
||||
$iId,
|
||||
$aWord['word'],
|
||||
$aWord['country_code']
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// near and in operator the same at the moment
|
||||
$oToken = new Token\SpecialTerm(
|
||||
$iId,
|
||||
$aWord['class'],
|
||||
$aWord['type'],
|
||||
$aWord['operator'] ? Operator::NEAR : Operator::NONE
|
||||
);
|
||||
}
|
||||
} elseif ($aWord['country_code']) {
|
||||
// Filter country tokens that do not match restricted countries.
|
||||
if (!$this->aCountryRestriction
|
||||
|| in_array($aWord['country_code'], $this->aCountryRestriction)
|
||||
) {
|
||||
$oToken = new Token\Country($iId, $aWord['country_code']);
|
||||
}
|
||||
} elseif ($aWord['word_token'][0] == ' ') {
|
||||
$oToken = new Token\Word(
|
||||
$iId,
|
||||
(int) $aWord['count'],
|
||||
substr_count($aWord['word_token'], ' ')
|
||||
);
|
||||
// For backward compatibility: ignore all partial tokens with more
|
||||
// than one word.
|
||||
} elseif (strpos($aWord['word_token'], ' ') === false) {
|
||||
$oToken = new Token\Partial(
|
||||
$iId,
|
||||
$aWord['word_token'],
|
||||
(int) $aWord['count']
|
||||
);
|
||||
}
|
||||
|
||||
if ($oToken) {
|
||||
// remove any leading spaces
|
||||
if ($aWord['word_token'][0] == ' ') {
|
||||
$oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken);
|
||||
} else {
|
||||
$oValidTokens->addToken($aWord['word_token'], $oToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add the tokens from this phrase to the given list of tokens.
|
||||
*
|
||||
* @param string[] $aTokens List of tokens to append.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
private static function addTokens(&$aTokens, $aWords)
|
||||
{
|
||||
$iNumWords = count($aWords);
|
||||
|
||||
for ($i = 0; $i < $iNumWords; $i++) {
|
||||
$sPhrase = $aWords[$i];
|
||||
$aTokens[' '.$sPhrase] = ' '.$sPhrase;
|
||||
$aTokens[$sPhrase] = $sPhrase;
|
||||
|
||||
for ($j = $i + 1; $j < $iNumWords; $j++) {
|
||||
$sPhrase .= ' '.$aWords[$j];
|
||||
$aTokens[' '.$sPhrase] = ' '.$sPhrase;
|
||||
$aTokens[$sPhrase] = $sPhrase;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -83,7 +83,7 @@ if ($sOsmType && $iOsmId > 0) {
|
||||
}
|
||||
|
||||
if ($sPlaceId === false) {
|
||||
throw new Exception('No place with that OSM ID found.', 404);
|
||||
throw new \Exception('No place with that OSM ID found.', 404);
|
||||
}
|
||||
} else {
|
||||
if ($sPlaceId === false) {
|
||||
@@ -95,20 +95,21 @@ $iPlaceID = (int)$sPlaceId;
|
||||
|
||||
if (CONST_Use_US_Tiger_Data) {
|
||||
$iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_property_tiger WHERE place_id = '.$iPlaceID);
|
||||
if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;
|
||||
if ($iParentPlaceID) {
|
||||
$iPlaceID = $iParentPlaceID;
|
||||
}
|
||||
}
|
||||
|
||||
// interpolated house numbers
|
||||
$iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_property_osmline WHERE place_id = '.$iPlaceID);
|
||||
if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;
|
||||
if ($iParentPlaceID) {
|
||||
$iPlaceID = $iParentPlaceID;
|
||||
}
|
||||
|
||||
// artificial postcodes
|
||||
$iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_postcode WHERE place_id = '.$iPlaceID);
|
||||
if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;
|
||||
|
||||
if (CONST_Use_Aux_Location_data) {
|
||||
$iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_property_aux WHERE place_id = '.$iPlaceID);
|
||||
if ($iParentPlaceID) $iPlaceID = $iParentPlaceID;
|
||||
if ($iParentPlaceID) {
|
||||
$iPlaceID = $iParentPlaceID;
|
||||
}
|
||||
|
||||
$hLog = logStart($oDB, 'details', $_SERVER['QUERY_STRING'], $aLangPrefOrder);
|
||||
@@ -145,7 +146,7 @@ $sSQL .= " WHERE place_id = $iPlaceID";
|
||||
$aPointDetails = $oDB->getRow($sSQL, null, 'Could not get details of place object.');
|
||||
|
||||
if (!$aPointDetails) {
|
||||
throw new Exception('No place with that place ID found.', 404);
|
||||
throw new \Exception('No place with that place ID found.', 404);
|
||||
}
|
||||
|
||||
$aPointDetails['localname'] = $aPointDetails['localname']?$aPointDetails['localname']:$aPointDetails['housenumber'];
|
||||
|
||||
@@ -35,8 +35,10 @@ if (count($aOsmIds) > CONST_Places_Max_ID_count) {
|
||||
|
||||
foreach ($aOsmIds as $sItem) {
|
||||
// Skip empty sItem
|
||||
if (empty($sItem)) continue;
|
||||
|
||||
if (empty($sItem)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$sType = $sItem[0];
|
||||
$iId = (int) substr($sItem, 1);
|
||||
if ($iId > 0 && ($sType == 'N' || $sType == 'W' || $sType == 'R')) {
|
||||
@@ -48,7 +50,9 @@ foreach ($aOsmIds as $sItem) {
|
||||
// key names
|
||||
$oResult = $oPlace;
|
||||
unset($oResult['aAddress']);
|
||||
if (isset($oPlace['aAddress'])) $oResult['address'] = $oPlace['aAddress'];
|
||||
if (isset($oPlace['aAddress'])) {
|
||||
$oResult['address'] = $oPlace['aAddress'];
|
||||
}
|
||||
if ($sOutputFormat != 'geocodejson') {
|
||||
unset($oResult['langaddress']);
|
||||
$oResult['name'] = $oPlace['langaddress'];
|
||||
@@ -71,7 +75,9 @@ foreach ($aOsmIds as $sItem) {
|
||||
}
|
||||
|
||||
|
||||
if (CONST_Debug) exit;
|
||||
if (CONST_Debug) {
|
||||
exit;
|
||||
}
|
||||
|
||||
$sXmlRootTag = 'lookupresults';
|
||||
$sQuery = join(',', $aCleanedQueryParts);
|
||||
|
||||
@@ -30,8 +30,12 @@ while ($iTotalBroken && empty($aPolygons)) {
|
||||
$iDays++;
|
||||
}
|
||||
|
||||
if ($bReduced) $aWhere[] = "errormessage like 'Area reduced%'";
|
||||
if ($sClass) $sWhere[] = "class = '".pg_escape_string($sClass)."'";
|
||||
if ($bReduced) {
|
||||
$aWhere[] = "errormessage like 'Area reduced%'";
|
||||
}
|
||||
if ($sClass) {
|
||||
$sWhere[] = "class = '".pg_escape_string($sClass)."'";
|
||||
}
|
||||
|
||||
if (!empty($aWhere)) {
|
||||
$sSQL .= ' WHERE '.join(' and ', $aWhere);
|
||||
|
||||
12
lib-php/website/reverse-only-search.php
Normal file
12
lib-php/website/reverse-only-search.php
Normal file
@@ -0,0 +1,12 @@
|
||||
<?php
|
||||
|
||||
require_once(CONST_LibDir.'/init-website.php');
|
||||
require_once(CONST_LibDir.'/ParameterParser.php');
|
||||
|
||||
$oParams = new Nominatim\ParameterParser();
|
||||
|
||||
// Format for output
|
||||
$sOutputFormat = $oParams->getSet('format', array('xml', 'json', 'jsonv2', 'geojson', 'geocodejson'), 'jsonv2');
|
||||
set_exception_handler_by_format($sOutputFormat);
|
||||
|
||||
throw new Exception('Reverse-only import does not support forward searching.', 404);
|
||||
@@ -74,10 +74,12 @@ if (isset($_SERVER['REQUEST_SCHEME'])
|
||||
.$_SERVER['HTTP_HOST'].$_SERVER['DOCUMENT_URI'].'/?'
|
||||
.http_build_query($aMoreParams);
|
||||
} else {
|
||||
$sMoreURL = '/search.php'.http_build_query($aMoreParams);
|
||||
$sMoreURL = '/search.php?'.http_build_query($aMoreParams);
|
||||
}
|
||||
|
||||
if (CONST_Debug) exit;
|
||||
if (CONST_Debug) {
|
||||
exit;
|
||||
}
|
||||
|
||||
$sOutputTemplate = ($sOutputFormat == 'jsonv2') ? 'json' : $sOutputFormat;
|
||||
include(CONST_LibDir.'/template/search-'.$sOutputTemplate.'.php');
|
||||
|
||||
@@ -17,6 +17,23 @@ if ($sOutputFormat == 'json') {
|
||||
try {
|
||||
$oStatus = new Nominatim\Status($oDB);
|
||||
$oStatus->status();
|
||||
|
||||
if ($sOutputFormat == 'json') {
|
||||
$epoch = $oStatus->dataDate();
|
||||
$aResponse = array(
|
||||
'status' => 0,
|
||||
'message' => 'OK',
|
||||
'data_updated' => (new DateTime('@'.$epoch))->format(DateTime::RFC3339),
|
||||
'software_version' => CONST_NominatimVersion
|
||||
);
|
||||
$sDatabaseVersion = $oStatus->databaseVersion();
|
||||
if ($sDatabaseVersion) {
|
||||
$aResponse['database_version'] = $sDatabaseVersion;
|
||||
}
|
||||
javascript_renderData($aResponse);
|
||||
} else {
|
||||
echo 'OK';
|
||||
}
|
||||
} catch (Exception $oErr) {
|
||||
if ($sOutputFormat == 'json') {
|
||||
$aResponse = array(
|
||||
@@ -28,25 +45,4 @@ try {
|
||||
header('HTTP/1.0 500 Internal Server Error');
|
||||
echo 'ERROR: '.$oErr->getMessage();
|
||||
}
|
||||
exit;
|
||||
}
|
||||
|
||||
|
||||
if ($sOutputFormat == 'json') {
|
||||
$epoch = $oStatus->dataDate();
|
||||
$aResponse = array(
|
||||
'status' => 0,
|
||||
'message' => 'OK',
|
||||
'data_updated' => (new DateTime('@'.$epoch))->format(DateTime::RFC3339),
|
||||
'software_version' => CONST_NominatimVersion
|
||||
);
|
||||
$sDatabaseVersion = $oStatus->databaseVersion();
|
||||
if ($sDatabaseVersion) {
|
||||
$aResponse['database_version'] = $sDatabaseVersion;
|
||||
}
|
||||
javascript_renderData($aResponse);
|
||||
} else {
|
||||
echo 'OK';
|
||||
}
|
||||
|
||||
exit;
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
CREATE TABLE location_property_aux () INHERITS (location_property);
|
||||
CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id);
|
||||
CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id);
|
||||
CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber);
|
||||
GRANT SELECT ON location_property_aux TO "{www-user}";
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
{% include('functions/utils.sql') %}
|
||||
{% include('functions/normalization.sql') %}
|
||||
{% include('functions/ranking.sql') %}
|
||||
{% include('functions/importance.sql') %}
|
||||
{% include('functions/address_lookup.sql') %}
|
||||
|
||||
@@ -135,20 +135,6 @@ BEGIN
|
||||
END IF;
|
||||
{% endif %}
|
||||
|
||||
-- then additional data
|
||||
{% if config.get_bool('USE_AUX_LOCATION_DATA') %}
|
||||
IF place IS NULL THEN
|
||||
SELECT parent_place_id as place_id, 'us' as country_code,
|
||||
housenumber, postcode,
|
||||
'place' as class, 'house' as type,
|
||||
null as name, null as address,
|
||||
centroid
|
||||
INTO place
|
||||
FROM location_property_aux
|
||||
WHERE place_id = in_place_id;
|
||||
END IF;
|
||||
{% endif %}
|
||||
|
||||
-- postcode table
|
||||
IF place IS NULL THEN
|
||||
SELECT parent_place_id as place_id, country_code,
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
-- Functions for adding external data (currently unused).
|
||||
|
||||
CREATE OR REPLACE FUNCTION aux_create_property(pointgeo GEOMETRY, in_housenumber TEXT,
|
||||
in_street TEXT, in_isin TEXT,
|
||||
in_postcode TEXT, in_countrycode char(2))
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
|
||||
newpoints INTEGER;
|
||||
place_centroid GEOMETRY;
|
||||
out_partition INTEGER;
|
||||
out_parent_place_id BIGINT;
|
||||
location RECORD;
|
||||
address_street_word_ids INTEGER[];
|
||||
out_postcode TEXT;
|
||||
|
||||
BEGIN
|
||||
|
||||
place_centroid := ST_Centroid(pointgeo);
|
||||
out_partition := get_partition(in_countrycode);
|
||||
out_parent_place_id := null;
|
||||
|
||||
address_street_word_ids := word_ids_from_name(in_street);
|
||||
IF address_street_word_ids IS NOT NULL THEN
|
||||
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
|
||||
address_street_word_ids);
|
||||
END IF;
|
||||
|
||||
IF out_parent_place_id IS NULL THEN
|
||||
SELECT getNearestRoadPlaceId(out_partition, place_centroid)
|
||||
INTO out_parent_place_id;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
out_postcode := in_postcode;
|
||||
IF out_postcode IS NULL THEN
|
||||
SELECT postcode from placex where place_id = out_parent_place_id INTO out_postcode;
|
||||
END IF;
|
||||
-- XXX look into postcode table
|
||||
|
||||
newpoints := 0;
|
||||
insert into location_property_aux (place_id, partition, parent_place_id,
|
||||
housenumber, postcode, centroid)
|
||||
values (nextval('seq_place'), out_partition, out_parent_place_id,
|
||||
in_housenumber, out_postcode, place_centroid);
|
||||
newpoints := newpoints + 1;
|
||||
|
||||
RETURN newpoints;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
@@ -12,39 +12,47 @@ $$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
|
||||
RETURNS HSTORE
|
||||
AS $$
|
||||
DECLARE
|
||||
location RECORD;
|
||||
waynodes BIGINT[];
|
||||
BEGIN
|
||||
IF akeys(in_address) != ARRAY['interpolation'] THEN
|
||||
RETURN in_address;
|
||||
END IF;
|
||||
|
||||
SELECT nodes INTO waynodes FROM planet_osm_ways WHERE id = wayid;
|
||||
FOR location IN
|
||||
SELECT placex.address, placex.osm_id FROM placex
|
||||
WHERE osm_type = 'N' and osm_id = ANY(waynodes)
|
||||
and placex.address is not null
|
||||
and (placex.address ? 'street' or placex.address ? 'place')
|
||||
and indexed_status < 100
|
||||
LOOP
|
||||
-- mark it as a derived address
|
||||
RETURN location.address || in_address || hstore('_inherited', '');
|
||||
END LOOP;
|
||||
|
||||
RETURN in_address;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
|
||||
-- find the parent road of the cut road parts
|
||||
CREATE OR REPLACE FUNCTION get_interpolation_parent(wayid BIGINT, street TEXT,
|
||||
place TEXT, partition SMALLINT,
|
||||
CREATE OR REPLACE FUNCTION get_interpolation_parent(street INTEGER[], place INTEGER[],
|
||||
partition SMALLINT,
|
||||
centroid GEOMETRY, geom GEOMETRY)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
addr_street TEXT;
|
||||
addr_place TEXT;
|
||||
parent_place_id BIGINT;
|
||||
|
||||
waynodes BIGINT[];
|
||||
|
||||
location RECORD;
|
||||
BEGIN
|
||||
addr_street = street;
|
||||
addr_place = place;
|
||||
|
||||
IF addr_street is null and addr_place is null THEN
|
||||
select nodes from planet_osm_ways where id = wayid INTO waynodes;
|
||||
FOR location IN SELECT placex.address from placex
|
||||
where osm_type = 'N' and osm_id = ANY(waynodes)
|
||||
and placex.address is not null
|
||||
and (placex.address ? 'street' or placex.address ? 'place')
|
||||
and indexed_status < 100
|
||||
limit 1 LOOP
|
||||
addr_street = location.address->'street';
|
||||
addr_place = location.address->'place';
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
parent_place_id := find_parent_for_address(addr_street, addr_place,
|
||||
partition, centroid);
|
||||
parent_place_id := find_parent_for_address(street, place, partition, centroid);
|
||||
|
||||
IF parent_place_id is null THEN
|
||||
FOR location IN SELECT place_id FROM placex
|
||||
@@ -147,15 +155,15 @@ BEGIN
|
||||
NEW.interpolationtype = NEW.address->'interpolation';
|
||||
|
||||
place_centroid := ST_PointOnSurface(NEW.linegeo);
|
||||
NEW.parent_place_id = get_interpolation_parent(NEW.osm_id, NEW.address->'street',
|
||||
NEW.address->'place',
|
||||
NEW.parent_place_id = get_interpolation_parent(token_addr_street_match_tokens(NEW.token_info),
|
||||
token_addr_place_match_tokens(NEW.token_info),
|
||||
NEW.partition, place_centroid, NEW.linegeo);
|
||||
|
||||
IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
|
||||
interpol_postcode := NEW.address->'postcode';
|
||||
housenum := getorcreate_postcode_id(NEW.address->'postcode');
|
||||
ELSE
|
||||
interpol_postcode := NULL;
|
||||
interpol_postcode := token_normalized_postcode(NEW.address->'postcode');
|
||||
|
||||
NEW.token_info := token_strip_info(NEW.token_info);
|
||||
IF NEW.address ? '_inherited' THEN
|
||||
NEW.address := hstore('interpolation', NEW.interpolationtype);
|
||||
END IF;
|
||||
|
||||
-- if the line was newly inserted, split the line as necessary
|
||||
@@ -202,12 +210,13 @@ BEGIN
|
||||
|
||||
-- determine postcode
|
||||
postcode := coalesce(interpol_postcode,
|
||||
prevnode.address->'postcode',
|
||||
nextnode.address->'postcode',
|
||||
token_normalized_postcode(prevnode.address->'postcode'),
|
||||
token_normalized_postcode(nextnode.address->'postcode'),
|
||||
postcode);
|
||||
|
||||
IF postcode is NULL THEN
|
||||
SELECT placex.postcode FROM placex WHERE place_id = NEW.parent_place_id INTO postcode;
|
||||
SELECT token_normalized_postcode(placex.postcode)
|
||||
FROM placex WHERE place_id = NEW.parent_place_id INTO postcode;
|
||||
END IF;
|
||||
IF postcode is NULL THEN
|
||||
postcode := get_nearest_postcode(NEW.country_code, nextnode.geometry);
|
||||
@@ -217,7 +226,7 @@ BEGIN
|
||||
NEW.startnumber := startnumber;
|
||||
NEW.endnumber := endnumber;
|
||||
NEW.linegeo := sectiongeo;
|
||||
NEW.postcode := upper(trim(postcode));
|
||||
NEW.postcode := postcode;
|
||||
ELSE
|
||||
insert into location_property_osmline
|
||||
(linegeo, partition, osm_id, parent_place_id,
|
||||
|
||||
@@ -1,545 +0,0 @@
|
||||
-- Functions for term normalisation and access to the 'word' table.
|
||||
|
||||
CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
|
||||
AS '{{ modulepath }}/nominatim.so', 'transliteration'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
|
||||
AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
|
||||
AS $$
|
||||
DECLARE
|
||||
o TEXT;
|
||||
BEGIN
|
||||
o := public.gettokenstring(public.transliteration(name));
|
||||
RETURN trim(substr(o,1,length(o)));
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- returns NULL if the word is too common
|
||||
CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
count INTEGER;
|
||||
BEGIN
|
||||
lookup_token := trim(lookup_word);
|
||||
SELECT min(word_id), max(search_name_count) FROM word
|
||||
WHERE word_token = lookup_token and class is null and type is null
|
||||
INTO return_word_id, count;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
|
||||
ELSE
|
||||
IF count > get_maxwordfreq() THEN
|
||||
return_word_id := NULL;
|
||||
END IF;
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
-- Create housenumber tokens from an OSM addr:housenumber.
|
||||
-- The housnumber is split at comma and semicolon as necessary.
|
||||
-- The function returns the normalized form of the housenumber suitable
|
||||
-- for comparison.
|
||||
CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
DECLARE
|
||||
normtext TEXT;
|
||||
BEGIN
|
||||
SELECT array_to_string(array_agg(trans), ';')
|
||||
INTO normtext
|
||||
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
|
||||
FROM (SELECT make_standard_name(h) as lookup_word
|
||||
FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
|
||||
|
||||
return normtext;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql STABLE STRICT;
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' ' || trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class='place' and type='house'
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, null,
|
||||
'place', 'house', null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
lookup_word TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_word := upper(trim(postcode));
|
||||
lookup_token := ' ' || make_standard_name(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and word = lookup_word
|
||||
and class='place' and type='postcode'
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
|
||||
'place', 'postcode', null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT,
|
||||
lookup_country_code varchar(2))
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and country_code=lookup_country_code
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, null,
|
||||
null, null, lookup_country_code, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
|
||||
lookup_class text, lookup_type text)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and word = normalized_word
|
||||
and class = lookup_class and type = lookup_type
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
|
||||
lookup_class, lookup_type, null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
|
||||
normalized_word TEXT,
|
||||
lookup_class text,
|
||||
lookup_type text,
|
||||
op text)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and word = normalized_word
|
||||
and class = lookup_class and type = lookup_type and operator = op
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
|
||||
lookup_class, lookup_type, null, 0, op);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
nospace_lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class is null and type is null
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
|
||||
null, null, null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
BEGIN
|
||||
RETURN getorcreate_name_id(lookup_word, '');
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
-- Normalize a string and lookup its word ids (partial words).
|
||||
CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
words TEXT[];
|
||||
id INTEGER;
|
||||
return_word_id INTEGER[];
|
||||
word_ids INTEGER[];
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
words := string_to_array(make_standard_name(lookup_word), ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
SELECT array_agg(word_id) INTO word_ids
|
||||
FROM word
|
||||
WHERE word_token = words[j] and class is null and type is null;
|
||||
|
||||
IF word_ids IS NULL THEN
|
||||
id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
|
||||
return_word_id := return_word_id || id;
|
||||
ELSE
|
||||
return_word_id := array_merge(return_word_id, word_ids);
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- Normalize a string and look up its name ids (full words).
|
||||
CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_ids INTEGER[];
|
||||
BEGIN
|
||||
lookup_token := ' '|| make_standard_name(lookup_word);
|
||||
SELECT array_agg(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class is null and type is null
|
||||
INTO return_word_ids;
|
||||
RETURN return_word_ids;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION create_country(src HSTORE, country_code varchar(2))
|
||||
RETURNS VOID
|
||||
AS $$
|
||||
DECLARE
|
||||
s TEXT;
|
||||
w INTEGER;
|
||||
words TEXT[];
|
||||
item RECORD;
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
FOR item IN SELECT (each(src)).* LOOP
|
||||
|
||||
s := make_standard_name(item.value);
|
||||
w := getorcreate_country(s, country_code);
|
||||
|
||||
words := regexp_split_to_array(item.value, E'[,;()]');
|
||||
IF array_upper(words, 1) != 1 THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
s := make_standard_name(words[j]);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_country(s, country_code);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
result INTEGER[];
|
||||
s TEXT;
|
||||
w INTEGER;
|
||||
words TEXT[];
|
||||
item RECORD;
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
result := '{}'::INTEGER[];
|
||||
|
||||
FOR item IN SELECT (each(src)).* LOOP
|
||||
|
||||
s := make_standard_name(item.value);
|
||||
w := getorcreate_name_id(s, item.value);
|
||||
|
||||
IF not(ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
|
||||
w := getorcreate_word_id(s);
|
||||
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
|
||||
words := string_to_array(s, ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
w = getorcreate_word_id(words[j]);
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
words := regexp_split_to_array(item.value, E'[,;()]');
|
||||
IF array_upper(words, 1) != 1 THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
s := make_standard_name(words[j]);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_word_id(s);
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
s := regexp_replace(item.value, '市$', '');
|
||||
IF s != item.value THEN
|
||||
s := make_standard_name(s);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_name_id(s, item.value);
|
||||
IF NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
END LOOP;
|
||||
|
||||
RETURN result;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION make_keywords(src TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
result INTEGER[];
|
||||
s TEXT;
|
||||
w INTEGER;
|
||||
words TEXT[];
|
||||
i INTEGER;
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
result := '{}'::INTEGER[];
|
||||
|
||||
s := make_standard_name(src);
|
||||
w := getorcreate_name_id(s, src);
|
||||
|
||||
IF NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
|
||||
w := getorcreate_word_id(s);
|
||||
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
|
||||
words := string_to_array(s, ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
w = getorcreate_word_id(words[j]);
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
words := regexp_split_to_array(src, E'[,;()]');
|
||||
IF array_upper(words, 1) != 1 THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
s := make_standard_name(words[j]);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_word_id(s);
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
s := regexp_replace(src, '市$', '');
|
||||
IF s != src THEN
|
||||
s := make_standard_name(s);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_name_id(s, src);
|
||||
IF NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN result;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT,
|
||||
in_partition SMALLINT,
|
||||
parent_place_id BIGINT,
|
||||
address HSTORE,
|
||||
country TEXT,
|
||||
housenumber TEXT,
|
||||
initial_name_vector INTEGER[],
|
||||
geometry GEOMETRY,
|
||||
OUT name_vector INTEGER[],
|
||||
OUT nameaddress_vector INTEGER[])
|
||||
AS $$
|
||||
DECLARE
|
||||
parent_name_vector INTEGER[];
|
||||
parent_address_vector INTEGER[];
|
||||
addr_place_ids INTEGER[];
|
||||
|
||||
addr_item RECORD;
|
||||
parent_address_place_ids BIGINT[];
|
||||
filtered_address HSTORE;
|
||||
BEGIN
|
||||
nameaddress_vector := '{}'::INTEGER[];
|
||||
|
||||
SELECT s.name_vector, s.nameaddress_vector
|
||||
INTO parent_name_vector, parent_address_vector
|
||||
FROM search_name s
|
||||
WHERE s.place_id = parent_place_id;
|
||||
|
||||
-- Find all address tags that don't appear in the parent search names.
|
||||
SELECT hstore(array_agg(ARRAY[k, v])) INTO filtered_address
|
||||
FROM (SELECT skeys(address) as k, svals(address) as v) a
|
||||
WHERE not addr_ids_from_name(v) && parent_address_vector
|
||||
AND k not in ('country', 'street', 'place', 'postcode',
|
||||
'housenumber', 'streetnumber', 'conscriptionnumber');
|
||||
|
||||
-- Compute all search terms from the addr: tags.
|
||||
IF filtered_address IS NOT NULL THEN
|
||||
FOR addr_item IN
|
||||
SELECT * FROM
|
||||
get_places_for_addr_tags(in_partition, geometry, filtered_address, country)
|
||||
LOOP
|
||||
IF addr_item.place_id is null THEN
|
||||
nameaddress_vector := array_merge(nameaddress_vector,
|
||||
addr_item.keywords);
|
||||
CONTINUE;
|
||||
END IF;
|
||||
|
||||
IF parent_address_place_ids is null THEN
|
||||
SELECT array_agg(parent_place_id) INTO parent_address_place_ids
|
||||
FROM place_addressline
|
||||
WHERE place_id = parent_place_id;
|
||||
END IF;
|
||||
|
||||
IF not parent_address_place_ids @> ARRAY[addr_item.place_id] THEN
|
||||
nameaddress_vector := array_merge(nameaddress_vector,
|
||||
addr_item.keywords);
|
||||
|
||||
INSERT INTO place_addressline (place_id, address_place_id, fromarea,
|
||||
isaddress, distance, cached_rank_address)
|
||||
VALUES (obj_place_id, addr_item.place_id, not addr_item.isguess,
|
||||
true, addr_item.distance, addr_item.rank_address);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
name_vector := initial_name_vector;
|
||||
|
||||
-- Check if the parent covers all address terms.
|
||||
-- If not, create a search name entry with the house number as the name.
|
||||
-- This is unusual for the search_name table but prevents that the place
|
||||
-- is returned when we only search for the street/place.
|
||||
|
||||
IF housenumber is not null and not nameaddress_vector <@ parent_address_vector THEN
|
||||
name_vector := array_merge(name_vector,
|
||||
ARRAY[getorcreate_housenumber_id(make_standard_name(housenumber))]);
|
||||
END IF;
|
||||
|
||||
IF not address ? 'street' and address ? 'place' THEN
|
||||
addr_place_ids := addr_ids_from_name(address->'place');
|
||||
IF not addr_place_ids <@ parent_name_vector THEN
|
||||
-- make sure addr:place terms are always searchable
|
||||
nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
|
||||
-- If there is a housenumber, also add the place name as a name,
|
||||
-- so we can search it by the usual housenumber+place algorithms.
|
||||
IF housenumber is not null THEN
|
||||
name_vector := array_merge(name_vector,
|
||||
ARRAY[getorcreate_name_id(make_standard_name(address->'place'))]);
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- Cheating here by not recomputing all terms but simply using the ones
|
||||
-- from the parent object.
|
||||
nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector);
|
||||
nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
|
||||
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
@@ -63,54 +63,36 @@ END
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_places_for_addr_tags(in_partition SMALLINT,
|
||||
feature GEOMETRY,
|
||||
address HSTORE, country TEXT)
|
||||
RETURNS SETOF nearfeaturecentr
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_address_place(in_partition SMALLINT, feature GEOMETRY,
|
||||
from_rank SMALLINT, to_rank SMALLINT,
|
||||
extent FLOAT, tokens INT[])
|
||||
RETURNS nearfeaturecentr
|
||||
AS $$
|
||||
DECLARE
|
||||
r nearfeaturecentr%rowtype;
|
||||
item RECORD;
|
||||
BEGIN
|
||||
FOR item IN
|
||||
SELECT (get_addr_tag_rank(key, country)).*, key, name FROM
|
||||
(SELECT skeys(address) as key, svals(address) as name) x
|
||||
LOOP
|
||||
IF item.from_rank is null THEN
|
||||
CONTINUE;
|
||||
END IF;
|
||||
|
||||
{% for partition in db.partitions %}
|
||||
IF in_partition = {{ partition }} THEN
|
||||
SELECT place_id, keywords, rank_address, rank_search,
|
||||
min(ST_Distance(feature, centroid)) as distance,
|
||||
isguess, postcode, centroid INTO r
|
||||
IF in_partition = {{ partition }} THEN
|
||||
SELECT place_id, keywords, rank_address, rank_search,
|
||||
min(ST_Distance(feature, centroid)) as distance,
|
||||
isguess, postcode, centroid INTO r
|
||||
FROM location_area_large_{{ partition }}
|
||||
WHERE geometry && ST_Expand(feature, item.extent)
|
||||
AND rank_address between item.from_rank and item.to_rank
|
||||
AND word_ids_from_name(item.name) && keywords
|
||||
WHERE geometry && ST_Expand(feature, extent)
|
||||
AND rank_address between from_rank and to_rank
|
||||
AND tokens && keywords
|
||||
GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid
|
||||
ORDER BY bool_or(ST_Intersects(geometry, feature)), distance LIMIT 1;
|
||||
IF r.place_id is null THEN
|
||||
-- If we cannot find a place for the term, just return the
|
||||
-- search term for the given name. That ensures that the address
|
||||
-- element can still be searched for, even though it will not be
|
||||
-- displayed.
|
||||
RETURN NEXT ROW(null, addr_ids_from_name(item.name), null, null,
|
||||
null, null, null, null)::nearfeaturecentr;
|
||||
ELSE
|
||||
RETURN NEXT r;
|
||||
END IF;
|
||||
CONTINUE;
|
||||
END IF;
|
||||
RETURN r;
|
||||
END IF;
|
||||
{% endfor %}
|
||||
|
||||
RAISE EXCEPTION 'Unknown partition %', in_partition;
|
||||
END LOOP;
|
||||
RAISE EXCEPTION 'Unknown partition %', in_partition;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
create or replace function deleteLocationArea(in_partition INTEGER, in_place_id BIGINT, in_rank_search INTEGER) RETURNS BOOLEAN AS $$
|
||||
DECLARE
|
||||
BEGIN
|
||||
|
||||
@@ -99,6 +99,27 @@ BEGIN
|
||||
DELETE FROM place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class;
|
||||
END IF;
|
||||
|
||||
-- Pure postcodes are never queried from placex so we don't add them.
|
||||
-- location_postcodes is filled from the place table directly.
|
||||
IF NEW.class = 'place' AND NEW.type = 'postcode' THEN
|
||||
-- Remove old placex entry.
|
||||
DELETE FROM placex where osm_type = NEW.osm_type and osm_id = NEW.osm_id;
|
||||
|
||||
IF existing.osm_type IS NOT NULL THEN
|
||||
IF coalesce(existing.address, ''::hstore) != coalesce(NEW.address, ''::hstore)
|
||||
OR existing.geometry::text != NEW.geometry::text
|
||||
THEN
|
||||
|
||||
update place set address = NEW.address, geometry = NEW.geometry
|
||||
where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type;
|
||||
END IF;
|
||||
|
||||
RETURN NULL;
|
||||
END IF;
|
||||
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
|
||||
{% if debug %}RAISE WARNING 'Existing: %',existing.osm_id;{% endif %}
|
||||
{% if debug %}RAISE WARNING 'Existing PlaceX: %',existingplacex.place_id;{% endif %}
|
||||
|
||||
@@ -201,7 +222,7 @@ BEGIN
|
||||
where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type;
|
||||
|
||||
|
||||
IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN
|
||||
IF NEW.class = 'boundary' AND NEW.type = 'postal_code' THEN
|
||||
IF NEW.address is NULL OR NOT NEW.address ? 'postcode' THEN
|
||||
-- postcode was deleted, no longer retain in placex
|
||||
DELETE FROM placex where place_id = existingplacex.place_id;
|
||||
|
||||
@@ -1,5 +1,84 @@
|
||||
-- Trigger functions for the placex table.
|
||||
|
||||
-- Retrieve the data needed by the indexer for updating the place.
|
||||
--
|
||||
-- Return parameters:
|
||||
-- name list of names
|
||||
-- address list of address tags, either from the object or a surrounding
|
||||
-- building
|
||||
-- country_feature If the place is a country feature, this contains the
|
||||
-- country code, otherwise it is null.
|
||||
CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
|
||||
OUT name HSTORE,
|
||||
OUT address HSTORE,
|
||||
OUT country_feature VARCHAR)
|
||||
AS $$
|
||||
BEGIN
|
||||
-- For POI nodes, check if the address should be derived from a surrounding
|
||||
-- building.
|
||||
IF p.rank_search < 30 OR p.osm_type != 'N' OR p.address is not null THEN
|
||||
address := p.address;
|
||||
ELSE
|
||||
-- The additional && condition works around the misguided query
|
||||
-- planner of postgis 3.0.
|
||||
SELECT placex.address || hstore('_inherited', '') INTO address
|
||||
FROM placex
|
||||
WHERE ST_Covers(geometry, p.centroid)
|
||||
and geometry && p.centroid
|
||||
and placex.address is not null
|
||||
and (placex.address ? 'housenumber' or placex.address ? 'street' or placex.address ? 'place')
|
||||
and rank_search = 30 AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
|
||||
LIMIT 1;
|
||||
END IF;
|
||||
|
||||
address := address - '_unlisted_place'::TEXT;
|
||||
name := p.name;
|
||||
|
||||
country_feature := CASE WHEN p.admin_level = 2
|
||||
and p.class = 'boundary' and p.type = 'administrative'
|
||||
and p.osm_type = 'R'
|
||||
THEN p.country_code
|
||||
ELSE null
|
||||
END;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION find_associated_street(poi_osm_type CHAR(1),
|
||||
poi_osm_id BIGINT)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
location RECORD;
|
||||
parent RECORD;
|
||||
BEGIN
|
||||
FOR location IN
|
||||
SELECT members FROM planet_osm_rels
|
||||
WHERE parts @> ARRAY[poi_osm_id]
|
||||
and members @> ARRAY[lower(poi_osm_type) || poi_osm_id]
|
||||
and tags @> ARRAY['associatedStreet']
|
||||
LOOP
|
||||
FOR i IN 1..array_upper(location.members, 1) BY 2 LOOP
|
||||
IF location.members[i+1] = 'street' THEN
|
||||
FOR parent IN
|
||||
SELECT place_id from placex
|
||||
WHERE osm_type = 'W' and osm_id = substring(location.members[i],2)::bigint
|
||||
and name is not null
|
||||
and rank_search between 26 and 27
|
||||
LOOP
|
||||
RETURN parent.place_id;
|
||||
END LOOP;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
-- Find the parent road of a POI.
|
||||
--
|
||||
-- \returns Place ID of parent object or NULL if none
|
||||
@@ -10,118 +89,89 @@ CREATE OR REPLACE FUNCTION find_parent_for_poi(poi_osm_type CHAR(1),
|
||||
poi_osm_id BIGINT,
|
||||
poi_partition SMALLINT,
|
||||
bbox GEOMETRY,
|
||||
addr_street TEXT,
|
||||
addr_place TEXT,
|
||||
fallback BOOL = true)
|
||||
addr_street INTEGER[],
|
||||
addr_place INTEGER[],
|
||||
is_place_addr BOOLEAN)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
parent_place_id BIGINT DEFAULT NULL;
|
||||
location RECORD;
|
||||
parent RECORD;
|
||||
BEGIN
|
||||
{% if debug %}RAISE WARNING 'finding street for % %', poi_osm_type, poi_osm_id;{% endif %}
|
||||
{% if debug %}RAISE WARNING 'finding street for % %', poi_osm_type, poi_osm_id;{% endif %}
|
||||
|
||||
-- Is this object part of an associatedStreet relation?
|
||||
FOR location IN
|
||||
SELECT members FROM planet_osm_rels
|
||||
WHERE parts @> ARRAY[poi_osm_id]
|
||||
and members @> ARRAY[lower(poi_osm_type) || poi_osm_id]
|
||||
and tags @> ARRAY['associatedStreet']
|
||||
LOOP
|
||||
FOR i IN 1..array_upper(location.members, 1) BY 2 LOOP
|
||||
IF location.members[i+1] = 'street' THEN
|
||||
FOR parent IN
|
||||
SELECT place_id from placex
|
||||
WHERE osm_type = 'W' and osm_id = substring(location.members[i],2)::bigint
|
||||
and name is not null
|
||||
and rank_search between 26 and 27
|
||||
LOOP
|
||||
RETURN parent.place_id;
|
||||
END LOOP;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
-- Is this object part of an associatedStreet relation?
|
||||
parent_place_id := find_associated_street(poi_osm_type, poi_osm_id);
|
||||
|
||||
IF parent_place_id is null THEN
|
||||
parent_place_id := find_parent_for_address(addr_street, addr_place,
|
||||
poi_partition, bbox);
|
||||
IF parent_place_id is not null THEN
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
IF poi_osm_type = 'N' THEN
|
||||
-- Is this node part of an interpolation?
|
||||
FOR parent IN
|
||||
SELECT q.parent_place_id
|
||||
FROM location_property_osmline q, planet_osm_ways x
|
||||
WHERE q.linegeo && bbox and x.id = q.osm_id
|
||||
and poi_osm_id = any(x.nodes)
|
||||
LIMIT 1
|
||||
LOOP
|
||||
{% if debug %}RAISE WARNING 'Get parent from interpolation: %', parent.parent_place_id;{% endif %}
|
||||
RETURN parent.parent_place_id;
|
||||
END LOOP;
|
||||
IF parent_place_id is null and poi_osm_type = 'N' THEN
|
||||
-- Is this node part of an interpolation?
|
||||
FOR location IN
|
||||
SELECT q.parent_place_id
|
||||
FROM location_property_osmline q, planet_osm_ways x
|
||||
WHERE q.linegeo && bbox and x.id = q.osm_id
|
||||
and poi_osm_id = any(x.nodes)
|
||||
LIMIT 1
|
||||
LOOP
|
||||
{% if debug %}RAISE WARNING 'Get parent from interpolation: %', location.parent_place_id;{% endif %}
|
||||
RETURN location.parent_place_id;
|
||||
END LOOP;
|
||||
|
||||
-- Is this node part of any other way?
|
||||
FOR location IN
|
||||
SELECT p.place_id, p.osm_id, p.rank_search, p.address,
|
||||
coalesce(p.centroid, ST_Centroid(p.geometry)) as centroid
|
||||
FROM placex p, planet_osm_ways w
|
||||
WHERE p.osm_type = 'W' and p.rank_search >= 26
|
||||
and p.geometry && bbox
|
||||
and w.id = p.osm_id and poi_osm_id = any(w.nodes)
|
||||
LOOP
|
||||
{% if debug %}RAISE WARNING 'Node is part of way % ', location.osm_id;{% endif %}
|
||||
FOR location IN
|
||||
SELECT p.place_id, p.osm_id, p.rank_search, p.address,
|
||||
coalesce(p.centroid, ST_Centroid(p.geometry)) as centroid
|
||||
FROM placex p, planet_osm_ways w
|
||||
WHERE p.osm_type = 'W' and p.rank_search >= 26
|
||||
and p.geometry && bbox
|
||||
and w.id = p.osm_id and poi_osm_id = any(w.nodes)
|
||||
LOOP
|
||||
{% if debug %}RAISE WARNING 'Node is part of way % ', location.osm_id;{% endif %}
|
||||
|
||||
-- Way IS a road then we are on it - that must be our road
|
||||
IF location.rank_search < 28 THEN
|
||||
{% if debug %}RAISE WARNING 'node in way that is a street %',location;{% endif %}
|
||||
return location.place_id;
|
||||
END IF;
|
||||
|
||||
SELECT find_parent_for_poi('W', location.osm_id, poi_partition,
|
||||
location.centroid,
|
||||
location.address->'street',
|
||||
location.address->'place',
|
||||
false)
|
||||
INTO parent_place_id;
|
||||
IF parent_place_id is not null THEN
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
IF fallback THEN
|
||||
IF addr_street is null and addr_place is not null THEN
|
||||
-- The address is attached to a place we don't know.
|
||||
-- Instead simply use the containing area with the largest rank.
|
||||
FOR location IN
|
||||
SELECT place_id FROM placex
|
||||
WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
|
||||
AND rank_address between 5 and 25
|
||||
ORDER BY rank_address desc
|
||||
LOOP
|
||||
RETURN location.place_id;
|
||||
END LOOP;
|
||||
ELSEIF ST_Area(bbox) < 0.005 THEN
|
||||
-- for smaller features get the nearest road
|
||||
SELECT getNearestRoadPlaceId(poi_partition, bbox) INTO parent_place_id;
|
||||
{% if debug %}RAISE WARNING 'Checked for nearest way (%)', parent_place_id;{% endif %}
|
||||
ELSE
|
||||
-- for larger features simply find the area with the largest rank that
|
||||
-- contains the bbox, only use addressable features
|
||||
FOR location IN
|
||||
SELECT place_id FROM placex
|
||||
WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
|
||||
AND rank_address between 5 and 25
|
||||
ORDER BY rank_address desc
|
||||
LOOP
|
||||
RETURN location.place_id;
|
||||
END LOOP;
|
||||
-- Way IS a road then we are on it - that must be our road
|
||||
IF location.rank_search < 28 THEN
|
||||
{% if debug %}RAISE WARNING 'node in way that is a street %',location;{% endif %}
|
||||
RETURN location.place_id;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN parent_place_id;
|
||||
parent_place_id := find_associated_street('W', location.osm_id);
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
IF parent_place_id is NULL THEN
|
||||
IF is_place_addr THEN
|
||||
-- The address is attached to a place we don't know.
|
||||
-- Instead simply use the containing area with the largest rank.
|
||||
FOR location IN
|
||||
SELECT place_id FROM placex
|
||||
WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
|
||||
AND rank_address between 5 and 25
|
||||
ORDER BY rank_address desc
|
||||
LOOP
|
||||
RETURN location.place_id;
|
||||
END LOOP;
|
||||
ELSEIF ST_Area(bbox) < 0.005 THEN
|
||||
-- for smaller features get the nearest road
|
||||
SELECT getNearestRoadPlaceId(poi_partition, bbox) INTO parent_place_id;
|
||||
{% if debug %}RAISE WARNING 'Checked for nearest way (%)', parent_place_id;{% endif %}
|
||||
ELSE
|
||||
-- for larger features simply find the area with the largest rank that
|
||||
-- contains the bbox, only use addressable features
|
||||
FOR location IN
|
||||
SELECT place_id FROM placex
|
||||
WHERE bbox && geometry AND _ST_Covers(geometry, ST_Centroid(bbox))
|
||||
AND rank_address between 5 and 25
|
||||
ORDER BY rank_address desc
|
||||
LOOP
|
||||
RETURN location.place_id;
|
||||
END LOOP;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN parent_place_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
@@ -169,7 +219,7 @@ BEGIN
|
||||
END IF;
|
||||
|
||||
IF bnd.name ? 'name' THEN
|
||||
bnd_name := make_standard_name(bnd.name->'name');
|
||||
bnd_name := lower(bnd.name->'name');
|
||||
IF bnd_name = '' THEN
|
||||
bnd_name := NULL;
|
||||
END IF;
|
||||
@@ -180,12 +230,14 @@ BEGIN
|
||||
IF bnd.extratags ? 'place' and bnd_name is not null THEN
|
||||
FOR linked_placex IN
|
||||
SELECT * FROM placex
|
||||
WHERE make_standard_name(name->'name') = bnd_name
|
||||
WHERE (position(lower(name->'name') in bnd_name) > 0
|
||||
OR position(bnd_name in lower(name->'name')) > 0)
|
||||
AND placex.class = 'place' AND placex.type = bnd.extratags->'place'
|
||||
AND placex.osm_type = 'N'
|
||||
AND placex.linked_place_id is null
|
||||
AND placex.rank_search < 26 -- needed to select the right index
|
||||
AND _st_covers(bnd.geometry, placex.geometry)
|
||||
AND placex.type != 'postcode'
|
||||
AND ST_Covers(bnd.geometry, placex.geometry)
|
||||
LOOP
|
||||
{% if debug %}RAISE WARNING 'Found type-matching place node %', linked_placex.osm_id;{% endif %}
|
||||
RETURN linked_placex;
|
||||
@@ -201,7 +253,7 @@ BEGIN
|
||||
AND placex.linked_place_id is null
|
||||
AND placex.rank_search < 26
|
||||
AND _st_covers(bnd.geometry, placex.geometry)
|
||||
ORDER BY make_standard_name(name->'name') = bnd_name desc
|
||||
ORDER BY lower(name->'name') = bnd_name desc
|
||||
LOOP
|
||||
{% if debug %}RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;{% endif %}
|
||||
RETURN linked_placex;
|
||||
@@ -213,7 +265,7 @@ BEGIN
|
||||
{% if debug %}RAISE WARNING 'Looking for nodes with matching names';{% endif %}
|
||||
FOR linked_placex IN
|
||||
SELECT placex.* from placex
|
||||
WHERE make_standard_name(name->'name') = bnd_name
|
||||
WHERE lower(name->'name') = bnd_name
|
||||
AND ((bnd.rank_address > 0
|
||||
and bnd.rank_address = (compute_place_rank(placex.country_code,
|
||||
'N', placex.class,
|
||||
@@ -221,9 +273,11 @@ BEGIN
|
||||
false, placex.postcode)).address_rank)
|
||||
OR (bnd.rank_address = 0 and placex.rank_search = bnd.rank_search))
|
||||
AND placex.osm_type = 'N'
|
||||
AND placex.class = 'place'
|
||||
AND placex.linked_place_id is null
|
||||
AND placex.rank_search < 26 -- needed to select the right index
|
||||
AND _st_covers(bnd.geometry, placex.geometry)
|
||||
AND placex.type != 'postcode'
|
||||
AND ST_Covers(bnd.geometry, placex.geometry)
|
||||
LOOP
|
||||
{% if debug %}RAISE WARNING 'Found matching place node %', linked_placex.osm_id;{% endif %}
|
||||
RETURN linked_placex;
|
||||
@@ -236,6 +290,101 @@ $$
|
||||
LANGUAGE plpgsql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION create_poi_search_terms(obj_place_id BIGINT,
|
||||
in_partition SMALLINT,
|
||||
parent_place_id BIGINT,
|
||||
is_place_addr BOOLEAN,
|
||||
country TEXT,
|
||||
token_info JSONB,
|
||||
geometry GEOMETRY,
|
||||
OUT name_vector INTEGER[],
|
||||
OUT nameaddress_vector INTEGER[])
|
||||
AS $$
|
||||
DECLARE
|
||||
parent_name_vector INTEGER[];
|
||||
parent_address_vector INTEGER[];
|
||||
addr_place_ids INTEGER[];
|
||||
hnr_vector INTEGER[];
|
||||
|
||||
addr_item RECORD;
|
||||
addr_place RECORD;
|
||||
parent_address_place_ids BIGINT[];
|
||||
BEGIN
|
||||
nameaddress_vector := '{}'::INTEGER[];
|
||||
|
||||
SELECT s.name_vector, s.nameaddress_vector
|
||||
INTO parent_name_vector, parent_address_vector
|
||||
FROM search_name s
|
||||
WHERE s.place_id = parent_place_id;
|
||||
|
||||
FOR addr_item IN
|
||||
SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
|
||||
FROM token_get_address_tokens(token_info)
|
||||
WHERE not search_tokens <@ parent_address_vector
|
||||
LOOP
|
||||
addr_place := get_address_place(in_partition, geometry,
|
||||
addr_item.from_rank, addr_item.to_rank,
|
||||
addr_item.extent, addr_item.match_tokens);
|
||||
|
||||
IF addr_place is null THEN
|
||||
-- No place found in OSM that matches. Make it at least searchable.
|
||||
nameaddress_vector := array_merge(nameaddress_vector, addr_item.search_tokens);
|
||||
ELSE
|
||||
IF parent_address_place_ids is null THEN
|
||||
SELECT array_agg(parent_place_id) INTO parent_address_place_ids
|
||||
FROM place_addressline
|
||||
WHERE place_id = parent_place_id;
|
||||
END IF;
|
||||
|
||||
-- If the parent already lists the place in place_address line, then we
|
||||
-- are done. Otherwise, add its own place_address line.
|
||||
IF not parent_address_place_ids @> ARRAY[addr_place.place_id] THEN
|
||||
nameaddress_vector := array_merge(nameaddress_vector, addr_place.keywords);
|
||||
|
||||
INSERT INTO place_addressline (place_id, address_place_id, fromarea,
|
||||
isaddress, distance, cached_rank_address)
|
||||
VALUES (obj_place_id, addr_place.place_id, not addr_place.isguess,
|
||||
true, addr_place.distance, addr_place.rank_address);
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
name_vector := token_get_name_search_tokens(token_info);
|
||||
|
||||
-- Check if the parent covers all address terms.
|
||||
-- If not, create a search name entry with the house number as the name.
|
||||
-- This is unusual for the search_name table but prevents that the place
|
||||
-- is returned when we only search for the street/place.
|
||||
|
||||
hnr_vector := token_get_housenumber_search_tokens(token_info);
|
||||
|
||||
IF hnr_vector is not null and not nameaddress_vector <@ parent_address_vector THEN
|
||||
name_vector := array_merge(name_vector, hnr_vector);
|
||||
END IF;
|
||||
|
||||
IF is_place_addr THEN
|
||||
addr_place_ids := token_addr_place_search_tokens(token_info);
|
||||
IF not addr_place_ids <@ parent_name_vector THEN
|
||||
-- make sure addr:place terms are always searchable
|
||||
nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
|
||||
-- If there is a housenumber, also add the place name as a name,
|
||||
-- so we can search it by the usual housenumber+place algorithms.
|
||||
IF hnr_vector is not null THEN
|
||||
name_vector := array_merge(name_vector, addr_place_ids);
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- Cheating here by not recomputing all terms but simply using the ones
|
||||
-- from the parent object.
|
||||
nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector);
|
||||
nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
|
||||
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- Insert address of a place into the place_addressline table.
|
||||
--
|
||||
-- \param obj_place_id Place_id of the place to compute the address for.
|
||||
@@ -256,7 +405,7 @@ LANGUAGE plpgsql STABLE;
|
||||
CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT,
|
||||
partition SMALLINT,
|
||||
maxrank SMALLINT,
|
||||
address HSTORE,
|
||||
token_info JSONB,
|
||||
geometry GEOMETRY,
|
||||
country TEXT,
|
||||
OUT parent_place_id BIGINT,
|
||||
@@ -271,7 +420,8 @@ DECLARE
|
||||
current_node_area GEOMETRY := NULL;
|
||||
|
||||
parent_place_rank INT := 0;
|
||||
addr_place_ids BIGINT[];
|
||||
addr_place_ids BIGINT[] := '{}'::int[];
|
||||
new_address_vector INT[];
|
||||
|
||||
location RECORD;
|
||||
BEGIN
|
||||
@@ -281,16 +431,21 @@ BEGIN
|
||||
address_havelevel := array_fill(false, ARRAY[maxrank]);
|
||||
|
||||
FOR location IN
|
||||
SELECT * FROM get_places_for_addr_tags(partition, geometry,
|
||||
address, country)
|
||||
ORDER BY rank_address, distance, isguess desc
|
||||
SELECT (get_address_place(partition, geometry, from_rank, to_rank,
|
||||
extent, match_tokens)).*, search_tokens
|
||||
FROM (SELECT (get_addr_tag_rank(key, country)).*, match_tokens, search_tokens
|
||||
FROM token_get_address_tokens(token_info)) x
|
||||
ORDER BY rank_address, distance, isguess desc
|
||||
LOOP
|
||||
{% if not db.reverse_only %}
|
||||
nameaddress_vector := array_merge(nameaddress_vector,
|
||||
location.keywords::int[]);
|
||||
{% endif %}
|
||||
IF location.place_id is null THEN
|
||||
{% if not db.reverse_only %}
|
||||
nameaddress_vector := array_merge(nameaddress_vector, location.search_tokens);
|
||||
{% endif %}
|
||||
ELSE
|
||||
{% if not db.reverse_only %}
|
||||
nameaddress_vector := array_merge(nameaddress_vector, location.keywords::INTEGER[]);
|
||||
{% endif %}
|
||||
|
||||
IF location.place_id is not null THEN
|
||||
location_isaddress := not address_havelevel[location.rank_address];
|
||||
IF not address_havelevel[location.rank_address] THEN
|
||||
address_havelevel[location.rank_address] := true;
|
||||
@@ -305,13 +460,13 @@ BEGIN
|
||||
VALUES (obj_place_id, location.place_id, not location.isguess,
|
||||
true, location.distance, location.rank_address);
|
||||
|
||||
addr_place_ids := array_append(addr_place_ids, location.place_id);
|
||||
addr_place_ids := addr_place_ids || location.place_id;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
FOR location IN
|
||||
SELECT * FROM getNearFeatures(partition, geometry, maxrank)
|
||||
WHERE addr_place_ids is null or not addr_place_ids @> ARRAY[place_id]
|
||||
WHERE not addr_place_ids @> ARRAY[place_id]
|
||||
ORDER BY rank_address, isguess asc,
|
||||
distance *
|
||||
CASE WHEN rank_address = 16 AND rank_search = 15 THEN 0.2
|
||||
@@ -393,10 +548,11 @@ BEGIN
|
||||
NEW.place_id := nextval('seq_place');
|
||||
NEW.indexed_status := 1; --STATUS_NEW
|
||||
|
||||
NEW.country_code := lower(get_country_code(NEW.geometry));
|
||||
NEW.centroid := ST_PointOnSurface(NEW.geometry);
|
||||
NEW.country_code := lower(get_country_code(NEW.centroid));
|
||||
|
||||
NEW.partition := get_partition(NEW.country_code);
|
||||
NEW.geometry_sector := geometry_sector(NEW.partition, NEW.geometry);
|
||||
NEW.geometry_sector := geometry_sector(NEW.partition, NEW.centroid);
|
||||
|
||||
IF NEW.osm_type = 'X' THEN
|
||||
-- E'X'ternal records should already be in the right format so do nothing
|
||||
@@ -518,8 +674,8 @@ DECLARE
|
||||
parent_address_level SMALLINT;
|
||||
place_address_level SMALLINT;
|
||||
|
||||
addr_street TEXT;
|
||||
addr_place TEXT;
|
||||
addr_street INTEGER[];
|
||||
addr_place INTEGER[];
|
||||
|
||||
max_rank SMALLINT;
|
||||
|
||||
@@ -527,12 +683,11 @@ DECLARE
|
||||
nameaddress_vector INTEGER[];
|
||||
addr_nameaddress_vector INTEGER[];
|
||||
|
||||
inherited_address HSTORE;
|
||||
|
||||
linked_node_id BIGINT;
|
||||
linked_importance FLOAT;
|
||||
linked_wikipedia TEXT;
|
||||
|
||||
is_place_address BOOLEAN;
|
||||
result BOOLEAN;
|
||||
BEGIN
|
||||
-- deferred delete
|
||||
@@ -562,9 +717,9 @@ BEGIN
|
||||
-- update not necessary for osmline, cause linked_place_id does not exist
|
||||
|
||||
NEW.extratags := NEW.extratags - 'linked_place'::TEXT;
|
||||
NEW.address := NEW.address - '_unlisted_place'::TEXT;
|
||||
|
||||
IF NEW.linked_place_id is not null THEN
|
||||
NEW.token_info := null;
|
||||
{% if debug %}RAISE WARNING 'place already linked to %', NEW.linked_place_id;{% endif %}
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
@@ -575,13 +730,34 @@ BEGIN
|
||||
-- imported as place=postcode. That's why relations are allowed to pass here.
|
||||
-- This can go away in a couple of versions.
|
||||
IF NEW.class = 'place' and NEW.type = 'postcode' and NEW.osm_type != 'R' THEN
|
||||
NEW.token_info := null;
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
|
||||
-- Speed up searches - just use the centroid of the feature
|
||||
-- cheaper but less acurate
|
||||
-- Compute a preliminary centroid.
|
||||
NEW.centroid := ST_PointOnSurface(NEW.geometry);
|
||||
{% if debug %}RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(NEW.centroid);{% endif %}
|
||||
|
||||
-- recalculate country and partition
|
||||
IF NEW.rank_search = 4 AND NEW.address is not NULL AND NEW.address ? 'country' THEN
|
||||
-- for countries, believe the mapped country code,
|
||||
-- so that we remain in the right partition if the boundaries
|
||||
-- suddenly expand.
|
||||
NEW.country_code := lower(NEW.address->'country');
|
||||
NEW.partition := get_partition(lower(NEW.country_code));
|
||||
IF NEW.partition = 0 THEN
|
||||
NEW.country_code := lower(get_country_code(NEW.centroid));
|
||||
NEW.partition := get_partition(NEW.country_code);
|
||||
END IF;
|
||||
ELSE
|
||||
IF NEW.rank_search >= 4 THEN
|
||||
NEW.country_code := lower(get_country_code(NEW.centroid));
|
||||
ELSE
|
||||
NEW.country_code := NULL;
|
||||
END IF;
|
||||
NEW.partition := get_partition(NEW.country_code);
|
||||
END IF;
|
||||
{% if debug %}RAISE WARNING 'Country updated: "%"', NEW.country_code;{% endif %}
|
||||
|
||||
|
||||
-- recompute the ranks, they might change when linking changes
|
||||
SELECT * INTO NEW.rank_search, NEW.rank_address
|
||||
@@ -661,54 +837,12 @@ BEGIN
|
||||
parent_address_level := 3;
|
||||
END IF;
|
||||
|
||||
{% if debug %}RAISE WARNING 'Copy over address tags';{% endif %}
|
||||
-- housenumber is a computed field, so start with an empty value
|
||||
NEW.housenumber := NULL;
|
||||
IF NEW.address is not NULL THEN
|
||||
IF NEW.address ? 'conscriptionnumber' THEN
|
||||
IF NEW.address ? 'streetnumber' THEN
|
||||
NEW.housenumber := (NEW.address->'conscriptionnumber') || '/' || (NEW.address->'streetnumber');
|
||||
ELSE
|
||||
NEW.housenumber := NEW.address->'conscriptionnumber';
|
||||
END IF;
|
||||
ELSEIF NEW.address ? 'streetnumber' THEN
|
||||
NEW.housenumber := NEW.address->'streetnumber';
|
||||
ELSEIF NEW.address ? 'housenumber' THEN
|
||||
NEW.housenumber := NEW.address->'housenumber';
|
||||
END IF;
|
||||
NEW.housenumber := create_housenumber_id(NEW.housenumber);
|
||||
|
||||
addr_street := NEW.address->'street';
|
||||
addr_place := NEW.address->'place';
|
||||
|
||||
IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(:|,|;)%' THEN
|
||||
i := getorcreate_postcode_id(NEW.address->'postcode');
|
||||
END IF;
|
||||
END IF;
|
||||
NEW.housenumber := token_normalized_housenumber(NEW.token_info);
|
||||
addr_street := token_addr_street_match_tokens(NEW.token_info);
|
||||
addr_place := token_addr_place_match_tokens(NEW.token_info);
|
||||
|
||||
NEW.postcode := null;
|
||||
|
||||
-- recalculate country and partition
|
||||
IF NEW.rank_search = 4 AND NEW.address is not NULL AND NEW.address ? 'country' THEN
|
||||
-- for countries, believe the mapped country code,
|
||||
-- so that we remain in the right partition if the boundaries
|
||||
-- suddenly expand.
|
||||
NEW.country_code := lower(NEW.address->'country');
|
||||
NEW.partition := get_partition(lower(NEW.country_code));
|
||||
IF NEW.partition = 0 THEN
|
||||
NEW.country_code := lower(get_country_code(NEW.centroid));
|
||||
NEW.partition := get_partition(NEW.country_code);
|
||||
END IF;
|
||||
ELSE
|
||||
IF NEW.rank_search >= 4 THEN
|
||||
NEW.country_code := lower(get_country_code(NEW.centroid));
|
||||
ELSE
|
||||
NEW.country_code := NULL;
|
||||
END IF;
|
||||
NEW.partition := get_partition(NEW.country_code);
|
||||
END IF;
|
||||
{% if debug %}RAISE WARNING 'Country updated: "%"', NEW.country_code;{% endif %}
|
||||
|
||||
-- waterway ways are linked when they are part of a relation and have the same class/type
|
||||
IF NEW.osm_type = 'R' and NEW.class = 'waterway' THEN
|
||||
FOR relation_members IN select members from planet_osm_rels r where r.id = NEW.osm_id and r.parts != array[]::bigint[]
|
||||
@@ -745,33 +879,14 @@ BEGIN
|
||||
|
||||
{% if debug %}RAISE WARNING 'finding street for % %', NEW.osm_type, NEW.osm_id;{% endif %}
|
||||
NEW.parent_place_id := null;
|
||||
|
||||
-- if we have a POI and there is no address information,
|
||||
-- see if we can get it from a surrounding building
|
||||
inherited_address := ''::HSTORE;
|
||||
IF NEW.osm_type = 'N' AND addr_street IS NULL AND addr_place IS NULL
|
||||
AND NEW.housenumber IS NULL THEN
|
||||
FOR location IN
|
||||
-- The additional && condition works around the misguided query
|
||||
-- planner of postgis 3.0.
|
||||
SELECT address from placex where ST_Covers(geometry, NEW.centroid)
|
||||
and geometry && NEW.centroid
|
||||
and (address ? 'housenumber' or address ? 'street' or address ? 'place')
|
||||
and rank_search > 28 AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
|
||||
limit 1
|
||||
LOOP
|
||||
NEW.housenumber := location.address->'housenumber';
|
||||
addr_street := location.address->'street';
|
||||
addr_place := location.address->'place';
|
||||
inherited_address := location.address;
|
||||
END LOOP;
|
||||
END IF;
|
||||
is_place_address := coalesce(not NEW.address ? 'street' and NEW.address ? 'place', FALSE);
|
||||
|
||||
-- We have to find our parent road.
|
||||
NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id,
|
||||
NEW.partition,
|
||||
ST_Envelope(NEW.geometry),
|
||||
addr_street, addr_place);
|
||||
addr_street, addr_place,
|
||||
is_place_address);
|
||||
|
||||
-- If we found the road take a shortcut here.
|
||||
-- Otherwise fall back to the full address getting method below.
|
||||
@@ -781,12 +896,12 @@ BEGIN
|
||||
SELECT p.country_code, p.postcode, p.name FROM placex p
|
||||
WHERE p.place_id = NEW.parent_place_id INTO location;
|
||||
|
||||
IF addr_street is null and addr_place is not null THEN
|
||||
IF is_place_address THEN
|
||||
-- Check if the addr:place tag is part of the parent name
|
||||
SELECT count(*) INTO i
|
||||
FROM svals(location.name) AS pname WHERE pname = addr_place;
|
||||
FROM svals(location.name) AS pname WHERE pname = NEW.address->'place';
|
||||
IF i = 0 THEN
|
||||
NEW.address = NEW.address || hstore('_unlisted_place', addr_place);
|
||||
NEW.address = NEW.address || hstore('_unlisted_place', NEW.address->'place');
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
@@ -794,39 +909,21 @@ BEGIN
|
||||
{% if debug %}RAISE WARNING 'Got parent details from search name';{% endif %}
|
||||
|
||||
-- determine postcode
|
||||
IF NEW.address is not null AND NEW.address ? 'postcode' THEN
|
||||
NEW.postcode = upper(trim(NEW.address->'postcode'));
|
||||
ELSE
|
||||
NEW.postcode := location.postcode;
|
||||
END IF;
|
||||
IF NEW.postcode is null THEN
|
||||
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
|
||||
END IF;
|
||||
NEW.postcode := coalesce(token_normalized_postcode(NEW.address->'postcode'),
|
||||
location.postcode,
|
||||
get_nearest_postcode(NEW.country_code, NEW.centroid));
|
||||
|
||||
IF NEW.name is not NULL THEN
|
||||
NEW.name := add_default_place_name(NEW.country_code, NEW.name);
|
||||
name_vector := make_keywords(NEW.name);
|
||||
|
||||
IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN
|
||||
result := add_location(NEW.place_id, NEW.country_code, NEW.partition,
|
||||
name_vector, NEW.rank_search, NEW.rank_address,
|
||||
upper(trim(NEW.address->'postcode')), NEW.geometry,
|
||||
NEW.centroid);
|
||||
{% if debug %}RAISE WARNING 'Place added to location table';{% endif %}
|
||||
END IF;
|
||||
|
||||
END IF;
|
||||
|
||||
{% if not db.reverse_only %}
|
||||
IF array_length(name_vector, 1) is not NULL
|
||||
OR inherited_address is not NULL OR NEW.address is not NULL
|
||||
THEN
|
||||
IF NEW.name is not NULL OR NEW.address is not NULL THEN
|
||||
SELECT * INTO name_vector, nameaddress_vector
|
||||
FROM create_poi_search_terms(NEW.place_id,
|
||||
NEW.partition, NEW.parent_place_id,
|
||||
inherited_address || NEW.address,
|
||||
NEW.country_code, NEW.housenumber,
|
||||
name_vector, NEW.centroid);
|
||||
is_place_address, NEW.country_code,
|
||||
NEW.token_info, NEW.centroid);
|
||||
|
||||
IF array_length(name_vector, 1) is not NULL THEN
|
||||
INSERT INTO search_name (place_id, search_rank, address_rank,
|
||||
@@ -840,6 +937,17 @@ BEGIN
|
||||
END IF;
|
||||
{% endif %}
|
||||
|
||||
NEW.token_info := token_strip_info(NEW.token_info);
|
||||
-- If the address was inherited from a surrounding building,
|
||||
-- do not add it permanently to the table.
|
||||
IF NEW.address ? '_inherited' THEN
|
||||
IF NEW.address ? '_unlisted_place' THEN
|
||||
NEW.address := hstore('_unlisted_place', NEW.address->'_unlisted_place');
|
||||
ELSE
|
||||
NEW.address := null;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
|
||||
@@ -910,19 +1018,11 @@ BEGIN
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- Initialise the name vector using our name
|
||||
NEW.name := add_default_place_name(NEW.country_code, NEW.name);
|
||||
name_vector := make_keywords(NEW.name);
|
||||
|
||||
-- make sure all names are in the word table
|
||||
IF NEW.admin_level = 2
|
||||
AND NEW.class = 'boundary' AND NEW.type = 'administrative'
|
||||
AND NEW.country_code IS NOT NULL AND NEW.osm_type = 'R'
|
||||
THEN
|
||||
PERFORM create_country(NEW.name, lower(NEW.country_code));
|
||||
{% if debug %}RAISE WARNING 'Country names updated';{% endif %}
|
||||
|
||||
-- Also update the list of country names. Adding an additional sanity
|
||||
-- Update the list of country names. Adding an additional sanity
|
||||
-- check here: make sure the country does overlap with the area where
|
||||
-- we expect it to be as per static country grid.
|
||||
FOR location IN
|
||||
@@ -955,29 +1055,28 @@ BEGIN
|
||||
ELSEIF NEW.rank_address > 25 THEN
|
||||
max_rank := 25;
|
||||
ELSE
|
||||
max_rank = NEW.rank_address;
|
||||
max_rank := NEW.rank_address;
|
||||
END IF;
|
||||
|
||||
SELECT * FROM insert_addresslines(NEW.place_id, NEW.partition, max_rank,
|
||||
NEW.address, geom, NEW.country_code)
|
||||
NEW.token_info, geom, NEW.country_code)
|
||||
INTO NEW.parent_place_id, NEW.postcode, nameaddress_vector;
|
||||
|
||||
{% if debug %}RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector;{% endif %}
|
||||
|
||||
IF NEW.address is not null AND NEW.address ? 'postcode'
|
||||
AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
|
||||
NEW.postcode := upper(trim(NEW.address->'postcode'));
|
||||
END IF;
|
||||
|
||||
IF NEW.postcode is null AND NEW.rank_search > 8 THEN
|
||||
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
|
||||
END IF;
|
||||
NEW.postcode := coalesce(token_normalized_postcode(NEW.address->'postcode'),
|
||||
NEW.postcode);
|
||||
|
||||
-- if we have a name add this to the name search table
|
||||
IF NEW.name IS NOT NULL THEN
|
||||
-- Initialise the name vector using our name
|
||||
NEW.name := add_default_place_name(NEW.country_code, NEW.name);
|
||||
name_vector := token_get_name_search_tokens(NEW.token_info);
|
||||
|
||||
IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN
|
||||
result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry, NEW.centroid);
|
||||
result := add_location(NEW.place_id, NEW.country_code, NEW.partition,
|
||||
name_vector, NEW.rank_search, NEW.rank_address,
|
||||
NEW.postcode, NEW.geometry, NEW.centroid);
|
||||
{% if debug %}RAISE WARNING 'added to location (full)';{% endif %}
|
||||
END IF;
|
||||
|
||||
@@ -986,8 +1085,11 @@ BEGIN
|
||||
{% if debug %}RAISE WARNING 'insert into road location table (full)';{% endif %}
|
||||
END IF;
|
||||
|
||||
result := insertSearchName(NEW.partition, NEW.place_id, name_vector,
|
||||
NEW.rank_search, NEW.rank_address, NEW.geometry);
|
||||
IF NEW.rank_address between 16 and 27 THEN
|
||||
result := insertSearchName(NEW.partition, NEW.place_id,
|
||||
token_get_name_match_tokens(NEW.token_info),
|
||||
NEW.rank_search, NEW.rank_address, NEW.geometry);
|
||||
END IF;
|
||||
{% if debug %}RAISE WARNING 'added to search name (full)';{% endif %}
|
||||
|
||||
{% if not db.reverse_only %}
|
||||
@@ -998,11 +1100,15 @@ BEGIN
|
||||
NEW.importance, NEW.country_code, name_vector,
|
||||
nameaddress_vector, NEW.centroid);
|
||||
{% endif %}
|
||||
END IF;
|
||||
|
||||
IF NEW.postcode is null AND NEW.rank_search > 8 THEN
|
||||
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
|
||||
END IF;
|
||||
|
||||
{% if debug %}RAISE WARNING 'place update % % finsihed.', NEW.osm_type, NEW.osm_id;{% endif %}
|
||||
|
||||
NEW.token_info := token_strip_info(NEW.token_info);
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$
|
||||
|
||||
@@ -221,37 +221,30 @@ LANGUAGE plpgsql STABLE;
|
||||
-- \param centroid Location of the address.
|
||||
--
|
||||
-- \return Place ID of the parent if one was found, NULL otherwise.
|
||||
CREATE OR REPLACE FUNCTION find_parent_for_address(street TEXT, place TEXT,
|
||||
CREATE OR REPLACE FUNCTION find_parent_for_address(street INTEGER[], place INTEGER[],
|
||||
partition SMALLINT,
|
||||
centroid GEOMETRY)
|
||||
RETURNS BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
parent_place_id BIGINT;
|
||||
word_ids INTEGER[];
|
||||
BEGIN
|
||||
IF street is not null THEN
|
||||
-- Check for addr:street attributes
|
||||
-- Note that addr:street links can only be indexed, once the street itself is indexed
|
||||
word_ids := word_ids_from_name(street);
|
||||
IF word_ids is not null THEN
|
||||
parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, word_ids);
|
||||
IF parent_place_id is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, street);
|
||||
IF parent_place_id is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- Check for addr:place attributes.
|
||||
IF place is not null THEN
|
||||
word_ids := word_ids_from_name(place);
|
||||
IF word_ids is not null THEN
|
||||
parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, word_ids);
|
||||
IF parent_place_id is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, place);
|
||||
IF parent_place_id is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %}
|
||||
RETURN parent_place_id;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
|
||||
@@ -1,71 +1,62 @@
|
||||
-- Indices used only during search and update.
|
||||
-- These indices are created only after the indexing process is done.
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id
|
||||
ON word USING BTREE (word_id) {{db.tablespace.search_index}};
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_place_addressline_address_place_id
|
||||
CREATE INDEX IF NOT EXISTS idx_place_addressline_address_place_id
|
||||
ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_placex_rank_search
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_rank_search
|
||||
ON placex USING BTREE (rank_search) {{db.tablespace.search_index}};
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_placex_rank_address
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_rank_address
|
||||
ON placex USING BTREE (rank_address) {{db.tablespace.search_index}};
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_placex_parent_place_id
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id
|
||||
ON placex USING BTREE (parent_place_id) {{db.tablespace.search_index}}
|
||||
WHERE parent_place_id IS NOT NULL;
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_lookupPolygon
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
|
||||
ON placex USING gist (geometry) {{db.tablespace.search_index}}
|
||||
WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon')
|
||||
AND rank_address between 4 and 25 AND type != 'postcode'
|
||||
AND name is not null AND indexed_status = 0 AND linked_place_id is null;
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_placeNode
|
||||
ON placex USING gist (geometry) {{db.tablespace.search_index}}
|
||||
WHERE osm_type = 'N' AND rank_search between 5 and 25
|
||||
AND class = 'place' AND type != 'postcode'
|
||||
AND name is not null AND indexed_status = 0 AND linked_place_id is null;
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_place_id
|
||||
CREATE INDEX IF NOT EXISTS idx_osmline_parent_place_id
|
||||
ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}};
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_osm_id
|
||||
CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id
|
||||
ON location_property_osmline USING BTREE (osm_id) {{db.tablespace.search_index}};
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_postcode_postcode
|
||||
CREATE INDEX IF NOT EXISTS idx_postcode_postcode
|
||||
ON location_postcode USING BTREE (postcode) {{db.tablespace.search_index}};
|
||||
|
||||
-- Indices only needed for updating.
|
||||
|
||||
{% if not drop %}
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_placex_pendingsector
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_pendingsector
|
||||
ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}}
|
||||
WHERE indexed_status > 0;
|
||||
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_location_area_country_place_id
|
||||
CREATE INDEX IF NOT EXISTS idx_location_area_country_place_id
|
||||
ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}};
|
||||
|
||||
CREATE UNIQUE INDEX {{sql.if_index_not_exists}} idx_place_osm_unique
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_place_osm_unique
|
||||
ON place USING btree(osm_id, osm_type, class, type) {{db.tablespace.address_index}};
|
||||
{% endif %}
|
||||
|
||||
-- Indices only needed for search.
|
||||
|
||||
{% if 'search_name' in db.tables %}
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_nameaddress_vector
|
||||
CREATE INDEX IF NOT EXISTS idx_search_name_nameaddress_vector
|
||||
ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_name_vector
|
||||
CREATE INDEX IF NOT EXISTS idx_search_name_name_vector
|
||||
ON search_name USING GIN (name_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_centroid
|
||||
CREATE INDEX IF NOT EXISTS idx_search_name_centroid
|
||||
ON search_name USING GIST (centroid) {{db.tablespace.search_index}};
|
||||
|
||||
{% if postgres.has_index_non_key_column %}
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_placex_housenumber
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_housenumber
|
||||
ON placex USING btree (parent_place_id) INCLUDE (housenumber) WHERE housenumber is not null;
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_osm_id_with_hnr
|
||||
CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr
|
||||
ON location_property_osmline USING btree(parent_place_id) INCLUDE (startnumber, endnumber);
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
@@ -43,22 +43,6 @@ CREATE TABLE nominatim_properties (
|
||||
);
|
||||
GRANT SELECT ON TABLE nominatim_properties TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
drop table IF EXISTS word;
|
||||
CREATE TABLE word (
|
||||
word_id INTEGER,
|
||||
word_token text,
|
||||
word text,
|
||||
class text,
|
||||
type text,
|
||||
country_code varchar(2),
|
||||
search_name_count INTEGER,
|
||||
operator TEXT
|
||||
) {{db.tablespace.search_data}};
|
||||
CREATE INDEX idx_word_word_token on word USING BTREE (word_token) {{db.tablespace.search_index}};
|
||||
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}" ;
|
||||
DROP SEQUENCE IF EXISTS seq_word;
|
||||
CREATE SEQUENCE seq_word start 1;
|
||||
|
||||
drop table IF EXISTS location_area CASCADE;
|
||||
CREATE TABLE location_area (
|
||||
place_id BIGINT,
|
||||
@@ -84,22 +68,6 @@ CREATE TABLE location_area_country (
|
||||
CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {{db.tablespace.address_index}};
|
||||
|
||||
|
||||
drop table IF EXISTS location_property CASCADE;
|
||||
CREATE TABLE location_property (
|
||||
place_id BIGINT,
|
||||
parent_place_id BIGINT,
|
||||
partition SMALLINT,
|
||||
housenumber TEXT,
|
||||
postcode TEXT,
|
||||
centroid GEOMETRY(Point, 4326)
|
||||
);
|
||||
|
||||
CREATE TABLE location_property_aux () INHERITS (location_property);
|
||||
CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id);
|
||||
CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id);
|
||||
CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber);
|
||||
GRANT SELECT ON location_property_aux TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
CREATE TABLE location_property_tiger (
|
||||
place_id BIGINT,
|
||||
parent_place_id BIGINT,
|
||||
@@ -125,6 +93,7 @@ CREATE TABLE location_property_osmline (
|
||||
linegeo GEOMETRY,
|
||||
interpolationtype TEXT,
|
||||
address HSTORE,
|
||||
token_info JSONB, -- custom column for tokenizer use only
|
||||
postcode TEXT,
|
||||
country_code VARCHAR(2)
|
||||
){{db.tablespace.search_data}};
|
||||
@@ -174,6 +143,7 @@ CREATE TABLE placex (
|
||||
indexed_status SMALLINT,
|
||||
LIKE place INCLUDING CONSTRAINTS,
|
||||
wikipedia TEXT, -- calculated wikipedia article name (language:title)
|
||||
token_info JSONB, -- custom column for tokenizer use only
|
||||
country_code varchar(2),
|
||||
housenumber TEXT,
|
||||
postcode TEXT,
|
||||
@@ -184,14 +154,20 @@ CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {{db.tabl
|
||||
CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL;
|
||||
CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}};
|
||||
CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}};
|
||||
CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name')) {{db.tablespace.address_index}} WHERE osm_type='N' and rank_search < 26;
|
||||
CREATE INDEX idx_placex_geometry_buildings ON placex
|
||||
USING GIST (geometry) {{db.tablespace.search_index}}
|
||||
WHERE address is not null and rank_search = 30
|
||||
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
|
||||
CREATE INDEX idx_placex_geometry_placenode ON placex
|
||||
USING GIST (geometry) {{db.tablespace.search_index}}
|
||||
WHERE osm_type = 'N' and rank_search < 26
|
||||
and class = 'place' and type != 'postcode' and linked_place_id is null;
|
||||
CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26;
|
||||
|
||||
DROP SEQUENCE IF EXISTS seq_place;
|
||||
CREATE SEQUENCE seq_place start 1;
|
||||
GRANT SELECT on placex to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT on place_addressline to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT ON planet_osm_ways to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT ON planet_osm_rels to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT on location_area to "{{config.DATABASE_WEBUSER}}" ;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
--index only on parent_place_id
|
||||
CREATE INDEX {{sql.if_index_not_exists}} idx_location_property_tiger_place_id_imp
|
||||
CREATE INDEX IF NOT EXISTS idx_location_property_tiger_parent_place_id_imp
|
||||
ON location_property_tiger_import (parent_place_id) {{db.tablespace.aux_index}};
|
||||
CREATE UNIQUE INDEX {{sql.if_index_not_exists}} idx_location_property_tiger_place_id_imp
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_location_property_tiger_place_id_imp
|
||||
ON location_property_tiger_import (place_id) {{db.tablespace.aux_index}};
|
||||
|
||||
GRANT SELECT ON location_property_tiger_import TO "{{config.DATABASE_WEBUSER}}";
|
||||
@@ -12,4 +12,6 @@ ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger;
|
||||
ALTER INDEX IF EXISTS idx_location_property_tiger_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id;
|
||||
ALTER INDEX IF EXISTS idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id;
|
||||
|
||||
DROP FUNCTION tiger_line_import (linegeo geometry, in_startnumber integer, in_endnumber integer, interpolationtype text, in_street text, in_isin text, in_postcode text);
|
||||
DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER,
|
||||
in_endnumber INTEGER, interpolationtype TEXT,
|
||||
token_info JSONB, in_postcode TEXT);
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
DROP TABLE IF EXISTS location_property_tiger_import;
|
||||
CREATE TABLE location_property_tiger_import (linegeo GEOMETRY, place_id BIGINT, partition INTEGER, parent_place_id BIGINT, startnumber INTEGER, endnumber INTEGER, interpolationtype TEXT, postcode TEXT);
|
||||
|
||||
CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, in_startnumber INTEGER,
|
||||
in_endnumber INTEGER, interpolationtype TEXT,
|
||||
in_street TEXT, in_isin TEXT, in_postcode TEXT) RETURNS INTEGER
|
||||
CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, in_startnumber INTEGER,
|
||||
in_endnumber INTEGER, interpolationtype TEXT,
|
||||
token_info JSONB, in_postcode TEXT) RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
startnumber INTEGER;
|
||||
@@ -27,13 +27,13 @@ BEGIN
|
||||
END IF;
|
||||
|
||||
IF startnumber < 0 THEN
|
||||
RAISE WARNING 'Negative house number range (% to %) on %, %', startnumber, endnumber, in_street, in_isin;
|
||||
RAISE WARNING 'Negative house number range (% to %)', startnumber, endnumber;
|
||||
RETURN 0;
|
||||
END IF;
|
||||
|
||||
numberrange := endnumber - startnumber;
|
||||
|
||||
IF (interpolationtype = 'odd' AND startnumber%2 = 0) OR (interpolationtype = 'even' AND startnumber%2 = 1) THEN
|
||||
IF (interpolationtype = 'odd' AND startnumber % 2 = 0) OR (interpolationtype = 'even' AND startnumber % 2 = 1) THEN
|
||||
startnumber := startnumber + 1;
|
||||
stepsize := 2;
|
||||
ELSE
|
||||
@@ -45,10 +45,10 @@ BEGIN
|
||||
END IF;
|
||||
|
||||
-- Filter out really broken tiger data
|
||||
IF numberrange > 0 AND (numberrange::float/stepsize::float > 500)
|
||||
IF numberrange > 0 AND (numberrange::float/stepsize::float > 500)
|
||||
AND ST_length(linegeo)/(numberrange::float/stepsize::float) < 0.000001 THEN
|
||||
RAISE WARNING 'Road too short for number range % to % on %, % (%)',startnumber,endnumber,in_street,in_isin,
|
||||
ST_length(linegeo)/(numberrange::float/stepsize::float);
|
||||
RAISE WARNING 'Road too short for number range % to % (%)',startnumber,endnumber,
|
||||
ST_length(linegeo)/(numberrange::float/stepsize::float);
|
||||
RETURN 0;
|
||||
END IF;
|
||||
|
||||
@@ -56,7 +56,7 @@ BEGIN
|
||||
out_partition := get_partition('us');
|
||||
out_parent_place_id := null;
|
||||
|
||||
address_street_word_ids := word_ids_from_name(in_street);
|
||||
address_street_word_ids := token_addr_street_match_tokens(token_info);
|
||||
IF address_street_word_ids IS NOT NULL THEN
|
||||
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
|
||||
address_street_word_ids);
|
||||
|
||||
29
lib-sql/tokenizer/icu_tokenizer_tables.sql
Normal file
29
lib-sql/tokenizer/icu_tokenizer_tables.sql
Normal file
@@ -0,0 +1,29 @@
|
||||
DROP TABLE IF EXISTS word;
|
||||
CREATE TABLE word (
|
||||
word_id INTEGER,
|
||||
word_token text NOT NULL,
|
||||
type text NOT NULL,
|
||||
word text,
|
||||
info jsonb
|
||||
) {{db.tablespace.search_data}};
|
||||
|
||||
CREATE INDEX idx_word_word_token ON word
|
||||
USING BTREE (word_token) {{db.tablespace.search_index}};
|
||||
-- Used when updating country names from the boundary relation.
|
||||
CREATE INDEX idx_word_country_names ON word
|
||||
USING btree(word) {{db.tablespace.address_index}}
|
||||
WHERE type = 'C';
|
||||
-- Used when inserting new postcodes on updates.
|
||||
CREATE INDEX idx_word_postcodes ON word
|
||||
USING btree(word) {{db.tablespace.address_index}}
|
||||
WHERE type = 'P';
|
||||
-- Used when inserting full words.
|
||||
CREATE INDEX idx_word_full_word ON word
|
||||
USING btree(word) {{db.tablespace.address_index}}
|
||||
WHERE type = 'W';
|
||||
|
||||
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
DROP SEQUENCE IF EXISTS seq_word;
|
||||
CREATE SEQUENCE seq_word start 1;
|
||||
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
|
||||
157
lib-sql/tokenizer/legacy_icu_tokenizer.sql
Normal file
157
lib-sql/tokenizer/legacy_icu_tokenizer.sql
Normal file
@@ -0,0 +1,157 @@
|
||||
-- Get tokens used for searching the given place.
|
||||
--
|
||||
-- These are the tokens that will be saved in the search_name table.
|
||||
CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'names')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Get tokens for matching the place name against others.
|
||||
--
|
||||
-- This should usually be restricted to full name tokens.
|
||||
CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'names')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Return the housenumber tokens applicable for the place.
|
||||
CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'hnr_tokens')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Return the housenumber in the form that it can be matched during search.
|
||||
CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
SELECT info->>'hnr';
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'place_match')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'place_search')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
DROP TYPE IF EXISTS token_addresstoken CASCADE;
|
||||
CREATE TYPE token_addresstoken AS (
|
||||
key TEXT,
|
||||
match_tokens INT[],
|
||||
search_tokens INT[]
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
|
||||
RETURNS SETOF token_addresstoken
|
||||
AS $$
|
||||
SELECT key, (value->>1)::int[] as match_tokens,
|
||||
(value->>0)::int[] as search_tokens
|
||||
FROM jsonb_each(info->'addr');
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Return token info that should be saved permanently in the database.
|
||||
CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
|
||||
RETURNS JSONB
|
||||
AS $$
|
||||
SELECT NULL::JSONB;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
--------------- private functions ----------------------------------------------
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_full_word(norm_term TEXT, lookup_terms TEXT[],
|
||||
OUT full_token INT,
|
||||
OUT partial_tokens INT[])
|
||||
AS $$
|
||||
DECLARE
|
||||
partial_terms TEXT[] = '{}'::TEXT[];
|
||||
term TEXT;
|
||||
term_id INTEGER;
|
||||
term_count INTEGER;
|
||||
BEGIN
|
||||
SELECT min(word_id) INTO full_token
|
||||
FROM word WHERE word = norm_term and type = 'W';
|
||||
|
||||
IF full_token IS NULL THEN
|
||||
full_token := nextval('seq_word');
|
||||
INSERT INTO word (word_id, word_token, type, word, info)
|
||||
SELECT full_token, lookup_term, 'W', norm_term,
|
||||
json_build_object('count', 0)
|
||||
FROM unnest(lookup_terms) as lookup_term;
|
||||
END IF;
|
||||
|
||||
FOR term IN SELECT unnest(string_to_array(unnest(lookup_terms), ' ')) LOOP
|
||||
term := trim(term);
|
||||
IF NOT (ARRAY[term] <@ partial_terms) THEN
|
||||
partial_terms := partial_terms || term;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
partial_tokens := '{}'::INT[];
|
||||
FOR term IN SELECT unnest(partial_terms) LOOP
|
||||
SELECT min(word_id), max(info->>'count') INTO term_id, term_count
|
||||
FROM word WHERE word_token = term and type = 'w';
|
||||
|
||||
IF term_id IS NULL THEN
|
||||
term_id := nextval('seq_word');
|
||||
term_count := 0;
|
||||
INSERT INTO word (word_id, word_token, type, info)
|
||||
VALUES (term_id, term, 'w', json_build_object('count', term_count));
|
||||
END IF;
|
||||
|
||||
IF term_count < {{ max_word_freq }} THEN
|
||||
partial_tokens := array_merge(partial_tokens, ARRAY[term_id]);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_hnr_id(lookup_term TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
return_id INTEGER;
|
||||
BEGIN
|
||||
SELECT min(word_id) INTO return_id FROM word
|
||||
WHERE word_token = lookup_term and type = 'H';
|
||||
|
||||
IF return_id IS NULL THEN
|
||||
return_id := nextval('seq_word');
|
||||
INSERT INTO word (word_id, word_token, type)
|
||||
VALUES (return_id, lookup_term, 'H');
|
||||
END IF;
|
||||
|
||||
RETURN return_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
393
lib-sql/tokenizer/legacy_tokenizer.sql
Normal file
393
lib-sql/tokenizer/legacy_tokenizer.sql
Normal file
@@ -0,0 +1,393 @@
|
||||
-- Get tokens used for searching the given place.
|
||||
--
|
||||
-- These are the tokens that will be saved in the search_name table.
|
||||
CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'names')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Get tokens for matching the place name against others.
|
||||
--
|
||||
-- This should usually be restricted to full name tokens.
|
||||
CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'names')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Return the housenumber tokens applicable for the place.
|
||||
CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'hnr_tokens')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Return the housenumber in the form that it can be matched during search.
|
||||
CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
SELECT info->>'hnr';
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_street_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'street')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_place_match_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'place_match')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
SELECT (info->>'place_search')::INTEGER[]
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
DROP TYPE IF EXISTS token_addresstoken CASCADE;
|
||||
CREATE TYPE token_addresstoken AS (
|
||||
key TEXT,
|
||||
match_tokens INT[],
|
||||
search_tokens INT[]
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_get_address_tokens(info JSONB)
|
||||
RETURNS SETOF token_addresstoken
|
||||
AS $$
|
||||
SELECT key, (value->>1)::int[] as match_tokens,
|
||||
(value->>0)::int[] as search_tokens
|
||||
FROM jsonb_each(info->'addr');
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
-- Return token info that should be saved permanently in the database.
|
||||
CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
|
||||
RETURNS JSONB
|
||||
AS $$
|
||||
SELECT NULL::JSONB;
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
--------------- private functions ----------------------------------------------
|
||||
|
||||
-- Functions for term normalisation and access to the 'word' table.
|
||||
|
||||
CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
|
||||
AS '{{ modulepath }}/nominatim.so', 'transliteration'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
|
||||
AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
|
||||
LANGUAGE c IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
|
||||
AS $$
|
||||
DECLARE
|
||||
o TEXT;
|
||||
BEGIN
|
||||
o := public.gettokenstring(public.transliteration(name));
|
||||
RETURN trim(substr(o,1,length(o)));
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
-- returns NULL if the word is too common
|
||||
CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
count INTEGER;
|
||||
BEGIN
|
||||
lookup_token := trim(lookup_word);
|
||||
SELECT min(word_id), max(search_name_count) FROM word
|
||||
WHERE word_token = lookup_token and class is null and type is null
|
||||
INTO return_word_id, count;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
|
||||
ELSE
|
||||
IF count > {{ max_word_freq }} THEN
|
||||
return_word_id := NULL;
|
||||
END IF;
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- Create housenumber tokens from an OSM addr:housenumber.
|
||||
-- The housnumber is split at comma and semicolon as necessary.
|
||||
-- The function returns the normalized form of the housenumber suitable
|
||||
-- for comparison.
|
||||
CREATE OR REPLACE FUNCTION create_housenumbers(housenumbers TEXT[],
|
||||
OUT tokens TEXT,
|
||||
OUT normtext TEXT)
|
||||
AS $$
|
||||
BEGIN
|
||||
SELECT array_to_string(array_agg(trans), ';'), array_agg(tid)::TEXT
|
||||
INTO normtext, tokens
|
||||
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) as tid
|
||||
FROM (SELECT make_standard_name(h) as lookup_word
|
||||
FROM unnest(housenumbers) h) x) y;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql STABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' ' || trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class='place' and type='house'
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, null,
|
||||
'place', 'house', null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
DECLARE
|
||||
r RECORD;
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' ' || make_standard_name(postcode);
|
||||
FOR r IN
|
||||
SELECT word_id FROM word
|
||||
WHERE word_token = lookup_token and word = postcode
|
||||
and class='place' and type='postcode'
|
||||
LOOP
|
||||
RETURN false;
|
||||
END LOOP;
|
||||
|
||||
INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode,
|
||||
'place', 'postcode', null, 0);
|
||||
RETURN true;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
nospace_lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class is null and type is null
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
|
||||
null, null, null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- Normalize a string and lookup its word ids (partial words).
|
||||
CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
words TEXT[];
|
||||
id INTEGER;
|
||||
return_word_id INTEGER[];
|
||||
word_ids INTEGER[];
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
words := string_to_array(make_standard_name(lookup_word), ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
SELECT array_agg(word_id) INTO word_ids
|
||||
FROM word
|
||||
WHERE word_token = words[j] and class is null and type is null;
|
||||
|
||||
IF word_ids IS NULL THEN
|
||||
id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
|
||||
return_word_id := return_word_id || id;
|
||||
ELSE
|
||||
return_word_id := array_merge(return_word_id, word_ids);
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- Normalize a string and look up its name ids (full words).
|
||||
CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_ids INTEGER[];
|
||||
BEGIN
|
||||
lookup_token := ' '|| make_standard_name(lookup_word);
|
||||
SELECT array_agg(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class is null and type is null
|
||||
INTO return_word_ids;
|
||||
RETURN return_word_ids;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql STABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
|
||||
RETURNS INTEGER[]
|
||||
AS $$
|
||||
DECLARE
|
||||
result INTEGER[];
|
||||
s TEXT;
|
||||
w INTEGER;
|
||||
words TEXT[];
|
||||
value TEXT;
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
result := '{}'::INTEGER[];
|
||||
|
||||
FOR value IN SELECT unnest(regexp_split_to_array(svals(src), E'[,;]')) LOOP
|
||||
-- full name
|
||||
s := make_standard_name(value);
|
||||
w := getorcreate_name_id(s, value);
|
||||
|
||||
IF not(ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
|
||||
-- partial single-word terms
|
||||
words := string_to_array(s, ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
w = getorcreate_word_id(words[j]);
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
-- consider parts before an opening braket a full word as well
|
||||
words := regexp_split_to_array(value, E'[(]');
|
||||
IF array_upper(words, 1) > 1 THEN
|
||||
s := make_standard_name(words[1]);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_name_id(s, words[1]);
|
||||
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
s := regexp_replace(value, '市$', '');
|
||||
IF s != value THEN
|
||||
s := make_standard_name(s);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_name_id(s, value);
|
||||
IF NOT (ARRAY[w] <@ result) THEN
|
||||
result := result || w;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
END LOOP;
|
||||
|
||||
RETURN result;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
s TEXT;
|
||||
w INTEGER;
|
||||
words TEXT[];
|
||||
i INTEGER;
|
||||
j INTEGER;
|
||||
BEGIN
|
||||
s := make_standard_name(src);
|
||||
w := getorcreate_name_id(s, src);
|
||||
|
||||
w := getorcreate_word_id(s);
|
||||
|
||||
words := string_to_array(s, ' ');
|
||||
IF array_upper(words, 1) IS NOT NULL THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
IF (words[j] != '') THEN
|
||||
w := getorcreate_word_id(words[j]);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
words := regexp_split_to_array(src, E'[,;()]');
|
||||
IF array_upper(words, 1) != 1 THEN
|
||||
FOR j IN 1..array_upper(words, 1) LOOP
|
||||
s := make_standard_name(words[j]);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_word_id(s);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
s := regexp_replace(src, '市$', '');
|
||||
IF s != src THEN
|
||||
s := make_standard_name(s);
|
||||
IF s != '' THEN
|
||||
w := getorcreate_name_id(s, src);
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
RETURN 1;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
2
lib-sql/tokenizer/legacy_tokenizer_indices.sql
Normal file
2
lib-sql/tokenizer/legacy_tokenizer_indices.sql
Normal file
@@ -0,0 +1,2 @@
|
||||
CREATE INDEX IF NOT EXISTS idx_word_word_id
|
||||
ON word USING BTREE (word_id) {{db.tablespace.search_index}};
|
||||
21
lib-sql/tokenizer/legacy_tokenizer_tables.sql
Normal file
21
lib-sql/tokenizer/legacy_tokenizer_tables.sql
Normal file
@@ -0,0 +1,21 @@
|
||||
DROP TABLE IF EXISTS word;
|
||||
CREATE TABLE word (
|
||||
word_id INTEGER,
|
||||
word_token text NOT NULL,
|
||||
word text,
|
||||
class text,
|
||||
type text,
|
||||
country_code varchar(2),
|
||||
search_name_count INTEGER,
|
||||
operator TEXT
|
||||
) {{db.tablespace.search_data}};
|
||||
|
||||
CREATE INDEX idx_word_word_token ON word
|
||||
USING BTREE (word_token) {{db.tablespace.search_index}};
|
||||
CREATE INDEX idx_word_word ON word
|
||||
USING BTREE (word) {{db.tablespace.search_index}} WHERE word is not null;
|
||||
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
DROP SEQUENCE IF EXISTS seq_word;
|
||||
CREATE SEQUENCE seq_word start 1;
|
||||
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
|
||||
@@ -1,58 +0,0 @@
|
||||
-- Create a temporary table with postcodes from placex.
|
||||
|
||||
CREATE TEMP TABLE tmp_new_postcode_locations AS
|
||||
SELECT country_code,
|
||||
upper(trim (both ' ' from address->'postcode')) as pc,
|
||||
ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
|
||||
FROM placex
|
||||
WHERE address ? 'postcode'
|
||||
AND address->'postcode' NOT SIMILAR TO '%(,|;|:)%'
|
||||
AND geometry IS NOT null
|
||||
GROUP BY country_code, pc;
|
||||
|
||||
CREATE INDEX idx_tmp_new_postcode_locations
|
||||
ON tmp_new_postcode_locations (pc, country_code);
|
||||
|
||||
-- add extra US postcodes
|
||||
INSERT INTO tmp_new_postcode_locations (country_code, pc, centroid)
|
||||
SELECT 'us', postcode, ST_SetSRID(ST_Point(x,y),4326)
|
||||
FROM us_postcode u
|
||||
WHERE NOT EXISTS (SELECT 0 FROM tmp_new_postcode_locations new
|
||||
WHERE new.country_code = 'us' AND new.pc = u.postcode);
|
||||
-- add extra UK postcodes
|
||||
INSERT INTO tmp_new_postcode_locations (country_code, pc, centroid)
|
||||
SELECT 'gb', postcode, geometry FROM gb_postcode g
|
||||
WHERE NOT EXISTS (SELECT 0 FROM tmp_new_postcode_locations new
|
||||
WHERE new.country_code = 'gb' and new.pc = g.postcode);
|
||||
|
||||
-- Remove all postcodes that are no longer valid
|
||||
DELETE FROM location_postcode old
|
||||
WHERE NOT EXISTS(SELECT 0 FROM tmp_new_postcode_locations new
|
||||
WHERE old.postcode = new.pc
|
||||
AND old.country_code = new.country_code);
|
||||
|
||||
-- Update geometries where necessary
|
||||
UPDATE location_postcode old SET geometry = new.centroid, indexed_status = 1
|
||||
FROM tmp_new_postcode_locations new
|
||||
WHERE old.postcode = new.pc AND old.country_code = new.country_code
|
||||
AND ST_AsText(old.geometry) != ST_AsText(new.centroid);
|
||||
|
||||
-- Remove all postcodes that already exist from the temporary table
|
||||
DELETE FROM tmp_new_postcode_locations new
|
||||
WHERE EXISTS(SELECT 0 FROM location_postcode old
|
||||
WHERE old.postcode = new.pc AND old.country_code = new.country_code);
|
||||
|
||||
-- Add newly added postcode
|
||||
INSERT INTO location_postcode
|
||||
(place_id, indexed_status, country_code, postcode, geometry)
|
||||
SELECT nextval('seq_place'), 1, country_code, pc, centroid
|
||||
FROM tmp_new_postcode_locations new;
|
||||
|
||||
-- Remove unused word entries
|
||||
DELETE FROM word
|
||||
WHERE class = 'place' AND type = 'postcode'
|
||||
AND NOT EXISTS (SELECT 0 FROM location_postcode p
|
||||
WHERE p.postcode = word.word);
|
||||
|
||||
-- Finally index the newly inserted postcodes
|
||||
UPDATE location_postcode SET indexed_status = 0 WHERE indexed_status > 0;
|
||||
@@ -1,14 +0,0 @@
|
||||
CREATE TABLE word_frequencies AS
|
||||
(SELECT unnest(make_keywords(v)) as id, sum(count) as count
|
||||
FROM (select svals(name) as v, count(*)from place group by v) cnt
|
||||
WHERE v is not null
|
||||
GROUP BY id);
|
||||
|
||||
select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null;
|
||||
select count(create_housenumber_id(v)) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
|
||||
|
||||
-- copy the word frequencies
|
||||
update word set search_name_count = count from word_frequencies wf where wf.id = word.word_id;
|
||||
|
||||
-- and drop the temporary frequency table again
|
||||
drop table word_frequencies;
|
||||
@@ -3,7 +3,7 @@
|
||||
nominatim
|
||||
.SH SYNOPSIS
|
||||
.B nominatim
|
||||
[-h] {import,freeze,replication,special-phrases,add-data,index,refresh,admin,export,serve,search,reverse,lookup,details,status,transition} ...
|
||||
[-h] {import,freeze,replication,special-phrases,add-data,index,refresh,admin,export,serve,search,reverse,lookup,details,status} ...
|
||||
.SH DESCRIPTION
|
||||
Command\-line tools for importing, updating, administrating and
|
||||
querying the Nominatim database.
|
||||
@@ -58,9 +58,6 @@ nominatim
|
||||
.TP
|
||||
\fBnominatim\fR \fI\,status\/\fR
|
||||
Execute API status query.
|
||||
.TP
|
||||
\fBnominatim\fR \fI\,transition\/\fR
|
||||
Internal functions for code transition. Do not use.
|
||||
.SH OPTIONS 'nominatim import'
|
||||
usage: nominatim import [-h] [-q] [-v] [--project-dir DIR] [-j NUM]
|
||||
(--osm-file FILE | --continue {load-data,indexing,db-postprocess})
|
||||
@@ -244,7 +241,7 @@ usage: nominatim add-data [-h] [-q] [-v] [--project-dir DIR] [-j NUM]
|
||||
|
||||
Add additional data from a file or an online source.
|
||||
|
||||
Data is only imported, not indexed. You need to call `nominatim\-update index`
|
||||
Data is only imported, not indexed. You need to call `nominatim index`
|
||||
to complete the process.
|
||||
|
||||
|
||||
@@ -909,106 +906,6 @@ Number of parallel threads to use
|
||||
\fB\-\-format\fR {text,json}
|
||||
Format of result
|
||||
|
||||
.SH OPTIONS 'nominatim transition'
|
||||
usage: nominatim transition [-h] [-q] [-v] [--project-dir DIR] [-j NUM]
|
||||
[--create-db] [--setup-db] [--import-data]
|
||||
[--load-data] [--create-tables]
|
||||
[--create-partition-tables] [--index]
|
||||
[--create-search-indices] [--create-country-names]
|
||||
[--no-partitions] [--osm-file FILE] [--drop]
|
||||
[--osm2pgsql-cache SIZE] [--no-analyse]
|
||||
[--ignore-errors] [--reverse-only]
|
||||
[--tiger-data FILE]
|
||||
|
||||
Internal functions for code transition. Do not use.
|
||||
|
||||
|
||||
|
||||
|
||||
.TP
|
||||
\fB\-q\fR, \fB\-\-quiet\fR
|
||||
Print only error messages
|
||||
|
||||
.TP
|
||||
\fB\-v\fR, \fB\-\-verbose\fR
|
||||
Increase verboseness of output
|
||||
|
||||
.TP
|
||||
\fB\-\-project\-dir\fR DIR
|
||||
Base directory of the Nominatim installation (default:.)
|
||||
|
||||
.TP
|
||||
\fB\-j\fR NUM, \fB\-\-threads\fR NUM
|
||||
Number of parallel threads to use
|
||||
|
||||
.TP
|
||||
\fB\-\-create\-db\fR
|
||||
Create nominatim db
|
||||
|
||||
.TP
|
||||
\fB\-\-setup\-db\fR
|
||||
Build a blank nominatim db
|
||||
|
||||
.TP
|
||||
\fB\-\-import\-data\fR
|
||||
Import a osm file
|
||||
|
||||
.TP
|
||||
\fB\-\-load\-data\fR
|
||||
Copy data to live tables from import table
|
||||
|
||||
.TP
|
||||
\fB\-\-create\-tables\fR
|
||||
Create main tables
|
||||
|
||||
.TP
|
||||
\fB\-\-create\-partition\-tables\fR
|
||||
Create required partition tables
|
||||
|
||||
.TP
|
||||
\fB\-\-index\fR
|
||||
Index the data
|
||||
|
||||
.TP
|
||||
\fB\-\-create\-search\-indices\fR
|
||||
Create additional indices required for search and update
|
||||
|
||||
.TP
|
||||
\fB\-\-create\-country\-names\fR
|
||||
Create search index for default country names.
|
||||
|
||||
.TP
|
||||
\fB\-\-no\-partitions\fR
|
||||
Do not partition search indices
|
||||
|
||||
.TP
|
||||
\fB\-\-osm\-file\fR FILE
|
||||
File to import
|
||||
|
||||
.TP
|
||||
\fB\-\-drop\fR
|
||||
Drop tables needed for updates, making the database readonly
|
||||
|
||||
.TP
|
||||
\fB\-\-osm2pgsql\-cache\fR SIZE
|
||||
Size of cache to be used by osm2pgsql (in MB)
|
||||
|
||||
.TP
|
||||
\fB\-\-no\-analyse\fR
|
||||
Do not perform analyse operations during index
|
||||
|
||||
.TP
|
||||
\fB\-\-ignore\-errors\fR
|
||||
Ignore certain erros on import.
|
||||
|
||||
.TP
|
||||
\fB\-\-reverse\-only\fR
|
||||
Do not create search tables and indexes
|
||||
|
||||
.TP
|
||||
\fB\-\-tiger\-data\fR FILE
|
||||
File to import
|
||||
|
||||
.SH DISTRIBUTION
|
||||
The latest version of Nominatim may be downloaded from
|
||||
.UR https://nominatim.org
|
||||
|
||||
@@ -8,12 +8,11 @@ import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from .config import Configuration
|
||||
from .tools.exec_utils import run_legacy_script, run_php_server
|
||||
from .errors import UsageError
|
||||
from . import clicmd
|
||||
from .clicmd.args import NominatimArgs
|
||||
from .tools import tiger_data
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.tools.exec_utils import run_legacy_script, run_php_server
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim import clicmd
|
||||
from nominatim.clicmd.args import NominatimArgs
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -84,6 +83,11 @@ class CommandlineParser:
|
||||
|
||||
args.config = Configuration(args.project_dir, args.config_dir,
|
||||
environ=kwargs.get('environ', os.environ))
|
||||
args.config.set_libdirs(module=args.module_dir,
|
||||
osm2pgsql=args.osm2pgsql_path,
|
||||
php=args.phplib_dir,
|
||||
sql=args.sqllib_dir,
|
||||
data=args.data_dir)
|
||||
|
||||
log = logging.getLogger()
|
||||
log.warning('Using project directory: %s', str(args.project_dir))
|
||||
@@ -99,7 +103,7 @@ class CommandlineParser:
|
||||
return 1
|
||||
|
||||
|
||||
##### Subcommand classes
|
||||
# Subcommand classes
|
||||
#
|
||||
# Each class needs to implement two functions: add_args() adds the CLI parameters
|
||||
# for the subfunction, run() executes the subcommand.
|
||||
@@ -110,61 +114,6 @@ class CommandlineParser:
|
||||
#
|
||||
# No need to document the functions each time.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to make pyosmium optional for replication only.
|
||||
# pylint: disable=E0012,C0415
|
||||
class UpdateAddData:
|
||||
"""\
|
||||
Add additional data from a file or an online source.
|
||||
|
||||
Data is only imported, not indexed. You need to call `nominatim-update index`
|
||||
to complete the process.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
group_name = parser.add_argument_group('Source')
|
||||
group = group_name.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--file', metavar='FILE',
|
||||
help='Import data from an OSM file')
|
||||
group.add_argument('--diff', metavar='FILE',
|
||||
help='Import data from an OSM diff file')
|
||||
group.add_argument('--node', metavar='ID', type=int,
|
||||
help='Import a single node from the API')
|
||||
group.add_argument('--way', metavar='ID', type=int,
|
||||
help='Import a single way from the API')
|
||||
group.add_argument('--relation', metavar='ID', type=int,
|
||||
help='Import a single relation from the API')
|
||||
group.add_argument('--tiger-data', metavar='DIR',
|
||||
help='Add housenumbers from the US TIGER census database.')
|
||||
group = parser.add_argument_group('Extra arguments')
|
||||
group.add_argument('--use-main-api', action='store_true',
|
||||
help='Use OSM API instead of Overpass to download objects')
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
if args.tiger_data:
|
||||
return tiger_data.add_tiger_data(args.config.get_libpq_dsn(),
|
||||
args.tiger_data,
|
||||
args.threads or 1,
|
||||
args.config,
|
||||
args.sqllib_dir)
|
||||
|
||||
params = ['update.php']
|
||||
if args.file:
|
||||
params.extend(('--import-file', args.file))
|
||||
elif args.diff:
|
||||
params.extend(('--import-diff', args.diff))
|
||||
elif args.node:
|
||||
params.extend(('--import-node', args.node))
|
||||
elif args.way:
|
||||
params.extend(('--import-way', args.way))
|
||||
elif args.relation:
|
||||
params.extend(('--import-relation', args.relation))
|
||||
if args.use_main_api:
|
||||
params.append('--use-main-api')
|
||||
return run_legacy_script(*params, nominatim_env=args)
|
||||
|
||||
|
||||
class QueryExport:
|
||||
"""\
|
||||
Export addresses as CSV file from the database.
|
||||
@@ -255,9 +204,9 @@ def get_set_parser(**kwargs):
|
||||
|
||||
parser.add_subcommand('special-phrases', clicmd.ImportSpecialPhrases)
|
||||
|
||||
parser.add_subcommand('add-data', UpdateAddData)
|
||||
parser.add_subcommand('add-data', clicmd.UpdateAddData)
|
||||
parser.add_subcommand('index', clicmd.UpdateIndex)
|
||||
parser.add_subcommand('refresh', clicmd.UpdateRefresh)
|
||||
parser.add_subcommand('refresh', clicmd.UpdateRefresh())
|
||||
|
||||
parser.add_subcommand('admin', clicmd.AdminFuncs)
|
||||
|
||||
@@ -273,8 +222,6 @@ def get_set_parser(**kwargs):
|
||||
else:
|
||||
parser.parser.epilog = 'php-cgi not found. Query commands not available.'
|
||||
|
||||
parser.add_subcommand('transition', clicmd.AdminTransition)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
|
||||
@@ -2,12 +2,12 @@
|
||||
Subcommand definitions for the command-line tool.
|
||||
"""
|
||||
|
||||
from .setup import SetupAll
|
||||
from .replication import UpdateReplication
|
||||
from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
|
||||
from .index import UpdateIndex
|
||||
from .refresh import UpdateRefresh
|
||||
from .admin import AdminFuncs
|
||||
from .freeze import SetupFreeze
|
||||
from .transition import AdminTransition
|
||||
from .special_phrases import ImportSpecialPhrases
|
||||
from nominatim.clicmd.setup import SetupAll
|
||||
from nominatim.clicmd.replication import UpdateReplication
|
||||
from nominatim.clicmd.api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
|
||||
from nominatim.clicmd.index import UpdateIndex
|
||||
from nominatim.clicmd.refresh import UpdateRefresh
|
||||
from nominatim.clicmd.add_data import UpdateAddData
|
||||
from nominatim.clicmd.admin import AdminFuncs
|
||||
from nominatim.clicmd.freeze import SetupFreeze
|
||||
from nominatim.clicmd.special_phrases import ImportSpecialPhrases
|
||||
|
||||
76
nominatim/clicmd/add_data.py
Normal file
76
nominatim/clicmd/add_data.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Implementation of the 'add-data' subcommand.
|
||||
"""
|
||||
import logging
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class UpdateAddData:
|
||||
"""\
|
||||
Add additional data from a file or an online source.
|
||||
|
||||
Data is only imported, not indexed. You need to call `nominatim index`
|
||||
to complete the process.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
group_name = parser.add_argument_group('Source')
|
||||
group = group_name.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--file', metavar='FILE',
|
||||
help='Import data from an OSM file or diff file')
|
||||
group.add_argument('--diff', metavar='FILE',
|
||||
help='Import data from an OSM diff file (deprecated: use --file)')
|
||||
group.add_argument('--node', metavar='ID', type=int,
|
||||
help='Import a single node from the API')
|
||||
group.add_argument('--way', metavar='ID', type=int,
|
||||
help='Import a single way from the API')
|
||||
group.add_argument('--relation', metavar='ID', type=int,
|
||||
help='Import a single relation from the API')
|
||||
group.add_argument('--tiger-data', metavar='DIR',
|
||||
help='Add housenumbers from the US TIGER census database.')
|
||||
group = parser.add_argument_group('Extra arguments')
|
||||
group.add_argument('--use-main-api', action='store_true',
|
||||
help='Use OSM API instead of Overpass to download objects')
|
||||
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
|
||||
help='Set timeout for file downloads.')
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
from nominatim.tokenizer import factory as tokenizer_factory
|
||||
from nominatim.tools import tiger_data, add_osm_data
|
||||
|
||||
if args.tiger_data:
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
return tiger_data.add_tiger_data(args.tiger_data,
|
||||
args.config, args.threads or 1,
|
||||
tokenizer)
|
||||
|
||||
osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
|
||||
if args.file or args.diff:
|
||||
return add_osm_data.add_data_from_file(args.file or args.diff,
|
||||
osm2pgsql_params)
|
||||
|
||||
if args.node:
|
||||
return add_osm_data.add_osm_object('node', args.node,
|
||||
args.use_main_api,
|
||||
osm2pgsql_params)
|
||||
|
||||
if args.way:
|
||||
return add_osm_data.add_osm_object('way', args.way,
|
||||
args.use_main_api,
|
||||
osm2pgsql_params)
|
||||
|
||||
if args.relation:
|
||||
return add_osm_data.add_osm_object('relation', args.relation,
|
||||
args.use_main_api,
|
||||
osm2pgsql_params)
|
||||
|
||||
return 0
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user