Compare commits

...

18 Commits

Author SHA1 Message Date
Sarah Hoffmann
e943a2c8a4 prepare release 4.0.2 2023-02-20 17:41:33 +01:00
Sarah Hoffmann
95958458c6 harmonize flags for PHP's htmlspecialchars 2023-02-20 17:33:53 +01:00
Sarah Hoffmann
3c703c3f14 adapt PHP tests for debug output 2023-02-20 17:33:10 +01:00
Sarah Hoffmann
cb66887c3b properly encode special HTML characters in debug mode 2023-02-20 17:33:08 +01:00
Sarah Hoffmann
e56add9888 prepare 4.0.1 release 2021-11-22 14:18:54 +01:00
Sarah Hoffmann
9628df3031 Merge pull request #2528 from lonvia/allow-french-extra-housenumbers
Don't penalize French 'bis' housenumbers
2021-11-21 10:53:20 +01:00
Sarah Hoffmann
423f338d04 Merge pull request #2526 from lonvia/docs-moving-database
Add a section about moving the database to another machine
2021-11-19 21:14:53 +01:00
Sarah Hoffmann
3a2597e5c4 don't penalize French 'bis' housenumbers
House numbers of the form '9 bis' are usual in France. So
be a bit more lenient before adding penalties to house numbers
with letters in them.

Fixes #2527.
2021-11-19 21:12:17 +01:00
Sarah Hoffmann
641f261495 Merge pull request #2525 from lonvia/fix-replication-indexer
Fix instantiation of indexer for replication
2021-11-19 16:16:30 +01:00
Sarah Hoffmann
5884a6e7a6 add a section about moving the database to another machine 2021-11-19 16:11:32 +01:00
Sarah Hoffmann
10e979e841 only instantiate indexer once for replication
Also makes sure that indexer object exists everywhere were needed.

See #2518.
2021-11-19 14:48:58 +01:00
Sarah Hoffmann
8dc1441635 Merge pull request #2517 from lonvia/transliteration-special-chars
ICU: avoid non-alphanumerical characters in transliteration
2021-11-11 07:42:42 +01:00
Sarah Hoffmann
c79dcfad9a make sure housenumbers are properly quoted 2021-11-10 20:44:28 +01:00
Sarah Hoffmann
1886952666 avoid special characters in word tokens
Transliteration should only consist of ASCII letters
and numbers. Avoid any other characters.
2021-11-10 17:14:13 +01:00
Sarah Hoffmann
7326b246b7 Merge pull request #2516 from lonvia/test-for-website-dir
Better error reporting when API script does not exist
2021-11-10 13:27:09 +01:00
Sarah Hoffmann
345c812e43 better error reporting when API script does not exist
Check if the API script exists on the expected location before
running php-cli. This way we can add a useful hint about the
project directory.

Fixes #2513.
2021-11-10 11:58:20 +01:00
Sarah Hoffmann
fd4ba3989e Merge pull request #2511 from lonvia/fix-combination-error-needs-address
Fix boolean combination of NeedsAddress flag
2021-11-06 12:11:55 +01:00
Sarah Hoffmann
e2d2571ad0 fix combination of NeedsAddress flag
When dealing with multiple partial terms, only keep the
flag, when all partial terms are so frequent as to need
an address.

Fixes #2510.
2021-11-05 22:18:37 +01:00
13 changed files with 162 additions and 69 deletions

View File

@@ -20,7 +20,7 @@ project(nominatim)
set(NOMINATIM_VERSION_MAJOR 4)
set(NOMINATIM_VERSION_MINOR 0)
set(NOMINATIM_VERSION_PATCH 0)
set(NOMINATIM_VERSION_PATCH 2)
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")

View File

@@ -1,3 +1,16 @@
4.0.2
* fix XSS vulnerability in debug view
4.0.1
* fix initialisation error in replication script
* ICU tokenizer: avoid any special characters in word tokens
* better error message when API php script does not exist
* fix quoting of house numbers in SQL queries
* small fixes and improvements in search query parsing
* add documentation for moving the database to a different machine
4.0.0
* refactor name token computation and introduce ICU tokenizer
@@ -27,6 +40,10 @@
* add testing of installation scripts via CI
* drop support for Python < 3.6 and Postgresql < 9.5
3.7.3
* fix XSS vulnerability in debug view
3.7.2
* fix database check for reverse-only imports

View File

@@ -101,7 +101,7 @@ This will get diffs from the replication server, import diffs and index
the database. The default replication server in the
script([Geofabrik](https://download.geofabrik.de)) provides daily updates.
## Importing Nominatim to an external PostgreSQL database
## Using an external PostgreSQL database
You can install Nominatim using a database that runs on a different server when
you have physical access to the file system on the other server. Nominatim
@@ -109,6 +109,11 @@ uses a custom normalization library that needs to be made accessible to the
PostgreSQL server. This section explains how to set up the normalization
library.
!!! note
The external module is only needed when using the legacy tokenizer.
If you have choosen the ICU tokenizer, then you can ignore this section
and follow the standard import documentation.
### Option 1: Compiling the library on the database server
The most sure way to get a working library is to compile it on the database
@@ -167,3 +172,44 @@ NOMINATIM_DATABASE_MODULE_PATH="<directory on the database server where nominati
Now change the `NOMINATIM_DATABASE_DSN` to point to your remote server and continue
to follow the [standard instructions for importing](Import.md).
## Moving the database to another machine
For some configurations it may be useful to run the import on one machine, then
move the database to another machine and run the Nominatim service from there.
For example, you might want to use a large machine to be able to run the import
quickly but only want a smaller machine for production because there is not so
much load. Or you might want to do the import once and then replicate the
database to many machines.
The important thing to keep in mind when transferring the Nominatim installation
is that you need to transfer the database _and the project directory_. Both
parts are essential for your installation.
The Nominatim database can be transferred using the `pg_dump`/`pg_restore` tool.
Make sure to use the same version of PostgreSQL and PostGIS on source and
target machine.
!!! note
Before creating a dump of your Nominatim database, consider running
`nominatim freeze` first. Your database looses the ability to receive further
data updates but the resulting database is only about a third of the size
of a full database.
Next install Nominatim on the target machine by following the standard installation
instructions. Again make sure to use the same version as the source machine.
You can now copy the project directory from the source machine to the new machine.
If necessary, edit the `.env` file to point it to the restored database.
Finally run
nominatim refresh --website
to make sure that the local installation of Nominatim will be used.
If you are using the legacy tokenizer you might also have to switch to the
PostgreSQL module that was compiled on your target machine. If you get errors
that PostgreSQL cannot find or access `nominatim.so` then copy the installed
version into the `module` directory of your project directory. The installed
copy can usually be found under `/usr/local/lib/nominatim/module/nominatim.so`.

View File

@@ -127,7 +127,7 @@ class Debug
public static function printSQL($sSQL)
{
echo '<p><tt><font color="#aaa">'.$sSQL.'</font></tt></p>'."\n";
echo '<p><tt><font color="#aaa">'.htmlspecialchars($sSQL, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401).'</font></tt></p>'."\n";
}
private static function outputVar($mVar, $sPreNL)
@@ -170,11 +170,12 @@ class Debug
}
if (is_string($mVar)) {
echo "'$mVar'";
return strlen($mVar) + 2;
$sOut = "'$mVar'";
} else {
$sOut = (string)$mVar;
}
echo (string)$mVar;
return strlen((string)$mVar);
echo htmlspecialchars($sOut, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401);
return strlen($sOut);
}
}

View File

@@ -257,7 +257,7 @@ class SearchDescription
if (empty($this->aName)) {
$this->bNameNeedsAddress = $bNeedsAddress;
} else {
$this->bNameNeedsAddress |= $bNeedsAddress;
$this->bNameNeedsAddress &= $bNeedsAddress;
}
if ($bSearchable) {
$this->aName[$iId] = $iId;
@@ -584,11 +584,11 @@ class SearchDescription
// will be narrowed down by an address. Remember that with ordering
// every single result has to be checked.
if ($this->sHouseNumber && ($this->bRareName || !empty($this->aAddress) || $this->sPostcode)) {
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
$sHouseNumberRegex = $oDB->getDBQuoted('\\\\m'.$this->sHouseNumber.'\\\\M');
// Housenumbers on streets and places.
$sChildHnr = 'SELECT * FROM placex WHERE parent_place_id = search_name.place_id';
$sChildHnr .= " AND housenumber ~* E'".$sHouseNumberRegex."'";
$sChildHnr .= ' AND housenumber ~* E'.$sHouseNumberRegex;
// Interpolations on streets and places.
if (preg_match('/^[0-9]+$/', $this->sHouseNumber)) {
$sIpolHnr = 'SELECT * FROM location_property_osmline ';
@@ -601,7 +601,7 @@ class SearchDescription
}
// Housenumbers on the object iteself for unlisted places.
$sSelfHnr = 'SELECT * FROM placex WHERE place_id = search_name.place_id';
$sSelfHnr .= " AND housenumber ~* E'".$sHouseNumberRegex."'";
$sSelfHnr .= ' AND housenumber ~* E'.$sHouseNumberRegex;
$sSql = '(CASE WHEN address_rank = 30 THEN EXISTS('.$sSelfHnr.') ';
$sSql .= ' ELSE EXISTS('.$sChildHnr.') ';
@@ -739,9 +739,9 @@ class SearchDescription
return $aResults;
}
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
$sHouseNumberRegex = $oDB->getDBQuoted('\\\\m'.$this->sHouseNumber.'\\\\M');
$sSQL = 'SELECT place_id FROM placex WHERE';
$sSQL .= " housenumber ~* E'".$sHouseNumberRegex."'";
$sSQL .= ' housenumber ~* E'.$sHouseNumberRegex;
$sSQL .= ' AND ('.join(' OR ', $aIDCondition).')';
$sSQL .= $this->oContext->excludeSQL(' AND place_id');

View File

@@ -58,7 +58,7 @@ class HouseNumber
// up of numbers, add a penalty
$iSearchCost = 1;
if (preg_match('/\\d/', $this->sToken) === 0
|| preg_match_all('/[^0-9]/', $this->sToken, $aMatches) > 2) {
|| preg_match_all('/[^0-9 ]/', $this->sToken, $aMatches) > 3) {
$iSearchCost += strlen($this->sToken) - 1;
}
if (!$oSearch->hasOperator(\Nominatim\Operator::NONE)) {

View File

@@ -4,6 +4,7 @@ Subcommand definitions for API calls from the command line.
import logging
from nominatim.tools.exec_utils import run_api_script
from nominatim.errors import UsageError
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
@@ -53,6 +54,18 @@ def _add_api_output_arguments(parser):
"Parameter is difference tolerance in degrees."))
def _run_api(endpoint, args, params):
script_file = args.project_dir / 'website' / (endpoint + '.php')
if not script_file.exists():
LOG.error("Cannot find API script file.\n\n"
"Make sure to run 'nominatim' from the project directory \n"
"or use the option --project-dir.")
raise UsageError("API script not found.")
return run_api_script(endpoint, args.project_dir,
phpcgi_bin=args.phpcgi_path, params=params)
class APISearch:
"""\
Execute a search query.
@@ -114,8 +127,7 @@ class APISearch:
if not args.dedupe:
params['dedupe'] = '0'
return run_api_script('search', args.project_dir,
phpcgi_bin=args.phpcgi_path, params=params)
return _run_api('search', args, params)
class APIReverse:
"""\
@@ -158,8 +170,7 @@ class APIReverse:
if args.polygon_threshold:
params['polygon_threshold'] = args.polygon_threshold
return run_api_script('reverse', args.project_dir,
phpcgi_bin=args.phpcgi_path, params=params)
return _run_api('reverse', args, params)
class APILookup:
@@ -198,8 +209,7 @@ class APILookup:
if args.polygon_threshold:
params['polygon_threshold'] = args.polygon_threshold
return run_api_script('lookup', args.project_dir,
phpcgi_bin=args.phpcgi_path, params=params)
return _run_api('lookup', args, params)
class APIDetails:
@@ -249,8 +259,7 @@ class APIDetails:
for name, _ in DETAILS_SWITCHES:
params[name] = '1' if getattr(args, name) else '0'
return run_api_script('details', args.project_dir,
phpcgi_bin=args.phpcgi_path, params=params)
return _run_api('details', args, params)
class APIStatus:
@@ -271,6 +280,4 @@ class APIStatus:
@staticmethod
def run(args):
return run_api_script('status', args.project_dir,
phpcgi_bin=args.phpcgi_path,
params=dict(format=args.format))
return _run_api('status', args, dict(format=args.format))

View File

@@ -136,6 +136,7 @@ class UpdateReplication:
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, args.threads or 1)
while True:
with connect(args.config.get_libpq_dsn()) as conn:
@@ -148,8 +149,6 @@ class UpdateReplication:
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
index_start = dt.datetime.now(dt.timezone.utc)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or 1)
indexer.index_full(analyse=False)
with connect(args.config.get_libpq_dsn()) as conn:

View File

@@ -10,7 +10,7 @@ Version information for Nominatim.
# and must always be increased when there is a change to the database or code
# that requires a migration.
# Released versions always have a database patch level of 0.
NOMINATIM_VERSION = (4, 0, 0, 0)
NOMINATIM_VERSION = (4, 0, 2, 0)
POSTGRESQL_REQUIRED_VERSION = (9, 5)
POSTGIS_REQUIRED_VERSION = (2, 2)

View File

@@ -21,8 +21,8 @@ transliteration:
- !include icu-rules/extended-unicode-to-asccii.yaml
- ":: Ascii ()"
- ":: NFD ()"
- "[^[:Ascii:]] >"
- ":: lower ()"
- "[^a-z0-9[:Space:]] >"
- ":: NFC ()"
sanitizers:
- step: split-name-list

View File

@@ -29,14 +29,14 @@ class DebugTest extends \PHPUnit\Framework\TestCase
<pre><b>Var1:</b> <i>True</i></pre>
<pre><b>Var2:</b> <i>False</i></pre>
<pre><b>Var3:</b> 0</pre>
<pre><b>Var4:</b> 'String'</pre>
<pre><b>Var5:</b> 0 => 'one'
1 => 'two'
2 => 'three'</pre>
<pre><b>Var6:</b> 'key' => 'value'
'key2' => 'value2'</pre>
<pre><b>Var4:</b> &#039;String&#039;</pre>
<pre><b>Var5:</b> 0 => &#039;one&#039;
1 => &#039;two&#039;
2 => &#039;three&#039;</pre>
<pre><b>Var6:</b> &#039;key&#039; => &#039;value&#039;
&#039;key2&#039; => &#039;value2&#039;</pre>
<pre><b>Var7:</b> me as string</pre>
<pre><b>Var8:</b> 'value', 'value2'</pre>
<pre><b>Var8:</b> &#039;value&#039;, &#039;value2&#039;</pre>
EOT
);
@@ -56,10 +56,10 @@ EOT
public function testDebugArray()
{
$this->expectOutputString(<<<EOT
<pre><b>Arr0:</b> 'null'</pre>
<pre><b>Arr1:</b> 'key1' => 'val1'
'key2' => 'val2'
'key3' => 'val3'</pre>
<pre><b>Arr0:</b> &#039;null&#039;</pre>
<pre><b>Arr1:</b> &#039;key1&#039; => &#039;val1&#039;
&#039;key2&#039; => &#039;val2&#039;
&#039;key3&#039; => &#039;val3&#039;</pre>
EOT
);
@@ -85,12 +85,12 @@ EOT
<th><small>1</small></th>
</tr>
<tr>
<td><pre>'one'</pre></td>
<td><pre>'two'</pre></td>
<td><pre>&#039;one&#039;</pre></td>
<td><pre>&#039;two&#039;</pre></td>
</tr>
<tr>
<td><pre>'three'</pre></td>
<td><pre>'four'</pre></td>
<td><pre>&#039;three&#039;</pre></td>
<td><pre>&#039;four&#039;</pre></td>
</tr>
</table>
<b>Table4:</b>
@@ -101,9 +101,9 @@ EOT
<th><small>key3</small></th>
</tr>
<tr>
<td><pre>'val1'</pre></td>
<td><pre>'val2'</pre></td>
<td><pre>'val3'</pre></td>
<td><pre>&#039;val1&#039;</pre></td>
<td><pre>&#039;val2&#039;</pre></td>
<td><pre>&#039;val3&#039;</pre></td>
</tr>
</table>
@@ -139,18 +139,18 @@ EOT
</tr>
<tr>
<td><pre>group1</pre></td>
<td><pre>'val1'</pre></td>
<td><pre>'val2'</pre></td>
<td><pre>&#039;val1&#039;</pre></td>
<td><pre>&#039;val2&#039;</pre></td>
</tr>
<tr>
<td><pre>group1</pre></td>
<td><pre>'one'</pre></td>
<td><pre>'two'</pre></td>
<td><pre>&#039;one&#039;</pre></td>
<td><pre>&#039;two&#039;</pre></td>
</tr>
<tr>
<td><pre>group2</pre></td>
<td><pre>'val1'</pre></td>
<td><pre>'val2'</pre></td>
<td><pre>&#039;val1&#039;</pre></td>
<td><pre>&#039;val2&#039;</pre></td>
</tr>
</table>
<b>Table4:</b>
@@ -163,15 +163,15 @@ EOT
</tr>
<tr>
<td><pre>group1</pre></td>
<td><pre>'val1'</pre></td>
<td><pre>'val2'</pre></td>
<td><pre>'val3'</pre></td>
<td><pre>&#039;val1&#039;</pre></td>
<td><pre>&#039;val2&#039;</pre></td>
<td><pre>&#039;val3&#039;</pre></td>
</tr>
<tr>
<td><pre>group1</pre></td>
<td><pre>'val1'</pre></td>
<td><pre>'val2'</pre></td>
<td><pre>'val3'</pre></td>
<td><pre>&#039;val1&#039;</pre></td>
<td><pre>&#039;val2&#039;</pre></td>
<td><pre>&#039;val3&#039;</pre></td>
</tr>
</table>

View File

@@ -113,23 +113,36 @@ class TestCli:
assert func.called == 1
@pytest.mark.parametrize("params", [('search', '--query', 'new'),
('reverse', '--lat', '0', '--lon', '0'),
('lookup', '--id', 'N1'),
('details', '--node', '1'),
('details', '--way', '1'),
('details', '--relation', '1'),
('details', '--place_id', '10001'),
('status',)])
def test_api_commands_simple(self, mock_func_factory, params):
@pytest.mark.parametrize("params", [('search', '--query', 'new'),
('reverse', '--lat', '0', '--lon', '0'),
('lookup', '--id', 'N1'),
('details', '--node', '1'),
('details', '--way', '1'),
('details', '--relation', '1'),
('details', '--place_id', '10001'),
('status',)])
class TestCliApiCall:
@pytest.fixture(autouse=True)
def setup_cli_call(self, cli_call):
self.call_nominatim = cli_call
def test_api_commands_simple(self, mock_func_factory, params, tmp_path):
(tmp_path / 'website').mkdir()
(tmp_path / 'website' / (params[0] + '.php')).write_text('')
mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script')
assert self.call_nominatim(*params) == 0
assert self.call_nominatim(*params, '--project-dir', str(tmp_path)) == 0
assert mock_run_api.called == 1
assert mock_run_api.last_args[0] == params[0]
def test_bad_project_idr(self, mock_func_factory, params):
mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script')
assert self.call_nominatim(*params) == 1
class TestCliWithDb:

View File

@@ -106,6 +106,16 @@ class TestCliReplication:
assert str(update_mock.last_args[1]['osm2pgsql']) == '/secret/osm2pgsql'
@pytest.mark.parametrize("update_interval", [60, 3600])
def test_replication_catchup(self, monkeypatch, index_mock, update_interval, placex_table):
monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', str(update_interval))
states = [nominatim.tools.replication.UpdateState.NO_CHANGES]
monkeypatch.setattr(nominatim.tools.replication, 'update',
lambda *args, **kwargs: states.pop())
assert self.call_nominatim('--catch-up') == 0
def test_replication_update_custom_threads(self, update_mock):
assert self.call_nominatim('--once', '--no-index', '--threads', '4') == 0