mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 18:37:58 +00:00
Use GB postcode table as definitive source. resort by pressence of search word in output address and other misc very minor changes not commited from MQ servers
This commit is contained in:
committed by
Brian Quinion
parent
ad125a9913
commit
b99a043c78
50
lib/lib.php
50
lib/lib.php
@@ -40,9 +40,9 @@
|
||||
|
||||
function byImportance($a, $b)
|
||||
{
|
||||
/*
|
||||
if ($a['importance'] != $b['importance'])
|
||||
return ($a['importance'] > $b['importance']?-1:1);
|
||||
/*
|
||||
if ($a['aPointPolygon']['numfeatures'] != $b['aPointPolygon']['numfeatures'])
|
||||
return ($a['aPointPolygon']['numfeatures'] > $b['aPointPolygon']['numfeatures']?-1:1);
|
||||
if ($a['aPointPolygon']['area'] != $b['aPointPolygon']['area'])
|
||||
@@ -173,60 +173,12 @@
|
||||
exit;
|
||||
}
|
||||
|
||||
|
||||
if (sizeof($aNearPostcodes))
|
||||
{
|
||||
return array(array('lat' => $aNearPostcodes[0]['lat'], 'lon' => $aNearPostcodes[0]['lon'], 'radius' => 0.005));
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
/* partial search disabled because it sequentially scans placex
|
||||
|
||||
$sSQL = 'select substring(upper(postcode) from \'^[A-Z][A-Z]?[0-9][0-9A-Z]? [0-9]([A-Z][A-Z])$\'),ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from placex where country_code::text = \'gb\'::text AND substring(postcode from \'^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$\') = \''.$sPostcodeSector.'\' and class=\'place\' and type=\'postcode\' ';
|
||||
$sSQL .= ' union ';
|
||||
$sSQL .= 'select substring(upper(postcode) from \'^[A-Z][A-Z]?[0-9][0-9A-Z]? [0-9]([A-Z][A-Z])$\'),ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from gb_postcode where substring(postcode from \'^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$\') = \''.$sPostcodeSector.'\'';
|
||||
$aNearPostcodes = $oDB->getAll($sSQL);
|
||||
if (PEAR::IsError($aNearPostcodes))
|
||||
{
|
||||
var_dump($sSQL, $aNearPostcodes);
|
||||
exit;
|
||||
}
|
||||
|
||||
if (!sizeof($aNearPostcodes))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
$fTotalLat = 0;
|
||||
$fTotalLon = 0;
|
||||
$fTotalFac = 0;
|
||||
foreach($aNearPostcodes as $aPostcode)
|
||||
{
|
||||
$iDiff = gbPostcodeAlphaDifference($sPostcodeEnd, $aPostcode['substring'])*2 + 1;
|
||||
if ($iDiff == 0)
|
||||
$fFac = 1;
|
||||
else
|
||||
$fFac = 1/($iDiff*$iDiff);
|
||||
|
||||
$fTotalFac += $fFac;
|
||||
$fTotalLat += $aPostcode['lat'] * $fFac;
|
||||
$fTotalLon += $aPostcode['lon'] * $fFac;
|
||||
}
|
||||
if ($fTotalFac)
|
||||
{
|
||||
$fLat = $fTotalLat / $fTotalFac;
|
||||
$fLon = $fTotalLon / $fTotalFac;
|
||||
$fRadius = min(0.1 / $fTotalFac, 0.02);
|
||||
return array(array('lat' => $fLat, 'lon' => $fLon, 'radius' => $fRadius));
|
||||
}
|
||||
return false;
|
||||
*/
|
||||
/*
|
||||
$fTotalFac is a suprisingly good indicator of accuracy
|
||||
$iZoom = 18 + round(log($fTotalFac,32));
|
||||
$iZoom = max(13,min(18,$iZoom));
|
||||
*/
|
||||
}
|
||||
|
||||
function usPostcodeCalculate($sPostcode, &$oDB)
|
||||
|
||||
@@ -26,7 +26,8 @@
|
||||
$oDB->query($sSQL);
|
||||
}
|
||||
|
||||
if (CONST_Log_File && CONST_Log_File_ReverseLog != '') {
|
||||
if (CONST_Log_File && CONST_Log_File_ReverseLog != '')
|
||||
{
|
||||
if ($sType == 'reverse')
|
||||
{
|
||||
$aStartTime = explode('.',$hLog[0]);
|
||||
@@ -68,7 +69,8 @@
|
||||
$oDB->query($sSQL);
|
||||
}
|
||||
|
||||
if (CONST_Log_File && CONST_Log_File_SearchLog != '') {
|
||||
if (CONST_Log_File && CONST_Log_File_SearchLog != '')
|
||||
{
|
||||
$aStartTime = explode('.',$hLog[0]);
|
||||
file_put_contents(CONST_Log_File_SearchLog,
|
||||
$aStartTime[0].','.$aStartTime[1].','.
|
||||
|
||||
@@ -74,11 +74,12 @@ body {
|
||||
var proj_map = map.getProjectionObject();
|
||||
var latlon;
|
||||
<?php
|
||||
if (isset($aPolyPoints)) {
|
||||
foreach($aPolyPoints as $aPolyPoint)
|
||||
if (isset($aPolyPoints))
|
||||
{
|
||||
echo " pointList.push(new OpenLayers.Geometry.Point(".$aPolyPoint[1].",".$aPolyPoint[2]."));\n";
|
||||
}
|
||||
foreach($aPolyPoints as $aPolyPoint)
|
||||
{
|
||||
echo " pointList.push(new OpenLayers.Geometry.Point(".$aPolyPoint[1].",".$aPolyPoint[2]."));\n";
|
||||
}
|
||||
}
|
||||
?>
|
||||
var linearRing = new OpenLayers.Geometry.LinearRing(pointList).transform(proj_EPSG4326, proj_map);;
|
||||
|
||||
@@ -515,15 +515,18 @@ void EndElement(xmlTextReaderPtr reader, const xmlChar *name)
|
||||
// insert into place_address
|
||||
paramValues[0] = (const char *)place_id;
|
||||
paramValues[1] = (const char *)featureAddress[i].distance;
|
||||
if (paramValues[1] == NULL || strlen(paramValues[1]) == 0) paramValues[1] = "0";
|
||||
paramValues[2] = (const char *)featureAddress[i].type;
|
||||
paramValues[3] = (const char *)featureAddress[i].id;
|
||||
paramValues[4] = (const char *)featureAddress[i].key;
|
||||
paramValues[5] = (const char *)featureAddress[i].value;
|
||||
paramValues[6] = (const char *)featureAddress[i].isAddress;
|
||||
if (verbose) fprintf(stderr, "placex_insert: %s %s\n", paramValues[2], paramValues[3]);
|
||||
res = PQexecPrepared(conn, "place_addressline_insert", 7, paramValues, NULL, NULL, 0);
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
fprintf(stderr, "place_addressline_insert: INSERT failed: %s", PQerrorMessage(conn));
|
||||
fprintf(stderr, "(%s,%s,%s,%s,%s,%s,%s)",paramValues[0],paramValues[1],paramValues[2],paramValues[3],paramValues[4],paramValues[5],paramValues[6]);
|
||||
PQclear(res);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
@@ -270,7 +270,7 @@ void nominatim_index(int rank_min, int rank_max, int num_threads, const char *co
|
||||
usleep(1000);
|
||||
|
||||
// Aim for one update per second
|
||||
if (sleepcount++ > 2000)
|
||||
if (sleepcount++ > 500)
|
||||
{
|
||||
rankPerSecond = ((float)rankCountTuples + (float)count) / MAX(difftime(time(0), rankStartTime),1);
|
||||
fprintf(stderr, " Done %i in %i @ %f per second - Rank %i ETA (seconds): %f\n", (rankCountTuples + count), (int)(difftime(time(0), rankStartTime)), rankPerSecond, rank, ((float)(rankTotalTuples - (rankCountTuples + count)))/rankPerSecond);
|
||||
|
||||
@@ -10,8 +10,8 @@
|
||||
@define('CONST_Postgresql_Version', '9.1');
|
||||
@define('CONST_Path_Postgresql_Contrib', '/usr/share/postgresql/'.CONST_Postgresql_Version.'/contrib');
|
||||
@define('CONST_Path_Postgresql_Postgis', CONST_Path_Postgresql_Contrib.'/postgis-1.5');
|
||||
@define('CONST_Osm2pgsql_Binary', CONST_BasePath.'/osm2pgsql/osm2pgsql');
|
||||
@define('CONST_Osmosis_Binary', CONST_BasePath.'/osmosis-0.38/bin/osmosis');
|
||||
@define('CONST_Osm2pgsql_Binary', CONST_BasePath.'/osm2pgsql/osm2pgsql');
|
||||
@define('CONST_Osmosis_Binary', CONST_BasePath.'/osmosis-0.38/bin/osmosis');
|
||||
|
||||
// Website settings
|
||||
@define('CONST_ClosedForIndexing', false);
|
||||
|
||||
@@ -940,7 +940,50 @@ BEGIN
|
||||
NEW.rank_address := NEW.rank_search;
|
||||
|
||||
-- By doing in postgres we have the country available to us - currently only used for postcode
|
||||
IF NEW.class = 'place' THEN
|
||||
IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN
|
||||
|
||||
NEW.name := 'ref'=>NEW.postcode;
|
||||
|
||||
IF NEW.country_code = 'gb' THEN
|
||||
|
||||
IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
|
||||
NEW.rank_search := 25;
|
||||
NEW.rank_address := 5;
|
||||
ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
|
||||
NEW.rank_search := 23;
|
||||
NEW.rank_address := 5;
|
||||
ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
|
||||
NEW.rank_search := 21;
|
||||
NEW.rank_address := 5;
|
||||
END IF;
|
||||
|
||||
ELSEIF NEW.country_code = 'de' THEN
|
||||
|
||||
IF NEW.postcode ~ '^([0-9]{5})$' THEN
|
||||
NEW.rank_search := 21;
|
||||
NEW.rank_address := 11;
|
||||
END IF;
|
||||
|
||||
ELSE
|
||||
-- Guess at the postcode format and coverage (!)
|
||||
IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
|
||||
NEW.rank_search := 21;
|
||||
NEW.rank_address := 11;
|
||||
ELSE
|
||||
-- Does it look splitable into and area and local code?
|
||||
postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
|
||||
|
||||
IF postcode IS NOT NULL THEN
|
||||
NEW.rank_search := 25;
|
||||
NEW.rank_address := 11;
|
||||
ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN
|
||||
NEW.rank_search := 21;
|
||||
NEW.rank_address := 11;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
ELSEIF NEW.class = 'place' THEN
|
||||
IF NEW.type in ('continent') THEN
|
||||
NEW.rank_search := 2;
|
||||
NEW.rank_address := NEW.rank_search;
|
||||
@@ -992,49 +1035,6 @@ BEGIN
|
||||
ELSEIF NEW.type in ('hall_of_residence','neighbourhood','housing_estate','nature_reserve') THEN
|
||||
NEW.rank_search := 22;
|
||||
NEW.rank_address := 22;
|
||||
ELSEIF NEW.type in ('postcode') THEN
|
||||
|
||||
NEW.name := 'ref'=>NEW.postcode;
|
||||
|
||||
IF NEW.country_code = 'gb' THEN
|
||||
|
||||
IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
|
||||
NEW.rank_search := 25;
|
||||
NEW.rank_address := 5;
|
||||
ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
|
||||
NEW.rank_search := 23;
|
||||
NEW.rank_address := 5;
|
||||
ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
|
||||
NEW.rank_search := 21;
|
||||
NEW.rank_address := 5;
|
||||
END IF;
|
||||
|
||||
ELSEIF NEW.country_code = 'de' THEN
|
||||
|
||||
IF NEW.postcode ~ '^([0-9]{5})$' THEN
|
||||
NEW.rank_search := 21;
|
||||
NEW.rank_address := 11;
|
||||
END IF;
|
||||
|
||||
ELSE
|
||||
-- Guess at the postcode format and coverage (!)
|
||||
IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
|
||||
NEW.rank_search := 21;
|
||||
NEW.rank_address := 11;
|
||||
ELSE
|
||||
-- Does it look splitable into and area and local code?
|
||||
postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
|
||||
|
||||
IF postcode IS NOT NULL THEN
|
||||
NEW.rank_search := 25;
|
||||
NEW.rank_address := 11;
|
||||
ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN
|
||||
NEW.rank_search := 21;
|
||||
NEW.rank_address := 11;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
ELSEIF NEW.type in ('airport','street') THEN
|
||||
NEW.rank_search := 26;
|
||||
NEW.rank_address := NEW.rank_search;
|
||||
@@ -1115,7 +1115,8 @@ BEGIN
|
||||
IF st_area(NEW.geometry) < 1 THEN
|
||||
-- mark items within the geometry for re-indexing
|
||||
-- RAISE WARNING 'placex poly insert: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
|
||||
-- work around bug in postgis
|
||||
|
||||
-- work around bug in postgis, this may have been fixed in 2.0.0 (see http://trac.osgeo.org/postgis/ticket/547)
|
||||
update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
|
||||
AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) = 'ST_Point';
|
||||
update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
|
||||
@@ -1203,6 +1204,7 @@ DECLARE
|
||||
|
||||
tagpairid INTEGER;
|
||||
|
||||
default_language TEXT;
|
||||
name_vector INTEGER[];
|
||||
nameaddress_vector INTEGER[];
|
||||
|
||||
@@ -1256,6 +1258,19 @@ BEGIN
|
||||
-- cheaper but less acurate
|
||||
place_centroid := ST_Centroid(NEW.geometry);
|
||||
|
||||
-- Thought this wasn't needed but when we add new languages to the country_name table
|
||||
-- we need to update the existing names
|
||||
IF NEW.name is not null AND array_upper(%#NEW.name,1) > 1 THEN
|
||||
default_language := get_country_language_code(NEW.country_code);
|
||||
IF default_language IS NOT NULL THEN
|
||||
IF NEW.name ? 'name' AND NOT NEW.name ? ('name:'||default_language) THEN
|
||||
NEW.name := NEW.name || (('name:'||default_language) => (NEW.name -> 'name'));
|
||||
ELSEIF NEW.name ? ('name:'||default_language) AND NOT NEW.name ? 'name' THEN
|
||||
NEW.name := NEW.name || ('name' => (NEW.name -> 'name:'||default_language));
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- Initialise the name vector using our name
|
||||
name_vector := make_keywords(NEW.name);
|
||||
nameaddress_vector := '{}'::int[];
|
||||
|
||||
@@ -212,6 +212,7 @@ CREATE TABLE placex (
|
||||
geometry_sector INTEGER
|
||||
);
|
||||
SELECT AddGeometryColumn('placex', 'geometry', 4326, 'GEOMETRY', 2);
|
||||
SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2);
|
||||
CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id);
|
||||
CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id);
|
||||
CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector);
|
||||
|
||||
@@ -346,7 +346,7 @@
|
||||
if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
|
||||
}
|
||||
|
||||
if ($aCMDResult['osmosis-init'] && isset($aCMDResult['osmosis-init-date']))
|
||||
if (($aCMDResult['osmosis-init'] || $aCMDResult['all']) && isset($aCMDResult['osmosis-init-date']))
|
||||
{
|
||||
$bDidSomething = true;
|
||||
|
||||
|
||||
@@ -107,6 +107,9 @@
|
||||
|
||||
foreach($aPairs as $aPair)
|
||||
{
|
||||
if ($aPair[0] == 'yes') continue;
|
||||
if ($aPair[1] == 'yes') continue;
|
||||
if ($aPair[0] == 'highway') continue;
|
||||
if ($aPair[1] == 'highway') continue;
|
||||
|
||||
echo "create table place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." as ";
|
||||
|
||||
@@ -247,6 +247,7 @@
|
||||
$sToken = $oDB->getOne("select make_standard_name('".$aSpecialTerm[1]."') as string");
|
||||
$sSQL = 'select * from (select word_id,word_token, word, class, type, location, country_code, operator';
|
||||
$sSQL .= ' from word where word_token in (\' '.$sToken.'\')) as x where (class is not null and class not in (\'place\',\'highway\')) or country_code is not null';
|
||||
if (CONST_Debug) var_Dump($sSQL);
|
||||
$aSearchWords = $oDB->getAll($sSQL);
|
||||
$aNewSearches = array();
|
||||
foreach($aSearches as $aSearch)
|
||||
@@ -374,7 +375,8 @@
|
||||
// Try and calculate GB postcodes we might be missing
|
||||
foreach($aTokens as $sToken)
|
||||
{
|
||||
if (!isset($aValidTokens[$sToken]) && !isset($aValidTokens[' '.$sToken]) && preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData))
|
||||
// Source of gb postcodes is now definitive - always use
|
||||
if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData))
|
||||
{
|
||||
if (substr($aData[1],-2,1) != ' ')
|
||||
{
|
||||
@@ -416,7 +418,6 @@
|
||||
|
||||
Score how good the search is so they can be ordered
|
||||
*/
|
||||
|
||||
foreach($aPhrases as $iPhrase => $sPhrase)
|
||||
{
|
||||
$aNewPhraseSearches = array();
|
||||
@@ -503,7 +504,7 @@
|
||||
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||
}
|
||||
}
|
||||
else
|
||||
elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
|
||||
{
|
||||
if (sizeof($aSearch['aName']))
|
||||
{
|
||||
@@ -530,6 +531,8 @@
|
||||
// Allow searching for a word - but at extra cost
|
||||
foreach($aValidTokens[$sToken] as $aSearchTerm)
|
||||
{
|
||||
if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
|
||||
{
|
||||
//var_Dump('<hr>',$aSearch['aName']);
|
||||
|
||||
if (sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4)
|
||||
@@ -549,6 +552,7 @@
|
||||
$aSearch['iNamePhrase'] = $iPhrase;
|
||||
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -745,6 +749,9 @@
|
||||
// First we need a position, either aName or fLat or both
|
||||
$aTerms = array();
|
||||
$aOrder = array();
|
||||
|
||||
// TODO: filter out the pointless search terms (2 letter name tokens and less)
|
||||
// they might be right - but they are just too darned expensive to run
|
||||
if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
|
||||
if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
|
||||
if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
|
||||
@@ -1132,6 +1139,11 @@
|
||||
//var_Dump($aSearchResults);
|
||||
//exit;
|
||||
$aClassType = getClassTypesWithImportance();
|
||||
$aRecheckWords = preg_split('/\b/',$sQuery);
|
||||
foreach($aRecheckWords as $i => $sWord)
|
||||
{
|
||||
if (!$sWord) unset($aRecheckWords[$i]);
|
||||
}
|
||||
foreach($aSearchResults as $iResNum => $aResult)
|
||||
{
|
||||
if (CONST_Search_AreaPolygons || true)
|
||||
@@ -1249,6 +1261,16 @@
|
||||
//exit;
|
||||
}
|
||||
|
||||
// Adjust importance for the number of exact string matches in the result
|
||||
$aResult['importance'] = max(0.001,$aResult['importance']);
|
||||
$iCountWords = 0;
|
||||
$sAddress = $aResult['langaddress'];
|
||||
foreach($aRecheckWords as $i => $sWord)
|
||||
{
|
||||
if (stripos($sAddress, $sWord)!==false) $iCountWords++;
|
||||
}
|
||||
$aResult['importance'] = $aResult['importance'] + $iCountWords;
|
||||
|
||||
//if (CONST_Debug) var_dump($aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']);
|
||||
/*
|
||||
if (isset($aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['importance'])
|
||||
@@ -1270,7 +1292,6 @@
|
||||
$aResult['foundorder'] = $iResNum;
|
||||
$aSearchResults[$iResNum] = $aResult;
|
||||
}
|
||||
|
||||
uasort($aSearchResults, 'byImportance');
|
||||
|
||||
//var_dump($aSearchResults);exit;
|
||||
|
||||
Reference in New Issue
Block a user