Use GB postcode table as definitive source. resort by pressence of search word in output address and other misc very minor changes not commited from MQ servers

This commit is contained in:
Brian Quinion
2012-03-22 00:33:28 +00:00
committed by Brian Quinion
parent ad125a9913
commit b99a043c78
11 changed files with 106 additions and 108 deletions

View File

@@ -40,9 +40,9 @@
function byImportance($a, $b)
{
/*
if ($a['importance'] != $b['importance'])
return ($a['importance'] > $b['importance']?-1:1);
/*
if ($a['aPointPolygon']['numfeatures'] != $b['aPointPolygon']['numfeatures'])
return ($a['aPointPolygon']['numfeatures'] > $b['aPointPolygon']['numfeatures']?-1:1);
if ($a['aPointPolygon']['area'] != $b['aPointPolygon']['area'])
@@ -173,60 +173,12 @@
exit;
}
if (sizeof($aNearPostcodes))
{
return array(array('lat' => $aNearPostcodes[0]['lat'], 'lon' => $aNearPostcodes[0]['lon'], 'radius' => 0.005));
}
return false;
/* partial search disabled because it sequentially scans placex
$sSQL = 'select substring(upper(postcode) from \'^[A-Z][A-Z]?[0-9][0-9A-Z]? [0-9]([A-Z][A-Z])$\'),ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from placex where country_code::text = \'gb\'::text AND substring(postcode from \'^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$\') = \''.$sPostcodeSector.'\' and class=\'place\' and type=\'postcode\' ';
$sSQL .= ' union ';
$sSQL .= 'select substring(upper(postcode) from \'^[A-Z][A-Z]?[0-9][0-9A-Z]? [0-9]([A-Z][A-Z])$\'),ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from gb_postcode where substring(postcode from \'^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$\') = \''.$sPostcodeSector.'\'';
$aNearPostcodes = $oDB->getAll($sSQL);
if (PEAR::IsError($aNearPostcodes))
{
var_dump($sSQL, $aNearPostcodes);
exit;
}
if (!sizeof($aNearPostcodes))
{
return false;
}
$fTotalLat = 0;
$fTotalLon = 0;
$fTotalFac = 0;
foreach($aNearPostcodes as $aPostcode)
{
$iDiff = gbPostcodeAlphaDifference($sPostcodeEnd, $aPostcode['substring'])*2 + 1;
if ($iDiff == 0)
$fFac = 1;
else
$fFac = 1/($iDiff*$iDiff);
$fTotalFac += $fFac;
$fTotalLat += $aPostcode['lat'] * $fFac;
$fTotalLon += $aPostcode['lon'] * $fFac;
}
if ($fTotalFac)
{
$fLat = $fTotalLat / $fTotalFac;
$fLon = $fTotalLon / $fTotalFac;
$fRadius = min(0.1 / $fTotalFac, 0.02);
return array(array('lat' => $fLat, 'lon' => $fLon, 'radius' => $fRadius));
}
return false;
*/
/*
$fTotalFac is a suprisingly good indicator of accuracy
$iZoom = 18 + round(log($fTotalFac,32));
$iZoom = max(13,min(18,$iZoom));
*/
}
function usPostcodeCalculate($sPostcode, &$oDB)

View File

@@ -26,7 +26,8 @@
$oDB->query($sSQL);
}
if (CONST_Log_File && CONST_Log_File_ReverseLog != '') {
if (CONST_Log_File && CONST_Log_File_ReverseLog != '')
{
if ($sType == 'reverse')
{
$aStartTime = explode('.',$hLog[0]);
@@ -68,7 +69,8 @@
$oDB->query($sSQL);
}
if (CONST_Log_File && CONST_Log_File_SearchLog != '') {
if (CONST_Log_File && CONST_Log_File_SearchLog != '')
{
$aStartTime = explode('.',$hLog[0]);
file_put_contents(CONST_Log_File_SearchLog,
$aStartTime[0].','.$aStartTime[1].','.

View File

@@ -74,11 +74,12 @@ body {
var proj_map = map.getProjectionObject();
var latlon;
<?php
if (isset($aPolyPoints)) {
foreach($aPolyPoints as $aPolyPoint)
if (isset($aPolyPoints))
{
echo " pointList.push(new OpenLayers.Geometry.Point(".$aPolyPoint[1].",".$aPolyPoint[2]."));\n";
}
foreach($aPolyPoints as $aPolyPoint)
{
echo " pointList.push(new OpenLayers.Geometry.Point(".$aPolyPoint[1].",".$aPolyPoint[2]."));\n";
}
}
?>
var linearRing = new OpenLayers.Geometry.LinearRing(pointList).transform(proj_EPSG4326, proj_map);;

View File

@@ -515,15 +515,18 @@ void EndElement(xmlTextReaderPtr reader, const xmlChar *name)
// insert into place_address
paramValues[0] = (const char *)place_id;
paramValues[1] = (const char *)featureAddress[i].distance;
if (paramValues[1] == NULL || strlen(paramValues[1]) == 0) paramValues[1] = "0";
paramValues[2] = (const char *)featureAddress[i].type;
paramValues[3] = (const char *)featureAddress[i].id;
paramValues[4] = (const char *)featureAddress[i].key;
paramValues[5] = (const char *)featureAddress[i].value;
paramValues[6] = (const char *)featureAddress[i].isAddress;
if (verbose) fprintf(stderr, "placex_insert: %s %s\n", paramValues[2], paramValues[3]);
res = PQexecPrepared(conn, "place_addressline_insert", 7, paramValues, NULL, NULL, 0);
if (PQresultStatus(res) != PGRES_COMMAND_OK)
{
fprintf(stderr, "place_addressline_insert: INSERT failed: %s", PQerrorMessage(conn));
fprintf(stderr, "(%s,%s,%s,%s,%s,%s,%s)",paramValues[0],paramValues[1],paramValues[2],paramValues[3],paramValues[4],paramValues[5],paramValues[6]);
PQclear(res);
exit(EXIT_FAILURE);
}

View File

@@ -270,7 +270,7 @@ void nominatim_index(int rank_min, int rank_max, int num_threads, const char *co
usleep(1000);
// Aim for one update per second
if (sleepcount++ > 2000)
if (sleepcount++ > 500)
{
rankPerSecond = ((float)rankCountTuples + (float)count) / MAX(difftime(time(0), rankStartTime),1);
fprintf(stderr, " Done %i in %i @ %f per second - Rank %i ETA (seconds): %f\n", (rankCountTuples + count), (int)(difftime(time(0), rankStartTime)), rankPerSecond, rank, ((float)(rankTotalTuples - (rankCountTuples + count)))/rankPerSecond);

View File

@@ -10,8 +10,8 @@
@define('CONST_Postgresql_Version', '9.1');
@define('CONST_Path_Postgresql_Contrib', '/usr/share/postgresql/'.CONST_Postgresql_Version.'/contrib');
@define('CONST_Path_Postgresql_Postgis', CONST_Path_Postgresql_Contrib.'/postgis-1.5');
@define('CONST_Osm2pgsql_Binary', CONST_BasePath.'/osm2pgsql/osm2pgsql');
@define('CONST_Osmosis_Binary', CONST_BasePath.'/osmosis-0.38/bin/osmosis');
@define('CONST_Osm2pgsql_Binary', CONST_BasePath.'/osm2pgsql/osm2pgsql');
@define('CONST_Osmosis_Binary', CONST_BasePath.'/osmosis-0.38/bin/osmosis');
// Website settings
@define('CONST_ClosedForIndexing', false);

View File

@@ -940,7 +940,50 @@ BEGIN
NEW.rank_address := NEW.rank_search;
-- By doing in postgres we have the country available to us - currently only used for postcode
IF NEW.class = 'place' THEN
IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN
NEW.name := 'ref'=>NEW.postcode;
IF NEW.country_code = 'gb' THEN
IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
NEW.rank_search := 25;
NEW.rank_address := 5;
ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
NEW.rank_search := 23;
NEW.rank_address := 5;
ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
NEW.rank_search := 21;
NEW.rank_address := 5;
END IF;
ELSEIF NEW.country_code = 'de' THEN
IF NEW.postcode ~ '^([0-9]{5})$' THEN
NEW.rank_search := 21;
NEW.rank_address := 11;
END IF;
ELSE
-- Guess at the postcode format and coverage (!)
IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
NEW.rank_search := 21;
NEW.rank_address := 11;
ELSE
-- Does it look splitable into and area and local code?
postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
IF postcode IS NOT NULL THEN
NEW.rank_search := 25;
NEW.rank_address := 11;
ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN
NEW.rank_search := 21;
NEW.rank_address := 11;
END IF;
END IF;
END IF;
ELSEIF NEW.class = 'place' THEN
IF NEW.type in ('continent') THEN
NEW.rank_search := 2;
NEW.rank_address := NEW.rank_search;
@@ -992,49 +1035,6 @@ BEGIN
ELSEIF NEW.type in ('hall_of_residence','neighbourhood','housing_estate','nature_reserve') THEN
NEW.rank_search := 22;
NEW.rank_address := 22;
ELSEIF NEW.type in ('postcode') THEN
NEW.name := 'ref'=>NEW.postcode;
IF NEW.country_code = 'gb' THEN
IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
NEW.rank_search := 25;
NEW.rank_address := 5;
ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
NEW.rank_search := 23;
NEW.rank_address := 5;
ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
NEW.rank_search := 21;
NEW.rank_address := 5;
END IF;
ELSEIF NEW.country_code = 'de' THEN
IF NEW.postcode ~ '^([0-9]{5})$' THEN
NEW.rank_search := 21;
NEW.rank_address := 11;
END IF;
ELSE
-- Guess at the postcode format and coverage (!)
IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
NEW.rank_search := 21;
NEW.rank_address := 11;
ELSE
-- Does it look splitable into and area and local code?
postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
IF postcode IS NOT NULL THEN
NEW.rank_search := 25;
NEW.rank_address := 11;
ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN
NEW.rank_search := 21;
NEW.rank_address := 11;
END IF;
END IF;
END IF;
ELSEIF NEW.type in ('airport','street') THEN
NEW.rank_search := 26;
NEW.rank_address := NEW.rank_search;
@@ -1115,7 +1115,8 @@ BEGIN
IF st_area(NEW.geometry) < 1 THEN
-- mark items within the geometry for re-indexing
-- RAISE WARNING 'placex poly insert: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
-- work around bug in postgis
-- work around bug in postgis, this may have been fixed in 2.0.0 (see http://trac.osgeo.org/postgis/ticket/547)
update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) = 'ST_Point';
update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
@@ -1203,6 +1204,7 @@ DECLARE
tagpairid INTEGER;
default_language TEXT;
name_vector INTEGER[];
nameaddress_vector INTEGER[];
@@ -1256,6 +1258,19 @@ BEGIN
-- cheaper but less acurate
place_centroid := ST_Centroid(NEW.geometry);
-- Thought this wasn't needed but when we add new languages to the country_name table
-- we need to update the existing names
IF NEW.name is not null AND array_upper(%#NEW.name,1) > 1 THEN
default_language := get_country_language_code(NEW.country_code);
IF default_language IS NOT NULL THEN
IF NEW.name ? 'name' AND NOT NEW.name ? ('name:'||default_language) THEN
NEW.name := NEW.name || (('name:'||default_language) => (NEW.name -> 'name'));
ELSEIF NEW.name ? ('name:'||default_language) AND NOT NEW.name ? 'name' THEN
NEW.name := NEW.name || ('name' => (NEW.name -> 'name:'||default_language));
END IF;
END IF;
END IF;
-- Initialise the name vector using our name
name_vector := make_keywords(NEW.name);
nameaddress_vector := '{}'::int[];

View File

@@ -212,6 +212,7 @@ CREATE TABLE placex (
geometry_sector INTEGER
);
SELECT AddGeometryColumn('placex', 'geometry', 4326, 'GEOMETRY', 2);
SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2);
CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id);
CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id);
CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector);

View File

@@ -346,7 +346,7 @@
if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
}
if ($aCMDResult['osmosis-init'] && isset($aCMDResult['osmosis-init-date']))
if (($aCMDResult['osmosis-init'] || $aCMDResult['all']) && isset($aCMDResult['osmosis-init-date']))
{
$bDidSomething = true;

View File

@@ -107,6 +107,9 @@
foreach($aPairs as $aPair)
{
if ($aPair[0] == 'yes') continue;
if ($aPair[1] == 'yes') continue;
if ($aPair[0] == 'highway') continue;
if ($aPair[1] == 'highway') continue;
echo "create table place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." as ";

View File

@@ -247,6 +247,7 @@
$sToken = $oDB->getOne("select make_standard_name('".$aSpecialTerm[1]."') as string");
$sSQL = 'select * from (select word_id,word_token, word, class, type, location, country_code, operator';
$sSQL .= ' from word where word_token in (\' '.$sToken.'\')) as x where (class is not null and class not in (\'place\',\'highway\')) or country_code is not null';
if (CONST_Debug) var_Dump($sSQL);
$aSearchWords = $oDB->getAll($sSQL);
$aNewSearches = array();
foreach($aSearches as $aSearch)
@@ -374,7 +375,8 @@
// Try and calculate GB postcodes we might be missing
foreach($aTokens as $sToken)
{
if (!isset($aValidTokens[$sToken]) && !isset($aValidTokens[' '.$sToken]) && preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData))
// Source of gb postcodes is now definitive - always use
if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData))
{
if (substr($aData[1],-2,1) != ' ')
{
@@ -416,7 +418,6 @@
Score how good the search is so they can be ordered
*/
foreach($aPhrases as $iPhrase => $sPhrase)
{
$aNewPhraseSearches = array();
@@ -503,7 +504,7 @@
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
}
}
else
elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
{
if (sizeof($aSearch['aName']))
{
@@ -530,6 +531,8 @@
// Allow searching for a word - but at extra cost
foreach($aValidTokens[$sToken] as $aSearchTerm)
{
if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
{
//var_Dump('<hr>',$aSearch['aName']);
if (sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4)
@@ -549,6 +552,7 @@
$aSearch['iNamePhrase'] = $iPhrase;
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
}
}
}
}
else
@@ -745,6 +749,9 @@
// First we need a position, either aName or fLat or both
$aTerms = array();
$aOrder = array();
// TODO: filter out the pointless search terms (2 letter name tokens and less)
// they might be right - but they are just too darned expensive to run
if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
@@ -1132,6 +1139,11 @@
//var_Dump($aSearchResults);
//exit;
$aClassType = getClassTypesWithImportance();
$aRecheckWords = preg_split('/\b/',$sQuery);
foreach($aRecheckWords as $i => $sWord)
{
if (!$sWord) unset($aRecheckWords[$i]);
}
foreach($aSearchResults as $iResNum => $aResult)
{
if (CONST_Search_AreaPolygons || true)
@@ -1249,6 +1261,16 @@
//exit;
}
// Adjust importance for the number of exact string matches in the result
$aResult['importance'] = max(0.001,$aResult['importance']);
$iCountWords = 0;
$sAddress = $aResult['langaddress'];
foreach($aRecheckWords as $i => $sWord)
{
if (stripos($sAddress, $sWord)!==false) $iCountWords++;
}
$aResult['importance'] = $aResult['importance'] + $iCountWords;
//if (CONST_Debug) var_dump($aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']);
/*
if (isset($aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['importance'])
@@ -1270,7 +1292,6 @@
$aResult['foundorder'] = $iResNum;
$aSearchResults[$iResNum] = $aResult;
}
uasort($aSearchResults, 'byImportance');
//var_dump($aSearchResults);exit;